changeset 47126:188ef162f019

8180055: Upgrade the Marlin renderer in Java2D Summary: added the double-precision variant + MarlinFX backports + Improved MarlinTileGenerator + higher precision of the cubic / quadratic curve Reviewed-by: flar, pnarayanan
author lbourges
date Wed, 17 May 2017 22:05:11 +0200
parents c42dc7b58b4d
children 56441eb0a8ec
files jdk/src/java.desktop/share/classes/sun/java2d/marlin/ArrayCacheConst.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/ByteArrayCache.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/CollinearSimplifier.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/Curve.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/DCollinearSimplifier.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/DCurve.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/DDasher.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/DHelpers.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/DMarlinRenderingEngine.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/DPathConsumer2D.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/DRenderer.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/DRendererContext.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/DStroker.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/DTransformingPathConsumer2D.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/Dasher.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/DoubleArrayCache.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/FloatArrayCache.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/FloatMath.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/Helpers.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/IRendererContext.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/IntArrayCache.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinCache.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinConst.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinProperties.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinRenderer.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinRenderingEngine.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinTileGenerator.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/OffHeapArray.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/Renderer.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/RendererContext.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/Stroker.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/TransformingPathConsumer2D.java jdk/src/java.desktop/share/classes/sun/java2d/marlin/Version.java jdk/src/java.desktop/share/classes/sun/java2d/pipe/RenderingEngine.java
diffstat 34 files changed, 7695 insertions(+), 894 deletions(-) [+]
line wrap: on
line diff
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/ArrayCacheConst.java	Wed Jul 05 23:27:00 2017 +0200
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/ArrayCacheConst.java	Wed May 17 22:05:11 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -242,6 +242,8 @@
             int factor = 1;
             if (name.contains("Int") || name.contains("Float")) {
                 factor = 4;
+            } else if (name.contains("Double")) {
+                factor = 8;
             }
             return factor;
         }
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/ByteArrayCache.java	Wed Jul 05 23:27:00 2017 +0200
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/ByteArrayCache.java	Wed May 17 22:05:11 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,6 +22,7 @@
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */
+
 package sun.java2d.marlin;
 
 import static sun.java2d.marlin.ArrayCacheConst.ARRAY_SIZES;
@@ -37,13 +38,14 @@
 import sun.java2d.marlin.ArrayCacheConst.CacheStats;
 
 /*
- * Note that the [BYTE/INT/FLOAT]ArrayCache files are nearly identical except
+ * Note that the [BYTE/INT/FLOAT/DOUBLE]ArrayCache files are nearly identical except
  * for a few type and name differences. Typically, the [BYTE]ArrayCache.java file
- * is edited manually and then [INT]ArrayCache.java and [FLOAT]ArrayCache.java
+ * is edited manually and then [INT/FLOAT/DOUBLE]ArrayCache.java
  * files are generated with the following command lines:
  */
 // % sed -e 's/(b\yte)[ ]*//g' -e 's/b\yte/int/g' -e 's/B\yte/Int/g' < B\yteArrayCache.java > IntArrayCache.java
-// % sed -e 's/(b\yte)[ ]*/(float) /g' -e 's/b\yte/float/g' -e 's/B\yte/Float/g' < B\yteArrayCache.java > FloatArrayCache.java
+// % sed -e 's/(b\yte)[ ]*0/0.0f/g' -e 's/(b\yte)[ ]*/(float) /g' -e 's/b\yte/float/g' -e 's/B\yte/Float/g' < B\yteArrayCache.java > FloatArrayCache.java
+// % sed -e 's/(b\yte)[ ]*0/0.0d/g' -e 's/(b\yte)[ ]*/(double) /g' -e 's/b\yte/double/g' -e 's/B\yte/Double/g' < B\yteArrayCache.java > DoubleArrayCache.java
 
 final class ByteArrayCache implements MarlinConst {
 
@@ -231,8 +233,8 @@
         if (clean) {
             return new byte[length];
         }
-       // use JDK9 Unsafe.allocateUninitializedArray(class, length):
-       return (byte[]) OffHeapArray.UNSAFE.allocateUninitializedArray(byte.class, length);
+        // use JDK9 Unsafe.allocateUninitializedArray(class, length):
+        return (byte[]) OffHeapArray.UNSAFE.allocateUninitializedArray(byte.class, length);
     }
 
     static void fill(final byte[] array, final int fromIndex,
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/CollinearSimplifier.java	Wed Jul 05 23:27:00 2017 +0200
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/CollinearSimplifier.java	Wed May 17 22:05:11 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -146,7 +146,7 @@
 
     private static float getSlope(float x1, float y1, float x2, float y2) {
         float dy = y2 - y1;
-        if (dy == 0f) {
+        if (dy == 0.0f) {
             return (x2 > x1) ? Float.POSITIVE_INFINITY
                    : Float.NEGATIVE_INFINITY;
         }
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/Curve.java	Wed Jul 05 23:27:00 2017 +0200
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/Curve.java	Wed May 17 22:05:11 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,8 +29,6 @@
 
     float ax, ay, bx, by, cx, cy, dx, dy;
     float dax, day, dbx, dby;
-    // shared iterator instance
-    private final BreakPtrIterator iterator = new BreakPtrIterator();
 
     Curve() {
     }
@@ -58,31 +56,31 @@
              float x3, float y3,
              float x4, float y4)
     {
-        ax = 3f * (x2 - x3) + x4 - x1;
-        ay = 3f * (y2 - y3) + y4 - y1;
-        bx = 3f * (x1 - 2f * x2 + x3);
-        by = 3f * (y1 - 2f * y2 + y3);
-        cx = 3f * (x2 - x1);
-        cy = 3f * (y2 - y1);
+        ax = 3.0f * (x2 - x3) + x4 - x1;
+        ay = 3.0f * (y2 - y3) + y4 - y1;
+        bx = 3.0f * (x1 - 2.0f * x2 + x3);
+        by = 3.0f * (y1 - 2.0f * y2 + y3);
+        cx = 3.0f * (x2 - x1);
+        cy = 3.0f * (y2 - y1);
         dx = x1;
         dy = y1;
-        dax = 3f * ax; day = 3f * ay;
-        dbx = 2f * bx; dby = 2f * by;
+        dax = 3.0f * ax; day = 3.0f * ay;
+        dbx = 2.0f * bx; dby = 2.0f * by;
     }
 
     void set(float x1, float y1,
              float x2, float y2,
              float x3, float y3)
     {
-        ax = 0f; ay = 0f;
-        bx = x1 - 2f * x2 + x3;
-        by = y1 - 2f * y2 + y3;
-        cx = 2f * (x2 - x1);
-        cy = 2f * (y2 - y1);
+        ax = 0.0f; ay = 0.0f;
+        bx = x1 - 2.0f * x2 + x3;
+        by = y1 - 2.0f * y2 + y3;
+        cx = 2.0f * (x2 - x1);
+        cy = 2.0f * (y2 - y1);
         dx = x1;
         dy = y1;
-        dax = 0f; day = 0f;
-        dbx = 2f * bx; dby = 2f * by;
+        dax = 0.0f; day = 0.0f;
+        dbx = 2.0f * bx; dby = 2.0f * by;
     }
 
     float xat(float t) {
@@ -113,7 +111,7 @@
         // Fortunately, this turns out to be quadratic, so there are at
         // most 2 inflection points.
         final float a = dax * dby - dbx * day;
-        final float b = 2f * (cy * dax - day * cx);
+        final float b = 2.0f * (cy * dax - day * cx);
         final float c = cy * dbx - cx * dby;
 
         return Helpers.quadraticRoots(a, b, c, pts, off);
@@ -128,11 +126,11 @@
         // these are the coefficients of some multiple of g(t) (not g(t),
         // because the roots of a polynomial are not changed after multiplication
         // by a constant, and this way we save a few multiplications).
-        final float a = 2f * (dax*dax + day*day);
-        final float b = 3f * (dax*dbx + day*dby);
-        final float c = 2f * (dax*cx + day*cy) + dbx*dbx + dby*dby;
+        final float a = 2.0f * (dax*dax + day*day);
+        final float b = 3.0f * (dax*dbx + day*dby);
+        final float c = 2.0f * (dax*cx + day*cy) + dbx*dbx + dby*dby;
         final float d = dbx*cx + dby*cy;
-        return Helpers.cubicRootsInAB(a, b, c, d, pts, off, 0f, 1f);
+        return Helpers.cubicRootsInAB(a, b, c, d, pts, off, 0.0f, 1.0f);
     }
 
     // Tries to find the roots of the function ROC(t)-w in [0, 1). It uses
@@ -153,14 +151,14 @@
         assert off <= 6 && roots.length >= 10;
         int ret = off;
         int numPerpdfddf = perpendiculardfddf(roots, off);
-        float t0 = 0, ft0 = ROCsq(t0) - w*w;
-        roots[off + numPerpdfddf] = 1f; // always check interval end points
+        float t0 = 0.0f, ft0 = ROCsq(t0) - w*w;
+        roots[off + numPerpdfddf] = 1.0f; // always check interval end points
         numPerpdfddf++;
         for (int i = off; i < off + numPerpdfddf; i++) {
             float t1 = roots[i], ft1 = ROCsq(t1) - w*w;
-            if (ft0 == 0f) {
+            if (ft0 == 0.0f) {
                 roots[ret++] = t0;
-            } else if (ft1 * ft0 < 0f) { // have opposite signs
+            } else if (ft1 * ft0 < 0.0f) { // have opposite signs
                 // (ROC(t)^2 == w^2) == (ROC(t) == w) is true because
                 // ROC(t) >= 0 for all t.
                 roots[ret++] = falsePositionROCsqMinusX(t0, t1, w*w, err);
@@ -220,7 +218,7 @@
 
     private static boolean sameSign(float x, float y) {
         // another way is to test if x*y > 0. This is bad for small x, y.
-        return (x < 0f && y < 0f) || (x > 0f && y > 0f);
+        return (x < 0.0f && y < 0.0f) || (x > 0.0f && y > 0.0f);
     }
 
     // returns the radius of curvature squared at t of this curve
@@ -229,76 +227,11 @@
         // dx=xat(t) and dy=yat(t). These calls have been inlined for efficiency
         final float dx = t * (t * dax + dbx) + cx;
         final float dy = t * (t * day + dby) + cy;
-        final float ddx = 2f * dax * t + dbx;
-        final float ddy = 2f * day * t + dby;
+        final float ddx = 2.0f * dax * t + dbx;
+        final float ddy = 2.0f * day * t + dby;
         final float dx2dy2 = dx*dx + dy*dy;
         final float ddx2ddy2 = ddx*ddx + ddy*ddy;
         final float ddxdxddydy = ddx*dx + ddy*dy;
         return dx2dy2*((dx2dy2*dx2dy2) / (dx2dy2 * ddx2ddy2 - ddxdxddydy*ddxdxddydy));
     }
-
-    // curve to be broken should be in pts
-    // this will change the contents of pts but not Ts
-    // TODO: There's no reason for Ts to be an array. All we need is a sequence
-    // of t values at which to subdivide. An array statisfies this condition,
-    // but is unnecessarily restrictive. Ts should be an Iterator<Float> instead.
-    // Doing this will also make dashing easier, since we could easily make
-    // LengthIterator an Iterator<Float> and feed it to this function to simplify
-    // the loop in Dasher.somethingTo.
-    BreakPtrIterator breakPtsAtTs(final float[] pts, final int type,
-                                  final float[] Ts, final int numTs)
-    {
-        assert pts.length >= 2*type && numTs <= Ts.length;
-
-        // initialize shared iterator:
-        iterator.init(pts, type, Ts, numTs);
-
-        return iterator;
-    }
-
-    static final class BreakPtrIterator {
-        private int nextCurveIdx;
-        private int curCurveOff;
-        private float prevT;
-        private float[] pts;
-        private int type;
-        private float[] ts;
-        private int numTs;
-
-        void init(final float[] pts, final int type,
-                  final float[] ts, final int numTs) {
-            this.pts = pts;
-            this.type = type;
-            this.ts = ts;
-            this.numTs = numTs;
-
-            nextCurveIdx = 0;
-            curCurveOff = 0;
-            prevT = 0f;
-        }
-
-        public boolean hasNext() {
-            return nextCurveIdx <= numTs;
-        }
-
-        public int next() {
-            int ret;
-            if (nextCurveIdx < numTs) {
-                float curT = ts[nextCurveIdx];
-                float splitT = (curT - prevT) / (1f - prevT);
-                Helpers.subdivideAt(splitT,
-                                    pts, curCurveOff,
-                                    pts, 0,
-                                    pts, type, type);
-                prevT = curT;
-                ret = 0;
-                curCurveOff = type;
-            } else {
-                ret = curCurveOff;
-            }
-            nextCurveIdx++;
-            return ret;
-        }
-    }
 }
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/DCollinearSimplifier.java	Wed May 17 22:05:11 2017 +0200
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+
+final class DCollinearSimplifier implements DPathConsumer2D {
+
+    enum SimplifierState {
+
+        Empty, PreviousPoint, PreviousLine
+    };
+    // slope precision threshold
+    static final double EPS = 1e-4d; // aaime proposed 1e-3d
+
+    DPathConsumer2D delegate;
+    SimplifierState state;
+    double px1, py1, px2, py2;
+    double pslope;
+
+    DCollinearSimplifier() {
+    }
+
+    public DCollinearSimplifier init(DPathConsumer2D delegate) {
+        this.delegate = delegate;
+        this.state = SimplifierState.Empty;
+
+        return this; // fluent API
+    }
+
+    @Override
+    public void pathDone() {
+        emitStashedLine();
+        state = SimplifierState.Empty;
+        delegate.pathDone();
+    }
+
+    @Override
+    public void closePath() {
+        emitStashedLine();
+        state = SimplifierState.Empty;
+        delegate.closePath();
+    }
+
+    @Override
+    public long getNativeConsumer() {
+        return 0;
+    }
+
+    @Override
+    public void quadTo(double x1, double y1, double x2, double y2) {
+        emitStashedLine();
+        delegate.quadTo(x1, y1, x2, y2);
+        // final end point:
+        state = SimplifierState.PreviousPoint;
+        px1 = x2;
+        py1 = y2;
+    }
+
+    @Override
+    public void curveTo(double x1, double y1, double x2, double y2,
+                        double x3, double y3) {
+        emitStashedLine();
+        delegate.curveTo(x1, y1, x2, y2, x3, y3);
+        // final end point:
+        state = SimplifierState.PreviousPoint;
+        px1 = x3;
+        py1 = y3;
+    }
+
+    @Override
+    public void moveTo(double x, double y) {
+        emitStashedLine();
+        delegate.moveTo(x, y);
+        state = SimplifierState.PreviousPoint;
+        px1 = x;
+        py1 = y;
+    }
+
+    @Override
+    public void lineTo(final double x, final double y) {
+        switch (state) {
+            case Empty:
+                delegate.lineTo(x, y);
+                state = SimplifierState.PreviousPoint;
+                px1 = x;
+                py1 = y;
+                return;
+
+            case PreviousPoint:
+                state = SimplifierState.PreviousLine;
+                px2 = x;
+                py2 = y;
+                pslope = getSlope(px1, py1, x, y);
+                return;
+
+            case PreviousLine:
+                final double slope = getSlope(px2, py2, x, y);
+                // test for collinearity
+                if ((slope == pslope) || (Math.abs(pslope - slope) < EPS)) {
+                    // merge segments
+                    px2 = x;
+                    py2 = y;
+                    return;
+                }
+                // emit previous segment
+                delegate.lineTo(px2, py2);
+                px1 = px2;
+                py1 = py2;
+                px2 = x;
+                py2 = y;
+                pslope = slope;
+                return;
+            default:
+        }
+    }
+
+    private void emitStashedLine() {
+        if (state == SimplifierState.PreviousLine) {
+            delegate.lineTo(px2, py2);
+        }
+    }
+
+    private static double getSlope(double x1, double y1, double x2, double y2) {
+        double dy = y2 - y1;
+        if (dy == 0.0d) {
+            return (x2 > x1) ? Double.POSITIVE_INFINITY
+                   : Double.NEGATIVE_INFINITY;
+        }
+        return (x2 - x1) / dy;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/DCurve.java	Wed May 17 22:05:11 2017 +0200
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+final class DCurve {
+
+    double ax, ay, bx, by, cx, cy, dx, dy;
+    double dax, day, dbx, dby;
+
+    DCurve() {
+    }
+
+    void set(double[] points, int type) {
+        switch(type) {
+        case 8:
+            set(points[0], points[1],
+                points[2], points[3],
+                points[4], points[5],
+                points[6], points[7]);
+            return;
+        case 6:
+            set(points[0], points[1],
+                points[2], points[3],
+                points[4], points[5]);
+            return;
+        default:
+            throw new InternalError("Curves can only be cubic or quadratic");
+        }
+    }
+
+    void set(double x1, double y1,
+             double x2, double y2,
+             double x3, double y3,
+             double x4, double y4)
+    {
+        ax = 3.0d * (x2 - x3) + x4 - x1;
+        ay = 3.0d * (y2 - y3) + y4 - y1;
+        bx = 3.0d * (x1 - 2.0d * x2 + x3);
+        by = 3.0d * (y1 - 2.0d * y2 + y3);
+        cx = 3.0d * (x2 - x1);
+        cy = 3.0d * (y2 - y1);
+        dx = x1;
+        dy = y1;
+        dax = 3.0d * ax; day = 3.0d * ay;
+        dbx = 2.0d * bx; dby = 2.0d * by;
+    }
+
+    void set(double x1, double y1,
+             double x2, double y2,
+             double x3, double y3)
+    {
+        ax = 0.0d; ay = 0.0d;
+        bx = x1 - 2.0d * x2 + x3;
+        by = y1 - 2.0d * y2 + y3;
+        cx = 2.0d * (x2 - x1);
+        cy = 2.0d * (y2 - y1);
+        dx = x1;
+        dy = y1;
+        dax = 0.0d; day = 0.0d;
+        dbx = 2.0d * bx; dby = 2.0d * by;
+    }
+
+    double xat(double t) {
+        return t * (t * (t * ax + bx) + cx) + dx;
+    }
+    double yat(double t) {
+        return t * (t * (t * ay + by) + cy) + dy;
+    }
+
+    double dxat(double t) {
+        return t * (t * dax + dbx) + cx;
+    }
+
+    double dyat(double t) {
+        return t * (t * day + dby) + cy;
+    }
+
+    int dxRoots(double[] roots, int off) {
+        return DHelpers.quadraticRoots(dax, dbx, cx, roots, off);
+    }
+
+    int dyRoots(double[] roots, int off) {
+        return DHelpers.quadraticRoots(day, dby, cy, roots, off);
+    }
+
+    int infPoints(double[] pts, int off) {
+        // inflection point at t if -f'(t)x*f''(t)y + f'(t)y*f''(t)x == 0
+        // Fortunately, this turns out to be quadratic, so there are at
+        // most 2 inflection points.
+        final double a = dax * dby - dbx * day;
+        final double b = 2.0d * (cy * dax - day * cx);
+        final double c = cy * dbx - cx * dby;
+
+        return DHelpers.quadraticRoots(a, b, c, pts, off);
+    }
+
+    // finds points where the first and second derivative are
+    // perpendicular. This happens when g(t) = f'(t)*f''(t) == 0 (where
+    // * is a dot product). Unfortunately, we have to solve a cubic.
+    private int perpendiculardfddf(double[] pts, int off) {
+        assert pts.length >= off + 4;
+
+        // these are the coefficients of some multiple of g(t) (not g(t),
+        // because the roots of a polynomial are not changed after multiplication
+        // by a constant, and this way we save a few multiplications).
+        final double a = 2.0d * (dax*dax + day*day);
+        final double b = 3.0d * (dax*dbx + day*dby);
+        final double c = 2.0d * (dax*cx + day*cy) + dbx*dbx + dby*dby;
+        final double d = dbx*cx + dby*cy;
+        return DHelpers.cubicRootsInAB(a, b, c, d, pts, off, 0.0d, 1.0d);
+    }
+
+    // Tries to find the roots of the function ROC(t)-w in [0, 1). It uses
+    // a variant of the false position algorithm to find the roots. False
+    // position requires that 2 initial values x0,x1 be given, and that the
+    // function must have opposite signs at those values. To find such
+    // values, we need the local extrema of the ROC function, for which we
+    // need the roots of its derivative; however, it's harder to find the
+    // roots of the derivative in this case than it is to find the roots
+    // of the original function. So, we find all points where this curve's
+    // first and second derivative are perpendicular, and we pretend these
+    // are our local extrema. There are at most 3 of these, so we will check
+    // at most 4 sub-intervals of (0,1). ROC has asymptotes at inflection
+    // points, so roc-w can have at least 6 roots. This shouldn't be a
+    // problem for what we're trying to do (draw a nice looking curve).
+    int rootsOfROCMinusW(double[] roots, int off, final double w, final double err) {
+        // no OOB exception, because by now off<=6, and roots.length >= 10
+        assert off <= 6 && roots.length >= 10;
+        int ret = off;
+        int numPerpdfddf = perpendiculardfddf(roots, off);
+        double t0 = 0.0d, ft0 = ROCsq(t0) - w*w;
+        roots[off + numPerpdfddf] = 1.0d; // always check interval end points
+        numPerpdfddf++;
+        for (int i = off; i < off + numPerpdfddf; i++) {
+            double t1 = roots[i], ft1 = ROCsq(t1) - w*w;
+            if (ft0 == 0.0d) {
+                roots[ret++] = t0;
+            } else if (ft1 * ft0 < 0.0d) { // have opposite signs
+                // (ROC(t)^2 == w^2) == (ROC(t) == w) is true because
+                // ROC(t) >= 0 for all t.
+                roots[ret++] = falsePositionROCsqMinusX(t0, t1, w*w, err);
+            }
+            t0 = t1;
+            ft0 = ft1;
+        }
+
+        return ret - off;
+    }
+
+    private static double eliminateInf(double x) {
+        return (x == Double.POSITIVE_INFINITY ? Double.MAX_VALUE :
+            (x == Double.NEGATIVE_INFINITY ? Double.MIN_VALUE : x));
+    }
+
+    // A slight modification of the false position algorithm on wikipedia.
+    // This only works for the ROCsq-x functions. It might be nice to have
+    // the function as an argument, but that would be awkward in java6.
+    // TODO: It is something to consider for java8 (or whenever lambda
+    // expressions make it into the language), depending on how closures
+    // and turn out. Same goes for the newton's method
+    // algorithm in DHelpers.java
+    private double falsePositionROCsqMinusX(double x0, double x1,
+                                           final double x, final double err)
+    {
+        final int iterLimit = 100;
+        int side = 0;
+        double t = x1, ft = eliminateInf(ROCsq(t) - x);
+        double s = x0, fs = eliminateInf(ROCsq(s) - x);
+        double r = s, fr;
+        for (int i = 0; i < iterLimit && Math.abs(t - s) > err * Math.abs(t + s); i++) {
+            r = (fs * t - ft * s) / (fs - ft);
+            fr = ROCsq(r) - x;
+            if (sameSign(fr, ft)) {
+                ft = fr; t = r;
+                if (side < 0) {
+                    fs /= (1 << (-side));
+                    side--;
+                } else {
+                    side = -1;
+                }
+            } else if (fr * fs > 0) {
+                fs = fr; s = r;
+                if (side > 0) {
+                    ft /= (1 << side);
+                    side++;
+                } else {
+                    side = 1;
+                }
+            } else {
+                break;
+            }
+        }
+        return r;
+    }
+
+    private static boolean sameSign(double x, double y) {
+        // another way is to test if x*y > 0. This is bad for small x, y.
+        return (x < 0.0d && y < 0.0d) || (x > 0.0d && y > 0.0d);
+    }
+
+    // returns the radius of curvature squared at t of this curve
+    // see http://en.wikipedia.org/wiki/Radius_of_curvature_(applications)
+    private double ROCsq(final double t) {
+        // dx=xat(t) and dy=yat(t). These calls have been inlined for efficiency
+        final double dx = t * (t * dax + dbx) + cx;
+        final double dy = t * (t * day + dby) + cy;
+        final double ddx = 2.0d * dax * t + dbx;
+        final double ddy = 2.0d * day * t + dby;
+        final double dx2dy2 = dx*dx + dy*dy;
+        final double ddx2ddy2 = ddx*ddx + ddy*ddy;
+        final double ddxdxddydy = ddx*dx + ddy*dy;
+        return dx2dy2*((dx2dy2*dx2dy2) / (dx2dy2 * ddx2ddy2 - ddxdxddydy*ddxdxddydy));
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/DDasher.java	Wed May 17 22:05:11 2017 +0200
@@ -0,0 +1,746 @@
+/*
+ * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import java.util.Arrays;
+
+/**
+ * The <code>DDasher</code> class takes a series of linear commands
+ * (<code>moveTo</code>, <code>lineTo</code>, <code>close</code> and
+ * <code>end</code>) and breaks them into smaller segments according to a
+ * dash pattern array and a starting dash phase.
+ *
+ * <p> Issues: in J2Se, a zero length dash segment as drawn as a very
+ * short dash, whereas Pisces does not draw anything.  The PostScript
+ * semantics are unclear.
+ *
+ */
+final class DDasher implements DPathConsumer2D, MarlinConst {
+
+    static final int REC_LIMIT = 4;
+    static final double ERR = 0.01d;
+    static final double MIN_T_INC = 1.0d / (1 << REC_LIMIT);
+
+    // More than 24 bits of mantissa means we can no longer accurately
+    // measure the number of times cycled through the dash array so we
+    // punt and override the phase to just be 0 past that point.
+    static final double MAX_CYCLES = 16000000.0d;
+
+    private DPathConsumer2D out;
+    private double[] dash;
+    private int dashLen;
+    private double startPhase;
+    private boolean startDashOn;
+    private int startIdx;
+
+    private boolean starting;
+    private boolean needsMoveTo;
+
+    private int idx;
+    private boolean dashOn;
+    private double phase;
+
+    private double sx, sy;
+    private double x0, y0;
+
+    // temporary storage for the current curve
+    private final double[] curCurvepts;
+
+    // per-thread renderer context
+    final DRendererContext rdrCtx;
+
+    // flag to recycle dash array copy
+    boolean recycleDashes;
+
+    // dashes ref (dirty)
+    final DoubleArrayCache.Reference dashes_ref;
+    // firstSegmentsBuffer ref (dirty)
+    final DoubleArrayCache.Reference firstSegmentsBuffer_ref;
+
+    /**
+     * Constructs a <code>DDasher</code>.
+     * @param rdrCtx per-thread renderer context
+     */
+    DDasher(final DRendererContext rdrCtx) {
+        this.rdrCtx = rdrCtx;
+
+        dashes_ref = rdrCtx.newDirtyDoubleArrayRef(INITIAL_ARRAY); // 1K
+
+        firstSegmentsBuffer_ref = rdrCtx.newDirtyDoubleArrayRef(INITIAL_ARRAY); // 1K
+        firstSegmentsBuffer     = firstSegmentsBuffer_ref.initial;
+
+        // we need curCurvepts to be able to contain 2 curves because when
+        // dashing curves, we need to subdivide it
+        curCurvepts = new double[8 * 2];
+    }
+
+    /**
+     * Initialize the <code>DDasher</code>.
+     *
+     * @param out an output <code>DPathConsumer2D</code>.
+     * @param dash an array of <code>double</code>s containing the dash pattern
+     * @param dashLen length of the given dash array
+     * @param phase a <code>double</code> containing the dash phase
+     * @param recycleDashes true to indicate to recycle the given dash array
+     * @return this instance
+     */
+    DDasher init(final DPathConsumer2D out, double[] dash, int dashLen,
+                double phase, boolean recycleDashes)
+    {
+        this.out = out;
+
+        // Normalize so 0 <= phase < dash[0]
+        int sidx = 0;
+        dashOn = true;
+        double sum = 0.0d;
+        for (double d : dash) {
+            sum += d;
+        }
+        double cycles = phase / sum;
+        if (phase < 0.0d) {
+            if (-cycles >= MAX_CYCLES) {
+                phase = 0.0d;
+            } else {
+                int fullcycles = FloatMath.floor_int(-cycles);
+                if ((fullcycles & dash.length & 1) != 0) {
+                    dashOn = !dashOn;
+                }
+                phase += fullcycles * sum;
+                while (phase < 0.0d) {
+                    if (--sidx < 0) {
+                        sidx = dash.length - 1;
+                    }
+                    phase += dash[sidx];
+                    dashOn = !dashOn;
+                }
+            }
+        } else if (phase > 0) {
+            if (cycles >= MAX_CYCLES) {
+                phase = 0.0d;
+            } else {
+                int fullcycles = FloatMath.floor_int(cycles);
+                if ((fullcycles & dash.length & 1) != 0) {
+                    dashOn = !dashOn;
+                }
+                phase -= fullcycles * sum;
+                double d;
+                while (phase >= (d = dash[sidx])) {
+                    phase -= d;
+                    sidx = (sidx + 1) % dash.length;
+                    dashOn = !dashOn;
+                }
+            }
+        }
+
+        this.dash = dash;
+        this.dashLen = dashLen;
+        this.startPhase = this.phase = phase;
+        this.startDashOn = dashOn;
+        this.startIdx = sidx;
+        this.starting = true;
+        needsMoveTo = false;
+        firstSegidx = 0;
+
+        this.recycleDashes = recycleDashes;
+
+        return this; // fluent API
+    }
+
+    /**
+     * Disposes this dasher:
+     * clean up before reusing this instance
+     */
+    void dispose() {
+        if (DO_CLEAN_DIRTY) {
+            // Force zero-fill dirty arrays:
+            Arrays.fill(curCurvepts, 0.0d);
+        }
+        // Return arrays:
+        if (recycleDashes) {
+            dash = dashes_ref.putArray(dash);
+        }
+        firstSegmentsBuffer = firstSegmentsBuffer_ref.putArray(firstSegmentsBuffer);
+    }
+
+    double[] copyDashArray(final float[] dashes) {
+        final int len = dashes.length;
+        final double[] newDashes;
+        if (len <= MarlinConst.INITIAL_ARRAY) {
+            newDashes = dashes_ref.initial;
+        } else {
+            if (DO_STATS) {
+                rdrCtx.stats.stat_array_dasher_dasher.add(len);
+            }
+            newDashes = dashes_ref.getArray(len);
+        }
+        for (int i = 0; i < len; i++) { newDashes[i] = dashes[i]; }
+        return newDashes;
+    }
+
+    @Override
+    public void moveTo(double x0, double y0) {
+        if (firstSegidx > 0) {
+            out.moveTo(sx, sy);
+            emitFirstSegments();
+        }
+        needsMoveTo = true;
+        this.idx = startIdx;
+        this.dashOn = this.startDashOn;
+        this.phase = this.startPhase;
+        this.sx = this.x0 = x0;
+        this.sy = this.y0 = y0;
+        this.starting = true;
+    }
+
+    private void emitSeg(double[] buf, int off, int type) {
+        switch (type) {
+        case 8:
+            out.curveTo(buf[off+0], buf[off+1],
+                        buf[off+2], buf[off+3],
+                        buf[off+4], buf[off+5]);
+            return;
+        case 6:
+            out.quadTo(buf[off+0], buf[off+1],
+                       buf[off+2], buf[off+3]);
+            return;
+        case 4:
+            out.lineTo(buf[off], buf[off+1]);
+            return;
+        default:
+        }
+    }
+
+    private void emitFirstSegments() {
+        final double[] fSegBuf = firstSegmentsBuffer;
+
+        for (int i = 0; i < firstSegidx; ) {
+            int type = (int)fSegBuf[i];
+            emitSeg(fSegBuf, i + 1, type);
+            i += (type - 1);
+        }
+        firstSegidx = 0;
+    }
+    // We don't emit the first dash right away. If we did, caps would be
+    // drawn on it, but we need joins to be drawn if there's a closePath()
+    // So, we store the path elements that make up the first dash in the
+    // buffer below.
+    private double[] firstSegmentsBuffer; // dynamic array
+    private int firstSegidx;
+
+    // precondition: pts must be in relative coordinates (relative to x0,y0)
+    private void goTo(double[] pts, int off, final int type) {
+        double x = pts[off + type - 4];
+        double y = pts[off + type - 3];
+        if (dashOn) {
+            if (starting) {
+                int len = type - 1; // - 2 + 1
+                int segIdx = firstSegidx;
+                double[] buf = firstSegmentsBuffer;
+                if (segIdx + len  > buf.length) {
+                    if (DO_STATS) {
+                        rdrCtx.stats.stat_array_dasher_firstSegmentsBuffer
+                            .add(segIdx + len);
+                    }
+                    firstSegmentsBuffer = buf
+                        = firstSegmentsBuffer_ref.widenArray(buf, segIdx,
+                                                             segIdx + len);
+                }
+                buf[segIdx++] = type;
+                len--;
+                // small arraycopy (2, 4 or 6) but with offset:
+                System.arraycopy(pts, off, buf, segIdx, len);
+                segIdx += len;
+                firstSegidx = segIdx;
+            } else {
+                if (needsMoveTo) {
+                    out.moveTo(x0, y0);
+                    needsMoveTo = false;
+                }
+                emitSeg(pts, off, type);
+            }
+        } else {
+            starting = false;
+            needsMoveTo = true;
+        }
+        this.x0 = x;
+        this.y0 = y;
+    }
+
+    @Override
+    public void lineTo(double x1, double y1) {
+        double dx = x1 - x0;
+        double dy = y1 - y0;
+
+        double len = dx*dx + dy*dy;
+        if (len == 0.0d) {
+            return;
+        }
+        len = Math.sqrt(len);
+
+        // The scaling factors needed to get the dx and dy of the
+        // transformed dash segments.
+        final double cx = dx / len;
+        final double cy = dy / len;
+
+        final double[] _curCurvepts = curCurvepts;
+        final double[] _dash = dash;
+
+        double leftInThisDashSegment;
+        double dashdx, dashdy, p;
+
+        while (true) {
+            leftInThisDashSegment = _dash[idx] - phase;
+
+            if (len <= leftInThisDashSegment) {
+                _curCurvepts[0] = x1;
+                _curCurvepts[1] = y1;
+                goTo(_curCurvepts, 0, 4);
+
+                // Advance phase within current dash segment
+                phase += len;
+                // TODO: compare double values using epsilon:
+                if (len == leftInThisDashSegment) {
+                    phase = 0.0d;
+                    idx = (idx + 1) % dashLen;
+                    dashOn = !dashOn;
+                }
+                return;
+            }
+
+            dashdx = _dash[idx] * cx;
+            dashdy = _dash[idx] * cy;
+
+            if (phase == 0.0d) {
+                _curCurvepts[0] = x0 + dashdx;
+                _curCurvepts[1] = y0 + dashdy;
+            } else {
+                p = leftInThisDashSegment / _dash[idx];
+                _curCurvepts[0] = x0 + p * dashdx;
+                _curCurvepts[1] = y0 + p * dashdy;
+            }
+
+            goTo(_curCurvepts, 0, 4);
+
+            len -= leftInThisDashSegment;
+            // Advance to next dash segment
+            idx = (idx + 1) % dashLen;
+            dashOn = !dashOn;
+            phase = 0.0d;
+        }
+    }
+
+    // shared instance in DDasher
+    private final LengthIterator li = new LengthIterator();
+
+    // preconditions: curCurvepts must be an array of length at least 2 * type,
+    // that contains the curve we want to dash in the first type elements
+    private void somethingTo(int type) {
+        if (pointCurve(curCurvepts, type)) {
+            return;
+        }
+        li.initializeIterationOnCurve(curCurvepts, type);
+
+        // initially the current curve is at curCurvepts[0...type]
+        int curCurveoff = 0;
+        double lastSplitT = 0.0d;
+        double t;
+        double leftInThisDashSegment = dash[idx] - phase;
+
+        while ((t = li.next(leftInThisDashSegment)) < 1.0d) {
+            if (t != 0.0d) {
+                DHelpers.subdivideAt((t - lastSplitT) / (1.0d - lastSplitT),
+                                    curCurvepts, curCurveoff,
+                                    curCurvepts, 0,
+                                    curCurvepts, type, type);
+                lastSplitT = t;
+                goTo(curCurvepts, 2, type);
+                curCurveoff = type;
+            }
+            // Advance to next dash segment
+            idx = (idx + 1) % dashLen;
+            dashOn = !dashOn;
+            phase = 0.0d;
+            leftInThisDashSegment = dash[idx];
+        }
+        goTo(curCurvepts, curCurveoff+2, type);
+        phase += li.lastSegLen();
+        if (phase >= dash[idx]) {
+            phase = 0.0d;
+            idx = (idx + 1) % dashLen;
+            dashOn = !dashOn;
+        }
+        // reset LengthIterator:
+        li.reset();
+    }
+
+    private static boolean pointCurve(double[] curve, int type) {
+        for (int i = 2; i < type; i++) {
+            if (curve[i] != curve[i-2]) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    // Objects of this class are used to iterate through curves. They return
+    // t values where the left side of the curve has a specified length.
+    // It does this by subdividing the input curve until a certain error
+    // condition has been met. A recursive subdivision procedure would
+    // return as many as 1<<limit curves, but this is an iterator and we
+    // don't need all the curves all at once, so what we carry out a
+    // lazy inorder traversal of the recursion tree (meaning we only move
+    // through the tree when we need the next subdivided curve). This saves
+    // us a lot of memory because at any one time we only need to store
+    // limit+1 curves - one for each level of the tree + 1.
+    // NOTE: the way we do things here is not enough to traverse a general
+    // tree; however, the trees we are interested in have the property that
+    // every non leaf node has exactly 2 children
+    static final class LengthIterator {
+        private enum Side {LEFT, RIGHT};
+        // Holds the curves at various levels of the recursion. The root
+        // (i.e. the original curve) is at recCurveStack[0] (but then it
+        // gets subdivided, the left half is put at 1, so most of the time
+        // only the right half of the original curve is at 0)
+        private final double[][] recCurveStack; // dirty
+        // sides[i] indicates whether the node at level i+1 in the path from
+        // the root to the current leaf is a left or right child of its parent.
+        private final Side[] sides; // dirty
+        private int curveType;
+        // lastT and nextT delimit the current leaf.
+        private double nextT;
+        private double lenAtNextT;
+        private double lastT;
+        private double lenAtLastT;
+        private double lenAtLastSplit;
+        private double lastSegLen;
+        // the current level in the recursion tree. 0 is the root. limit
+        // is the deepest possible leaf.
+        private int recLevel;
+        private boolean done;
+
+        // the lengths of the lines of the control polygon. Only its first
+        // curveType/2 - 1 elements are valid. This is an optimization. See
+        // next() for more detail.
+        private final double[] curLeafCtrlPolyLengths = new double[3];
+
+        LengthIterator() {
+            this.recCurveStack = new double[REC_LIMIT + 1][8];
+            this.sides = new Side[REC_LIMIT];
+            // if any methods are called without first initializing this object
+            // on a curve, we want it to fail ASAP.
+            this.nextT = Double.MAX_VALUE;
+            this.lenAtNextT = Double.MAX_VALUE;
+            this.lenAtLastSplit = Double.MIN_VALUE;
+            this.recLevel = Integer.MIN_VALUE;
+            this.lastSegLen = Double.MAX_VALUE;
+            this.done = true;
+        }
+
+        /**
+         * Reset this LengthIterator.
+         */
+        void reset() {
+            // keep data dirty
+            // as it appears not useful to reset data:
+            if (DO_CLEAN_DIRTY) {
+                final int recLimit = recCurveStack.length - 1;
+                for (int i = recLimit; i >= 0; i--) {
+                    Arrays.fill(recCurveStack[i], 0.0d);
+                }
+                Arrays.fill(sides, Side.LEFT);
+                Arrays.fill(curLeafCtrlPolyLengths, 0.0d);
+                Arrays.fill(nextRoots, 0.0d);
+                Arrays.fill(flatLeafCoefCache, 0.0d);
+                flatLeafCoefCache[2] = -1.0d;
+            }
+        }
+
+        void initializeIterationOnCurve(double[] pts, int type) {
+            // optimize arraycopy (8 values faster than 6 = type):
+            System.arraycopy(pts, 0, recCurveStack[0], 0, 8);
+            this.curveType = type;
+            this.recLevel = 0;
+            this.lastT = 0.0d;
+            this.lenAtLastT = 0.0d;
+            this.nextT = 0.0d;
+            this.lenAtNextT = 0.0d;
+            goLeft(); // initializes nextT and lenAtNextT properly
+            this.lenAtLastSplit = 0.0d;
+            if (recLevel > 0) {
+                this.sides[0] = Side.LEFT;
+                this.done = false;
+            } else {
+                // the root of the tree is a leaf so we're done.
+                this.sides[0] = Side.RIGHT;
+                this.done = true;
+            }
+            this.lastSegLen = 0.0d;
+        }
+
+        // 0 == false, 1 == true, -1 == invalid cached value.
+        private int cachedHaveLowAcceleration = -1;
+
+        private boolean haveLowAcceleration(double err) {
+            if (cachedHaveLowAcceleration == -1) {
+                final double len1 = curLeafCtrlPolyLengths[0];
+                final double len2 = curLeafCtrlPolyLengths[1];
+                // the test below is equivalent to !within(len1/len2, 1, err).
+                // It is using a multiplication instead of a division, so it
+                // should be a bit faster.
+                if (!DHelpers.within(len1, len2, err * len2)) {
+                    cachedHaveLowAcceleration = 0;
+                    return false;
+                }
+                if (curveType == 8) {
+                    final double len3 = curLeafCtrlPolyLengths[2];
+                    // if len1 is close to 2 and 2 is close to 3, that probably
+                    // means 1 is close to 3 so the second part of this test might
+                    // not be needed, but it doesn't hurt to include it.
+                    final double errLen3 = err * len3;
+                    if (!(DHelpers.within(len2, len3, errLen3) &&
+                          DHelpers.within(len1, len3, errLen3))) {
+                        cachedHaveLowAcceleration = 0;
+                        return false;
+                    }
+                }
+                cachedHaveLowAcceleration = 1;
+                return true;
+            }
+
+            return (cachedHaveLowAcceleration == 1);
+        }
+
+        // we want to avoid allocations/gc so we keep this array so we
+        // can put roots in it,
+        private final double[] nextRoots = new double[4];
+
+        // caches the coefficients of the current leaf in its flattened
+        // form (see inside next() for what that means). The cache is
+        // invalid when it's third element is negative, since in any
+        // valid flattened curve, this would be >= 0.
+        private final double[] flatLeafCoefCache = new double[]{0.0d, 0.0d, -1.0d, 0.0d};
+
+        // returns the t value where the remaining curve should be split in
+        // order for the left subdivided curve to have length len. If len
+        // is >= than the length of the uniterated curve, it returns 1.
+        double next(final double len) {
+            final double targetLength = lenAtLastSplit + len;
+            while (lenAtNextT < targetLength) {
+                if (done) {
+                    lastSegLen = lenAtNextT - lenAtLastSplit;
+                    return 1.0d;
+                }
+                goToNextLeaf();
+            }
+            lenAtLastSplit = targetLength;
+            final double leaflen = lenAtNextT - lenAtLastT;
+            double t = (targetLength - lenAtLastT) / leaflen;
+
+            // cubicRootsInAB is a fairly expensive call, so we just don't do it
+            // if the acceleration in this section of the curve is small enough.
+            if (!haveLowAcceleration(0.05d)) {
+                // We flatten the current leaf along the x axis, so that we're
+                // left with a, b, c which define a 1D Bezier curve. We then
+                // solve this to get the parameter of the original leaf that
+                // gives us the desired length.
+                final double[] _flatLeafCoefCache = flatLeafCoefCache;
+
+                if (_flatLeafCoefCache[2] < 0.0d) {
+                    double x =     curLeafCtrlPolyLengths[0],
+                          y = x + curLeafCtrlPolyLengths[1];
+                    if (curveType == 8) {
+                        double z = y + curLeafCtrlPolyLengths[2];
+                        _flatLeafCoefCache[0] = 3.0d * (x - y) + z;
+                        _flatLeafCoefCache[1] = 3.0d * (y - 2.0d * x);
+                        _flatLeafCoefCache[2] = 3.0d * x;
+                        _flatLeafCoefCache[3] = -z;
+                    } else if (curveType == 6) {
+                        _flatLeafCoefCache[0] = 0.0d;
+                        _flatLeafCoefCache[1] = y - 2.0d * x;
+                        _flatLeafCoefCache[2] = 2.0d * x;
+                        _flatLeafCoefCache[3] = -y;
+                    }
+                }
+                double a = _flatLeafCoefCache[0];
+                double b = _flatLeafCoefCache[1];
+                double c = _flatLeafCoefCache[2];
+                double d = t * _flatLeafCoefCache[3];
+
+                // we use cubicRootsInAB here, because we want only roots in 0, 1,
+                // and our quadratic root finder doesn't filter, so it's just a
+                // matter of convenience.
+                int n = DHelpers.cubicRootsInAB(a, b, c, d, nextRoots, 0, 0.0d, 1.0d);
+                if (n == 1 && !Double.isNaN(nextRoots[0])) {
+                    t = nextRoots[0];
+                }
+            }
+            // t is relative to the current leaf, so we must make it a valid parameter
+            // of the original curve.
+            t = t * (nextT - lastT) + lastT;
+            if (t >= 1.0d) {
+                t = 1.0d;
+                done = true;
+            }
+            // even if done = true, if we're here, that means targetLength
+            // is equal to, or very, very close to the total length of the
+            // curve, so lastSegLen won't be too high. In cases where len
+            // overshoots the curve, this method will exit in the while
+            // loop, and lastSegLen will still be set to the right value.
+            lastSegLen = len;
+            return t;
+        }
+
+        double lastSegLen() {
+            return lastSegLen;
+        }
+
+        // go to the next leaf (in an inorder traversal) in the recursion tree
+        // preconditions: must be on a leaf, and that leaf must not be the root.
+        private void goToNextLeaf() {
+            // We must go to the first ancestor node that has an unvisited
+            // right child.
+            int _recLevel = recLevel;
+            final Side[] _sides = sides;
+
+            _recLevel--;
+            while(_sides[_recLevel] == Side.RIGHT) {
+                if (_recLevel == 0) {
+                    recLevel = 0;
+                    done = true;
+                    return;
+                }
+                _recLevel--;
+            }
+
+            _sides[_recLevel] = Side.RIGHT;
+            // optimize arraycopy (8 values faster than 6 = type):
+            System.arraycopy(recCurveStack[_recLevel], 0,
+                             recCurveStack[_recLevel+1], 0, 8);
+            _recLevel++;
+
+            recLevel = _recLevel;
+            goLeft();
+        }
+
+        // go to the leftmost node from the current node. Return its length.
+        private void goLeft() {
+            double len = onLeaf();
+            if (len >= 0.0d) {
+                lastT = nextT;
+                lenAtLastT = lenAtNextT;
+                nextT += (1 << (REC_LIMIT - recLevel)) * MIN_T_INC;
+                lenAtNextT += len;
+                // invalidate caches
+                flatLeafCoefCache[2] = -1.0d;
+                cachedHaveLowAcceleration = -1;
+            } else {
+                DHelpers.subdivide(recCurveStack[recLevel], 0,
+                                  recCurveStack[recLevel+1], 0,
+                                  recCurveStack[recLevel], 0, curveType);
+                sides[recLevel] = Side.LEFT;
+                recLevel++;
+                goLeft();
+            }
+        }
+
+        // this is a bit of a hack. It returns -1 if we're not on a leaf, and
+        // the length of the leaf if we are on a leaf.
+        private double onLeaf() {
+            double[] curve = recCurveStack[recLevel];
+            double polyLen = 0.0d;
+
+            double x0 = curve[0], y0 = curve[1];
+            for (int i = 2; i < curveType; i += 2) {
+                final double x1 = curve[i], y1 = curve[i+1];
+                final double len = DHelpers.linelen(x0, y0, x1, y1);
+                polyLen += len;
+                curLeafCtrlPolyLengths[i/2 - 1] = len;
+                x0 = x1;
+                y0 = y1;
+            }
+
+            final double lineLen = DHelpers.linelen(curve[0], curve[1],
+                                                  curve[curveType-2],
+                                                  curve[curveType-1]);
+            if ((polyLen - lineLen) < ERR || recLevel == REC_LIMIT) {
+                return (polyLen + lineLen) / 2.0d;
+            }
+            return -1.0d;
+        }
+    }
+
+    @Override
+    public void curveTo(double x1, double y1,
+                        double x2, double y2,
+                        double x3, double y3)
+    {
+        final double[] _curCurvepts = curCurvepts;
+        _curCurvepts[0] = x0;        _curCurvepts[1] = y0;
+        _curCurvepts[2] = x1;        _curCurvepts[3] = y1;
+        _curCurvepts[4] = x2;        _curCurvepts[5] = y2;
+        _curCurvepts[6] = x3;        _curCurvepts[7] = y3;
+        somethingTo(8);
+    }
+
+    @Override
+    public void quadTo(double x1, double y1, double x2, double y2) {
+        final double[] _curCurvepts = curCurvepts;
+        _curCurvepts[0] = x0;        _curCurvepts[1] = y0;
+        _curCurvepts[2] = x1;        _curCurvepts[3] = y1;
+        _curCurvepts[4] = x2;        _curCurvepts[5] = y2;
+        somethingTo(6);
+    }
+
+    @Override
+    public void closePath() {
+        lineTo(sx, sy);
+        if (firstSegidx > 0) {
+            if (!dashOn || needsMoveTo) {
+                out.moveTo(sx, sy);
+            }
+            emitFirstSegments();
+        }
+        moveTo(sx, sy);
+    }
+
+    @Override
+    public void pathDone() {
+        if (firstSegidx > 0) {
+            out.moveTo(sx, sy);
+            emitFirstSegments();
+        }
+        out.pathDone();
+
+        // Dispose this instance:
+        dispose();
+    }
+
+    @Override
+    public long getNativeConsumer() {
+        throw new InternalError("DDasher does not use a native consumer");
+    }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/DHelpers.java	Wed May 17 22:05:11 2017 +0200
@@ -0,0 +1,436 @@
+/*
+ * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import static java.lang.Math.PI;
+import static java.lang.Math.cos;
+import static java.lang.Math.sqrt;
+import static java.lang.Math.cbrt;
+import static java.lang.Math.acos;
+
+final class DHelpers implements MarlinConst {
+
+    private DHelpers() {
+        throw new Error("This is a non instantiable class");
+    }
+
+    static boolean within(final double x, final double y, final double err) {
+        final double d = y - x;
+        return (d <= err && d >= -err);
+    }
+
+    static int quadraticRoots(final double a, final double b,
+                              final double c, double[] zeroes, final int off)
+    {
+        int ret = off;
+        double t;
+        if (a != 0.0d) {
+            final double dis = b*b - 4*a*c;
+            if (dis > 0.0d) {
+                final double sqrtDis = Math.sqrt(dis);
+                // depending on the sign of b we use a slightly different
+                // algorithm than the traditional one to find one of the roots
+                // so we can avoid adding numbers of different signs (which
+                // might result in loss of precision).
+                if (b >= 0.0d) {
+                    zeroes[ret++] = (2.0d * c) / (-b - sqrtDis);
+                    zeroes[ret++] = (-b - sqrtDis) / (2.0d * a);
+                } else {
+                    zeroes[ret++] = (-b + sqrtDis) / (2.0d * a);
+                    zeroes[ret++] = (2.0d * c) / (-b + sqrtDis);
+                }
+            } else if (dis == 0.0d) {
+                t = (-b) / (2.0d * a);
+                zeroes[ret++] = t;
+            }
+        } else {
+            if (b != 0.0d) {
+                t = (-c) / b;
+                zeroes[ret++] = t;
+            }
+        }
+        return ret - off;
+    }
+
+    // find the roots of g(t) = d*t^3 + a*t^2 + b*t + c in [A,B)
+    static int cubicRootsInAB(double d, double a, double b, double c,
+                              double[] pts, final int off,
+                              final double A, final double B)
+    {
+        if (d == 0.0d) {
+            int num = quadraticRoots(a, b, c, pts, off);
+            return filterOutNotInAB(pts, off, num, A, B) - off;
+        }
+        // From Graphics Gems:
+        // http://tog.acm.org/resources/GraphicsGems/gems/Roots3And4.c
+        // (also from awt.geom.CubicCurve2D. But here we don't need as
+        // much accuracy and we don't want to create arrays so we use
+        // our own customized version).
+
+        // normal form: x^3 + ax^2 + bx + c = 0
+        a /= d;
+        b /= d;
+        c /= d;
+
+        //  substitute x = y - A/3 to eliminate quadratic term:
+        //     x^3 +Px + Q = 0
+        //
+        // Since we actually need P/3 and Q/2 for all of the
+        // calculations that follow, we will calculate
+        // p = P/3
+        // q = Q/2
+        // instead and use those values for simplicity of the code.
+        double sq_A = a * a;
+        double p = (1.0d/3.0d) * ((-1.0d/3.0d) * sq_A + b);
+        double q = (1.0d/2.0d) * ((2.0d/27.0d) * a * sq_A - (1.0d/3.0d) * a * b + c);
+
+        // use Cardano's formula
+
+        double cb_p = p * p * p;
+        double D = q * q + cb_p;
+
+        int num;
+        if (D < 0.0d) {
+            // see: http://en.wikipedia.org/wiki/Cubic_function#Trigonometric_.28and_hyperbolic.29_method
+            final double phi = (1.0d/3.0d) * acos(-q / sqrt(-cb_p));
+            final double t = 2.0d * sqrt(-p);
+
+            pts[ off+0 ] = ( t * cos(phi));
+            pts[ off+1 ] = (-t * cos(phi + (PI / 3.0d)));
+            pts[ off+2 ] = (-t * cos(phi - (PI / 3.0d)));
+            num = 3;
+        } else {
+            final double sqrt_D = sqrt(D);
+            final double u = cbrt(sqrt_D - q);
+            final double v = - cbrt(sqrt_D + q);
+
+            pts[ off ] = (u + v);
+            num = 1;
+
+            if (within(D, 0.0d, 1e-8d)) {
+                pts[off+1] = -(pts[off] / 2.0d);
+                num = 2;
+            }
+        }
+
+        final double sub = (1.0d/3.0d) * a;
+
+        for (int i = 0; i < num; ++i) {
+            pts[ off+i ] -= sub;
+        }
+
+        return filterOutNotInAB(pts, off, num, A, B) - off;
+    }
+
+    static double evalCubic(final double a, final double b,
+                           final double c, final double d,
+                           final double t)
+    {
+        return t * (t * (t * a + b) + c) + d;
+    }
+
+    static double evalQuad(final double a, final double b,
+                          final double c, final double t)
+    {
+        return t * (t * a + b) + c;
+    }
+
+    // returns the index 1 past the last valid element remaining after filtering
+    static int filterOutNotInAB(double[] nums, final int off, final int len,
+                                final double a, final double b)
+    {
+        int ret = off;
+        for (int i = off, end = off + len; i < end; i++) {
+            if (nums[i] >= a && nums[i] < b) {
+                nums[ret++] = nums[i];
+            }
+        }
+        return ret;
+    }
+
+    static double polyLineLength(double[] poly, final int off, final int nCoords) {
+        assert nCoords % 2 == 0 && poly.length >= off + nCoords : "";
+        double acc = 0.0d;
+        for (int i = off + 2; i < off + nCoords; i += 2) {
+            acc += linelen(poly[i], poly[i+1], poly[i-2], poly[i-1]);
+        }
+        return acc;
+    }
+
+    static double linelen(double x1, double y1, double x2, double y2) {
+        final double dx = x2 - x1;
+        final double dy = y2 - y1;
+        return Math.sqrt(dx*dx + dy*dy);
+    }
+
+    static void subdivide(double[] src, int srcoff, double[] left, int leftoff,
+                          double[] right, int rightoff, int type)
+    {
+        switch(type) {
+        case 6:
+            DHelpers.subdivideQuad(src, srcoff, left, leftoff, right, rightoff);
+            return;
+        case 8:
+            DHelpers.subdivideCubic(src, srcoff, left, leftoff, right, rightoff);
+            return;
+        default:
+            throw new InternalError("Unsupported curve type");
+        }
+    }
+
+    static void isort(double[] a, int off, int len) {
+        for (int i = off + 1, end = off + len; i < end; i++) {
+            double ai = a[i];
+            int j = i - 1;
+            for (; j >= off && a[j] > ai; j--) {
+                a[j+1] = a[j];
+            }
+            a[j+1] = ai;
+        }
+    }
+
+    // Most of these are copied from classes in java.awt.geom because we need
+    // both single and double precision variants of these functions, and Line2D,
+    // CubicCurve2D, QuadCurve2D don't provide them.
+    /**
+     * Subdivides the cubic curve specified by the coordinates
+     * stored in the <code>src</code> array at indices <code>srcoff</code>
+     * through (<code>srcoff</code>&nbsp;+&nbsp;7) and stores the
+     * resulting two subdivided curves into the two result arrays at the
+     * corresponding indices.
+     * Either or both of the <code>left</code> and <code>right</code>
+     * arrays may be <code>null</code> or a reference to the same array
+     * as the <code>src</code> array.
+     * Note that the last point in the first subdivided curve is the
+     * same as the first point in the second subdivided curve. Thus,
+     * it is possible to pass the same array for <code>left</code>
+     * and <code>right</code> and to use offsets, such as <code>rightoff</code>
+     * equals (<code>leftoff</code> + 6), in order
+     * to avoid allocating extra storage for this common point.
+     * @param src the array holding the coordinates for the source curve
+     * @param srcoff the offset into the array of the beginning of the
+     * the 6 source coordinates
+     * @param left the array for storing the coordinates for the first
+     * half of the subdivided curve
+     * @param leftoff the offset into the array of the beginning of the
+     * the 6 left coordinates
+     * @param right the array for storing the coordinates for the second
+     * half of the subdivided curve
+     * @param rightoff the offset into the array of the beginning of the
+     * the 6 right coordinates
+     * @since 1.7
+     */
+    static void subdivideCubic(double[] src, int srcoff,
+                               double[] left, int leftoff,
+                               double[] right, int rightoff)
+    {
+        double x1 = src[srcoff + 0];
+        double y1 = src[srcoff + 1];
+        double ctrlx1 = src[srcoff + 2];
+        double ctrly1 = src[srcoff + 3];
+        double ctrlx2 = src[srcoff + 4];
+        double ctrly2 = src[srcoff + 5];
+        double x2 = src[srcoff + 6];
+        double y2 = src[srcoff + 7];
+        if (left != null) {
+            left[leftoff + 0] = x1;
+            left[leftoff + 1] = y1;
+        }
+        if (right != null) {
+            right[rightoff + 6] = x2;
+            right[rightoff + 7] = y2;
+        }
+        x1 = (x1 + ctrlx1) / 2.0d;
+        y1 = (y1 + ctrly1) / 2.0d;
+        x2 = (x2 + ctrlx2) / 2.0d;
+        y2 = (y2 + ctrly2) / 2.0d;
+        double centerx = (ctrlx1 + ctrlx2) / 2.0d;
+        double centery = (ctrly1 + ctrly2) / 2.0d;
+        ctrlx1 = (x1 + centerx) / 2.0d;
+        ctrly1 = (y1 + centery) / 2.0d;
+        ctrlx2 = (x2 + centerx) / 2.0d;
+        ctrly2 = (y2 + centery) / 2.0d;
+        centerx = (ctrlx1 + ctrlx2) / 2.0d;
+        centery = (ctrly1 + ctrly2) / 2.0d;
+        if (left != null) {
+            left[leftoff + 2] = x1;
+            left[leftoff + 3] = y1;
+            left[leftoff + 4] = ctrlx1;
+            left[leftoff + 5] = ctrly1;
+            left[leftoff + 6] = centerx;
+            left[leftoff + 7] = centery;
+        }
+        if (right != null) {
+            right[rightoff + 0] = centerx;
+            right[rightoff + 1] = centery;
+            right[rightoff + 2] = ctrlx2;
+            right[rightoff + 3] = ctrly2;
+            right[rightoff + 4] = x2;
+            right[rightoff + 5] = y2;
+        }
+    }
+
+
+    static void subdivideCubicAt(double t, double[] src, int srcoff,
+                                 double[] left, int leftoff,
+                                 double[] right, int rightoff)
+    {
+        double x1 = src[srcoff + 0];
+        double y1 = src[srcoff + 1];
+        double ctrlx1 = src[srcoff + 2];
+        double ctrly1 = src[srcoff + 3];
+        double ctrlx2 = src[srcoff + 4];
+        double ctrly2 = src[srcoff + 5];
+        double x2 = src[srcoff + 6];
+        double y2 = src[srcoff + 7];
+        if (left != null) {
+            left[leftoff + 0] = x1;
+            left[leftoff + 1] = y1;
+        }
+        if (right != null) {
+            right[rightoff + 6] = x2;
+            right[rightoff + 7] = y2;
+        }
+        x1 = x1 + t * (ctrlx1 - x1);
+        y1 = y1 + t * (ctrly1 - y1);
+        x2 = ctrlx2 + t * (x2 - ctrlx2);
+        y2 = ctrly2 + t * (y2 - ctrly2);
+        double centerx = ctrlx1 + t * (ctrlx2 - ctrlx1);
+        double centery = ctrly1 + t * (ctrly2 - ctrly1);
+        ctrlx1 = x1 + t * (centerx - x1);
+        ctrly1 = y1 + t * (centery - y1);
+        ctrlx2 = centerx + t * (x2 - centerx);
+        ctrly2 = centery + t * (y2 - centery);
+        centerx = ctrlx1 + t * (ctrlx2 - ctrlx1);
+        centery = ctrly1 + t * (ctrly2 - ctrly1);
+        if (left != null) {
+            left[leftoff + 2] = x1;
+            left[leftoff + 3] = y1;
+            left[leftoff + 4] = ctrlx1;
+            left[leftoff + 5] = ctrly1;
+            left[leftoff + 6] = centerx;
+            left[leftoff + 7] = centery;
+        }
+        if (right != null) {
+            right[rightoff + 0] = centerx;
+            right[rightoff + 1] = centery;
+            right[rightoff + 2] = ctrlx2;
+            right[rightoff + 3] = ctrly2;
+            right[rightoff + 4] = x2;
+            right[rightoff + 5] = y2;
+        }
+    }
+
+    static void subdivideQuad(double[] src, int srcoff,
+                              double[] left, int leftoff,
+                              double[] right, int rightoff)
+    {
+        double x1 = src[srcoff + 0];
+        double y1 = src[srcoff + 1];
+        double ctrlx = src[srcoff + 2];
+        double ctrly = src[srcoff + 3];
+        double x2 = src[srcoff + 4];
+        double y2 = src[srcoff + 5];
+        if (left != null) {
+            left[leftoff + 0] = x1;
+            left[leftoff + 1] = y1;
+        }
+        if (right != null) {
+            right[rightoff + 4] = x2;
+            right[rightoff + 5] = y2;
+        }
+        x1 = (x1 + ctrlx) / 2.0d;
+        y1 = (y1 + ctrly) / 2.0d;
+        x2 = (x2 + ctrlx) / 2.0d;
+        y2 = (y2 + ctrly) / 2.0d;
+        ctrlx = (x1 + x2) / 2.0d;
+        ctrly = (y1 + y2) / 2.0d;
+        if (left != null) {
+            left[leftoff + 2] = x1;
+            left[leftoff + 3] = y1;
+            left[leftoff + 4] = ctrlx;
+            left[leftoff + 5] = ctrly;
+        }
+        if (right != null) {
+            right[rightoff + 0] = ctrlx;
+            right[rightoff + 1] = ctrly;
+            right[rightoff + 2] = x2;
+            right[rightoff + 3] = y2;
+        }
+    }
+
+    static void subdivideQuadAt(double t, double[] src, int srcoff,
+                                double[] left, int leftoff,
+                                double[] right, int rightoff)
+    {
+        double x1 = src[srcoff + 0];
+        double y1 = src[srcoff + 1];
+        double ctrlx = src[srcoff + 2];
+        double ctrly = src[srcoff + 3];
+        double x2 = src[srcoff + 4];
+        double y2 = src[srcoff + 5];
+        if (left != null) {
+            left[leftoff + 0] = x1;
+            left[leftoff + 1] = y1;
+        }
+        if (right != null) {
+            right[rightoff + 4] = x2;
+            right[rightoff + 5] = y2;
+        }
+        x1 = x1 + t * (ctrlx - x1);
+        y1 = y1 + t * (ctrly - y1);
+        x2 = ctrlx + t * (x2 - ctrlx);
+        y2 = ctrly + t * (y2 - ctrly);
+        ctrlx = x1 + t * (x2 - x1);
+        ctrly = y1 + t * (y2 - y1);
+        if (left != null) {
+            left[leftoff + 2] = x1;
+            left[leftoff + 3] = y1;
+            left[leftoff + 4] = ctrlx;
+            left[leftoff + 5] = ctrly;
+        }
+        if (right != null) {
+            right[rightoff + 0] = ctrlx;
+            right[rightoff + 1] = ctrly;
+            right[rightoff + 2] = x2;
+            right[rightoff + 3] = y2;
+        }
+    }
+
+    static void subdivideAt(double t, double[] src, int srcoff,
+                            double[] left, int leftoff,
+                            double[] right, int rightoff, int size)
+    {
+        switch(size) {
+        case 8:
+            subdivideCubicAt(t, src, srcoff, left, leftoff, right, rightoff);
+            return;
+        case 6:
+            subdivideQuadAt(t, src, srcoff, left, leftoff, right, rightoff);
+            return;
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/DMarlinRenderingEngine.java	Wed May 17 22:05:11 2017 +0200
@@ -0,0 +1,1111 @@
+/*
+ * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import java.awt.BasicStroke;
+import java.awt.Shape;
+import java.awt.geom.AffineTransform;
+import java.awt.geom.Path2D;
+import java.awt.geom.PathIterator;
+import java.security.AccessController;
+import static sun.java2d.marlin.MarlinUtils.logInfo;
+import sun.java2d.ReentrantContextProvider;
+import sun.java2d.ReentrantContextProviderCLQ;
+import sun.java2d.ReentrantContextProviderTL;
+import sun.java2d.pipe.AATileGenerator;
+import sun.java2d.pipe.Region;
+import sun.java2d.pipe.RenderingEngine;
+import sun.security.action.GetPropertyAction;
+
+/**
+ * Marlin RendererEngine implementation (derived from Pisces)
+ */
+public final class DMarlinRenderingEngine extends RenderingEngine
+                                          implements MarlinConst
+{
+    private static enum NormMode {
+        ON_WITH_AA {
+            @Override
+            PathIterator getNormalizingPathIterator(final DRendererContext rdrCtx,
+                                                    final PathIterator src)
+            {
+                // NormalizingPathIterator NearestPixelCenter:
+                return rdrCtx.nPCPathIterator.init(src);
+            }
+        },
+        ON_NO_AA{
+            @Override
+            PathIterator getNormalizingPathIterator(final DRendererContext rdrCtx,
+                                                    final PathIterator src)
+            {
+                // NearestPixel NormalizingPathIterator:
+                return rdrCtx.nPQPathIterator.init(src);
+            }
+        },
+        OFF{
+            @Override
+            PathIterator getNormalizingPathIterator(final DRendererContext rdrCtx,
+                                                    final PathIterator src)
+            {
+                // return original path iterator if normalization is disabled:
+                return src;
+            }
+        };
+
+        abstract PathIterator getNormalizingPathIterator(DRendererContext rdrCtx,
+                                                         PathIterator src);
+    }
+
+    private static final float MIN_PEN_SIZE = 1.0f / NORM_SUBPIXELS;
+
+    static final double UPPER_BND = Float.MAX_VALUE / 2.0d;
+    static final double LOWER_BND = -UPPER_BND;
+
+    /**
+     * Public constructor
+     */
+    public DMarlinRenderingEngine() {
+        super();
+        logSettings(DMarlinRenderingEngine.class.getName());
+    }
+
+    /**
+     * Create a widened path as specified by the parameters.
+     * <p>
+     * The specified {@code src} {@link Shape} is widened according
+     * to the specified attribute parameters as per the
+     * {@link BasicStroke} specification.
+     *
+     * @param src the source path to be widened
+     * @param width the width of the widened path as per {@code BasicStroke}
+     * @param caps the end cap decorations as per {@code BasicStroke}
+     * @param join the segment join decorations as per {@code BasicStroke}
+     * @param miterlimit the miter limit as per {@code BasicStroke}
+     * @param dashes the dash length array as per {@code BasicStroke}
+     * @param dashphase the initial dash phase as per {@code BasicStroke}
+     * @return the widened path stored in a new {@code Shape} object
+     * @since 1.7
+     */
+    @Override
+    public Shape createStrokedShape(Shape src,
+                                    float width,
+                                    int caps,
+                                    int join,
+                                    float miterlimit,
+                                    float[] dashes,
+                                    float dashphase)
+    {
+        final DRendererContext rdrCtx = getRendererContext();
+        try {
+            // initialize a large copyable Path2D to avoid a lot of array growing:
+            final Path2D.Double p2d = rdrCtx.getPath2D();
+
+            strokeTo(rdrCtx,
+                     src,
+                     null,
+                     width,
+                     NormMode.OFF,
+                     caps,
+                     join,
+                     miterlimit,
+                     dashes,
+                     dashphase,
+                     rdrCtx.transformerPC2D.wrapPath2d(p2d)
+                    );
+
+            // Use Path2D copy constructor (trim)
+            return new Path2D.Double(p2d);
+
+        } finally {
+            // recycle the DRendererContext instance
+            returnRendererContext(rdrCtx);
+        }
+    }
+
+    /**
+     * Sends the geometry for a widened path as specified by the parameters
+     * to the specified consumer.
+     * <p>
+     * The specified {@code src} {@link Shape} is widened according
+     * to the parameters specified by the {@link BasicStroke} object.
+     * Adjustments are made to the path as appropriate for the
+     * {@link java.awt.RenderingHints#VALUE_STROKE_NORMALIZE} hint if the
+     * {@code normalize} boolean parameter is true.
+     * Adjustments are made to the path as appropriate for the
+     * {@link java.awt.RenderingHints#VALUE_ANTIALIAS_ON} hint if the
+     * {@code antialias} boolean parameter is true.
+     * <p>
+     * The geometry of the widened path is forwarded to the indicated
+     * {@link DPathConsumer2D} object as it is calculated.
+     *
+     * @param src the source path to be widened
+     * @param bs the {@code BasicSroke} object specifying the
+     *           decorations to be applied to the widened path
+     * @param normalize indicates whether stroke normalization should
+     *                  be applied
+     * @param antialias indicates whether or not adjustments appropriate
+     *                  to antialiased rendering should be applied
+     * @param consumer the {@code DPathConsumer2D} instance to forward
+     *                 the widened geometry to
+     * @since 1.7
+     */
+    @Override
+    public void strokeTo(Shape src,
+                         AffineTransform at,
+                         BasicStroke bs,
+                         boolean thin,
+                         boolean normalize,
+                         boolean antialias,
+                         final sun.awt.geom.PathConsumer2D consumer)
+    {
+        final NormMode norm = (normalize) ?
+                ((antialias) ? NormMode.ON_WITH_AA : NormMode.ON_NO_AA)
+                : NormMode.OFF;
+
+        final DRendererContext rdrCtx = getRendererContext();
+        try {
+            strokeTo(rdrCtx, src, at, bs, thin, norm, antialias,
+                     rdrCtx.p2dAdapter.init(consumer));
+        } finally {
+            // recycle the DRendererContext instance
+            returnRendererContext(rdrCtx);
+        }
+    }
+
+    final void strokeTo(final DRendererContext rdrCtx,
+                        Shape src,
+                        AffineTransform at,
+                        BasicStroke bs,
+                        boolean thin,
+                        NormMode normalize,
+                        boolean antialias,
+                        DPathConsumer2D pc2d)
+    {
+        double lw;
+        if (thin) {
+            if (antialias) {
+                lw = userSpaceLineWidth(at, MIN_PEN_SIZE);
+            } else {
+                lw = userSpaceLineWidth(at, 1.0d);
+            }
+        } else {
+            lw = bs.getLineWidth();
+        }
+        strokeTo(rdrCtx,
+                 src,
+                 at,
+                 lw,
+                 normalize,
+                 bs.getEndCap(),
+                 bs.getLineJoin(),
+                 bs.getMiterLimit(),
+                 bs.getDashArray(),
+                 bs.getDashPhase(),
+                 pc2d);
+    }
+
+    private final double userSpaceLineWidth(AffineTransform at, double lw) {
+
+        double widthScale;
+
+        if (at == null) {
+            widthScale = 1.0d;
+        } else if ((at.getType() & (AffineTransform.TYPE_GENERAL_TRANSFORM  |
+                                    AffineTransform.TYPE_GENERAL_SCALE)) != 0) {
+            widthScale = Math.sqrt(at.getDeterminant());
+        } else {
+            // First calculate the "maximum scale" of this transform.
+            double A = at.getScaleX();       // m00
+            double C = at.getShearX();       // m01
+            double B = at.getShearY();       // m10
+            double D = at.getScaleY();       // m11
+
+            /*
+             * Given a 2 x 2 affine matrix [ A B ] such that
+             *                             [ C D ]
+             * v' = [x' y'] = [Ax + Cy, Bx + Dy], we want to
+             * find the maximum magnitude (norm) of the vector v'
+             * with the constraint (x^2 + y^2 = 1).
+             * The equation to maximize is
+             *     |v'| = sqrt((Ax+Cy)^2+(Bx+Dy)^2)
+             * or  |v'| = sqrt((AA+BB)x^2 + 2(AC+BD)xy + (CC+DD)y^2).
+             * Since sqrt is monotonic we can maximize |v'|^2
+             * instead and plug in the substitution y = sqrt(1 - x^2).
+             * Trigonometric equalities can then be used to get
+             * rid of most of the sqrt terms.
+             */
+
+            double EA = A*A + B*B;          // x^2 coefficient
+            double EB = 2.0d * (A*C + B*D); // xy coefficient
+            double EC = C*C + D*D;          // y^2 coefficient
+
+            /*
+             * There is a lot of calculus omitted here.
+             *
+             * Conceptually, in the interests of understanding the
+             * terms that the calculus produced we can consider
+             * that EA and EC end up providing the lengths along
+             * the major axes and the hypot term ends up being an
+             * adjustment for the additional length along the off-axis
+             * angle of rotated or sheared ellipses as well as an
+             * adjustment for the fact that the equation below
+             * averages the two major axis lengths.  (Notice that
+             * the hypot term contains a part which resolves to the
+             * difference of these two axis lengths in the absence
+             * of rotation.)
+             *
+             * In the calculus, the ratio of the EB and (EA-EC) terms
+             * ends up being the tangent of 2*theta where theta is
+             * the angle that the long axis of the ellipse makes
+             * with the horizontal axis.  Thus, this equation is
+             * calculating the length of the hypotenuse of a triangle
+             * along that axis.
+             */
+
+            double hypot = Math.sqrt(EB*EB + (EA-EC)*(EA-EC));
+            // sqrt omitted, compare to squared limits below.
+            double widthsquared = ((EA + EC + hypot) / 2.0d);
+
+            widthScale = Math.sqrt(widthsquared);
+        }
+
+        return (lw / widthScale);
+    }
+
+    final void strokeTo(final DRendererContext rdrCtx,
+                        Shape src,
+                        AffineTransform at,
+                        double width,
+                        NormMode norm,
+                        int caps,
+                        int join,
+                        float miterlimit,
+                        float[] dashes,
+                        float dashphase,
+                        DPathConsumer2D pc2d)
+    {
+        // We use strokerat so that in Stroker and Dasher we can work only
+        // with the pre-transformation coordinates. This will repeat a lot of
+        // computations done in the path iterator, but the alternative is to
+        // work with transformed paths and compute untransformed coordinates
+        // as needed. This would be faster but I do not think the complexity
+        // of working with both untransformed and transformed coordinates in
+        // the same code is worth it.
+        // However, if a path's width is constant after a transformation,
+        // we can skip all this untransforming.
+
+        // As pathTo() will check transformed coordinates for invalid values
+        // (NaN / Infinity) to ignore such points, it is necessary to apply the
+        // transformation before the path processing.
+        AffineTransform strokerat = null;
+
+        int dashLen = -1;
+        boolean recycleDashes = false;
+        double[] dashesD = null;
+
+        // Ensure converting dashes to double precision:
+        if (dashes != null) {
+            recycleDashes = true;
+            dashLen = dashes.length;
+            dashesD = rdrCtx.dasher.copyDashArray(dashes);
+        }
+
+        if (at != null && !at.isIdentity()) {
+            final double a = at.getScaleX();
+            final double b = at.getShearX();
+            final double c = at.getShearY();
+            final double d = at.getScaleY();
+            final double det = a * d - c * b;
+
+            if (Math.abs(det) <= (2.0d * Double.MIN_VALUE)) {
+                // this rendering engine takes one dimensional curves and turns
+                // them into 2D shapes by giving them width.
+                // However, if everything is to be passed through a singular
+                // transformation, these 2D shapes will be squashed down to 1D
+                // again so, nothing can be drawn.
+
+                // Every path needs an initial moveTo and a pathDone. If these
+                // are not there this causes a SIGSEGV in libawt.so (at the time
+                // of writing of this comment (September 16, 2010)). Actually,
+                // I am not sure if the moveTo is necessary to avoid the SIGSEGV
+                // but the pathDone is definitely needed.
+                pc2d.moveTo(0.0d, 0.0d);
+                pc2d.pathDone();
+                return;
+            }
+
+            // If the transform is a constant multiple of an orthogonal transformation
+            // then every length is just multiplied by a constant, so we just
+            // need to transform input paths to stroker and tell stroker
+            // the scaled width. This condition is satisfied if
+            // a*b == -c*d && a*a+c*c == b*b+d*d. In the actual check below, we
+            // leave a bit of room for error.
+            if (nearZero(a*b + c*d) && nearZero(a*a + c*c - (b*b + d*d))) {
+                final double scale =  Math.sqrt(a*a + c*c);
+
+                if (dashesD != null) {
+                    for (int i = 0; i < dashLen; i++) {
+                        dashesD[i] *= scale;
+                    }
+                    dashphase *= scale;
+                }
+                width *= scale;
+
+                // by now strokerat == null. Input paths to
+                // stroker (and maybe dasher) will have the full transform at
+                // applied to them and nothing will happen to the output paths.
+            } else {
+                strokerat = at;
+
+                // by now strokerat == at. Input paths to
+                // stroker (and maybe dasher) will have the full transform at
+                // applied to them, then they will be normalized, and then
+                // the inverse of *only the non translation part of at* will
+                // be applied to the normalized paths. This won't cause problems
+                // in stroker, because, suppose at = T*A, where T is just the
+                // translation part of at, and A is the rest. T*A has already
+                // been applied to Stroker/Dasher's input. Then Ainv will be
+                // applied. Ainv*T*A is not equal to T, but it is a translation,
+                // which means that none of stroker's assumptions about its
+                // input will be violated. After all this, A will be applied
+                // to stroker's output.
+            }
+        } else {
+            // either at is null or it's the identity. In either case
+            // we don't transform the path.
+            at = null;
+        }
+
+        if (USE_SIMPLIFIER) {
+            // Use simplifier after stroker before Renderer
+            // to remove collinear segments (notably due to cap square)
+            pc2d = rdrCtx.simplifier.init(pc2d);
+        }
+
+        final DTransformingPathConsumer2D transformerPC2D = rdrCtx.transformerPC2D;
+        pc2d = transformerPC2D.deltaTransformConsumer(pc2d, strokerat);
+
+        pc2d = rdrCtx.stroker.init(pc2d, width, caps, join, miterlimit);
+
+        if (dashesD != null) {
+            pc2d = rdrCtx.dasher.init(pc2d, dashesD, dashLen, dashphase,
+                                      recycleDashes);
+        }
+        pc2d = transformerPC2D.inverseDeltaTransformConsumer(pc2d, strokerat);
+
+        final PathIterator pi = norm.getNormalizingPathIterator(rdrCtx,
+                                         src.getPathIterator(at));
+
+        pathTo(rdrCtx, pi, pc2d);
+
+        /*
+         * Pipeline seems to be:
+         * shape.getPathIterator(at)
+         * -> (NormalizingPathIterator)
+         * -> (inverseDeltaTransformConsumer)
+         * -> (Dasher)
+         * -> Stroker
+         * -> (deltaTransformConsumer)
+         *
+         * -> (CollinearSimplifier) to remove redundant segments
+         *
+         * -> pc2d = Renderer (bounding box)
+         */
+    }
+
+    private static boolean nearZero(final double num) {
+        return Math.abs(num) < 2.0d * Math.ulp(num);
+    }
+
+    abstract static class NormalizingPathIterator implements PathIterator {
+
+        private PathIterator src;
+
+        // the adjustment applied to the current position.
+        private double curx_adjust, cury_adjust;
+        // the adjustment applied to the last moveTo position.
+        private double movx_adjust, movy_adjust;
+
+        private final double[] tmp;
+
+        NormalizingPathIterator(final double[] tmp) {
+            this.tmp = tmp;
+        }
+
+        final NormalizingPathIterator init(final PathIterator src) {
+            this.src = src;
+            return this; // fluent API
+        }
+
+        /**
+         * Disposes this path iterator:
+         * clean up before reusing this instance
+         */
+        final void dispose() {
+            // free source PathIterator:
+            this.src = null;
+        }
+
+        @Override
+        public final int currentSegment(final double[] coords) {
+            int lastCoord;
+            final int type = src.currentSegment(coords);
+
+            switch(type) {
+                case PathIterator.SEG_MOVETO:
+                case PathIterator.SEG_LINETO:
+                    lastCoord = 0;
+                    break;
+                case PathIterator.SEG_QUADTO:
+                    lastCoord = 2;
+                    break;
+                case PathIterator.SEG_CUBICTO:
+                    lastCoord = 4;
+                    break;
+                case PathIterator.SEG_CLOSE:
+                    // we don't want to deal with this case later. We just exit now
+                    curx_adjust = movx_adjust;
+                    cury_adjust = movy_adjust;
+                    return type;
+                default:
+                    throw new InternalError("Unrecognized curve type");
+            }
+
+            // normalize endpoint
+            double coord, x_adjust, y_adjust;
+
+            coord = coords[lastCoord];
+            x_adjust = normCoord(coord); // new coord
+            coords[lastCoord] = x_adjust;
+            x_adjust -= coord;
+
+            coord = coords[lastCoord + 1];
+            y_adjust = normCoord(coord); // new coord
+            coords[lastCoord + 1] = y_adjust;
+            y_adjust -= coord;
+
+            // now that the end points are done, normalize the control points
+            switch(type) {
+                case PathIterator.SEG_MOVETO:
+                    movx_adjust = x_adjust;
+                    movy_adjust = y_adjust;
+                    break;
+                case PathIterator.SEG_LINETO:
+                    break;
+                case PathIterator.SEG_QUADTO:
+                    coords[0] += (curx_adjust + x_adjust) / 2.0d;
+                    coords[1] += (cury_adjust + y_adjust) / 2.0d;
+                    break;
+                case PathIterator.SEG_CUBICTO:
+                    coords[0] += curx_adjust;
+                    coords[1] += cury_adjust;
+                    coords[2] += x_adjust;
+                    coords[3] += y_adjust;
+                    break;
+                case PathIterator.SEG_CLOSE:
+                    // handled earlier
+                default:
+            }
+            curx_adjust = x_adjust;
+            cury_adjust = y_adjust;
+            return type;
+        }
+
+        abstract double normCoord(final double coord);
+
+        @Override
+        public final int currentSegment(final float[] coords) {
+            final double[] _tmp = tmp; // dirty
+            int type = this.currentSegment(_tmp);
+            for (int i = 0; i < 6; i++) {
+                coords[i] = (float)_tmp[i];
+            }
+            return type;
+        }
+
+        @Override
+        public final int getWindingRule() {
+            return src.getWindingRule();
+        }
+
+        @Override
+        public final boolean isDone() {
+            if (src.isDone()) {
+                // Dispose this instance:
+                dispose();
+                return true;
+            }
+            return false;
+        }
+
+        @Override
+        public final void next() {
+            src.next();
+        }
+
+        static final class NearestPixelCenter
+                                extends NormalizingPathIterator
+        {
+            NearestPixelCenter(final double[] tmp) {
+                super(tmp);
+            }
+
+            @Override
+            double normCoord(final double coord) {
+                // round to nearest pixel center
+                return Math.floor(coord) + 0.5d;
+            }
+        }
+
+        static final class NearestPixelQuarter
+                                extends NormalizingPathIterator
+        {
+            NearestPixelQuarter(final double[] tmp) {
+                super(tmp);
+            }
+
+            @Override
+            double normCoord(final double coord) {
+                // round to nearest (0.25, 0.25) pixel quarter
+                return Math.floor(coord + 0.25d) + 0.25d;
+            }
+        }
+    }
+
+    private static void pathTo(final DRendererContext rdrCtx, final PathIterator pi,
+                               final DPathConsumer2D pc2d)
+    {
+        // mark context as DIRTY:
+        rdrCtx.dirty = true;
+
+        final double[] coords = rdrCtx.double6;
+
+        pathToLoop(coords, pi, pc2d);
+
+        // mark context as CLEAN:
+        rdrCtx.dirty = false;
+    }
+
+    private static void pathToLoop(final double[] coords, final PathIterator pi,
+                                   final DPathConsumer2D pc2d)
+    {
+        // ported from DuctusRenderingEngine.feedConsumer() but simplified:
+        // - removed skip flag = !subpathStarted
+        // - removed pathClosed (ie subpathStarted not set to false)
+        boolean subpathStarted = false;
+
+        for (; !pi.isDone(); pi.next()) {
+            switch (pi.currentSegment(coords)) {
+            case PathIterator.SEG_MOVETO:
+                /* Checking SEG_MOVETO coordinates if they are out of the
+                 * [LOWER_BND, UPPER_BND] range. This check also handles NaN
+                 * and Infinity values. Skipping next path segment in case of
+                 * invalid data.
+                 */
+                if (coords[0] < UPPER_BND && coords[0] > LOWER_BND &&
+                    coords[1] < UPPER_BND && coords[1] > LOWER_BND)
+                {
+                    pc2d.moveTo(coords[0], coords[1]);
+                    subpathStarted = true;
+                }
+                break;
+            case PathIterator.SEG_LINETO:
+                /* Checking SEG_LINETO coordinates if they are out of the
+                 * [LOWER_BND, UPPER_BND] range. This check also handles NaN
+                 * and Infinity values. Ignoring current path segment in case
+                 * of invalid data. If segment is skipped its endpoint
+                 * (if valid) is used to begin new subpath.
+                 */
+                if (coords[0] < UPPER_BND && coords[0] > LOWER_BND &&
+                    coords[1] < UPPER_BND && coords[1] > LOWER_BND)
+                {
+                    if (subpathStarted) {
+                        pc2d.lineTo(coords[0], coords[1]);
+                    } else {
+                        pc2d.moveTo(coords[0], coords[1]);
+                        subpathStarted = true;
+                    }
+                }
+                break;
+            case PathIterator.SEG_QUADTO:
+                // Quadratic curves take two points
+                /* Checking SEG_QUADTO coordinates if they are out of the
+                 * [LOWER_BND, UPPER_BND] range. This check also handles NaN
+                 * and Infinity values. Ignoring current path segment in case
+                 * of invalid endpoints's data. Equivalent to the SEG_LINETO
+                 * if endpoint coordinates are valid but there are invalid data
+                 * among other coordinates
+                 */
+                if (coords[2] < UPPER_BND && coords[2] > LOWER_BND &&
+                    coords[3] < UPPER_BND && coords[3] > LOWER_BND)
+                {
+                    if (subpathStarted) {
+                        if (coords[0] < UPPER_BND && coords[0] > LOWER_BND &&
+                            coords[1] < UPPER_BND && coords[1] > LOWER_BND)
+                        {
+                            pc2d.quadTo(coords[0], coords[1],
+                                        coords[2], coords[3]);
+                        } else {
+                            pc2d.lineTo(coords[2], coords[3]);
+                        }
+                    } else {
+                        pc2d.moveTo(coords[2], coords[3]);
+                        subpathStarted = true;
+                    }
+                }
+                break;
+            case PathIterator.SEG_CUBICTO:
+                // Cubic curves take three points
+                /* Checking SEG_CUBICTO coordinates if they are out of the
+                 * [LOWER_BND, UPPER_BND] range. This check also handles NaN
+                 * and Infinity values. Ignoring current path segment in case
+                 * of invalid endpoints's data. Equivalent to the SEG_LINETO
+                 * if endpoint coordinates are valid but there are invalid data
+                 * among other coordinates
+                 */
+                if (coords[4] < UPPER_BND && coords[4] > LOWER_BND &&
+                    coords[5] < UPPER_BND && coords[5] > LOWER_BND)
+                {
+                    if (subpathStarted) {
+                        if (coords[0] < UPPER_BND && coords[0] > LOWER_BND &&
+                            coords[1] < UPPER_BND && coords[1] > LOWER_BND &&
+                            coords[2] < UPPER_BND && coords[2] > LOWER_BND &&
+                            coords[3] < UPPER_BND && coords[3] > LOWER_BND)
+                        {
+                            pc2d.curveTo(coords[0], coords[1],
+                                         coords[2], coords[3],
+                                         coords[4], coords[5]);
+                        } else {
+                            pc2d.lineTo(coords[4], coords[5]);
+                        }
+                    } else {
+                        pc2d.moveTo(coords[4], coords[5]);
+                        subpathStarted = true;
+                    }
+                }
+                break;
+            case PathIterator.SEG_CLOSE:
+                if (subpathStarted) {
+                    pc2d.closePath();
+                    // do not set subpathStarted to false
+                    // in case of missing moveTo() after close()
+                }
+                break;
+            default:
+            }
+        }
+        pc2d.pathDone();
+    }
+
+    /**
+     * Construct an antialiased tile generator for the given shape with
+     * the given rendering attributes and store the bounds of the tile
+     * iteration in the bbox parameter.
+     * The {@code at} parameter specifies a transform that should affect
+     * both the shape and the {@code BasicStroke} attributes.
+     * The {@code clip} parameter specifies the current clip in effect
+     * in device coordinates and can be used to prune the data for the
+     * operation, but the renderer is not required to perform any
+     * clipping.
+     * If the {@code BasicStroke} parameter is null then the shape
+     * should be filled as is, otherwise the attributes of the
+     * {@code BasicStroke} should be used to specify a draw operation.
+     * The {@code thin} parameter indicates whether or not the
+     * transformed {@code BasicStroke} represents coordinates smaller
+     * than the minimum resolution of the antialiasing rasterizer as
+     * specified by the {@code getMinimumAAPenWidth()} method.
+     * <p>
+     * Upon returning, this method will fill the {@code bbox} parameter
+     * with 4 values indicating the bounds of the iteration of the
+     * tile generator.
+     * The iteration order of the tiles will be as specified by the
+     * pseudo-code:
+     * <pre>
+     *     for (y = bbox[1]; y < bbox[3]; y += tileheight) {
+     *         for (x = bbox[0]; x < bbox[2]; x += tilewidth) {
+     *         }
+     *     }
+     * </pre>
+     * If there is no output to be rendered, this method may return
+     * null.
+     *
+     * @param s the shape to be rendered (fill or draw)
+     * @param at the transform to be applied to the shape and the
+     *           stroke attributes
+     * @param clip the current clip in effect in device coordinates
+     * @param bs if non-null, a {@code BasicStroke} whose attributes
+     *           should be applied to this operation
+     * @param thin true if the transformed stroke attributes are smaller
+     *             than the minimum dropout pen width
+     * @param normalize true if the {@code VALUE_STROKE_NORMALIZE}
+     *                  {@code RenderingHint} is in effect
+     * @param bbox returns the bounds of the iteration
+     * @return the {@code AATileGenerator} instance to be consulted
+     *         for tile coverages, or null if there is no output to render
+     * @since 1.7
+     */
+    @Override
+    public AATileGenerator getAATileGenerator(Shape s,
+                                              AffineTransform at,
+                                              Region clip,
+                                              BasicStroke bs,
+                                              boolean thin,
+                                              boolean normalize,
+                                              int[] bbox)
+    {
+        MarlinTileGenerator ptg = null;
+        DRenderer r = null;
+
+        final DRendererContext rdrCtx = getRendererContext();
+        try {
+            // Test if at is identity:
+            final AffineTransform _at = (at != null && !at.isIdentity()) ? at
+                                        : null;
+
+            final NormMode norm = (normalize) ? NormMode.ON_WITH_AA : NormMode.OFF;
+
+            if (bs == null) {
+                // fill shape:
+                final PathIterator pi = norm.getNormalizingPathIterator(rdrCtx,
+                                                 s.getPathIterator(_at));
+
+                // note: Winding rule may be EvenOdd ONLY for fill operations !
+                r = rdrCtx.renderer.init(clip.getLoX(), clip.getLoY(),
+                                         clip.getWidth(), clip.getHeight(),
+                                         pi.getWindingRule());
+
+                // TODO: subdivide quad/cubic curves into monotonic curves ?
+                pathTo(rdrCtx, pi, r);
+            } else {
+                // draw shape with given stroke:
+                r = rdrCtx.renderer.init(clip.getLoX(), clip.getLoY(),
+                                         clip.getWidth(), clip.getHeight(),
+                                         PathIterator.WIND_NON_ZERO);
+
+                strokeTo(rdrCtx, s, _at, bs, thin, norm, true, r);
+            }
+            if (r.endRendering()) {
+                ptg = rdrCtx.ptg.init();
+                ptg.getBbox(bbox);
+                // note: do not returnRendererContext(rdrCtx)
+                // as it will be called later by MarlinTileGenerator.dispose()
+                r = null;
+            }
+        } finally {
+            if (r != null) {
+                // dispose renderer and recycle the RendererContext instance:
+                r.dispose();
+            }
+        }
+
+        // Return null to cancel AA tile generation (nothing to render)
+        return ptg;
+    }
+
+    @Override
+    public final AATileGenerator getAATileGenerator(double x, double y,
+                                                    double dx1, double dy1,
+                                                    double dx2, double dy2,
+                                                    double lw1, double lw2,
+                                                    Region clip,
+                                                    int[] bbox)
+    {
+        // REMIND: Deal with large coordinates!
+        double ldx1, ldy1, ldx2, ldy2;
+        boolean innerpgram = (lw1 > 0.0d && lw2 > 0.0d);
+
+        if (innerpgram) {
+            ldx1 = dx1 * lw1;
+            ldy1 = dy1 * lw1;
+            ldx2 = dx2 * lw2;
+            ldy2 = dy2 * lw2;
+            x -= (ldx1 + ldx2) / 2.0d;
+            y -= (ldy1 + ldy2) / 2.0d;
+            dx1 += ldx1;
+            dy1 += ldy1;
+            dx2 += ldx2;
+            dy2 += ldy2;
+            if (lw1 > 1.0d && lw2 > 1.0d) {
+                // Inner parallelogram was entirely consumed by stroke...
+                innerpgram = false;
+            }
+        } else {
+            ldx1 = ldy1 = ldx2 = ldy2 = 0.0d;
+        }
+
+        MarlinTileGenerator ptg = null;
+        DRenderer r = null;
+
+        final DRendererContext rdrCtx = getRendererContext();
+        try {
+            r = rdrCtx.renderer.init(clip.getLoX(), clip.getLoY(),
+                                         clip.getWidth(), clip.getHeight(),
+                                         DRenderer.WIND_EVEN_ODD);
+
+            r.moveTo( x,  y);
+            r.lineTo( (x+dx1),  (y+dy1));
+            r.lineTo( (x+dx1+dx2),  (y+dy1+dy2));
+            r.lineTo( (x+dx2),  (y+dy2));
+            r.closePath();
+
+            if (innerpgram) {
+                x += ldx1 + ldx2;
+                y += ldy1 + ldy2;
+                dx1 -= 2.0d * ldx1;
+                dy1 -= 2.0d * ldy1;
+                dx2 -= 2.0d * ldx2;
+                dy2 -= 2.0d * ldy2;
+                r.moveTo( x,  y);
+                r.lineTo( (x+dx1),  (y+dy1));
+                r.lineTo( (x+dx1+dx2),  (y+dy1+dy2));
+                r.lineTo( (x+dx2),  (y+dy2));
+                r.closePath();
+            }
+            r.pathDone();
+
+            if (r.endRendering()) {
+                ptg = rdrCtx.ptg.init();
+                ptg.getBbox(bbox);
+                // note: do not returnRendererContext(rdrCtx)
+                // as it will be called later by MarlinTileGenerator.dispose()
+                r = null;
+            }
+        } finally {
+            if (r != null) {
+                // dispose renderer and recycle the RendererContext instance:
+                r.dispose();
+            }
+        }
+
+        // Return null to cancel AA tile generation (nothing to render)
+        return ptg;
+    }
+
+    /**
+     * Returns the minimum pen width that the antialiasing rasterizer
+     * can represent without dropouts occuring.
+     * @since 1.7
+     */
+    @Override
+    public float getMinimumAAPenSize() {
+        return MIN_PEN_SIZE;
+    }
+
+    static {
+        if (PathIterator.WIND_NON_ZERO != DRenderer.WIND_NON_ZERO ||
+            PathIterator.WIND_EVEN_ODD != DRenderer.WIND_EVEN_ODD ||
+            BasicStroke.JOIN_MITER != DStroker.JOIN_MITER ||
+            BasicStroke.JOIN_ROUND != DStroker.JOIN_ROUND ||
+            BasicStroke.JOIN_BEVEL != DStroker.JOIN_BEVEL ||
+            BasicStroke.CAP_BUTT != DStroker.CAP_BUTT ||
+            BasicStroke.CAP_ROUND != DStroker.CAP_ROUND ||
+            BasicStroke.CAP_SQUARE != DStroker.CAP_SQUARE)
+        {
+            throw new InternalError("mismatched renderer constants");
+        }
+    }
+
+    // --- DRendererContext handling ---
+    // use ThreadLocal or ConcurrentLinkedQueue to get one DRendererContext
+    private static final boolean USE_THREAD_LOCAL;
+
+    // reference type stored in either TL or CLQ
+    static final int REF_TYPE;
+
+    // Per-thread DRendererContext
+    private static final ReentrantContextProvider<DRendererContext> RDR_CTX_PROVIDER;
+
+    // Static initializer to use TL or CLQ mode
+    static {
+        USE_THREAD_LOCAL = MarlinProperties.isUseThreadLocal();
+
+        // Soft reference by default:
+        final String refType = AccessController.doPrivileged(
+                            new GetPropertyAction("sun.java2d.renderer.useRef",
+                            "soft"));
+
+        // Java 1.6 does not support strings in switch:
+        if ("hard".equalsIgnoreCase(refType)) {
+            REF_TYPE = ReentrantContextProvider.REF_HARD;
+        } else if ("weak".equalsIgnoreCase(refType)) {
+            REF_TYPE = ReentrantContextProvider.REF_WEAK;
+        } else {
+            REF_TYPE = ReentrantContextProvider.REF_SOFT;
+        }
+
+        if (USE_THREAD_LOCAL) {
+            RDR_CTX_PROVIDER = new ReentrantContextProviderTL<DRendererContext>(REF_TYPE)
+                {
+                    @Override
+                    protected DRendererContext newContext() {
+                        return DRendererContext.createContext();
+                    }
+                };
+        } else {
+            RDR_CTX_PROVIDER = new ReentrantContextProviderCLQ<DRendererContext>(REF_TYPE)
+                {
+                    @Override
+                    protected DRendererContext newContext() {
+                        return DRendererContext.createContext();
+                    }
+                };
+        }
+    }
+
+    private static boolean SETTINGS_LOGGED = !ENABLE_LOGS;
+
+    private static void logSettings(final String reClass) {
+        // log information at startup
+        if (SETTINGS_LOGGED) {
+            return;
+        }
+        SETTINGS_LOGGED = true;
+
+        String refType;
+        switch (REF_TYPE) {
+            default:
+            case ReentrantContextProvider.REF_HARD:
+                refType = "hard";
+                break;
+            case ReentrantContextProvider.REF_SOFT:
+                refType = "soft";
+                break;
+            case ReentrantContextProvider.REF_WEAK:
+                refType = "weak";
+                break;
+        }
+
+        logInfo("=========================================================="
+                + "=====================");
+
+        logInfo("Marlin software rasterizer           = ENABLED");
+        logInfo("Version                              = ["
+                + Version.getVersion() + "]");
+        logInfo("sun.java2d.renderer                  = "
+                + reClass);
+        logInfo("sun.java2d.renderer.useThreadLocal   = "
+                + USE_THREAD_LOCAL);
+        logInfo("sun.java2d.renderer.useRef           = "
+                + refType);
+
+        logInfo("sun.java2d.renderer.edges            = "
+                + MarlinConst.INITIAL_EDGES_COUNT);
+        logInfo("sun.java2d.renderer.pixelsize        = "
+                + MarlinConst.INITIAL_PIXEL_DIM);
+
+        logInfo("sun.java2d.renderer.subPixel_log2_X  = "
+                + MarlinConst.SUBPIXEL_LG_POSITIONS_X);
+        logInfo("sun.java2d.renderer.subPixel_log2_Y  = "
+                + MarlinConst.SUBPIXEL_LG_POSITIONS_Y);
+
+        logInfo("sun.java2d.renderer.tileSize_log2    = "
+                + MarlinConst.TILE_H_LG);
+        logInfo("sun.java2d.renderer.tileWidth_log2   = "
+                + MarlinConst.TILE_W_LG);
+        logInfo("sun.java2d.renderer.blockSize_log2   = "
+                + MarlinConst.BLOCK_SIZE_LG);
+
+        // RLE / blockFlags settings
+
+        logInfo("sun.java2d.renderer.forceRLE         = "
+                + MarlinProperties.isForceRLE());
+        logInfo("sun.java2d.renderer.forceNoRLE       = "
+                + MarlinProperties.isForceNoRLE());
+        logInfo("sun.java2d.renderer.useTileFlags     = "
+                + MarlinProperties.isUseTileFlags());
+        logInfo("sun.java2d.renderer.useTileFlags.useHeuristics = "
+                + MarlinProperties.isUseTileFlagsWithHeuristics());
+        logInfo("sun.java2d.renderer.rleMinWidth      = "
+                + MarlinCache.RLE_MIN_WIDTH);
+
+        // optimisation parameters
+        logInfo("sun.java2d.renderer.useSimplifier    = "
+                + MarlinConst.USE_SIMPLIFIER);
+
+        // debugging parameters
+        logInfo("sun.java2d.renderer.doStats          = "
+                + MarlinConst.DO_STATS);
+        logInfo("sun.java2d.renderer.doMonitors       = "
+                + MarlinConst.DO_MONITORS);
+        logInfo("sun.java2d.renderer.doChecks         = "
+                + MarlinConst.DO_CHECKS);
+
+        // logging parameters
+        logInfo("sun.java2d.renderer.useLogger        = "
+                + MarlinConst.USE_LOGGER);
+        logInfo("sun.java2d.renderer.logCreateContext = "
+                + MarlinConst.LOG_CREATE_CONTEXT);
+        logInfo("sun.java2d.renderer.logUnsafeMalloc  = "
+                + MarlinConst.LOG_UNSAFE_MALLOC);
+
+        // quality settings
+        logInfo("sun.java2d.renderer.cubic_dec_d2     = "
+                + MarlinProperties.getCubicDecD2());
+        logInfo("sun.java2d.renderer.cubic_inc_d1     = "
+                + MarlinProperties.getCubicIncD1());
+        logInfo("sun.java2d.renderer.quad_dec_d2      = "
+                + MarlinProperties.getQuadDecD2());
+
+        logInfo("Renderer settings:");
+        logInfo("CUB_DEC_BND  = " + DRenderer.CUB_DEC_BND);
+        logInfo("CUB_INC_BND  = " + DRenderer.CUB_INC_BND);
+        logInfo("QUAD_DEC_BND = " + DRenderer.QUAD_DEC_BND);
+
+        logInfo("INITIAL_EDGES_CAPACITY               = "
+                + MarlinConst.INITIAL_EDGES_CAPACITY);
+        logInfo("INITIAL_CROSSING_COUNT               = "
+                + DRenderer.INITIAL_CROSSING_COUNT);
+
+        logInfo("=========================================================="
+                + "=====================");
+    }
+
+    /**
+     * Get the DRendererContext instance dedicated to the current thread
+     * @return DRendererContext instance
+     */
+    @SuppressWarnings({"unchecked"})
+    static DRendererContext getRendererContext() {
+        final DRendererContext rdrCtx = RDR_CTX_PROVIDER.acquire();
+        if (DO_MONITORS) {
+            rdrCtx.stats.mon_pre_getAATileGenerator.start();
+        }
+        return rdrCtx;
+    }
+
+    /**
+     * Reset and return the given DRendererContext instance for reuse
+     * @param rdrCtx DRendererContext instance
+     */
+    static void returnRendererContext(final DRendererContext rdrCtx) {
+        rdrCtx.dispose();
+
+        if (DO_MONITORS) {
+            rdrCtx.stats.mon_pre_getAATileGenerator.stop();
+        }
+        RDR_CTX_PROVIDER.release(rdrCtx);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/DPathConsumer2D.java	Wed May 17 22:05:11 2017 +0200
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+public interface DPathConsumer2D {
+    /**
+     * @see java.awt.geom.Path2D.Float#moveTo
+     */
+    public void moveTo(double x, double y);
+
+    /**
+     * @see java.awt.geom.Path2D.Float#lineTo
+     */
+    public void lineTo(double x, double y);
+
+    /**
+     * @see java.awt.geom.Path2D.Float#quadTo
+     */
+    public void quadTo(double x1, double y1,
+                       double x2, double y2);
+
+    /**
+     * @see java.awt.geom.Path2D.Float#curveTo
+     */
+    public void curveTo(double x1, double y1,
+                        double x2, double y2,
+                        double x3, double y3);
+
+    /**
+     * @see java.awt.geom.Path2D.Float#closePath
+     */
+    public void closePath();
+
+    /**
+     * Called after the last segment of the last subpath when the
+     * iteration of the path segments is completely done.  This
+     * method serves to trigger the end of path processing in the
+     * consumer that would normally be triggered when a
+     * {@link java.awt.geom.PathIterator PathIterator}
+     * returns {@code true} from its {@code done} method.
+     */
+    public void pathDone();
+
+    /**
+     * If a given PathConsumer performs all or most of its work
+     * natively then it can return a (non-zero) pointer to a
+     * native function vector that defines C functions for all
+     * of the above methods.
+     * The specific pointer it returns is a pointer to a
+     * PathConsumerVec structure as defined in the include file
+     * src/share/native/sun/java2d/pipe/PathConsumer2D.h
+     * @return a native pointer to a PathConsumerVec structure.
+     */
+    public long getNativeConsumer();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/DRenderer.java	Wed May 17 22:05:11 2017 +0200
@@ -0,0 +1,1526 @@
+/*
+ * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import static sun.java2d.marlin.OffHeapArray.SIZE_INT;
+import jdk.internal.misc.Unsafe;
+
+final class DRenderer implements DPathConsumer2D, MarlinRenderer {
+
+    static final boolean DISABLE_RENDER = false;
+
+    static final boolean ENABLE_BLOCK_FLAGS = MarlinProperties.isUseTileFlags();
+    static final boolean ENABLE_BLOCK_FLAGS_HEURISTICS = MarlinProperties.isUseTileFlagsWithHeuristics();
+
+    private static final int ALL_BUT_LSB = 0xFFFFFFFE;
+    private static final int ERR_STEP_MAX = 0x7FFFFFFF; // = 2^31 - 1
+
+    private static final double POWER_2_TO_32 = 0x1.0p32d;
+
+    // use double to make tosubpix methods faster (no int to double conversion)
+    static final double SUBPIXEL_SCALE_X = SUBPIXEL_POSITIONS_X;
+    static final double SUBPIXEL_SCALE_Y = SUBPIXEL_POSITIONS_Y;
+    static final int SUBPIXEL_MASK_X = SUBPIXEL_POSITIONS_X - 1;
+    static final int SUBPIXEL_MASK_Y = SUBPIXEL_POSITIONS_Y - 1;
+
+    // number of subpixels corresponding to a tile line
+    private static final int SUBPIXEL_TILE
+        = TILE_H << SUBPIXEL_LG_POSITIONS_Y;
+
+    // 2048 (pixelSize) pixels (height) x 8 subpixels = 64K
+    static final int INITIAL_BUCKET_ARRAY
+        = INITIAL_PIXEL_DIM * SUBPIXEL_POSITIONS_Y;
+
+    // crossing capacity = edges count / 4 ~ 1024
+    static final int INITIAL_CROSSING_COUNT = INITIAL_EDGES_COUNT >> 2;
+
+    public static final int WIND_EVEN_ODD = 0;
+    public static final int WIND_NON_ZERO = 1;
+
+    // common to all types of input path segments.
+    // OFFSET as bytes
+    // only integer values:
+    public static final long OFF_CURX_OR  = 0;
+    public static final long OFF_ERROR    = OFF_CURX_OR  + SIZE_INT;
+    public static final long OFF_BUMP_X   = OFF_ERROR    + SIZE_INT;
+    public static final long OFF_BUMP_ERR = OFF_BUMP_X   + SIZE_INT;
+    public static final long OFF_NEXT     = OFF_BUMP_ERR + SIZE_INT;
+    public static final long OFF_YMAX     = OFF_NEXT     + SIZE_INT;
+
+    // size of one edge in bytes
+    public static final int SIZEOF_EDGE_BYTES = (int)(OFF_YMAX + SIZE_INT);
+
+    // curve break into lines
+    // cubic error in subpixels to decrement step
+    private static final double CUB_DEC_ERR_SUBPIX
+        = MarlinProperties.getCubicDecD2() * (NORM_SUBPIXELS / 8.0d); // 1 pixel
+    // cubic error in subpixels to increment step
+    private static final double CUB_INC_ERR_SUBPIX
+        = MarlinProperties.getCubicIncD1() * (NORM_SUBPIXELS / 8.0d); // 0.4 pixel
+
+    // TestNonAARasterization (JDK-8170879): cubics
+    // bad paths (59294/100000 == 59,29%, 94335 bad pixels (avg = 1,59), 3966 warnings (avg = 0,07)
+
+    // cubic bind length to decrement step
+    public static final double CUB_DEC_BND
+        = 8.0d * CUB_DEC_ERR_SUBPIX;
+    // cubic bind length to increment step
+    public static final double CUB_INC_BND
+        = 8.0d * CUB_INC_ERR_SUBPIX;
+
+    // cubic countlg
+    public static final int CUB_COUNT_LG = 2;
+    // cubic count = 2^countlg
+    private static final int CUB_COUNT = 1 << CUB_COUNT_LG;
+    // cubic count^2 = 4^countlg
+    private static final int CUB_COUNT_2 = 1 << (2 * CUB_COUNT_LG);
+    // cubic count^3 = 8^countlg
+    private static final int CUB_COUNT_3 = 1 << (3 * CUB_COUNT_LG);
+    // cubic dt = 1 / count
+    private static final double CUB_INV_COUNT = 1.0d / CUB_COUNT;
+    // cubic dt^2 = 1 / count^2 = 1 / 4^countlg
+    private static final double CUB_INV_COUNT_2 = 1.0d / CUB_COUNT_2;
+    // cubic dt^3 = 1 / count^3 = 1 / 8^countlg
+    private static final double CUB_INV_COUNT_3 = 1.0d / CUB_COUNT_3;
+
+    // quad break into lines
+    // quadratic error in subpixels
+    private static final double QUAD_DEC_ERR_SUBPIX
+        = MarlinProperties.getQuadDecD2() * (NORM_SUBPIXELS / 8.0d); // 0.5 pixel
+
+    // TestNonAARasterization (JDK-8170879): quads
+    // bad paths (62916/100000 == 62,92%, 103818 bad pixels (avg = 1,65), 6514 warnings (avg = 0,10)
+
+    // quadratic bind length to decrement step
+    public static final double QUAD_DEC_BND
+        = 8.0d * QUAD_DEC_ERR_SUBPIX;
+
+//////////////////////////////////////////////////////////////////////////////
+//  SCAN LINE
+//////////////////////////////////////////////////////////////////////////////
+    // crossings ie subpixel edge x coordinates
+    private int[] crossings;
+    // auxiliary storage for crossings (merge sort)
+    private int[] aux_crossings;
+
+    // indices into the segment pointer lists. They indicate the "active"
+    // sublist in the segment lists (the portion of the list that contains
+    // all the segments that cross the next scan line).
+    private int edgeCount;
+    private int[] edgePtrs;
+    // auxiliary storage for edge pointers (merge sort)
+    private int[] aux_edgePtrs;
+
+    // max used for both edgePtrs and crossings (stats only)
+    private int activeEdgeMaxUsed;
+
+    // crossings ref (dirty)
+    private final IntArrayCache.Reference crossings_ref;
+    // edgePtrs ref (dirty)
+    private final IntArrayCache.Reference edgePtrs_ref;
+    // merge sort initial arrays (large enough to satisfy most usages) (1024)
+    // aux_crossings ref (dirty)
+    private final IntArrayCache.Reference aux_crossings_ref;
+    // aux_edgePtrs ref (dirty)
+    private final IntArrayCache.Reference aux_edgePtrs_ref;
+
+//////////////////////////////////////////////////////////////////////////////
+//  EDGE LIST
+//////////////////////////////////////////////////////////////////////////////
+    private int edgeMinY = Integer.MAX_VALUE;
+    private int edgeMaxY = Integer.MIN_VALUE;
+    private double edgeMinX = Double.POSITIVE_INFINITY;
+    private double edgeMaxX = Double.NEGATIVE_INFINITY;
+
+    // edges [ints] stored in off-heap memory
+    private final OffHeapArray edges;
+
+    private int[] edgeBuckets;
+    private int[] edgeBucketCounts; // 2*newedges + (1 if pruning needed)
+    // used range for edgeBuckets / edgeBucketCounts
+    private int buckets_minY;
+    private int buckets_maxY;
+
+    // edgeBuckets ref (clean)
+    private final IntArrayCache.Reference edgeBuckets_ref;
+    // edgeBucketCounts ref (clean)
+    private final IntArrayCache.Reference edgeBucketCounts_ref;
+
+    // Flattens using adaptive forward differencing. This only carries out
+    // one iteration of the AFD loop. All it does is update AFD variables (i.e.
+    // X0, Y0, D*[X|Y], COUNT; not variables used for computing scanline crossings).
+    private void quadBreakIntoLinesAndAdd(double x0, double y0,
+                                          final DCurve c,
+                                          final double x2, final double y2)
+    {
+        int count = 1; // dt = 1 / count
+
+        // maximum(ddX|Y) = norm(dbx, dby) * dt^2 (= 1)
+        double maxDD = Math.abs(c.dbx) + Math.abs(c.dby);
+
+        final double _DEC_BND = QUAD_DEC_BND;
+
+        while (maxDD >= _DEC_BND) {
+            // divide step by half:
+            maxDD /= 4.0d; // error divided by 2^2 = 4
+
+            count <<= 1;
+            if (DO_STATS) {
+                rdrCtx.stats.stat_rdr_quadBreak_dec.add(count);
+            }
+        }
+
+        int nL = 0; // line count
+        if (count > 1) {
+            final double icount = 1.0d / count; // dt
+            final double icount2 = icount * icount; // dt^2
+
+            final double ddx = c.dbx * icount2;
+            final double ddy = c.dby * icount2;
+            double dx = c.bx * icount2 + c.cx * icount;
+            double dy = c.by * icount2 + c.cy * icount;
+
+            double x1, y1;
+
+            while (--count > 0) {
+                x1 = x0 + dx;
+                dx += ddx;
+                y1 = y0 + dy;
+                dy += ddy;
+
+                addLine(x0, y0, x1, y1);
+
+                if (DO_STATS) { nL++; }
+                x0 = x1;
+                y0 = y1;
+            }
+        }
+        addLine(x0, y0, x2, y2);
+
+        if (DO_STATS) {
+            rdrCtx.stats.stat_rdr_quadBreak.add(nL + 1);
+        }
+    }
+
+    // x0, y0 and x3,y3 are the endpoints of the curve. We could compute these
+    // using c.xat(0),c.yat(0) and c.xat(1),c.yat(1), but this might introduce
+    // numerical errors, and our callers already have the exact values.
+    // Another alternative would be to pass all the control points, and call
+    // c.set here, but then too many numbers are passed around.
+    private void curveBreakIntoLinesAndAdd(double x0, double y0,
+                                           final DCurve c,
+                                           final double x3, final double y3)
+    {
+        int count           = CUB_COUNT;
+        final double icount  = CUB_INV_COUNT;   // dt
+        final double icount2 = CUB_INV_COUNT_2; // dt^2
+        final double icount3 = CUB_INV_COUNT_3; // dt^3
+
+        // the dx and dy refer to forward differencing variables, not the last
+        // coefficients of the "points" polynomial
+        double dddx, dddy, ddx, ddy, dx, dy;
+        dddx = 2.0d * c.dax * icount3;
+        dddy = 2.0d * c.day * icount3;
+        ddx = dddx + c.dbx * icount2;
+        ddy = dddy + c.dby * icount2;
+        dx = c.ax * icount3 + c.bx * icount2 + c.cx * icount;
+        dy = c.ay * icount3 + c.by * icount2 + c.cy * icount;
+
+        // we use x0, y0 to walk the line
+        double x1 = x0, y1 = y0;
+        int nL = 0; // line count
+
+        final double _DEC_BND = CUB_DEC_BND;
+        final double _INC_BND = CUB_INC_BND;
+
+        while (count > 0) {
+            // divide step by half:
+            while (Math.abs(ddx) + Math.abs(ddy) >= _DEC_BND) {
+                dddx /= 8.0d;
+                dddy /= 8.0d;
+                ddx = ddx / 4.0d - dddx;
+                ddy = ddy / 4.0d - dddy;
+                dx = (dx - ddx) / 2.0d;
+                dy = (dy - ddy) / 2.0d;
+
+                count <<= 1;
+                if (DO_STATS) {
+                    rdrCtx.stats.stat_rdr_curveBreak_dec.add(count);
+                }
+            }
+
+            // double step:
+            // can only do this on even "count" values, because we must divide count by 2
+            while (count % 2 == 0
+                   && Math.abs(dx) + Math.abs(dy) <= _INC_BND)
+            {
+                dx = 2.0d * dx + ddx;
+                dy = 2.0d * dy + ddy;
+                ddx = 4.0d * (ddx + dddx);
+                ddy = 4.0d * (ddy + dddy);
+                dddx *= 8.0d;
+                dddy *= 8.0d;
+
+                count >>= 1;
+                if (DO_STATS) {
+                    rdrCtx.stats.stat_rdr_curveBreak_inc.add(count);
+                }
+            }
+            if (--count > 0) {
+                x1 += dx;
+                dx += ddx;
+                ddx += dddx;
+                y1 += dy;
+                dy += ddy;
+                ddy += dddy;
+            } else {
+                x1 = x3;
+                y1 = y3;
+            }
+
+            addLine(x0, y0, x1, y1);
+
+            if (DO_STATS) { nL++; }
+            x0 = x1;
+            y0 = y1;
+        }
+        if (DO_STATS) {
+            rdrCtx.stats.stat_rdr_curveBreak.add(nL);
+        }
+    }
+
+    private void addLine(double x1, double y1, double x2, double y2) {
+        if (DO_MONITORS) {
+            rdrCtx.stats.mon_rdr_addLine.start();
+        }
+        if (DO_STATS) {
+            rdrCtx.stats.stat_rdr_addLine.add(1);
+        }
+        int or = 1; // orientation of the line. 1 if y increases, 0 otherwise.
+        if (y2 < y1) {
+            or = 0;
+            double tmp = y2;
+            y2 = y1;
+            y1 = tmp;
+            tmp = x2;
+            x2 = x1;
+            x1 = tmp;
+        }
+
+        // convert subpixel coordinates [double] into pixel positions [int]
+
+        // The index of the pixel that holds the next HPC is at ceil(trueY - 0.5)
+        // Since y1 and y2 are biased by -0.5 in tosubpixy(), this is simply
+        // ceil(y1) or ceil(y2)
+        // upper integer (inclusive)
+        final int firstCrossing = FloatMath.max(FloatMath.ceil_int(y1), boundsMinY);
+
+        // note: use boundsMaxY (last Y exclusive) to compute correct coverage
+        // upper integer (exclusive)
+        final int lastCrossing  = FloatMath.min(FloatMath.ceil_int(y2), boundsMaxY);
+
+        /* skip horizontal lines in pixel space and clip edges
+           out of y range [boundsMinY; boundsMaxY] */
+        if (firstCrossing >= lastCrossing) {
+            if (DO_MONITORS) {
+                rdrCtx.stats.mon_rdr_addLine.stop();
+            }
+            if (DO_STATS) {
+                rdrCtx.stats.stat_rdr_addLine_skip.add(1);
+            }
+            return;
+        }
+
+        // edge min/max X/Y are in subpixel space (half-open interval):
+        // note: Use integer crossings to ensure consistent range within
+        // edgeBuckets / edgeBucketCounts arrays in case of NaN values (int = 0)
+        if (firstCrossing < edgeMinY) {
+            edgeMinY = firstCrossing;
+        }
+        if (lastCrossing > edgeMaxY) {
+            edgeMaxY = lastCrossing;
+        }
+
+        final double slope = (x1 - x2) / (y1 - y2);
+
+        if (slope >= 0.0d) { // <==> x1 < x2
+            if (x1 < edgeMinX) {
+                edgeMinX = x1;
+            }
+            if (x2 > edgeMaxX) {
+                edgeMaxX = x2;
+            }
+        } else {
+            if (x2 < edgeMinX) {
+                edgeMinX = x2;
+            }
+            if (x1 > edgeMaxX) {
+                edgeMaxX = x1;
+            }
+        }
+
+        // local variables for performance:
+        final int _SIZEOF_EDGE_BYTES = SIZEOF_EDGE_BYTES;
+
+        final OffHeapArray _edges = edges;
+
+        // get free pointer (ie length in bytes)
+        final int edgePtr = _edges.used;
+
+        // use substraction to avoid integer overflow:
+        if (_edges.length - edgePtr < _SIZEOF_EDGE_BYTES) {
+            // suppose _edges.length > _SIZEOF_EDGE_BYTES
+            // so doubling size is enough to add needed bytes
+            // note: throw IOOB if neededSize > 2Gb:
+            final long edgeNewSize = ArrayCacheConst.getNewLargeSize(
+                                        _edges.length,
+                                        edgePtr + _SIZEOF_EDGE_BYTES);
+
+            if (DO_STATS) {
+                rdrCtx.stats.stat_rdr_edges_resizes.add(edgeNewSize);
+            }
+            _edges.resize(edgeNewSize);
+        }
+
+
+        final Unsafe _unsafe = OffHeapArray.UNSAFE;
+        final long SIZE_INT = 4L;
+        long addr   = _edges.address + edgePtr;
+
+        // The x value must be bumped up to its position at the next HPC we will evaluate.
+        // "firstcrossing" is the (sub)pixel number where the next crossing occurs
+        // thus, the actual coordinate of the next HPC is "firstcrossing + 0.5"
+        // so the Y distance we cover is "firstcrossing + 0.5 - trueY".
+        // Note that since y1 (and y2) are already biased by -0.5 in tosubpixy(), we have
+        // y1 = trueY - 0.5
+        // trueY = y1 + 0.5
+        // firstcrossing + 0.5 - trueY = firstcrossing + 0.5 - (y1 + 0.5)
+        //                             = firstcrossing - y1
+        // The x coordinate at that HPC is then:
+        // x1_intercept = x1 + (firstcrossing - y1) * slope
+        // The next VPC is then given by:
+        // VPC index = ceil(x1_intercept - 0.5), or alternately
+        // VPC index = floor(x1_intercept - 0.5 + 1 - epsilon)
+        // epsilon is hard to pin down in floating point, but easy in fixed point, so if
+        // we convert to fixed point then these operations get easier:
+        // long x1_fixed = x1_intercept * 2^32;  (fixed point 32.32 format)
+        // curx = next VPC = fixed_floor(x1_fixed - 2^31 + 2^32 - 1)
+        //                 = fixed_floor(x1_fixed + 2^31 - 1)
+        //                 = fixed_floor(x1_fixed + 0x7FFFFFFF)
+        // and error       = fixed_fract(x1_fixed + 0x7FFFFFFF)
+        final double x1_intercept = x1 + (firstCrossing - y1) * slope;
+
+        // inlined scalb(x1_intercept, 32):
+        final long x1_fixed_biased = ((long) (POWER_2_TO_32 * x1_intercept))
+                                     + 0x7FFFFFFFL;
+        // curx:
+        // last bit corresponds to the orientation
+        _unsafe.putInt(addr, (((int) (x1_fixed_biased >> 31L)) & ALL_BUT_LSB) | or);
+        addr += SIZE_INT;
+        _unsafe.putInt(addr,  ((int)  x1_fixed_biased) >>> 1);
+        addr += SIZE_INT;
+
+        // inlined scalb(slope, 32):
+        final long slope_fixed = (long) (POWER_2_TO_32 * slope);
+
+        // last bit set to 0 to keep orientation:
+        _unsafe.putInt(addr, (((int) (slope_fixed >> 31L)) & ALL_BUT_LSB));
+        addr += SIZE_INT;
+        _unsafe.putInt(addr,  ((int)  slope_fixed) >>> 1);
+        addr += SIZE_INT;
+
+        final int[] _edgeBuckets      = edgeBuckets;
+        final int[] _edgeBucketCounts = edgeBucketCounts;
+
+        final int _boundsMinY = boundsMinY;
+
+        // each bucket is a linked list. this method adds ptr to the
+        // start of the "bucket"th linked list.
+        final int bucketIdx = firstCrossing - _boundsMinY;
+
+        // pointer from bucket
+        _unsafe.putInt(addr, _edgeBuckets[bucketIdx]);
+        addr += SIZE_INT;
+        // y max (exclusive)
+        _unsafe.putInt(addr,  lastCrossing);
+
+        // Update buckets:
+        // directly the edge struct "pointer"
+        _edgeBuckets[bucketIdx]       = edgePtr;
+        _edgeBucketCounts[bucketIdx] += 2; // 1 << 1
+        // last bit means edge end
+        _edgeBucketCounts[lastCrossing - _boundsMinY] |= 0x1;
+
+        // update free pointer (ie length in bytes)
+        _edges.used += _SIZEOF_EDGE_BYTES;
+
+        if (DO_MONITORS) {
+            rdrCtx.stats.mon_rdr_addLine.stop();
+        }
+    }
+
+// END EDGE LIST
+//////////////////////////////////////////////////////////////////////////////
+
+    // Cache to store RLE-encoded coverage mask of the current primitive
+    final MarlinCache cache;
+
+    // Bounds of the drawing region, at subpixel precision.
+    private int boundsMinX, boundsMinY, boundsMaxX, boundsMaxY;
+
+    // Current winding rule
+    private int windingRule;
+
+    // Current drawing position, i.e., final point of last segment
+    private double x0, y0;
+
+    // Position of most recent 'moveTo' command
+    private double sx0, sy0;
+
+    // per-thread renderer context
+    final DRendererContext rdrCtx;
+    // dirty curve
+    private final DCurve curve;
+
+    // clean alpha array (zero filled)
+    private int[] alphaLine;
+
+    // alphaLine ref (clean)
+    private final IntArrayCache.Reference alphaLine_ref;
+
+    private boolean enableBlkFlags = false;
+    private boolean prevUseBlkFlags = false;
+
+    /* block flags (0|1) */
+    private int[] blkFlags;
+
+    // blkFlags ref (clean)
+    private final IntArrayCache.Reference blkFlags_ref;
+
+    DRenderer(final DRendererContext rdrCtx) {
+        this.rdrCtx = rdrCtx;
+
+        this.edges = rdrCtx.newOffHeapArray(INITIAL_EDGES_CAPACITY); // 96K
+
+        this.curve = rdrCtx.curve;
+
+        edgeBuckets_ref      = rdrCtx.newCleanIntArrayRef(INITIAL_BUCKET_ARRAY); // 64K
+        edgeBucketCounts_ref = rdrCtx.newCleanIntArrayRef(INITIAL_BUCKET_ARRAY); // 64K
+
+        edgeBuckets      = edgeBuckets_ref.initial;
+        edgeBucketCounts = edgeBucketCounts_ref.initial;
+
+        // 2048 (pixelsize) pixel large
+        alphaLine_ref = rdrCtx.newCleanIntArrayRef(INITIAL_AA_ARRAY); // 8K
+        alphaLine     = alphaLine_ref.initial;
+
+        this.cache = rdrCtx.cache;
+
+        crossings_ref     = rdrCtx.newDirtyIntArrayRef(INITIAL_CROSSING_COUNT); // 2K
+        aux_crossings_ref = rdrCtx.newDirtyIntArrayRef(INITIAL_CROSSING_COUNT); // 2K
+        edgePtrs_ref      = rdrCtx.newDirtyIntArrayRef(INITIAL_CROSSING_COUNT); // 2K
+        aux_edgePtrs_ref  = rdrCtx.newDirtyIntArrayRef(INITIAL_CROSSING_COUNT); // 2K
+
+        crossings     = crossings_ref.initial;
+        aux_crossings = aux_crossings_ref.initial;
+        edgePtrs      = edgePtrs_ref.initial;
+        aux_edgePtrs  = aux_edgePtrs_ref.initial;
+
+        blkFlags_ref = rdrCtx.newCleanIntArrayRef(INITIAL_ARRAY); // 1K = 1 tile line
+        blkFlags     = blkFlags_ref.initial;
+    }
+
+    DRenderer init(final int pix_boundsX, final int pix_boundsY,
+                  final int pix_boundsWidth, final int pix_boundsHeight,
+                  final int windingRule)
+    {
+        this.windingRule = windingRule;
+
+        // bounds as half-open intervals: minX <= x < maxX and minY <= y < maxY
+        this.boundsMinX =  pix_boundsX << SUBPIXEL_LG_POSITIONS_X;
+        this.boundsMaxX =
+            (pix_boundsX + pix_boundsWidth) << SUBPIXEL_LG_POSITIONS_X;
+        this.boundsMinY =  pix_boundsY << SUBPIXEL_LG_POSITIONS_Y;
+        this.boundsMaxY =
+            (pix_boundsY + pix_boundsHeight) << SUBPIXEL_LG_POSITIONS_Y;
+
+        if (DO_LOG_BOUNDS) {
+            MarlinUtils.logInfo("boundsXY = [" + boundsMinX + " ... "
+                                + boundsMaxX + "[ [" + boundsMinY + " ... "
+                                + boundsMaxY + "[");
+        }
+
+        // see addLine: ceil(boundsMaxY) => boundsMaxY + 1
+        // +1 for edgeBucketCounts
+        final int edgeBucketsLength = (boundsMaxY - boundsMinY) + 1;
+
+        if (edgeBucketsLength > INITIAL_BUCKET_ARRAY) {
+            if (DO_STATS) {
+                rdrCtx.stats.stat_array_renderer_edgeBuckets
+                    .add(edgeBucketsLength);
+                rdrCtx.stats.stat_array_renderer_edgeBucketCounts
+                    .add(edgeBucketsLength);
+            }
+            edgeBuckets = edgeBuckets_ref.getArray(edgeBucketsLength);
+            edgeBucketCounts = edgeBucketCounts_ref.getArray(edgeBucketsLength);
+        }
+
+        edgeMinY = Integer.MAX_VALUE;
+        edgeMaxY = Integer.MIN_VALUE;
+        edgeMinX = Double.POSITIVE_INFINITY;
+        edgeMaxX = Double.NEGATIVE_INFINITY;
+
+        // reset used mark:
+        edgeCount = 0;
+        activeEdgeMaxUsed = 0;
+        edges.used = 0;
+
+        return this; // fluent API
+    }
+
+    /**
+     * Disposes this renderer and recycle it clean up before reusing this instance
+     */
+    void dispose() {
+        if (DO_STATS) {
+            rdrCtx.stats.stat_rdr_activeEdges.add(activeEdgeMaxUsed);
+            rdrCtx.stats.stat_rdr_edges.add(edges.used);
+            rdrCtx.stats.stat_rdr_edges_count.add(edges.used / SIZEOF_EDGE_BYTES);
+            rdrCtx.stats.hist_rdr_edges_count.add(edges.used / SIZEOF_EDGE_BYTES);
+            rdrCtx.stats.totalOffHeap += edges.length;
+        }
+        // Return arrays:
+        crossings = crossings_ref.putArray(crossings);
+        aux_crossings = aux_crossings_ref.putArray(aux_crossings);
+
+        edgePtrs = edgePtrs_ref.putArray(edgePtrs);
+        aux_edgePtrs = aux_edgePtrs_ref.putArray(aux_edgePtrs);
+
+        alphaLine = alphaLine_ref.putArray(alphaLine, 0, 0); // already zero filled
+        blkFlags  = blkFlags_ref.putArray(blkFlags, 0, 0); // already zero filled
+
+        if (edgeMinY != Integer.MAX_VALUE) {
+            // if context is maked as DIRTY:
+            if (rdrCtx.dirty) {
+                // may happen if an exception if thrown in the pipeline processing:
+                // clear completely buckets arrays:
+                buckets_minY = 0;
+                buckets_maxY = boundsMaxY - boundsMinY;
+            }
+            // clear only used part
+            edgeBuckets = edgeBuckets_ref.putArray(edgeBuckets, buckets_minY,
+                                                                buckets_maxY);
+            edgeBucketCounts = edgeBucketCounts_ref.putArray(edgeBucketCounts,
+                                                             buckets_minY,
+                                                             buckets_maxY + 1);
+        } else {
+            // unused arrays
+            edgeBuckets = edgeBuckets_ref.putArray(edgeBuckets, 0, 0);
+            edgeBucketCounts = edgeBucketCounts_ref.putArray(edgeBucketCounts, 0, 0);
+        }
+
+        // At last: resize back off-heap edges to initial size
+        if (edges.length != INITIAL_EDGES_CAPACITY) {
+            // note: may throw OOME:
+            edges.resize(INITIAL_EDGES_CAPACITY);
+        }
+        if (DO_CLEAN_DIRTY) {
+            // Force zero-fill dirty arrays:
+            edges.fill(BYTE_0);
+        }
+        if (DO_MONITORS) {
+            rdrCtx.stats.mon_rdr_endRendering.stop();
+        }
+        // recycle the RendererContext instance
+        DMarlinRenderingEngine.returnRendererContext(rdrCtx);
+    }
+
+    private static double tosubpixx(final double pix_x) {
+        return SUBPIXEL_SCALE_X * pix_x;
+    }
+
+    private static double tosubpixy(final double pix_y) {
+        // shift y by -0.5 for fast ceil(y - 0.5):
+        return SUBPIXEL_SCALE_Y * pix_y - 0.5d;
+    }
+
+    @Override
+    public void moveTo(double pix_x0, double pix_y0) {
+        closePath();
+        final double sx = tosubpixx(pix_x0);
+        final double sy = tosubpixy(pix_y0);
+        this.sx0 = sx;
+        this.sy0 = sy;
+        this.x0 = sx;
+        this.y0 = sy;
+    }
+
+    @Override
+    public void lineTo(double pix_x1, double pix_y1) {
+        final double x1 = tosubpixx(pix_x1);
+        final double y1 = tosubpixy(pix_y1);
+        addLine(x0, y0, x1, y1);
+        x0 = x1;
+        y0 = y1;
+    }
+
+    @Override
+    public void curveTo(double x1, double y1,
+                        double x2, double y2,
+                        double x3, double y3)
+    {
+        final double xe = tosubpixx(x3);
+        final double ye = tosubpixy(y3);
+        curve.set(x0, y0, tosubpixx(x1), tosubpixy(y1),
+                          tosubpixx(x2), tosubpixy(y2), xe, ye);
+        curveBreakIntoLinesAndAdd(x0, y0, curve, xe, ye);
+        x0 = xe;
+        y0 = ye;
+    }
+
+    @Override
+    public void quadTo(double x1, double y1, double x2, double y2) {
+        final double xe = tosubpixx(x2);
+        final double ye = tosubpixy(y2);
+        curve.set(x0, y0, tosubpixx(x1), tosubpixy(y1), xe, ye);
+        quadBreakIntoLinesAndAdd(x0, y0, curve, xe, ye);
+        x0 = xe;
+        y0 = ye;
+    }
+
+    @Override
+    public void closePath() {
+        addLine(x0, y0, sx0, sy0);
+        x0 = sx0;
+        y0 = sy0;
+    }
+
+    @Override
+    public void pathDone() {
+        closePath();
+    }
+
+    @Override
+    public long getNativeConsumer() {
+        throw new InternalError("Renderer does not use a native consumer.");
+    }
+
+    private void _endRendering(final int ymin, final int ymax) {
+        if (DISABLE_RENDER) {
+            return;
+        }
+
+        // Get X bounds as true pixel boundaries to compute correct pixel coverage:
+        final int bboxx0 = bbox_spminX;
+        final int bboxx1 = bbox_spmaxX;
+
+        final boolean windingRuleEvenOdd = (windingRule == WIND_EVEN_ODD);
+
+        // Useful when processing tile line by tile line
+        final int[] _alpha = alphaLine;
+
+        // local vars (performance):
+        final MarlinCache _cache = cache;
+        final OffHeapArray _edges = edges;
+        final int[] _edgeBuckets = edgeBuckets;
+        final int[] _edgeBucketCounts = edgeBucketCounts;
+
+        int[] _crossings = this.crossings;
+        int[] _edgePtrs  = this.edgePtrs;
+
+        // merge sort auxiliary storage:
+        int[] _aux_crossings = this.aux_crossings;
+        int[] _aux_edgePtrs  = this.aux_edgePtrs;
+
+        // copy constants:
+        final long _OFF_ERROR    = OFF_ERROR;
+        final long _OFF_BUMP_X   = OFF_BUMP_X;
+        final long _OFF_BUMP_ERR = OFF_BUMP_ERR;
+
+        final long _OFF_NEXT     = OFF_NEXT;
+        final long _OFF_YMAX     = OFF_YMAX;
+
+        final int _ALL_BUT_LSB   = ALL_BUT_LSB;
+        final int _ERR_STEP_MAX  = ERR_STEP_MAX;
+
+        // unsafe I/O:
+        final Unsafe _unsafe = OffHeapArray.UNSAFE;
+        final long    addr0  = _edges.address;
+        long addr;
+        final int _SUBPIXEL_LG_POSITIONS_X = SUBPIXEL_LG_POSITIONS_X;
+        final int _SUBPIXEL_LG_POSITIONS_Y = SUBPIXEL_LG_POSITIONS_Y;
+        final int _SUBPIXEL_MASK_X = SUBPIXEL_MASK_X;
+        final int _SUBPIXEL_MASK_Y = SUBPIXEL_MASK_Y;
+        final int _SUBPIXEL_POSITIONS_X = SUBPIXEL_POSITIONS_X;
+
+        final int _MIN_VALUE = Integer.MIN_VALUE;
+        final int _MAX_VALUE = Integer.MAX_VALUE;
+
+        // Now we iterate through the scanlines. We must tell emitRow the coord
+        // of the first non-transparent pixel, so we must keep accumulators for
+        // the first and last pixels of the section of the current pixel row
+        // that we will emit.
+        // We also need to accumulate pix_bbox, but the iterator does it
+        // for us. We will just get the values from it once this loop is done
+        int minX = _MAX_VALUE;
+        int maxX = _MIN_VALUE;
+
+        int y = ymin;
+        int bucket = y - boundsMinY;
+
+        int numCrossings = this.edgeCount;
+        int edgePtrsLen = _edgePtrs.length;
+        int crossingsLen = _crossings.length;
+        int _arrayMaxUsed = activeEdgeMaxUsed;
+        int ptrLen = 0, newCount, ptrEnd;
+
+        int bucketcount, i, j, ecur;
+        int cross, lastCross;
+        int x0, x1, tmp, sum, prev, curx, curxo, crorientation, err;
+        int pix_x, pix_xmaxm1, pix_xmax;
+
+        int low, high, mid, prevNumCrossings;
+        boolean useBinarySearch;
+
+        final int[] _blkFlags = blkFlags;
+        final int _BLK_SIZE_LG = BLOCK_SIZE_LG;
+        final int _BLK_SIZE = BLOCK_SIZE;
+
+        final boolean _enableBlkFlagsHeuristics = ENABLE_BLOCK_FLAGS_HEURISTICS && this.enableBlkFlags;
+
+        // Use block flags if large pixel span and few crossings:
+        // ie mean(distance between crossings) is high
+        boolean useBlkFlags = this.prevUseBlkFlags;
+
+        final int stroking = rdrCtx.stroking;
+
+        int lastY = -1; // last emited row
+
+
+        // Iteration on scanlines
+        for (; y < ymax; y++, bucket++) {
+            // --- from former ScanLineIterator.next()
+            bucketcount = _edgeBucketCounts[bucket];
+
+            // marker on previously sorted edges:
+            prevNumCrossings = numCrossings;
+
+            // bucketCount indicates new edge / edge end:
+            if (bucketcount != 0) {
+                if (DO_STATS) {
+                    rdrCtx.stats.stat_rdr_activeEdges_updates.add(numCrossings);
+                }
+
+                // last bit set to 1 means that edges ends
+                if ((bucketcount & 0x1) != 0) {
+                    // eviction in active edge list
+                    // cache edges[] address + offset
+                    addr = addr0 + _OFF_YMAX;
+
+                    for (i = 0, newCount = 0; i < numCrossings; i++) {
+                        // get the pointer to the edge
+                        ecur = _edgePtrs[i];
+                        // random access so use unsafe:
+                        if (_unsafe.getInt(addr + ecur) > y) {
+                            _edgePtrs[newCount++] = ecur;
+                        }
+                    }
+                    // update marker on sorted edges minus removed edges:
+                    prevNumCrossings = numCrossings = newCount;
+                }
+
+                ptrLen = bucketcount >> 1; // number of new edge
+
+                if (ptrLen != 0) {
+                    if (DO_STATS) {
+                        rdrCtx.stats.stat_rdr_activeEdges_adds.add(ptrLen);
+                        if (ptrLen > 10) {
+                            rdrCtx.stats.stat_rdr_activeEdges_adds_high.add(ptrLen);
+                        }
+                    }
+                    ptrEnd = numCrossings + ptrLen;
+
+                    if (edgePtrsLen < ptrEnd) {
+                        if (DO_STATS) {
+                            rdrCtx.stats.stat_array_renderer_edgePtrs.add(ptrEnd);
+                        }
+                        this.edgePtrs = _edgePtrs
+                            = edgePtrs_ref.widenArray(_edgePtrs, numCrossings,
+                                                      ptrEnd);
+
+                        edgePtrsLen = _edgePtrs.length;
+                        // Get larger auxiliary storage:
+                        aux_edgePtrs_ref.putArray(_aux_edgePtrs);
+
+                        // use ArrayCache.getNewSize() to use the same growing
+                        // factor than widenArray():
+                        if (DO_STATS) {
+                            rdrCtx.stats.stat_array_renderer_aux_edgePtrs.add(ptrEnd);
+                        }
+                        this.aux_edgePtrs = _aux_edgePtrs
+                            = aux_edgePtrs_ref.getArray(
+                                ArrayCacheConst.getNewSize(numCrossings, ptrEnd)
+                            );
+                    }
+
+                    // cache edges[] address + offset
+                    addr = addr0 + _OFF_NEXT;
+
+                    // add new edges to active edge list:
+                    for (ecur = _edgeBuckets[bucket];
+                         numCrossings < ptrEnd; numCrossings++)
+                    {
+                        // store the pointer to the edge
+                        _edgePtrs[numCrossings] = ecur;
+                        // random access so use unsafe:
+                        ecur = _unsafe.getInt(addr + ecur);
+                    }
+
+                    if (crossingsLen < numCrossings) {
+                        // Get larger array:
+                        crossings_ref.putArray(_crossings);
+
+                        if (DO_STATS) {
+                            rdrCtx.stats.stat_array_renderer_crossings
+                                .add(numCrossings);
+                        }
+                        this.crossings = _crossings
+                            = crossings_ref.getArray(numCrossings);
+
+                        // Get larger auxiliary storage:
+                        aux_crossings_ref.putArray(_aux_crossings);
+
+                        if (DO_STATS) {
+                            rdrCtx.stats.stat_array_renderer_aux_crossings
+                                .add(numCrossings);
+                        }
+                        this.aux_crossings = _aux_crossings
+                            = aux_crossings_ref.getArray(numCrossings);
+
+                        crossingsLen = _crossings.length;
+                    }
+                    if (DO_STATS) {
+                        // update max used mark
+                        if (numCrossings > _arrayMaxUsed) {
+                            _arrayMaxUsed = numCrossings;
+                        }
+                    }
+                } // ptrLen != 0
+            } // bucketCount != 0
+
+
+            if (numCrossings != 0) {
+                /*
+                 * thresholds to switch to optimized merge sort
+                 * for newly added edges + final merge pass.
+                 */
+                if ((ptrLen < 10) || (numCrossings < 40)) {
+                    if (DO_STATS) {
+                        rdrCtx.stats.hist_rdr_crossings.add(numCrossings);
+                        rdrCtx.stats.hist_rdr_crossings_adds.add(ptrLen);
+                    }
+
+                    /*
+                     * threshold to use binary insertion sort instead of
+                     * straight insertion sort (to reduce minimize comparisons).
+                     */
+                    useBinarySearch = (numCrossings >= 20);
+
+                    // if small enough:
+                    lastCross = _MIN_VALUE;
+
+                    for (i = 0; i < numCrossings; i++) {
+                        // get the pointer to the edge
+                        ecur = _edgePtrs[i];
+
+                        /* convert subpixel coordinates into pixel
+                            positions for coming scanline */
+                        /* note: it is faster to always update edges even
+                           if it is removed from AEL for coming or last scanline */
+
+                        // random access so use unsafe:
+                        addr = addr0 + ecur; // ecur + OFF_F_CURX
+
+                        // get current crossing:
+                        curx = _unsafe.getInt(addr);
+
+                        // update crossing with orientation at last bit:
+                        cross = curx;
+
+                        // Increment x using DDA (fixed point):
+                        curx += _unsafe.getInt(addr + _OFF_BUMP_X);
+
+                        // Increment error:
+                        err  =  _unsafe.getInt(addr + _OFF_ERROR)
+                              + _unsafe.getInt(addr + _OFF_BUMP_ERR);
+
+                        // Manual carry handling:
+                        // keep sign and carry bit only and ignore last bit (preserve orientation):
+                        _unsafe.putInt(addr,               curx - ((err >> 30) & _ALL_BUT_LSB));
+                        _unsafe.putInt(addr + _OFF_ERROR, (err & _ERR_STEP_MAX));
+
+                        if (DO_STATS) {
+                            rdrCtx.stats.stat_rdr_crossings_updates.add(numCrossings);
+                        }
+
+                        // insertion sort of crossings:
+                        if (cross < lastCross) {
+                            if (DO_STATS) {
+                                rdrCtx.stats.stat_rdr_crossings_sorts.add(i);
+                            }
+
+                            /* use binary search for newly added edges
+                               in crossings if arrays are large enough */
+                            if (useBinarySearch && (i >= prevNumCrossings)) {
+                                if (DO_STATS) {
+                                    rdrCtx.stats.stat_rdr_crossings_bsearch.add(i);
+                                }
+                                low = 0;
+                                high = i - 1;
+
+                                do {
+                                    // note: use signed shift (not >>>) for performance
+                                    // as indices are small enough to exceed Integer.MAX_VALUE
+                                    mid = (low + high) >> 1;
+
+                                    if (_crossings[mid] < cross) {
+                                        low = mid + 1;
+                                    } else {
+                                        high = mid - 1;
+                                    }
+                                } while (low <= high);
+
+                                for (j = i - 1; j >= low; j--) {
+                                    _crossings[j + 1] = _crossings[j];
+                                    _edgePtrs [j + 1] = _edgePtrs[j];
+                                }
+                                _crossings[low] = cross;
+                                _edgePtrs [low] = ecur;
+
+                            } else {
+                                j = i - 1;
+                                _crossings[i] = _crossings[j];
+                                _edgePtrs[i] = _edgePtrs[j];
+
+                                while ((--j >= 0) && (_crossings[j] > cross)) {
+                                    _crossings[j + 1] = _crossings[j];
+                                    _edgePtrs [j + 1] = _edgePtrs[j];
+                                }
+                                _crossings[j + 1] = cross;
+                                _edgePtrs [j + 1] = ecur;
+                            }
+
+                        } else {
+                            _crossings[i] = lastCross = cross;
+                        }
+                    }
+                } else {
+                    if (DO_STATS) {
+                        rdrCtx.stats.stat_rdr_crossings_msorts.add(numCrossings);
+                        rdrCtx.stats.hist_rdr_crossings_ratio
+                            .add((1000 * ptrLen) / numCrossings);
+                        rdrCtx.stats.hist_rdr_crossings_msorts.add(numCrossings);
+                        rdrCtx.stats.hist_rdr_crossings_msorts_adds.add(ptrLen);
+                    }
+
+                    // Copy sorted data in auxiliary arrays
+                    // and perform insertion sort on almost sorted data
+                    // (ie i < prevNumCrossings):
+
+                    lastCross = _MIN_VALUE;
+
+                    for (i = 0; i < numCrossings; i++) {
+                        // get the pointer to the edge
+                        ecur = _edgePtrs[i];
+
+                        /* convert subpixel coordinates into pixel
+                            positions for coming scanline */
+                        /* note: it is faster to always update edges even
+                           if it is removed from AEL for coming or last scanline */
+
+                        // random access so use unsafe:
+                        addr = addr0 + ecur; // ecur + OFF_F_CURX
+
+                        // get current crossing:
+                        curx = _unsafe.getInt(addr);
+
+                        // update crossing with orientation at last bit:
+                        cross = curx;
+
+                        // Increment x using DDA (fixed point):
+                        curx += _unsafe.getInt(addr + _OFF_BUMP_X);
+
+                        // Increment error:
+                        err  =  _unsafe.getInt(addr + _OFF_ERROR)
+                              + _unsafe.getInt(addr + _OFF_BUMP_ERR);
+
+                        // Manual carry handling:
+                        // keep sign and carry bit only and ignore last bit (preserve orientation):
+                        _unsafe.putInt(addr,               curx - ((err >> 30) & _ALL_BUT_LSB));
+                        _unsafe.putInt(addr + _OFF_ERROR, (err & _ERR_STEP_MAX));
+
+                        if (DO_STATS) {
+                            rdrCtx.stats.stat_rdr_crossings_updates.add(numCrossings);
+                        }
+
+                        if (i >= prevNumCrossings) {
+                            // simply store crossing as edgePtrs is in-place:
+                            // will be copied and sorted efficiently by mergesort later:
+                            _crossings[i]     = cross;
+
+                        } else if (cross < lastCross) {
+                            if (DO_STATS) {
+                                rdrCtx.stats.stat_rdr_crossings_sorts.add(i);
+                            }
+
+                            // (straight) insertion sort of crossings:
+                            j = i - 1;
+                            _aux_crossings[i] = _aux_crossings[j];
+                            _aux_edgePtrs[i] = _aux_edgePtrs[j];
+
+                            while ((--j >= 0) && (_aux_crossings[j] > cross)) {
+                                _aux_crossings[j + 1] = _aux_crossings[j];
+                                _aux_edgePtrs [j + 1] = _aux_edgePtrs[j];
+                            }
+                            _aux_crossings[j + 1] = cross;
+                            _aux_edgePtrs [j + 1] = ecur;
+
+                        } else {
+                            // auxiliary storage:
+                            _aux_crossings[i] = lastCross = cross;
+                            _aux_edgePtrs [i] = ecur;
+                        }
+                    }
+
+                    // use Mergesort using auxiliary arrays (sort only right part)
+                    MergeSort.mergeSortNoCopy(_crossings,     _edgePtrs,
+                                              _aux_crossings, _aux_edgePtrs,
+                                              numCrossings,   prevNumCrossings);
+                }
+
+                // reset ptrLen
+                ptrLen = 0;
+                // --- from former ScanLineIterator.next()
+
+
+                /* note: bboxx0 and bboxx1 must be pixel boundaries
+                   to have correct coverage computation */
+
+                // right shift on crossings to get the x-coordinate:
+                curxo = _crossings[0];
+                x0    = curxo >> 1;
+                if (x0 < minX) {
+                    minX = x0; // subpixel coordinate
+                }
+
+                x1 = _crossings[numCrossings - 1] >> 1;
+                if (x1 > maxX) {
+                    maxX = x1; // subpixel coordinate
+                }
+
+
+                // compute pixel coverages
+                prev = curx = x0;
+                // to turn {0, 1} into {-1, 1}, multiply by 2 and subtract 1.
+                // last bit contains orientation (0 or 1)
+                crorientation = ((curxo & 0x1) << 1) - 1;
+
+                if (windingRuleEvenOdd) {
+                    sum = crorientation;
+
+                    // Even Odd winding rule: take care of mask ie sum(orientations)
+                    for (i = 1; i < numCrossings; i++) {
+                        curxo = _crossings[i];
+                        curx  =  curxo >> 1;
+                        // to turn {0, 1} into {-1, 1}, multiply by 2 and subtract 1.
+                        // last bit contains orientation (0 or 1)
+                        crorientation = ((curxo & 0x1) << 1) - 1;
+
+                        if ((sum & 0x1) != 0) {
+                            // TODO: perform line clipping on left-right sides
+                            // to avoid such bound checks:
+                            x0 = (prev > bboxx0) ? prev : bboxx0;
+
+                            if (curx < bboxx1) {
+                                x1 = curx;
+                            } else {
+                                x1 = bboxx1;
+                                // skip right side (fast exit loop):
+                                i = numCrossings;
+                            }
+
+                            if (x0 < x1) {
+                                x0 -= bboxx0; // turn x0, x1 from coords to indices
+                                x1 -= bboxx0; // in the alpha array.
+
+                                pix_x      =  x0      >> _SUBPIXEL_LG_POSITIONS_X;
+                                pix_xmaxm1 = (x1 - 1) >> _SUBPIXEL_LG_POSITIONS_X;
+
+                                if (pix_x == pix_xmaxm1) {
+                                    // Start and end in same pixel
+                                    tmp = (x1 - x0); // number of subpixels
+                                    _alpha[pix_x    ] += tmp;
+                                    _alpha[pix_x + 1] -= tmp;
+
+                                    if (useBlkFlags) {
+                                        // flag used blocks:
+                                        // note: block processing handles extra pixel:
+                                        _blkFlags[pix_x    >> _BLK_SIZE_LG] = 1;
+                                    }
+                                } else {
+                                    tmp = (x0 & _SUBPIXEL_MASK_X);
+                                    _alpha[pix_x    ]
+                                        += (_SUBPIXEL_POSITIONS_X - tmp);
+                                    _alpha[pix_x + 1]
+                                        += tmp;
+
+                                    pix_xmax = x1 >> _SUBPIXEL_LG_POSITIONS_X;
+
+                                    tmp = (x1 & _SUBPIXEL_MASK_X);
+                                    _alpha[pix_xmax    ]
+                                        -= (_SUBPIXEL_POSITIONS_X - tmp);
+                                    _alpha[pix_xmax + 1]
+                                        -= tmp;
+
+                                    if (useBlkFlags) {
+                                        // flag used blocks:
+                                        // note: block processing handles extra pixel:
+                                        _blkFlags[pix_x    >> _BLK_SIZE_LG] = 1;
+                                        _blkFlags[pix_xmax >> _BLK_SIZE_LG] = 1;
+                                    }
+                                }
+                            }
+                        }
+
+                        sum += crorientation;
+                        prev = curx;
+                    }
+                } else {
+                    // Non-zero winding rule: optimize that case (default)
+                    // and avoid processing intermediate crossings
+                    for (i = 1, sum = 0;; i++) {
+                        sum += crorientation;
+
+                        if (sum != 0) {
+                            // prev = min(curx)
+                            if (prev > curx) {
+                                prev = curx;
+                            }
+                        } else {
+                            // TODO: perform line clipping on left-right sides
+                            // to avoid such bound checks:
+                            x0 = (prev > bboxx0) ? prev : bboxx0;
+
+                            if (curx < bboxx1) {
+                                x1 = curx;
+                            } else {
+                                x1 = bboxx1;
+                                // skip right side (fast exit loop):
+                                i = numCrossings;
+                            }
+
+                            if (x0 < x1) {
+                                x0 -= bboxx0; // turn x0, x1 from coords to indices
+                                x1 -= bboxx0; // in the alpha array.
+
+                                pix_x      =  x0      >> _SUBPIXEL_LG_POSITIONS_X;
+                                pix_xmaxm1 = (x1 - 1) >> _SUBPIXEL_LG_POSITIONS_X;
+
+                                if (pix_x == pix_xmaxm1) {
+                                    // Start and end in same pixel
+                                    tmp = (x1 - x0); // number of subpixels
+                                    _alpha[pix_x    ] += tmp;
+                                    _alpha[pix_x + 1] -= tmp;
+
+                                    if (useBlkFlags) {
+                                        // flag used blocks:
+                                        // note: block processing handles extra pixel:
+                                        _blkFlags[pix_x    >> _BLK_SIZE_LG] = 1;
+                                    }
+                                } else {
+                                    tmp = (x0 & _SUBPIXEL_MASK_X);
+                                    _alpha[pix_x    ]
+                                        += (_SUBPIXEL_POSITIONS_X - tmp);
+                                    _alpha[pix_x + 1]
+                                        += tmp;
+
+                                    pix_xmax = x1 >> _SUBPIXEL_LG_POSITIONS_X;
+
+                                    tmp = (x1 & _SUBPIXEL_MASK_X);
+                                    _alpha[pix_xmax    ]
+                                        -= (_SUBPIXEL_POSITIONS_X - tmp);
+                                    _alpha[pix_xmax + 1]
+                                        -= tmp;
+
+                                    if (useBlkFlags) {
+                                        // flag used blocks:
+                                        // note: block processing handles extra pixel:
+                                        _blkFlags[pix_x    >> _BLK_SIZE_LG] = 1;
+                                        _blkFlags[pix_xmax >> _BLK_SIZE_LG] = 1;
+                                    }
+                                }
+                            }
+                            prev = _MAX_VALUE;
+                        }
+
+                        if (i == numCrossings) {
+                            break;
+                        }
+
+                        curxo = _crossings[i];
+                        curx  =  curxo >> 1;
+                        // to turn {0, 1} into {-1, 1}, multiply by 2 and subtract 1.
+                        // last bit contains orientation (0 or 1)
+                        crorientation = ((curxo & 0x1) << 1) - 1;
+                    }
+                }
+            } // numCrossings > 0
+
+            // even if this last row had no crossings, alpha will be zeroed
+            // from the last emitRow call. But this doesn't matter because
+            // maxX < minX, so no row will be emitted to the MarlinCache.
+            if ((y & _SUBPIXEL_MASK_Y) == _SUBPIXEL_MASK_Y) {
+                lastY = y >> _SUBPIXEL_LG_POSITIONS_Y;
+
+                // convert subpixel to pixel coordinate within boundaries:
+                minX = FloatMath.max(minX, bboxx0) >> _SUBPIXEL_LG_POSITIONS_X;
+                maxX = FloatMath.min(maxX, bboxx1) >> _SUBPIXEL_LG_POSITIONS_X;
+
+                if (maxX >= minX) {
+                    // note: alpha array will be zeroed by copyAARow()
+                    // +1 because alpha [pix_minX; pix_maxX[
+                    // fix range [x0; x1[
+                    // note: if x1=bboxx1, then alpha is written up to bboxx1+1
+                    // inclusive: alpha[bboxx1] ignored, alpha[bboxx1+1] == 0
+                    // (normally so never cleared below)
+                    copyAARow(_alpha, lastY, minX, maxX + 1, useBlkFlags);
+
+                    // speculative for next pixel row (scanline coherence):
+                    if (_enableBlkFlagsHeuristics) {
+                        // Use block flags if large pixel span and few crossings:
+                        // ie mean(distance between crossings) is larger than
+                        // 1 block size;
+
+                        // fast check width:
+                        maxX -= minX;
+
+                        // if stroking: numCrossings /= 2
+                        // => shift numCrossings by 1
+                        // condition = (width / (numCrossings - 1)) > blockSize
+                        useBlkFlags = (maxX > _BLK_SIZE) && (maxX >
+                            (((numCrossings >> stroking) - 1) << _BLK_SIZE_LG));
+
+                        if (DO_STATS) {
+                            tmp = FloatMath.max(1,
+                                    ((numCrossings >> stroking) - 1));
+                            rdrCtx.stats.hist_tile_generator_encoding_dist
+                                .add(maxX / tmp);
+                        }
+                    }
+                } else {
+                    _cache.clearAARow(lastY);
+                }
+                minX = _MAX_VALUE;
+                maxX = _MIN_VALUE;
+            }
+        } // scan line iterator
+
+        // Emit final row
+        y--;
+        y >>= _SUBPIXEL_LG_POSITIONS_Y;
+
+        // convert subpixel to pixel coordinate within boundaries:
+        minX = FloatMath.max(minX, bboxx0) >> _SUBPIXEL_LG_POSITIONS_X;
+        maxX = FloatMath.min(maxX, bboxx1) >> _SUBPIXEL_LG_POSITIONS_X;
+
+        if (maxX >= minX) {
+            // note: alpha array will be zeroed by copyAARow()
+            // +1 because alpha [pix_minX; pix_maxX[
+            // fix range [x0; x1[
+            // note: if x1=bboxx1, then alpha is written up to bboxx1+1
+            // inclusive: alpha[bboxx1] ignored then cleared and
+            // alpha[bboxx1+1] == 0 (normally so never cleared after)
+            copyAARow(_alpha, y, minX, maxX + 1, useBlkFlags);
+        } else if (y != lastY) {
+            _cache.clearAARow(y);
+        }
+
+        // update member:
+        edgeCount = numCrossings;
+        prevUseBlkFlags = useBlkFlags;
+
+        if (DO_STATS) {
+            // update max used mark
+            activeEdgeMaxUsed = _arrayMaxUsed;
+        }
+    }
+
+    boolean endRendering() {
+        if (DO_MONITORS) {
+            rdrCtx.stats.mon_rdr_endRendering.start();
+        }
+        if (edgeMinY == Integer.MAX_VALUE) {
+            return false; // undefined edges bounds
+        }
+
+        // bounds as half-open intervals
+        final int spminX = FloatMath.max(FloatMath.ceil_int(edgeMinX - 0.5d), boundsMinX);
+        final int spmaxX = FloatMath.min(FloatMath.ceil_int(edgeMaxX - 0.5d), boundsMaxX);
+
+        // edge Min/Max Y are already rounded to subpixels within bounds:
+        final int spminY = edgeMinY;
+        final int spmaxY = edgeMaxY;
+
+        buckets_minY = spminY - boundsMinY;
+        buckets_maxY = spmaxY - boundsMinY;
+
+        if (DO_LOG_BOUNDS) {
+            MarlinUtils.logInfo("edgesXY = [" + edgeMinX + " ... " + edgeMaxX
+                                + "[ [" + edgeMinY + " ... " + edgeMaxY + "[");
+            MarlinUtils.logInfo("spXY    = [" + spminX + " ... " + spmaxX
+                                + "[ [" + spminY + " ... " + spmaxY + "[");
+        }
+
+        // test clipping for shapes out of bounds
+        if ((spminX >= spmaxX) || (spminY >= spmaxY)) {
+            return false;
+        }
+
+        // half open intervals
+        // inclusive:
+        final int pminX =  spminX                    >> SUBPIXEL_LG_POSITIONS_X;
+        // exclusive:
+        final int pmaxX = (spmaxX + SUBPIXEL_MASK_X) >> SUBPIXEL_LG_POSITIONS_X;
+        // inclusive:
+        final int pminY =  spminY                    >> SUBPIXEL_LG_POSITIONS_Y;
+        // exclusive:
+        final int pmaxY = (spmaxY + SUBPIXEL_MASK_Y) >> SUBPIXEL_LG_POSITIONS_Y;
+
+        // store BBox to answer ptg.getBBox():
+        this.cache.init(pminX, pminY, pmaxX, pmaxY);
+
+        // Heuristics for using block flags:
+        if (ENABLE_BLOCK_FLAGS) {
+            enableBlkFlags = this.cache.useRLE;
+            prevUseBlkFlags = enableBlkFlags && !ENABLE_BLOCK_FLAGS_HEURISTICS;
+
+            if (enableBlkFlags) {
+                // ensure blockFlags array is large enough:
+                // note: +2 to ensure enough space left at end
+                final int blkLen = ((pmaxX - pminX) >> BLOCK_SIZE_LG) + 2;
+                if (blkLen > INITIAL_ARRAY) {
+                    blkFlags = blkFlags_ref.getArray(blkLen);
+                }
+            }
+        }
+
+        // memorize the rendering bounding box:
+        /* note: bbox_spminX and bbox_spmaxX must be pixel boundaries
+           to have correct coverage computation */
+        // inclusive:
+        bbox_spminX = pminX << SUBPIXEL_LG_POSITIONS_X;
+        // exclusive:
+        bbox_spmaxX = pmaxX << SUBPIXEL_LG_POSITIONS_X;
+        // inclusive:
+        bbox_spminY = spminY;
+        // exclusive:
+        bbox_spmaxY = spmaxY;
+
+        if (DO_LOG_BOUNDS) {
+            MarlinUtils.logInfo("pXY       = [" + pminX + " ... " + pmaxX
+                                + "[ [" + pminY + " ... " + pmaxY + "[");
+            MarlinUtils.logInfo("bbox_spXY = [" + bbox_spminX + " ... "
+                                + bbox_spmaxX + "[ [" + bbox_spminY + " ... "
+                                + bbox_spmaxY + "[");
+        }
+
+        // Prepare alpha line:
+        // add 2 to better deal with the last pixel in a pixel row.
+        final int width = (pmaxX - pminX) + 2;
+
+        // Useful when processing tile line by tile line
+        if (width > INITIAL_AA_ARRAY) {
+            if (DO_STATS) {
+                rdrCtx.stats.stat_array_renderer_alphaline.add(width);
+            }
+            alphaLine = alphaLine_ref.getArray(width);
+        }
+
+        // process first tile line:
+        endRendering(pminY);
+
+        return true;
+    }
+
+    private int bbox_spminX, bbox_spmaxX, bbox_spminY, bbox_spmaxY;
+
+    void endRendering(final int pminY) {
+        if (DO_MONITORS) {
+            rdrCtx.stats.mon_rdr_endRendering_Y.start();
+        }
+
+        final int spminY       = pminY << SUBPIXEL_LG_POSITIONS_Y;
+        final int fixed_spminY = FloatMath.max(bbox_spminY, spminY);
+
+        // avoid rendering for last call to nextTile()
+        if (fixed_spminY < bbox_spmaxY) {
+            // process a complete tile line ie scanlines for 32 rows
+            final int spmaxY = FloatMath.min(bbox_spmaxY, spminY + SUBPIXEL_TILE);
+
+            // process tile line [0 - 32]
+            cache.resetTileLine(pminY);
+
+            // Process only one tile line:
+            _endRendering(fixed_spminY, spmaxY);
+        }
+        if (DO_MONITORS) {
+            rdrCtx.stats.mon_rdr_endRendering_Y.stop();
+        }
+    }
+
+    void copyAARow(final int[] alphaRow,
+                   final int pix_y, final int pix_from, final int pix_to,
+                   final boolean useBlockFlags)
+    {
+        if (DO_MONITORS) {
+            rdrCtx.stats.mon_rdr_copyAARow.start();
+        }
+        if (useBlockFlags) {
+            if (DO_STATS) {
+                rdrCtx.stats.hist_tile_generator_encoding.add(1);
+            }
+            cache.copyAARowRLE_WithBlockFlags(blkFlags, alphaRow, pix_y, pix_from, pix_to);
+        } else {
+            if (DO_STATS) {
+                rdrCtx.stats.hist_tile_generator_encoding.add(0);
+            }
+            cache.copyAARowNoRLE(alphaRow, pix_y, pix_from, pix_to);
+        }
+        if (DO_MONITORS) {
+            rdrCtx.stats.mon_rdr_copyAARow.stop();
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/DRendererContext.java	Wed May 17 22:05:11 2017 +0200
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import java.awt.geom.Path2D;
+import java.lang.ref.WeakReference;
+import java.util.concurrent.atomic.AtomicInteger;
+import sun.java2d.ReentrantContext;
+import sun.java2d.marlin.ArrayCacheConst.CacheStats;
+import sun.java2d.marlin.DMarlinRenderingEngine.NormalizingPathIterator;
+
+/**
+ * This class is a renderer context dedicated to a single thread
+ */
+final class DRendererContext extends ReentrantContext implements IRendererContext {
+
+    // RendererContext creation counter
+    private static final AtomicInteger CTX_COUNT = new AtomicInteger(1);
+
+    /**
+     * Create a new renderer context
+     *
+     * @return new RendererContext instance
+     */
+    static DRendererContext createContext() {
+        return new DRendererContext("ctx"
+                       + Integer.toString(CTX_COUNT.getAndIncrement()));
+    }
+
+    // Smallest object used as Cleaner's parent reference
+    private final Object cleanerObj;
+    // dirty flag indicating an exception occured during pipeline in pathTo()
+    boolean dirty = false;
+    // shared data
+    final double[] double6 = new double[6];
+    // shared curve (dirty) (Renderer / Stroker)
+    final DCurve curve = new DCurve();
+    // MarlinRenderingEngine NormalizingPathIterator NearestPixelCenter:
+    final NormalizingPathIterator nPCPathIterator;
+    // MarlinRenderingEngine NearestPixelQuarter NormalizingPathIterator:
+    final NormalizingPathIterator nPQPathIterator;
+    // MarlinRenderingEngine.TransformingPathConsumer2D
+    final DTransformingPathConsumer2D transformerPC2D;
+    // recycled Path2D instance (weak)
+    private WeakReference<Path2D.Double> refPath2D = null;
+    final DRenderer renderer;
+    final DStroker stroker;
+    // Simplifies out collinear lines
+    final DCollinearSimplifier simplifier = new DCollinearSimplifier();
+    final DDasher dasher;
+    final MarlinTileGenerator ptg;
+    final MarlinCache cache;
+    // flag indicating the shape is stroked (1) or filled (0)
+    int stroking = 0;
+
+    // Array caches:
+    /* clean int[] cache (zero-filled) = 5 refs */
+    private final IntArrayCache cleanIntCache = new IntArrayCache(true, 5);
+    /* dirty int[] cache = 4 refs */
+    private final IntArrayCache dirtyIntCache = new IntArrayCache(false, 4);
+    /* dirty double[] cache = 3 refs */
+    private final DoubleArrayCache dirtyDoubleCache = new DoubleArrayCache(false, 3);
+    /* dirty byte[] cache = 1 ref */
+    private final ByteArrayCache dirtyByteCache = new ByteArrayCache(false, 1);
+
+    // RendererContext statistics
+    final RendererStats stats;
+
+    final PathConsumer2DAdapter p2dAdapter = new PathConsumer2DAdapter();
+
+
+    /**
+     * Constructor
+     *
+     * @param name context name (debugging)
+     */
+    DRendererContext(final String name) {
+        if (LOG_CREATE_CONTEXT) {
+            MarlinUtils.logInfo("new RendererContext = " + name);
+        }
+        this.cleanerObj = new Object();
+
+        // create first stats (needed by newOffHeapArray):
+        if (DO_STATS || DO_MONITORS) {
+            stats = RendererStats.createInstance(cleanerObj, name);
+            // push cache stats:
+            stats.cacheStats = new CacheStats[] { cleanIntCache.stats,
+                dirtyIntCache.stats, dirtyDoubleCache.stats, dirtyByteCache.stats
+            };
+        } else {
+            stats = null;
+        }
+
+        // NormalizingPathIterator instances:
+        nPCPathIterator = new NormalizingPathIterator.NearestPixelCenter(double6);
+        nPQPathIterator  = new NormalizingPathIterator.NearestPixelQuarter(double6);
+
+        // MarlinRenderingEngine.TransformingPathConsumer2D
+        transformerPC2D = new DTransformingPathConsumer2D();
+
+        // Renderer:
+        cache = new MarlinCache(this);
+        renderer = new DRenderer(this); // needs MarlinCache from rdrCtx.cache
+        ptg = new MarlinTileGenerator(stats, renderer, cache);
+
+        stroker = new DStroker(this);
+        dasher = new DDasher(this);
+    }
+
+    /**
+     * Disposes this renderer context:
+     * clean up before reusing this context
+     */
+    void dispose() {
+        if (DO_STATS) {
+            if (stats.totalOffHeap > stats.totalOffHeapMax) {
+                stats.totalOffHeapMax = stats.totalOffHeap;
+            }
+            stats.totalOffHeap = 0L;
+        }
+        stroking = 0;
+        // if context is maked as DIRTY:
+        if (dirty) {
+            // may happen if an exception if thrown in the pipeline processing:
+            // force cleanup of all possible pipelined blocks (except Renderer):
+
+            // NormalizingPathIterator instances:
+            this.nPCPathIterator.dispose();
+            this.nPQPathIterator.dispose();
+            // Dasher:
+            this.dasher.dispose();
+            // Stroker:
+            this.stroker.dispose();
+
+            // mark context as CLEAN:
+            dirty = false;
+        }
+    }
+
+    Path2D.Double getPath2D() {
+        // resolve reference:
+        Path2D.Double p2d
+            = (refPath2D != null) ? refPath2D.get() : null;
+
+        // create a new Path2D ?
+        if (p2d == null) {
+            p2d = new Path2D.Double(Path2D.WIND_NON_ZERO, INITIAL_EDGES_COUNT); // 32K
+
+            // update weak reference:
+            refPath2D = new WeakReference<Path2D.Double>(p2d);
+        }
+        // reset the path anyway:
+        p2d.reset();
+        return p2d;
+    }
+
+    @Override
+    public RendererStats stats() {
+        return stats;
+    }
+
+    @Override
+    public OffHeapArray newOffHeapArray(final long initialSize) {
+        if (DO_STATS) {
+            stats.totalOffHeapInitial += initialSize;
+        }
+        return new OffHeapArray(cleanerObj, initialSize);
+    }
+
+    @Override
+    public IntArrayCache.Reference newCleanIntArrayRef(final int initialSize) {
+        return cleanIntCache.createRef(initialSize);
+    }
+
+    IntArrayCache.Reference newDirtyIntArrayRef(final int initialSize) {
+        return dirtyIntCache.createRef(initialSize);
+    }
+
+    DoubleArrayCache.Reference newDirtyDoubleArrayRef(final int initialSize) {
+        return dirtyDoubleCache.createRef(initialSize);
+    }
+
+    ByteArrayCache.Reference newDirtyByteArrayRef(final int initialSize) {
+        return dirtyByteCache.createRef(initialSize);
+    }
+
+    static final class PathConsumer2DAdapter implements DPathConsumer2D {
+        private sun.awt.geom.PathConsumer2D out;
+
+        PathConsumer2DAdapter() {}
+
+        PathConsumer2DAdapter init(sun.awt.geom.PathConsumer2D out) {
+            this.out = out;
+            return this;
+        }
+
+        @Override
+        public void moveTo(double x0, double y0) {
+            out.moveTo((float)x0, (float)y0);
+        }
+
+        @Override
+        public void lineTo(double x1, double y1) {
+            out.lineTo((float)x1, (float)y1);
+        }
+
+        @Override
+        public void closePath() {
+            out.closePath();
+        }
+
+        @Override
+        public void pathDone() {
+            out.pathDone();
+        }
+
+        @Override
+        public void curveTo(double x1, double y1,
+                            double x2, double y2,
+                            double x3, double y3)
+        {
+            out.curveTo((float)x1, (float)y1,
+                    (float)x2, (float)y2,
+                    (float)x3, (float)y3);
+        }
+
+        @Override
+        public void quadTo(double x1, double y1, double x2, double y2) {
+            out.quadTo((float)x1, (float)y1, (float)x2, (float)y2);
+        }
+
+        @Override
+        public long getNativeConsumer() {
+            throw new InternalError("Not using a native peer");
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/DStroker.java	Wed May 17 22:05:11 2017 +0200
@@ -0,0 +1,1325 @@
+/*
+ * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import java.util.Arrays;
+
+// TODO: some of the arithmetic here is too verbose and prone to hard to
+// debug typos. We should consider making a small Point/Vector class that
+// has methods like plus(Point), minus(Point), dot(Point), cross(Point)and such
+final class DStroker implements DPathConsumer2D, MarlinConst {
+
+    private static final int MOVE_TO = 0;
+    private static final int DRAWING_OP_TO = 1; // ie. curve, line, or quad
+    private static final int CLOSE = 2;
+
+    /**
+     * Constant value for join style.
+     */
+    public static final int JOIN_MITER = 0;
+
+    /**
+     * Constant value for join style.
+     */
+    public static final int JOIN_ROUND = 1;
+
+    /**
+     * Constant value for join style.
+     */
+    public static final int JOIN_BEVEL = 2;
+
+    /**
+     * Constant value for end cap style.
+     */
+    public static final int CAP_BUTT = 0;
+
+    /**
+     * Constant value for end cap style.
+     */
+    public static final int CAP_ROUND = 1;
+
+    /**
+     * Constant value for end cap style.
+     */
+    public static final int CAP_SQUARE = 2;
+
+    // pisces used to use fixed point arithmetic with 16 decimal digits. I
+    // didn't want to change the values of the constant below when I converted
+    // it to floating point, so that's why the divisions by 2^16 are there.
+    private static final double ROUND_JOIN_THRESHOLD = 1000.0d/65536.0d;
+
+    private static final double C = 0.5522847498307933d;
+
+    private static final int MAX_N_CURVES = 11;
+
+    private DPathConsumer2D out;
+
+    private int capStyle;
+    private int joinStyle;
+
+    private double lineWidth2;
+    private double invHalfLineWidth2Sq;
+
+    private final double[] offset0 = new double[2];
+    private final double[] offset1 = new double[2];
+    private final double[] offset2 = new double[2];
+    private final double[] miter = new double[2];
+    private double miterLimitSq;
+
+    private int prev;
+
+    // The starting point of the path, and the slope there.
+    private double sx0, sy0, sdx, sdy;
+    // the current point and the slope there.
+    private double cx0, cy0, cdx, cdy; // c stands for current
+    // vectors that when added to (sx0,sy0) and (cx0,cy0) respectively yield the
+    // first and last points on the left parallel path. Since this path is
+    // parallel, it's slope at any point is parallel to the slope of the
+    // original path (thought they may have different directions), so these
+    // could be computed from sdx,sdy and cdx,cdy (and vice versa), but that
+    // would be error prone and hard to read, so we keep these anyway.
+    private double smx, smy, cmx, cmy;
+
+    private final PolyStack reverse;
+
+    // This is where the curve to be processed is put. We give it
+    // enough room to store all curves.
+    private final double[] middle = new double[MAX_N_CURVES * 6 + 2];
+    private final double[] lp = new double[8];
+    private final double[] rp = new double[8];
+    private final double[] subdivTs = new double[MAX_N_CURVES - 1];
+
+    // per-thread renderer context
+    final DRendererContext rdrCtx;
+
+    // dirty curve
+    final DCurve curve;
+
+    /**
+     * Constructs a <code>DStroker</code>.
+     * @param rdrCtx per-thread renderer context
+     */
+    DStroker(final DRendererContext rdrCtx) {
+        this.rdrCtx = rdrCtx;
+
+        this.reverse = new PolyStack(rdrCtx);
+        this.curve = rdrCtx.curve;
+    }
+
+    /**
+     * Inits the <code>DStroker</code>.
+     *
+     * @param pc2d an output <code>DPathConsumer2D</code>.
+     * @param lineWidth the desired line width in pixels
+     * @param capStyle the desired end cap style, one of
+     * <code>CAP_BUTT</code>, <code>CAP_ROUND</code> or
+     * <code>CAP_SQUARE</code>.
+     * @param joinStyle the desired line join style, one of
+     * <code>JOIN_MITER</code>, <code>JOIN_ROUND</code> or
+     * <code>JOIN_BEVEL</code>.
+     * @param miterLimit the desired miter limit
+     * @return this instance
+     */
+    DStroker init(DPathConsumer2D pc2d,
+              double lineWidth,
+              int capStyle,
+              int joinStyle,
+              double miterLimit)
+    {
+        this.out = pc2d;
+
+        this.lineWidth2 = lineWidth / 2.0d;
+        this.invHalfLineWidth2Sq = 1.0d / (2.0d * lineWidth2 * lineWidth2);
+        this.capStyle = capStyle;
+        this.joinStyle = joinStyle;
+
+        double limit = miterLimit * lineWidth2;
+        this.miterLimitSq = limit * limit;
+
+        this.prev = CLOSE;
+
+        rdrCtx.stroking = 1;
+
+        return this; // fluent API
+    }
+
+    /**
+     * Disposes this stroker:
+     * clean up before reusing this instance
+     */
+    void dispose() {
+        reverse.dispose();
+
+        if (DO_CLEAN_DIRTY) {
+            // Force zero-fill dirty arrays:
+            Arrays.fill(offset0, 0.0d);
+            Arrays.fill(offset1, 0.0d);
+            Arrays.fill(offset2, 0.0d);
+            Arrays.fill(miter, 0.0d);
+            Arrays.fill(middle, 0.0d);
+            Arrays.fill(lp, 0.0d);
+            Arrays.fill(rp, 0.0d);
+            Arrays.fill(subdivTs, 0.0d);
+        }
+    }
+
+    private static void computeOffset(final double lx, final double ly,
+                                      final double w, final double[] m)
+    {
+        double len = lx*lx + ly*ly;
+        if (len == 0.0d) {
+            m[0] = 0.0d;
+            m[1] = 0.0d;
+        } else {
+            len = Math.sqrt(len);
+            m[0] =  (ly * w) / len;
+            m[1] = -(lx * w) / len;
+        }
+    }
+
+    // Returns true if the vectors (dx1, dy1) and (dx2, dy2) are
+    // clockwise (if dx1,dy1 needs to be rotated clockwise to close
+    // the smallest angle between it and dx2,dy2).
+    // This is equivalent to detecting whether a point q is on the right side
+    // of a line passing through points p1, p2 where p2 = p1+(dx1,dy1) and
+    // q = p2+(dx2,dy2), which is the same as saying p1, p2, q are in a
+    // clockwise order.
+    // NOTE: "clockwise" here assumes coordinates with 0,0 at the bottom left.
+    private static boolean isCW(final double dx1, final double dy1,
+                                final double dx2, final double dy2)
+    {
+        return dx1 * dy2 <= dy1 * dx2;
+    }
+
+    private void drawRoundJoin(double x, double y,
+                               double omx, double omy, double mx, double my,
+                               boolean rev,
+                               double threshold)
+    {
+        if ((omx == 0.0d && omy == 0.0d) || (mx == 0.0d && my == 0.0d)) {
+            return;
+        }
+
+        double domx = omx - mx;
+        double domy = omy - my;
+        double len = domx*domx + domy*domy;
+        if (len < threshold) {
+            return;
+        }
+
+        if (rev) {
+            omx = -omx;
+            omy = -omy;
+            mx  = -mx;
+            my  = -my;
+        }
+        drawRoundJoin(x, y, omx, omy, mx, my, rev);
+    }
+
+    private void drawRoundJoin(double cx, double cy,
+                               double omx, double omy,
+                               double mx, double my,
+                               boolean rev)
+    {
+        // The sign of the dot product of mx,my and omx,omy is equal to the
+        // the sign of the cosine of ext
+        // (ext is the angle between omx,omy and mx,my).
+        final double cosext = omx * mx + omy * my;
+        // If it is >=0, we know that abs(ext) is <= 90 degrees, so we only
+        // need 1 curve to approximate the circle section that joins omx,omy
+        // and mx,my.
+        final int numCurves = (cosext >= 0.0d) ? 1 : 2;
+
+        switch (numCurves) {
+        case 1:
+            drawBezApproxForArc(cx, cy, omx, omy, mx, my, rev);
+            break;
+        case 2:
+            // we need to split the arc into 2 arcs spanning the same angle.
+            // The point we want will be one of the 2 intersections of the
+            // perpendicular bisector of the chord (omx,omy)->(mx,my) and the
+            // circle. We could find this by scaling the vector
+            // (omx+mx, omy+my)/2 so that it has length=lineWidth2 (and thus lies
+            // on the circle), but that can have numerical problems when the angle
+            // between omx,omy and mx,my is close to 180 degrees. So we compute a
+            // normal of (omx,omy)-(mx,my). This will be the direction of the
+            // perpendicular bisector. To get one of the intersections, we just scale
+            // this vector that its length is lineWidth2 (this works because the
+            // perpendicular bisector goes through the origin). This scaling doesn't
+            // have numerical problems because we know that lineWidth2 divided by
+            // this normal's length is at least 0.5 and at most sqrt(2)/2 (because
+            // we know the angle of the arc is > 90 degrees).
+            double nx = my - omy, ny = omx - mx;
+            double nlen = Math.sqrt(nx*nx + ny*ny);
+            double scale = lineWidth2/nlen;
+            double mmx = nx * scale, mmy = ny * scale;
+
+            // if (isCW(omx, omy, mx, my) != isCW(mmx, mmy, mx, my)) then we've
+            // computed the wrong intersection so we get the other one.
+            // The test above is equivalent to if (rev).
+            if (rev) {
+                mmx = -mmx;
+                mmy = -mmy;
+            }
+            drawBezApproxForArc(cx, cy, omx, omy, mmx, mmy, rev);
+            drawBezApproxForArc(cx, cy, mmx, mmy, mx, my, rev);
+            break;
+        default:
+        }
+    }
+
+    // the input arc defined by omx,omy and mx,my must span <= 90 degrees.
+    private void drawBezApproxForArc(final double cx, final double cy,
+                                     final double omx, final double omy,
+                                     final double mx, final double my,
+                                     boolean rev)
+    {
+        final double cosext2 = (omx * mx + omy * my) * invHalfLineWidth2Sq;
+
+        // check round off errors producing cos(ext) > 1 and a NaN below
+        // cos(ext) == 1 implies colinear segments and an empty join anyway
+        if (cosext2 >= 0.5d) {
+            // just return to avoid generating a flat curve:
+            return;
+        }
+
+        // cv is the length of P1-P0 and P2-P3 divided by the radius of the arc
+        // (so, cv assumes the arc has radius 1). P0, P1, P2, P3 are the points that
+        // define the bezier curve we're computing.
+        // It is computed using the constraints that P1-P0 and P3-P2 are parallel
+        // to the arc tangents at the endpoints, and that |P1-P0|=|P3-P2|.
+        double cv = ((4.0d / 3.0d) * Math.sqrt(0.5d - cosext2) /
+                            (1.0d + Math.sqrt(cosext2 + 0.5d)));
+        // if clockwise, we need to negate cv.
+        if (rev) { // rev is equivalent to isCW(omx, omy, mx, my)
+            cv = -cv;
+        }
+        final double x1 = cx + omx;
+        final double y1 = cy + omy;
+        final double x2 = x1 - cv * omy;
+        final double y2 = y1 + cv * omx;
+
+        final double x4 = cx + mx;
+        final double y4 = cy + my;
+        final double x3 = x4 + cv * my;
+        final double y3 = y4 - cv * mx;
+
+        emitCurveTo(x1, y1, x2, y2, x3, y3, x4, y4, rev);
+    }
+
+    private void drawRoundCap(double cx, double cy, double mx, double my) {
+        final double Cmx = C * mx;
+        final double Cmy = C * my;
+        emitCurveTo(cx + mx - Cmy, cy + my + Cmx,
+                    cx - my + Cmx, cy + mx + Cmy,
+                    cx - my,       cy + mx);
+        emitCurveTo(cx - my - Cmx, cy + mx - Cmy,
+                    cx - mx - Cmy, cy - my + Cmx,
+                    cx - mx,       cy - my);
+    }
+
+    // Return the intersection point of the lines (x0, y0) -> (x1, y1)
+    // and (x0p, y0p) -> (x1p, y1p) in m[off] and m[off+1]
+    private static void computeMiter(final double x0, final double y0,
+                                     final double x1, final double y1,
+                                     final double x0p, final double y0p,
+                                     final double x1p, final double y1p,
+                                     final double[] m, int off)
+    {
+        double x10 = x1 - x0;
+        double y10 = y1 - y0;
+        double x10p = x1p - x0p;
+        double y10p = y1p - y0p;
+
+        // if this is 0, the lines are parallel. If they go in the
+        // same direction, there is no intersection so m[off] and
+        // m[off+1] will contain infinity, so no miter will be drawn.
+        // If they go in the same direction that means that the start of the
+        // current segment and the end of the previous segment have the same
+        // tangent, in which case this method won't even be involved in
+        // miter drawing because it won't be called by drawMiter (because
+        // (mx == omx && my == omy) will be true, and drawMiter will return
+        // immediately).
+        double den = x10*y10p - x10p*y10;
+        double t = x10p*(y0-y0p) - y10p*(x0-x0p);
+        t /= den;
+        m[off++] = x0 + t*x10;
+        m[off]   = y0 + t*y10;
+    }
+
+    // Return the intersection point of the lines (x0, y0) -> (x1, y1)
+    // and (x0p, y0p) -> (x1p, y1p) in m[off] and m[off+1]
+    private static void safeComputeMiter(final double x0, final double y0,
+                                         final double x1, final double y1,
+                                         final double x0p, final double y0p,
+                                         final double x1p, final double y1p,
+                                         final double[] m, int off)
+    {
+        double x10 = x1 - x0;
+        double y10 = y1 - y0;
+        double x10p = x1p - x0p;
+        double y10p = y1p - y0p;
+
+        // if this is 0, the lines are parallel. If they go in the
+        // same direction, there is no intersection so m[off] and
+        // m[off+1] will contain infinity, so no miter will be drawn.
+        // If they go in the same direction that means that the start of the
+        // current segment and the end of the previous segment have the same
+        // tangent, in which case this method won't even be involved in
+        // miter drawing because it won't be called by drawMiter (because
+        // (mx == omx && my == omy) will be true, and drawMiter will return
+        // immediately).
+        double den = x10*y10p - x10p*y10;
+        if (den == 0.0d) {
+            m[off++] = (x0 + x0p) / 2.0d;
+            m[off]   = (y0 + y0p) / 2.0d;
+            return;
+        }
+        double t = x10p*(y0-y0p) - y10p*(x0-x0p);
+        t /= den;
+        m[off++] = x0 + t*x10;
+        m[off] = y0 + t*y10;
+    }
+
+    private void drawMiter(final double pdx, final double pdy,
+                           final double x0, final double y0,
+                           final double dx, final double dy,
+                           double omx, double omy, double mx, double my,
+                           boolean rev)
+    {
+        if ((mx == omx && my == omy) ||
+            (pdx == 0.0d && pdy == 0.0d) ||
+            (dx == 0.0d && dy == 0.0d))
+        {
+            return;
+        }
+
+        if (rev) {
+            omx = -omx;
+            omy = -omy;
+            mx  = -mx;
+            my  = -my;
+        }
+
+        computeMiter((x0 - pdx) + omx, (y0 - pdy) + omy, x0 + omx, y0 + omy,
+                     (dx + x0) + mx, (dy + y0) + my, x0 + mx, y0 + my,
+                     miter, 0);
+
+        final double miterX = miter[0];
+        final double miterY = miter[1];
+        double lenSq = (miterX-x0)*(miterX-x0) + (miterY-y0)*(miterY-y0);
+
+        // If the lines are parallel, lenSq will be either NaN or +inf
+        // (actually, I'm not sure if the latter is possible. The important
+        // thing is that -inf is not possible, because lenSq is a square).
+        // For both of those values, the comparison below will fail and
+        // no miter will be drawn, which is correct.
+        if (lenSq < miterLimitSq) {
+            emitLineTo(miterX, miterY, rev);
+        }
+    }
+
+    @Override
+    public void moveTo(double x0, double y0) {
+        if (prev == DRAWING_OP_TO) {
+            finish();
+        }
+        this.sx0 = this.cx0 = x0;
+        this.sy0 = this.cy0 = y0;
+        this.cdx = this.sdx = 1.0d;
+        this.cdy = this.sdy = 0.0d;
+        this.prev = MOVE_TO;
+    }
+
+    @Override
+    public void lineTo(double x1, double y1) {
+        double dx = x1 - cx0;
+        double dy = y1 - cy0;
+        if (dx == 0.0d && dy == 0.0d) {
+            dx = 1.0d;
+        }
+        computeOffset(dx, dy, lineWidth2, offset0);
+        final double mx = offset0[0];
+        final double my = offset0[1];
+
+        drawJoin(cdx, cdy, cx0, cy0, dx, dy, cmx, cmy, mx, my);
+
+        emitLineTo(cx0 + mx, cy0 + my);
+        emitLineTo( x1 + mx,  y1 + my);
+
+        emitLineToRev(cx0 - mx, cy0 - my);
+        emitLineToRev( x1 - mx,  y1 - my);
+
+        this.cmx = mx;
+        this.cmy = my;
+        this.cdx = dx;
+        this.cdy = dy;
+        this.cx0 = x1;
+        this.cy0 = y1;
+        this.prev = DRAWING_OP_TO;
+    }
+
+    @Override
+    public void closePath() {
+        if (prev != DRAWING_OP_TO) {
+            if (prev == CLOSE) {
+                return;
+            }
+            emitMoveTo(cx0, cy0 - lineWidth2);
+            this.cmx = this.smx = 0.0d;
+            this.cmy = this.smy = -lineWidth2;
+            this.cdx = this.sdx = 1.0d;
+            this.cdy = this.sdy = 0.0d;
+            finish();
+            return;
+        }
+
+        if (cx0 != sx0 || cy0 != sy0) {
+            lineTo(sx0, sy0);
+        }
+
+        drawJoin(cdx, cdy, cx0, cy0, sdx, sdy, cmx, cmy, smx, smy);
+
+        emitLineTo(sx0 + smx, sy0 + smy);
+
+        emitMoveTo(sx0 - smx, sy0 - smy);
+        emitReverse();
+
+        this.prev = CLOSE;
+        emitClose();
+    }
+
+    private void emitReverse() {
+        reverse.popAll(out);
+    }
+
+    @Override
+    public void pathDone() {
+        if (prev == DRAWING_OP_TO) {
+            finish();
+        }
+
+        out.pathDone();
+
+        // this shouldn't matter since this object won't be used
+        // after the call to this method.
+        this.prev = CLOSE;
+
+        // Dispose this instance:
+        dispose();
+    }
+
+    private void finish() {
+        if (capStyle == CAP_ROUND) {
+            drawRoundCap(cx0, cy0, cmx, cmy);
+        } else if (capStyle == CAP_SQUARE) {
+            emitLineTo(cx0 - cmy + cmx, cy0 + cmx + cmy);
+            emitLineTo(cx0 - cmy - cmx, cy0 + cmx - cmy);
+        }
+
+        emitReverse();
+
+        if (capStyle == CAP_ROUND) {
+            drawRoundCap(sx0, sy0, -smx, -smy);
+        } else if (capStyle == CAP_SQUARE) {
+            emitLineTo(sx0 + smy - smx, sy0 - smx - smy);
+            emitLineTo(sx0 + smy + smx, sy0 - smx + smy);
+        }
+
+        emitClose();
+    }
+
+    private void emitMoveTo(final double x0, final double y0) {
+        out.moveTo(x0, y0);
+    }
+
+    private void emitLineTo(final double x1, final double y1) {
+        out.lineTo(x1, y1);
+    }
+
+    private void emitLineToRev(final double x1, final double y1) {
+        reverse.pushLine(x1, y1);
+    }
+
+    private void emitLineTo(final double x1, final double y1,
+                            final boolean rev)
+    {
+        if (rev) {
+            emitLineToRev(x1, y1);
+        } else {
+            emitLineTo(x1, y1);
+        }
+    }
+
+    private void emitQuadTo(final double x1, final double y1,
+                            final double x2, final double y2)
+    {
+        out.quadTo(x1, y1, x2, y2);
+    }
+
+    private void emitQuadToRev(final double x0, final double y0,
+                               final double x1, final double y1)
+    {
+        reverse.pushQuad(x0, y0, x1, y1);
+    }
+
+    private void emitCurveTo(final double x1, final double y1,
+                             final double x2, final double y2,
+                             final double x3, final double y3)
+    {
+        out.curveTo(x1, y1, x2, y2, x3, y3);
+    }
+
+    private void emitCurveToRev(final double x0, final double y0,
+                                final double x1, final double y1,
+                                final double x2, final double y2)
+    {
+        reverse.pushCubic(x0, y0, x1, y1, x2, y2);
+    }
+
+    private void emitCurveTo(final double x0, final double y0,
+                             final double x1, final double y1,
+                             final double x2, final double y2,
+                             final double x3, final double y3, final boolean rev)
+    {
+        if (rev) {
+            reverse.pushCubic(x0, y0, x1, y1, x2, y2);
+        } else {
+            out.curveTo(x1, y1, x2, y2, x3, y3);
+        }
+    }
+
+    private void emitClose() {
+        out.closePath();
+    }
+
+    private void drawJoin(double pdx, double pdy,
+                          double x0, double y0,
+                          double dx, double dy,
+                          double omx, double omy,
+                          double mx, double my)
+    {
+        if (prev != DRAWING_OP_TO) {
+            emitMoveTo(x0 + mx, y0 + my);
+            this.sdx = dx;
+            this.sdy = dy;
+            this.smx = mx;
+            this.smy = my;
+        } else {
+            boolean cw = isCW(pdx, pdy, dx, dy);
+            if (joinStyle == JOIN_MITER) {
+                drawMiter(pdx, pdy, x0, y0, dx, dy, omx, omy, mx, my, cw);
+            } else if (joinStyle == JOIN_ROUND) {
+                drawRoundJoin(x0, y0,
+                              omx, omy,
+                              mx, my, cw,
+                              ROUND_JOIN_THRESHOLD);
+            }
+            emitLineTo(x0, y0, !cw);
+        }
+        prev = DRAWING_OP_TO;
+    }
+
+    private static boolean within(final double x1, final double y1,
+                                  final double x2, final double y2,
+                                  final double ERR)
+    {
+        assert ERR > 0 : "";
+        // compare taxicab distance. ERR will always be small, so using
+        // true distance won't give much benefit
+        return (DHelpers.within(x1, x2, ERR) &&  // we want to avoid calling Math.abs
+                DHelpers.within(y1, y2, ERR)); // this is just as good.
+    }
+
+    private void getLineOffsets(double x1, double y1,
+                                double x2, double y2,
+                                double[] left, double[] right) {
+        computeOffset(x2 - x1, y2 - y1, lineWidth2, offset0);
+        final double mx = offset0[0];
+        final double my = offset0[1];
+        left[0] = x1 + mx;
+        left[1] = y1 + my;
+        left[2] = x2 + mx;
+        left[3] = y2 + my;
+        right[0] = x1 - mx;
+        right[1] = y1 - my;
+        right[2] = x2 - mx;
+        right[3] = y2 - my;
+    }
+
+    private int computeOffsetCubic(double[] pts, final int off,
+                                   double[] leftOff, double[] rightOff)
+    {
+        // if p1=p2 or p3=p4 it means that the derivative at the endpoint
+        // vanishes, which creates problems with computeOffset. Usually
+        // this happens when this stroker object is trying to widen
+        // a curve with a cusp. What happens is that curveTo splits
+        // the input curve at the cusp, and passes it to this function.
+        // because of inaccuracies in the splitting, we consider points
+        // equal if they're very close to each other.
+        final double x1 = pts[off + 0], y1 = pts[off + 1];
+        final double x2 = pts[off + 2], y2 = pts[off + 3];
+        final double x3 = pts[off + 4], y3 = pts[off + 5];
+        final double x4 = pts[off + 6], y4 = pts[off + 7];
+
+        double dx4 = x4 - x3;
+        double dy4 = y4 - y3;
+        double dx1 = x2 - x1;
+        double dy1 = y2 - y1;
+
+        // if p1 == p2 && p3 == p4: draw line from p1->p4, unless p1 == p4,
+        // in which case ignore if p1 == p2
+        final boolean p1eqp2 = within(x1, y1, x2, y2, 6.0d * Math.ulp(y2));
+        final boolean p3eqp4 = within(x3, y3, x4, y4, 6.0d * Math.ulp(y4));
+        if (p1eqp2 && p3eqp4) {
+            getLineOffsets(x1, y1, x4, y4, leftOff, rightOff);
+            return 4;
+        } else if (p1eqp2) {
+            dx1 = x3 - x1;
+            dy1 = y3 - y1;
+        } else if (p3eqp4) {
+            dx4 = x4 - x2;
+            dy4 = y4 - y2;
+        }
+
+        // if p2-p1 and p4-p3 are parallel, that must mean this curve is a line
+        double dotsq = (dx1 * dx4 + dy1 * dy4);
+        dotsq *= dotsq;
+        double l1sq = dx1 * dx1 + dy1 * dy1, l4sq = dx4 * dx4 + dy4 * dy4;
+        if (DHelpers.within(dotsq, l1sq * l4sq, 4.0d * Math.ulp(dotsq))) {
+            getLineOffsets(x1, y1, x4, y4, leftOff, rightOff);
+            return 4;
+        }
+
+//      What we're trying to do in this function is to approximate an ideal
+//      offset curve (call it I) of the input curve B using a bezier curve Bp.
+//      The constraints I use to get the equations are:
+//
+//      1. The computed curve Bp should go through I(0) and I(1). These are
+//      x1p, y1p, x4p, y4p, which are p1p and p4p. We still need to find
+//      4 variables: the x and y components of p2p and p3p (i.e. x2p, y2p, x3p, y3p).
+//
+//      2. Bp should have slope equal in absolute value to I at the endpoints. So,
+//      (by the way, the operator || in the comments below means "aligned with".
+//      It is defined on vectors, so when we say I'(0) || Bp'(0) we mean that
+//      vectors I'(0) and Bp'(0) are aligned, which is the same as saying
+//      that the tangent lines of I and Bp at 0 are parallel. Mathematically
+//      this means (I'(t) || Bp'(t)) <==> (I'(t) = c * Bp'(t)) where c is some
+//      nonzero constant.)
+//      I'(0) || Bp'(0) and I'(1) || Bp'(1). Obviously, I'(0) || B'(0) and
+//      I'(1) || B'(1); therefore, Bp'(0) || B'(0) and Bp'(1) || B'(1).
+//      We know that Bp'(0) || (p2p-p1p) and Bp'(1) || (p4p-p3p) and the same
+//      is true for any bezier curve; therefore, we get the equations
+//          (1) p2p = c1 * (p2-p1) + p1p
+//          (2) p3p = c2 * (p4-p3) + p4p
+//      We know p1p, p4p, p2, p1, p3, and p4; therefore, this reduces the number
+//      of unknowns from 4 to 2 (i.e. just c1 and c2).
+//      To eliminate these 2 unknowns we use the following constraint:
+//
+//      3. Bp(0.5) == I(0.5). Bp(0.5)=(x,y) and I(0.5)=(xi,yi), and I should note
+//      that I(0.5) is *the only* reason for computing dxm,dym. This gives us
+//          (3) Bp(0.5) = (p1p + 3 * (p2p + p3p) + p4p)/8, which is equivalent to
+//          (4) p2p + p3p = (Bp(0.5)*8 - p1p - p4p) / 3
+//      We can substitute (1) and (2) from above into (4) and we get:
+//          (5) c1*(p2-p1) + c2*(p4-p3) = (Bp(0.5)*8 - p1p - p4p)/3 - p1p - p4p
+//      which is equivalent to
+//          (6) c1*(p2-p1) + c2*(p4-p3) = (4/3) * (Bp(0.5) * 2 - p1p - p4p)
+//
+//      The right side of this is a 2D vector, and we know I(0.5), which gives us
+//      Bp(0.5), which gives us the value of the right side.
+//      The left side is just a matrix vector multiplication in disguise. It is
+//
+//      [x2-x1, x4-x3][c1]
+//      [y2-y1, y4-y3][c2]
+//      which, is equal to
+//      [dx1, dx4][c1]
+//      [dy1, dy4][c2]
+//      At this point we are left with a simple linear system and we solve it by
+//      getting the inverse of the matrix above. Then we use [c1,c2] to compute
+//      p2p and p3p.
+
+        double x = (x1 + 3.0d * (x2 + x3) + x4) / 8.0d;
+        double y = (y1 + 3.0d * (y2 + y3) + y4) / 8.0d;
+        // (dxm,dym) is some tangent of B at t=0.5. This means it's equal to
+        // c*B'(0.5) for some constant c.
+        double dxm = x3 + x4 - x1 - x2, dym = y3 + y4 - y1 - y2;
+
+        // this computes the offsets at t=0, 0.5, 1, using the property that
+        // for any bezier curve the vectors p2-p1 and p4-p3 are parallel to
+        // the (dx/dt, dy/dt) vectors at the endpoints.
+        computeOffset(dx1, dy1, lineWidth2, offset0);
+        computeOffset(dxm, dym, lineWidth2, offset1);
+        computeOffset(dx4, dy4, lineWidth2, offset2);
+        double x1p = x1 + offset0[0]; // start
+        double y1p = y1 + offset0[1]; // point
+        double xi  = x  + offset1[0]; // interpolation
+        double yi  = y  + offset1[1]; // point
+        double x4p = x4 + offset2[0]; // end
+        double y4p = y4 + offset2[1]; // point
+
+        double invdet43 = 4.0d / (3.0d * (dx1 * dy4 - dy1 * dx4));
+
+        double two_pi_m_p1_m_p4x = 2.0d * xi - x1p - x4p;
+        double two_pi_m_p1_m_p4y = 2.0d * yi - y1p - y4p;
+        double c1 = invdet43 * (dy4 * two_pi_m_p1_m_p4x - dx4 * two_pi_m_p1_m_p4y);
+        double c2 = invdet43 * (dx1 * two_pi_m_p1_m_p4y - dy1 * two_pi_m_p1_m_p4x);
+
+        double x2p, y2p, x3p, y3p;
+        x2p = x1p + c1*dx1;
+        y2p = y1p + c1*dy1;
+        x3p = x4p + c2*dx4;
+        y3p = y4p + c2*dy4;
+
+        leftOff[0] = x1p; leftOff[1] = y1p;
+        leftOff[2] = x2p; leftOff[3] = y2p;
+        leftOff[4] = x3p; leftOff[5] = y3p;
+        leftOff[6] = x4p; leftOff[7] = y4p;
+
+        x1p = x1 - offset0[0]; y1p = y1 - offset0[1];
+        xi = xi - 2.0d * offset1[0]; yi = yi - 2.0d * offset1[1];
+        x4p = x4 - offset2[0]; y4p = y4 - offset2[1];
+
+        two_pi_m_p1_m_p4x = 2.0d * xi - x1p - x4p;
+        two_pi_m_p1_m_p4y = 2.0d * yi - y1p - y4p;
+        c1 = invdet43 * (dy4 * two_pi_m_p1_m_p4x - dx4 * two_pi_m_p1_m_p4y);
+        c2 = invdet43 * (dx1 * two_pi_m_p1_m_p4y - dy1 * two_pi_m_p1_m_p4x);
+
+        x2p = x1p + c1*dx1;
+        y2p = y1p + c1*dy1;
+        x3p = x4p + c2*dx4;
+        y3p = y4p + c2*dy4;
+
+        rightOff[0] = x1p; rightOff[1] = y1p;
+        rightOff[2] = x2p; rightOff[3] = y2p;
+        rightOff[4] = x3p; rightOff[5] = y3p;
+        rightOff[6] = x4p; rightOff[7] = y4p;
+        return 8;
+    }
+
+    // compute offset curves using bezier spline through t=0.5 (i.e.
+    // ComputedCurve(0.5) == IdealParallelCurve(0.5))
+    // return the kind of curve in the right and left arrays.
+    private int computeOffsetQuad(double[] pts, final int off,
+                                  double[] leftOff, double[] rightOff)
+    {
+        final double x1 = pts[off + 0], y1 = pts[off + 1];
+        final double x2 = pts[off + 2], y2 = pts[off + 3];
+        final double x3 = pts[off + 4], y3 = pts[off + 5];
+
+        final double dx3 = x3 - x2;
+        final double dy3 = y3 - y2;
+        final double dx1 = x2 - x1;
+        final double dy1 = y2 - y1;
+
+        // if p1=p2 or p3=p4 it means that the derivative at the endpoint
+        // vanishes, which creates problems with computeOffset. Usually
+        // this happens when this stroker object is trying to widen
+        // a curve with a cusp. What happens is that curveTo splits
+        // the input curve at the cusp, and passes it to this function.
+        // because of inaccuracies in the splitting, we consider points
+        // equal if they're very close to each other.
+
+        // if p1 == p2 && p3 == p4: draw line from p1->p4, unless p1 == p4,
+        // in which case ignore.
+        final boolean p1eqp2 = within(x1, y1, x2, y2, 6.0d * Math.ulp(y2));
+        final boolean p2eqp3 = within(x2, y2, x3, y3, 6.0d * Math.ulp(y3));
+        if (p1eqp2 || p2eqp3) {
+            getLineOffsets(x1, y1, x3, y3, leftOff, rightOff);
+            return 4;
+        }
+
+        // if p2-p1 and p4-p3 are parallel, that must mean this curve is a line
+        double dotsq = (dx1 * dx3 + dy1 * dy3);
+        dotsq *= dotsq;
+        double l1sq = dx1 * dx1 + dy1 * dy1, l3sq = dx3 * dx3 + dy3 * dy3;
+        if (DHelpers.within(dotsq, l1sq * l3sq, 4.0d * Math.ulp(dotsq))) {
+            getLineOffsets(x1, y1, x3, y3, leftOff, rightOff);
+            return 4;
+        }
+
+        // this computes the offsets at t=0, 0.5, 1, using the property that
+        // for any bezier curve the vectors p2-p1 and p4-p3 are parallel to
+        // the (dx/dt, dy/dt) vectors at the endpoints.
+        computeOffset(dx1, dy1, lineWidth2, offset0);
+        computeOffset(dx3, dy3, lineWidth2, offset1);
+
+        double x1p = x1 + offset0[0]; // start
+        double y1p = y1 + offset0[1]; // point
+        double x3p = x3 + offset1[0]; // end
+        double y3p = y3 + offset1[1]; // point
+        safeComputeMiter(x1p, y1p, x1p+dx1, y1p+dy1, x3p, y3p, x3p-dx3, y3p-dy3, leftOff, 2);
+        leftOff[0] = x1p; leftOff[1] = y1p;
+        leftOff[4] = x3p; leftOff[5] = y3p;
+
+        x1p = x1 - offset0[0]; y1p = y1 - offset0[1];
+        x3p = x3 - offset1[0]; y3p = y3 - offset1[1];
+        safeComputeMiter(x1p, y1p, x1p+dx1, y1p+dy1, x3p, y3p, x3p-dx3, y3p-dy3, rightOff, 2);
+        rightOff[0] = x1p; rightOff[1] = y1p;
+        rightOff[4] = x3p; rightOff[5] = y3p;
+        return 6;
+    }
+
+    // finds values of t where the curve in pts should be subdivided in order
+    // to get good offset curves a distance of w away from the middle curve.
+    // Stores the points in ts, and returns how many of them there were.
+    private static int findSubdivPoints(final DCurve c, double[] pts, double[] ts,
+                                        final int type, final double w)
+    {
+        final double x12 = pts[2] - pts[0];
+        final double y12 = pts[3] - pts[1];
+        // if the curve is already parallel to either axis we gain nothing
+        // from rotating it.
+        if (y12 != 0.0d && x12 != 0.0d) {
+            // we rotate it so that the first vector in the control polygon is
+            // parallel to the x-axis. This will ensure that rotated quarter
+            // circles won't be subdivided.
+            final double hypot = Math.sqrt(x12 * x12 + y12 * y12);
+            final double cos = x12 / hypot;
+            final double sin = y12 / hypot;
+            final double x1 = cos * pts[0] + sin * pts[1];
+            final double y1 = cos * pts[1] - sin * pts[0];
+            final double x2 = cos * pts[2] + sin * pts[3];
+            final double y2 = cos * pts[3] - sin * pts[2];
+            final double x3 = cos * pts[4] + sin * pts[5];
+            final double y3 = cos * pts[5] - sin * pts[4];
+
+            switch(type) {
+            case 8:
+                final double x4 = cos * pts[6] + sin * pts[7];
+                final double y4 = cos * pts[7] - sin * pts[6];
+                c.set(x1, y1, x2, y2, x3, y3, x4, y4);
+                break;
+            case 6:
+                c.set(x1, y1, x2, y2, x3, y3);
+                break;
+            default:
+            }
+        } else {
+            c.set(pts, type);
+        }
+
+        int ret = 0;
+        // we subdivide at values of t such that the remaining rotated
+        // curves are monotonic in x and y.
+        ret += c.dxRoots(ts, ret);
+        ret += c.dyRoots(ts, ret);
+        // subdivide at inflection points.
+        if (type == 8) {
+            // quadratic curves can't have inflection points
+            ret += c.infPoints(ts, ret);
+        }
+
+        // now we must subdivide at points where one of the offset curves will have
+        // a cusp. This happens at ts where the radius of curvature is equal to w.
+        ret += c.rootsOfROCMinusW(ts, ret, w, 0.0001d);
+
+        ret = DHelpers.filterOutNotInAB(ts, 0, ret, 0.0001d, 0.9999d);
+        DHelpers.isort(ts, 0, ret);
+        return ret;
+    }
+
+    @Override public void curveTo(double x1, double y1,
+                                  double x2, double y2,
+                                  double x3, double y3)
+    {
+        final double[] mid = middle;
+
+        mid[0] = cx0; mid[1] = cy0;
+        mid[2] = x1;  mid[3] = y1;
+        mid[4] = x2;  mid[5] = y2;
+        mid[6] = x3;  mid[7] = y3;
+
+        // need these so we can update the state at the end of this method
+        final double xf = mid[6], yf = mid[7];
+        double dxs = mid[2] - mid[0];
+        double dys = mid[3] - mid[1];
+        double dxf = mid[6] - mid[4];
+        double dyf = mid[7] - mid[5];
+
+        boolean p1eqp2 = (dxs == 0.0d && dys == 0.0d);
+        boolean p3eqp4 = (dxf == 0.0d && dyf == 0.0d);
+        if (p1eqp2) {
+            dxs = mid[4] - mid[0];
+            dys = mid[5] - mid[1];
+            if (dxs == 0.0d && dys == 0.0d) {
+                dxs = mid[6] - mid[0];
+                dys = mid[7] - mid[1];
+            }
+        }
+        if (p3eqp4) {
+            dxf = mid[6] - mid[2];
+            dyf = mid[7] - mid[3];
+            if (dxf == 0.0d && dyf == 0.0d) {
+                dxf = mid[6] - mid[0];
+                dyf = mid[7] - mid[1];
+            }
+        }
+        if (dxs == 0.0d && dys == 0.0d) {
+            // this happens if the "curve" is just a point
+            lineTo(mid[0], mid[1]);
+            return;
+        }
+
+        // if these vectors are too small, normalize them, to avoid future
+        // precision problems.
+        if (Math.abs(dxs) < 0.1d && Math.abs(dys) < 0.1d) {
+            double len = Math.sqrt(dxs*dxs + dys*dys);
+            dxs /= len;
+            dys /= len;
+        }
+        if (Math.abs(dxf) < 0.1d && Math.abs(dyf) < 0.1d) {
+            double len = Math.sqrt(dxf*dxf + dyf*dyf);
+            dxf /= len;
+            dyf /= len;
+        }
+
+        computeOffset(dxs, dys, lineWidth2, offset0);
+        drawJoin(cdx, cdy, cx0, cy0, dxs, dys, cmx, cmy, offset0[0], offset0[1]);
+
+        final int nSplits = findSubdivPoints(curve, mid, subdivTs, 8, lineWidth2);
+
+        double prevT = 0.0d;
+        for (int i = 0, off = 0; i < nSplits; i++, off += 6) {
+            final double t = subdivTs[i];
+            DHelpers.subdivideCubicAt((t - prevT) / (1.0d - prevT),
+                                     mid, off, mid, off, mid, off + 6);
+            prevT = t;
+        }
+
+        final double[] l = lp;
+        final double[] r = rp;
+
+        int kind = 0;
+        for (int i = 0, off = 0; i <= nSplits; i++, off += 6) {
+            kind = computeOffsetCubic(mid, off, l, r);
+
+            emitLineTo(l[0], l[1]);
+
+            switch(kind) {
+            case 8:
+                emitCurveTo(l[2], l[3], l[4], l[5], l[6], l[7]);
+                emitCurveToRev(r[0], r[1], r[2], r[3], r[4], r[5]);
+                break;
+            case 4:
+                emitLineTo(l[2], l[3]);
+                emitLineToRev(r[0], r[1]);
+                break;
+            default:
+            }
+            emitLineToRev(r[kind - 2], r[kind - 1]);
+        }
+
+        this.cmx = (l[kind - 2] - r[kind - 2]) / 2.0d;
+        this.cmy = (l[kind - 1] - r[kind - 1]) / 2.0d;
+        this.cdx = dxf;
+        this.cdy = dyf;
+        this.cx0 = xf;
+        this.cy0 = yf;
+        this.prev = DRAWING_OP_TO;
+    }
+
+    @Override public void quadTo(double x1, double y1, double x2, double y2) {
+        final double[] mid = middle;
+
+        mid[0] = cx0; mid[1] = cy0;
+        mid[2] = x1;  mid[3] = y1;
+        mid[4] = x2;  mid[5] = y2;
+
+        // need these so we can update the state at the end of this method
+        final double xf = mid[4], yf = mid[5];
+        double dxs = mid[2] - mid[0];
+        double dys = mid[3] - mid[1];
+        double dxf = mid[4] - mid[2];
+        double dyf = mid[5] - mid[3];
+        if ((dxs == 0.0d && dys == 0.0d) || (dxf == 0.0d && dyf == 0.0d)) {
+            dxs = dxf = mid[4] - mid[0];
+            dys = dyf = mid[5] - mid[1];
+        }
+        if (dxs == 0.0d && dys == 0.0d) {
+            // this happens if the "curve" is just a point
+            lineTo(mid[0], mid[1]);
+            return;
+        }
+        // if these vectors are too small, normalize them, to avoid future
+        // precision problems.
+        if (Math.abs(dxs) < 0.1d && Math.abs(dys) < 0.1d) {
+            double len = Math.sqrt(dxs*dxs + dys*dys);
+            dxs /= len;
+            dys /= len;
+        }
+        if (Math.abs(dxf) < 0.1d && Math.abs(dyf) < 0.1d) {
+            double len = Math.sqrt(dxf*dxf + dyf*dyf);
+            dxf /= len;
+            dyf /= len;
+        }
+
+        computeOffset(dxs, dys, lineWidth2, offset0);
+        drawJoin(cdx, cdy, cx0, cy0, dxs, dys, cmx, cmy, offset0[0], offset0[1]);
+
+        int nSplits = findSubdivPoints(curve, mid, subdivTs, 6, lineWidth2);
+
+        double prevt = 0.0d;
+        for (int i = 0, off = 0; i < nSplits; i++, off += 4) {
+            final double t = subdivTs[i];
+            DHelpers.subdivideQuadAt((t - prevt) / (1.0d - prevt),
+                                    mid, off, mid, off, mid, off + 4);
+            prevt = t;
+        }
+
+        final double[] l = lp;
+        final double[] r = rp;
+
+        int kind = 0;
+        for (int i = 0, off = 0; i <= nSplits; i++, off += 4) {
+            kind = computeOffsetQuad(mid, off, l, r);
+
+            emitLineTo(l[0], l[1]);
+
+            switch(kind) {
+            case 6:
+                emitQuadTo(l[2], l[3], l[4], l[5]);
+                emitQuadToRev(r[0], r[1], r[2], r[3]);
+                break;
+            case 4:
+                emitLineTo(l[2], l[3]);
+                emitLineToRev(r[0], r[1]);
+                break;
+            default:
+            }
+            emitLineToRev(r[kind - 2], r[kind - 1]);
+        }
+
+        this.cmx = (l[kind - 2] - r[kind - 2]) / 2.0d;
+        this.cmy = (l[kind - 1] - r[kind - 1]) / 2.0d;
+        this.cdx = dxf;
+        this.cdy = dyf;
+        this.cx0 = xf;
+        this.cy0 = yf;
+        this.prev = DRAWING_OP_TO;
+    }
+
+    @Override public long getNativeConsumer() {
+        throw new InternalError("Stroker doesn't use a native consumer");
+    }
+
+    // a stack of polynomial curves where each curve shares endpoints with
+    // adjacent ones.
+    static final class PolyStack {
+        private static final byte TYPE_LINETO  = (byte) 0;
+        private static final byte TYPE_QUADTO  = (byte) 1;
+        private static final byte TYPE_CUBICTO = (byte) 2;
+
+        // curves capacity = edges count (8192) = edges x 2 (coords)
+        private static final int INITIAL_CURVES_COUNT = INITIAL_EDGES_COUNT << 1;
+
+        // types capacity = edges count (4096)
+        private static final int INITIAL_TYPES_COUNT = INITIAL_EDGES_COUNT;
+
+        double[] curves;
+        int end;
+        byte[] curveTypes;
+        int numCurves;
+
+        // per-thread renderer context
+        final DRendererContext rdrCtx;
+
+        // curves ref (dirty)
+        final DoubleArrayCache.Reference curves_ref;
+        // curveTypes ref (dirty)
+        final ByteArrayCache.Reference curveTypes_ref;
+
+        // used marks (stats only)
+        int curveTypesUseMark;
+        int curvesUseMark;
+
+        /**
+         * Constructor
+         * @param rdrCtx per-thread renderer context
+         */
+        PolyStack(final DRendererContext rdrCtx) {
+            this.rdrCtx = rdrCtx;
+
+            curves_ref = rdrCtx.newDirtyDoubleArrayRef(INITIAL_CURVES_COUNT); // 32K
+            curves     = curves_ref.initial;
+
+            curveTypes_ref = rdrCtx.newDirtyByteArrayRef(INITIAL_TYPES_COUNT); // 4K
+            curveTypes     = curveTypes_ref.initial;
+            numCurves = 0;
+            end = 0;
+
+            if (DO_STATS) {
+                curveTypesUseMark = 0;
+                curvesUseMark = 0;
+            }
+        }
+
+        /**
+         * Disposes this PolyStack:
+         * clean up before reusing this instance
+         */
+        void dispose() {
+            end = 0;
+            numCurves = 0;
+
+            if (DO_STATS) {
+                rdrCtx.stats.stat_rdr_poly_stack_types.add(curveTypesUseMark);
+                rdrCtx.stats.stat_rdr_poly_stack_curves.add(curvesUseMark);
+                rdrCtx.stats.hist_rdr_poly_stack_curves.add(curvesUseMark);
+
+                // reset marks
+                curveTypesUseMark = 0;
+                curvesUseMark = 0;
+            }
+
+            // Return arrays:
+            // curves and curveTypes are kept dirty
+            curves     = curves_ref.putArray(curves);
+            curveTypes = curveTypes_ref.putArray(curveTypes);
+        }
+
+        private void ensureSpace(final int n) {
+            // use substraction to avoid integer overflow:
+            if (curves.length - end < n) {
+                if (DO_STATS) {
+                    rdrCtx.stats.stat_array_stroker_polystack_curves
+                        .add(end + n);
+                }
+                curves = curves_ref.widenArray(curves, end, end + n);
+            }
+            if (curveTypes.length <= numCurves) {
+                if (DO_STATS) {
+                    rdrCtx.stats.stat_array_stroker_polystack_curveTypes
+                        .add(numCurves + 1);
+                }
+                curveTypes = curveTypes_ref.widenArray(curveTypes,
+                                                       numCurves,
+                                                       numCurves + 1);
+            }
+        }
+
+        void pushCubic(double x0, double y0,
+                       double x1, double y1,
+                       double x2, double y2)
+        {
+            ensureSpace(6);
+            curveTypes[numCurves++] = TYPE_CUBICTO;
+            // we reverse the coordinate order to make popping easier
+            final double[] _curves = curves;
+            int e = end;
+            _curves[e++] = x2;    _curves[e++] = y2;
+            _curves[e++] = x1;    _curves[e++] = y1;
+            _curves[e++] = x0;    _curves[e++] = y0;
+            end = e;
+        }
+
+        void pushQuad(double x0, double y0,
+                      double x1, double y1)
+        {
+            ensureSpace(4);
+            curveTypes[numCurves++] = TYPE_QUADTO;
+            final double[] _curves = curves;
+            int e = end;
+            _curves[e++] = x1;    _curves[e++] = y1;
+            _curves[e++] = x0;    _curves[e++] = y0;
+            end = e;
+        }
+
+        void pushLine(double x, double y) {
+            ensureSpace(2);
+            curveTypes[numCurves++] = TYPE_LINETO;
+            curves[end++] = x;    curves[end++] = y;
+        }
+
+        void popAll(DPathConsumer2D io) {
+            if (DO_STATS) {
+                // update used marks:
+                if (numCurves > curveTypesUseMark) {
+                    curveTypesUseMark = numCurves;
+                }
+                if (end > curvesUseMark) {
+                    curvesUseMark = end;
+                }
+            }
+            final byte[]  _curveTypes = curveTypes;
+            final double[] _curves = curves;
+            int nc = numCurves;
+            int e  = end;
+
+            while (nc != 0) {
+                switch(_curveTypes[--nc]) {
+                case TYPE_LINETO:
+                    e -= 2;
+                    io.lineTo(_curves[e], _curves[e+1]);
+                    continue;
+                case TYPE_QUADTO:
+                    e -= 4;
+                    io.quadTo(_curves[e+0], _curves[e+1],
+                              _curves[e+2], _curves[e+3]);
+                    continue;
+                case TYPE_CUBICTO:
+                    e -= 6;
+                    io.curveTo(_curves[e+0], _curves[e+1],
+                               _curves[e+2], _curves[e+3],
+                               _curves[e+4], _curves[e+5]);
+                    continue;
+                default:
+                }
+            }
+            numCurves = 0;
+            end = 0;
+        }
+
+        @Override
+        public String toString() {
+            String ret = "";
+            int nc = numCurves;
+            int last = end;
+            int len;
+            while (nc != 0) {
+                switch(curveTypes[--nc]) {
+                case TYPE_LINETO:
+                    len = 2;
+                    ret += "line: ";
+                    break;
+                case TYPE_QUADTO:
+                    len = 4;
+                    ret += "quad: ";
+                    break;
+                case TYPE_CUBICTO:
+                    len = 6;
+                    ret += "cubic: ";
+                    break;
+                default:
+                    len = 0;
+                }
+                last -= len;
+                ret += Arrays.toString(Arrays.copyOfRange(curves, last, last+len))
+                                       + "\n";
+            }
+            return ret;
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/DTransformingPathConsumer2D.java	Wed May 17 22:05:11 2017 +0200
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import java.awt.geom.AffineTransform;
+import java.awt.geom.Path2D;
+
+final class DTransformingPathConsumer2D {
+
+    DTransformingPathConsumer2D() {
+        // used by DRendererContext
+    }
+
+    // recycled DPathConsumer2D instance from wrapPath2d()
+    private final Path2DWrapper        wp_Path2DWrapper        = new Path2DWrapper();
+
+    DPathConsumer2D wrapPath2d(Path2D.Double p2d)
+    {
+        return wp_Path2DWrapper.init(p2d);
+    }
+
+    // recycled DPathConsumer2D instances from deltaTransformConsumer()
+    private final DeltaScaleFilter     dt_DeltaScaleFilter     = new DeltaScaleFilter();
+    private final DeltaTransformFilter dt_DeltaTransformFilter = new DeltaTransformFilter();
+
+    DPathConsumer2D deltaTransformConsumer(DPathConsumer2D out,
+                                          AffineTransform at)
+    {
+        if (at == null) {
+            return out;
+        }
+        double mxx = at.getScaleX();
+        double mxy = at.getShearX();
+        double myx = at.getShearY();
+        double myy = at.getScaleY();
+
+        if (mxy == 0.0d && myx == 0.0d) {
+            if (mxx == 1.0d && myy == 1.0d) {
+                return out;
+            } else {
+                return dt_DeltaScaleFilter.init(out, mxx, myy);
+            }
+        } else {
+            return dt_DeltaTransformFilter.init(out, mxx, mxy, myx, myy);
+        }
+    }
+
+    // recycled DPathConsumer2D instances from inverseDeltaTransformConsumer()
+    private final DeltaScaleFilter     iv_DeltaScaleFilter     = new DeltaScaleFilter();
+    private final DeltaTransformFilter iv_DeltaTransformFilter = new DeltaTransformFilter();
+
+    DPathConsumer2D inverseDeltaTransformConsumer(DPathConsumer2D out,
+                                                 AffineTransform at)
+    {
+        if (at == null) {
+            return out;
+        }
+        double mxx = at.getScaleX();
+        double mxy = at.getShearX();
+        double myx = at.getShearY();
+        double myy = at.getScaleY();
+
+        if (mxy == 0.0d && myx == 0.0d) {
+            if (mxx == 1.0d && myy == 1.0d) {
+                return out;
+            } else {
+                return iv_DeltaScaleFilter.init(out, 1.0d/mxx, 1.0d/myy);
+            }
+        } else {
+            double det = mxx * myy - mxy * myx;
+            return iv_DeltaTransformFilter.init(out,
+                                                myy / det,
+                                               -mxy / det,
+                                               -myx / det,
+                                                mxx / det);
+        }
+    }
+
+
+    static final class DeltaScaleFilter implements DPathConsumer2D {
+        private DPathConsumer2D out;
+        private double sx, sy;
+
+        DeltaScaleFilter() {}
+
+        DeltaScaleFilter init(DPathConsumer2D out,
+                              double mxx, double myy)
+        {
+            this.out = out;
+            sx = mxx;
+            sy = myy;
+            return this; // fluent API
+        }
+
+        @Override
+        public void moveTo(double x0, double y0) {
+            out.moveTo(x0 * sx, y0 * sy);
+        }
+
+        @Override
+        public void lineTo(double x1, double y1) {
+            out.lineTo(x1 * sx, y1 * sy);
+        }
+
+        @Override
+        public void quadTo(double x1, double y1,
+                           double x2, double y2)
+        {
+            out.quadTo(x1 * sx, y1 * sy,
+                       x2 * sx, y2 * sy);
+        }
+
+        @Override
+        public void curveTo(double x1, double y1,
+                            double x2, double y2,
+                            double x3, double y3)
+        {
+            out.curveTo(x1 * sx, y1 * sy,
+                        x2 * sx, y2 * sy,
+                        x3 * sx, y3 * sy);
+        }
+
+        @Override
+        public void closePath() {
+            out.closePath();
+        }
+
+        @Override
+        public void pathDone() {
+            out.pathDone();
+        }
+
+        @Override
+        public long getNativeConsumer() {
+            return 0;
+        }
+    }
+
+    static final class DeltaTransformFilter implements DPathConsumer2D {
+        private DPathConsumer2D out;
+        private double mxx, mxy, myx, myy;
+
+        DeltaTransformFilter() {}
+
+        DeltaTransformFilter init(DPathConsumer2D out,
+                                  double mxx, double mxy,
+                                  double myx, double myy)
+        {
+            this.out = out;
+            this.mxx = mxx;
+            this.mxy = mxy;
+            this.myx = myx;
+            this.myy = myy;
+            return this; // fluent API
+        }
+
+        @Override
+        public void moveTo(double x0, double y0) {
+            out.moveTo(x0 * mxx + y0 * mxy,
+                       x0 * myx + y0 * myy);
+        }
+
+        @Override
+        public void lineTo(double x1, double y1) {
+            out.lineTo(x1 * mxx + y1 * mxy,
+                       x1 * myx + y1 * myy);
+        }
+
+        @Override
+        public void quadTo(double x1, double y1,
+                           double x2, double y2)
+        {
+            out.quadTo(x1 * mxx + y1 * mxy,
+                       x1 * myx + y1 * myy,
+                       x2 * mxx + y2 * mxy,
+                       x2 * myx + y2 * myy);
+        }
+
+        @Override
+        public void curveTo(double x1, double y1,
+                            double x2, double y2,
+                            double x3, double y3)
+        {
+            out.curveTo(x1 * mxx + y1 * mxy,
+                        x1 * myx + y1 * myy,
+                        x2 * mxx + y2 * mxy,
+                        x2 * myx + y2 * myy,
+                        x3 * mxx + y3 * mxy,
+                        x3 * myx + y3 * myy);
+        }
+
+        @Override
+        public void closePath() {
+            out.closePath();
+        }
+
+        @Override
+        public void pathDone() {
+            out.pathDone();
+        }
+
+        @Override
+        public long getNativeConsumer() {
+            return 0;
+        }
+    }
+
+    static final class Path2DWrapper implements DPathConsumer2D {
+        private Path2D.Double p2d;
+
+        Path2DWrapper() {}
+
+        Path2DWrapper init(Path2D.Double p2d) {
+            this.p2d = p2d;
+            return this;
+        }
+
+        @Override
+        public void moveTo(double x0, double y0) {
+            p2d.moveTo(x0, y0);
+        }
+
+        @Override
+        public void lineTo(double x1, double y1) {
+            p2d.lineTo(x1, y1);
+        }
+
+        @Override
+        public void closePath() {
+            p2d.closePath();
+        }
+
+        @Override
+        public void pathDone() {}
+
+        @Override
+        public void curveTo(double x1, double y1,
+                            double x2, double y2,
+                            double x3, double y3)
+        {
+            p2d.curveTo(x1, y1, x2, y2, x3, y3);
+        }
+
+        @Override
+        public void quadTo(double x1, double y1, double x2, double y2) {
+            p2d.quadTo(x1, y1, x2, y2);
+        }
+
+        @Override
+        public long getNativeConsumer() {
+            throw new InternalError("Not using a native peer");
+        }
+    }
+}
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/Dasher.java	Wed Jul 05 23:27:00 2017 +0200
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/Dasher.java	Wed May 17 22:05:11 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -39,11 +39,16 @@
  * semantics are unclear.
  *
  */
-final class Dasher implements sun.awt.geom.PathConsumer2D, MarlinConst {
+final class Dasher implements PathConsumer2D, MarlinConst {
 
     static final int REC_LIMIT = 4;
     static final float ERR = 0.01f;
-    static final float MIN_T_INC = 1f / (1 << REC_LIMIT);
+    static final float MIN_T_INC = 1.0f / (1 << REC_LIMIT);
+
+    // More than 24 bits of mantissa means we can no longer accurately
+    // measure the number of times cycled through the dash array so we
+    // punt and override the phase to just be 0 past that point.
+    static final float MAX_CYCLES = 16000000.0f;
 
     private PathConsumer2D out;
     private float[] dash;
@@ -106,26 +111,56 @@
     Dasher init(final PathConsumer2D out, float[] dash, int dashLen,
                 float phase, boolean recycleDashes)
     {
-        if (phase < 0f) {
-            throw new IllegalArgumentException("phase < 0 !");
-        }
         this.out = out;
 
         // Normalize so 0 <= phase < dash[0]
-        int idx = 0;
+        int sidx = 0;
         dashOn = true;
-        float d;
-        while (phase >= (d = dash[idx])) {
-            phase -= d;
-            idx = (idx + 1) % dashLen;
-            dashOn = !dashOn;
+        float sum = 0.0f;
+        for (float d : dash) {
+            sum += d;
+        }
+        float cycles = phase / sum;
+        if (phase < 0.0f) {
+            if (-cycles >= MAX_CYCLES) {
+                phase = 0.0f;
+            } else {
+                int fullcycles = FloatMath.floor_int(-cycles);
+                if ((fullcycles & dash.length & 1) != 0) {
+                    dashOn = !dashOn;
+                }
+                phase += fullcycles * sum;
+                while (phase < 0.0f) {
+                    if (--sidx < 0) {
+                        sidx = dash.length - 1;
+                    }
+                    phase += dash[sidx];
+                    dashOn = !dashOn;
+                }
+            }
+        } else if (phase > 0) {
+            if (cycles >= MAX_CYCLES) {
+                phase = 0.0f;
+            } else {
+                int fullcycles = FloatMath.floor_int(cycles);
+                if ((fullcycles & dash.length & 1) != 0) {
+                    dashOn = !dashOn;
+                }
+                phase -= fullcycles * sum;
+                float d;
+                while (phase >= (d = dash[sidx])) {
+                    phase -= d;
+                    sidx = (sidx + 1) % dash.length;
+                    dashOn = !dashOn;
+                }
+            }
         }
 
         this.dash = dash;
         this.dashLen = dashLen;
         this.startPhase = this.phase = phase;
         this.startDashOn = dashOn;
-        this.startIdx = idx;
+        this.startIdx = sidx;
         this.starting = true;
         needsMoveTo = false;
         firstSegidx = 0;
@@ -142,7 +177,7 @@
     void dispose() {
         if (DO_CLEAN_DIRTY) {
             // Force zero-fill dirty arrays:
-            Arrays.fill(curCurvepts, 0f);
+            Arrays.fill(curCurvepts, 0.0f);
         }
         // Return arrays:
         if (recycleDashes) {
@@ -151,6 +186,21 @@
         firstSegmentsBuffer = firstSegmentsBuffer_ref.putArray(firstSegmentsBuffer);
     }
 
+    float[] copyDashArray(final float[] dashes) {
+        final int len = dashes.length;
+        final float[] newDashes;
+        if (len <= MarlinConst.INITIAL_ARRAY) {
+            newDashes = dashes_ref.initial;
+        } else {
+            if (DO_STATS) {
+                rdrCtx.stats.stat_array_dasher_dasher.add(len);
+            }
+            newDashes = dashes_ref.getArray(len);
+        }
+        System.arraycopy(dashes, 0, newDashes, 0, len);
+        return newDashes;
+    }
+
     @Override
     public void moveTo(float x0, float y0) {
         if (firstSegidx > 0) {
@@ -202,13 +252,12 @@
     private int firstSegidx;
 
     // precondition: pts must be in relative coordinates (relative to x0,y0)
-    // fullCurve is true iff the curve in pts has not been split.
     private void goTo(float[] pts, int off, final int type) {
         float x = pts[off + type - 4];
         float y = pts[off + type - 3];
         if (dashOn) {
             if (starting) {
-                int len = type - 2 + 1;
+                int len = type - 1; // - 2 + 1
                 int segIdx = firstSegidx;
                 float[] buf = firstSegmentsBuffer;
                 if (segIdx + len  > buf.length) {
@@ -247,7 +296,7 @@
         float dy = y1 - y0;
 
         float len = dx*dx + dy*dy;
-        if (len == 0f) {
+        if (len == 0.0f) {
             return;
         }
         len = (float) Math.sqrt(len);
@@ -275,7 +324,7 @@
                 phase += len;
                 // TODO: compare float values using epsilon:
                 if (len == leftInThisDashSegment) {
-                    phase = 0f;
+                    phase = 0.0f;
                     idx = (idx + 1) % dashLen;
                     dashOn = !dashOn;
                 }
@@ -285,7 +334,7 @@
             dashdx = _dash[idx] * cx;
             dashdy = _dash[idx] * cy;
 
-            if (phase == 0f) {
+            if (phase == 0.0f) {
                 _curCurvepts[0] = x0 + dashdx;
                 _curCurvepts[1] = y0 + dashdy;
             } else {
@@ -300,7 +349,7 @@
             // Advance to next dash segment
             idx = (idx + 1) % dashLen;
             dashOn = !dashOn;
-            phase = 0f;
+            phase = 0.0f;
         }
     }
 
@@ -317,13 +366,13 @@
 
         // initially the current curve is at curCurvepts[0...type]
         int curCurveoff = 0;
-        float lastSplitT = 0f;
+        float lastSplitT = 0.0f;
         float t;
         float leftInThisDashSegment = dash[idx] - phase;
 
-        while ((t = li.next(leftInThisDashSegment)) < 1f) {
-            if (t != 0f) {
-                Helpers.subdivideAt((t - lastSplitT) / (1f - lastSplitT),
+        while ((t = li.next(leftInThisDashSegment)) < 1.0f) {
+            if (t != 0.0f) {
+                Helpers.subdivideAt((t - lastSplitT) / (1.0f - lastSplitT),
                                     curCurvepts, curCurveoff,
                                     curCurvepts, 0,
                                     curCurvepts, type, type);
@@ -334,13 +383,13 @@
             // Advance to next dash segment
             idx = (idx + 1) % dashLen;
             dashOn = !dashOn;
-            phase = 0f;
+            phase = 0.0f;
             leftInThisDashSegment = dash[idx];
         }
         goTo(curCurvepts, curCurveoff+2, type);
         phase += li.lastSegLen();
         if (phase >= dash[idx]) {
-            phase = 0f;
+            phase = 0.0f;
             idx = (idx + 1) % dashLen;
             dashOn = !dashOn;
         }
@@ -395,7 +444,7 @@
 
         // the lengths of the lines of the control polygon. Only its first
         // curveType/2 - 1 elements are valid. This is an optimization. See
-        // next(float) for more detail.
+        // next() for more detail.
         private final float[] curLeafCtrlPolyLengths = new float[3];
 
         LengthIterator() {
@@ -420,13 +469,13 @@
             if (DO_CLEAN_DIRTY) {
                 final int recLimit = recCurveStack.length - 1;
                 for (int i = recLimit; i >= 0; i--) {
-                    Arrays.fill(recCurveStack[i], 0f);
+                    Arrays.fill(recCurveStack[i], 0.0f);
                 }
                 Arrays.fill(sides, Side.LEFT);
-                Arrays.fill(curLeafCtrlPolyLengths, 0f);
-                Arrays.fill(nextRoots, 0f);
-                Arrays.fill(flatLeafCoefCache, 0f);
-                flatLeafCoefCache[2] = -1f;
+                Arrays.fill(curLeafCtrlPolyLengths, 0.0f);
+                Arrays.fill(nextRoots, 0.0f);
+                Arrays.fill(flatLeafCoefCache, 0.0f);
+                flatLeafCoefCache[2] = -1.0f;
             }
         }
 
@@ -435,12 +484,12 @@
             System.arraycopy(pts, 0, recCurveStack[0], 0, 8);
             this.curveType = type;
             this.recLevel = 0;
-            this.lastT = 0f;
-            this.lenAtLastT = 0f;
-            this.nextT = 0f;
-            this.lenAtNextT = 0f;
+            this.lastT = 0.0f;
+            this.lenAtLastT = 0.0f;
+            this.nextT = 0.0f;
+            this.lenAtNextT = 0.0f;
             goLeft(); // initializes nextT and lenAtNextT properly
-            this.lenAtLastSplit = 0f;
+            this.lenAtLastSplit = 0.0f;
             if (recLevel > 0) {
                 this.sides[0] = Side.LEFT;
                 this.done = false;
@@ -449,7 +498,7 @@
                 this.sides[0] = Side.RIGHT;
                 this.done = true;
             }
-            this.lastSegLen = 0f;
+            this.lastSegLen = 0.0f;
         }
 
         // 0 == false, 1 == true, -1 == invalid cached value.
@@ -462,7 +511,7 @@
                 // the test below is equivalent to !within(len1/len2, 1, err).
                 // It is using a multiplication instead of a division, so it
                 // should be a bit faster.
-                if (!Helpers.within(len1, len2, err*len2)) {
+                if (!Helpers.within(len1, len2, err * len2)) {
                     cachedHaveLowAcceleration = 0;
                     return false;
                 }
@@ -493,7 +542,7 @@
         // form (see inside next() for what that means). The cache is
         // invalid when it's third element is negative, since in any
         // valid flattened curve, this would be >= 0.
-        private final float[] flatLeafCoefCache = new float[]{0f, 0f, -1f, 0f};
+        private final float[] flatLeafCoefCache = new float[]{0.0f, 0.0f, -1.0f, 0.0f};
 
         // returns the t value where the remaining curve should be split in
         // order for the left subdivided curve to have length len. If len
@@ -503,7 +552,7 @@
             while (lenAtNextT < targetLength) {
                 if (done) {
                     lastSegLen = lenAtNextT - lenAtLastSplit;
-                    return 1f;
+                    return 1.0f;
                 }
                 goToNextLeaf();
             }
@@ -520,19 +569,19 @@
                 // gives us the desired length.
                 final float[] _flatLeafCoefCache = flatLeafCoefCache;
 
-                if (_flatLeafCoefCache[2] < 0) {
-                    float x = 0f + curLeafCtrlPolyLengths[0],
-                          y = x  + curLeafCtrlPolyLengths[1];
+                if (_flatLeafCoefCache[2] < 0.0f) {
+                    float x =     curLeafCtrlPolyLengths[0],
+                          y = x + curLeafCtrlPolyLengths[1];
                     if (curveType == 8) {
                         float z = y + curLeafCtrlPolyLengths[2];
-                        _flatLeafCoefCache[0] = 3f * (x - y) + z;
-                        _flatLeafCoefCache[1] = 3f * (y - 2f * x);
-                        _flatLeafCoefCache[2] = 3f * x;
+                        _flatLeafCoefCache[0] = 3.0f * (x - y) + z;
+                        _flatLeafCoefCache[1] = 3.0f * (y - 2.0f * x);
+                        _flatLeafCoefCache[2] = 3.0f * x;
                         _flatLeafCoefCache[3] = -z;
                     } else if (curveType == 6) {
-                        _flatLeafCoefCache[0] = 0f;
-                        _flatLeafCoefCache[1] = y - 2f * x;
-                        _flatLeafCoefCache[2] = 2f * x;
+                        _flatLeafCoefCache[0] = 0.0f;
+                        _flatLeafCoefCache[1] = y - 2.0f * x;
+                        _flatLeafCoefCache[2] = 2.0f * x;
                         _flatLeafCoefCache[3] = -y;
                     }
                 }
@@ -544,7 +593,7 @@
                 // we use cubicRootsInAB here, because we want only roots in 0, 1,
                 // and our quadratic root finder doesn't filter, so it's just a
                 // matter of convenience.
-                int n = Helpers.cubicRootsInAB(a, b, c, d, nextRoots, 0, 0, 1);
+                int n = Helpers.cubicRootsInAB(a, b, c, d, nextRoots, 0, 0.0f, 1.0f);
                 if (n == 1 && !Float.isNaN(nextRoots[0])) {
                     t = nextRoots[0];
                 }
@@ -552,8 +601,8 @@
             // t is relative to the current leaf, so we must make it a valid parameter
             // of the original curve.
             t = t * (nextT - lastT) + lastT;
-            if (t >= 1f) {
-                t = 1f;
+            if (t >= 1.0f) {
+                t = 1.0f;
                 done = true;
             }
             // even if done = true, if we're here, that means targetLength
@@ -600,13 +649,13 @@
         // go to the leftmost node from the current node. Return its length.
         private void goLeft() {
             float len = onLeaf();
-            if (len >= 0f) {
+            if (len >= 0.0f) {
                 lastT = nextT;
                 lenAtLastT = lenAtNextT;
                 nextT += (1 << (REC_LIMIT - recLevel)) * MIN_T_INC;
                 lenAtNextT += len;
                 // invalidate caches
-                flatLeafCoefCache[2] = -1f;
+                flatLeafCoefCache[2] = -1.0f;
                 cachedHaveLowAcceleration = -1;
             } else {
                 Helpers.subdivide(recCurveStack[recLevel], 0,
@@ -622,7 +671,7 @@
         // the length of the leaf if we are on a leaf.
         private float onLeaf() {
             float[] curve = recCurveStack[recLevel];
-            float polyLen = 0f;
+            float polyLen = 0.0f;
 
             float x0 = curve[0], y0 = curve[1];
             for (int i = 2; i < curveType; i += 2) {
@@ -638,9 +687,9 @@
                                                   curve[curveType-2],
                                                   curve[curveType-1]);
             if ((polyLen - lineLen) < ERR || recLevel == REC_LIMIT) {
-                return (polyLen + lineLen) / 2f;
+                return (polyLen + lineLen) / 2.0f;
             }
-            return -1f;
+            return -1.0f;
         }
     }
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/DoubleArrayCache.java	Wed May 17 22:05:11 2017 +0200
@@ -0,0 +1,273 @@
+/*
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+import static sun.java2d.marlin.ArrayCacheConst.ARRAY_SIZES;
+import static sun.java2d.marlin.ArrayCacheConst.BUCKETS;
+import static sun.java2d.marlin.ArrayCacheConst.MAX_ARRAY_SIZE;
+import static sun.java2d.marlin.MarlinUtils.logInfo;
+import static sun.java2d.marlin.MarlinUtils.logException;
+
+import java.lang.ref.WeakReference;
+import java.util.Arrays;
+
+import sun.java2d.marlin.ArrayCacheConst.BucketStats;
+import sun.java2d.marlin.ArrayCacheConst.CacheStats;
+
+/*
+ * Note that the [BYTE/INT/FLOAT/DOUBLE]ArrayCache files are nearly identical except
+ * for a few type and name differences. Typically, the [BYTE]ArrayCache.java file
+ * is edited manually and then [INT/FLOAT/DOUBLE]ArrayCache.java
+ * files are generated with the following command lines:
+ */
+// % sed -e 's/(b\yte)[ ]*//g' -e 's/b\yte/int/g' -e 's/B\yte/Int/g' < B\yteArrayCache.java > IntArrayCache.java
+// % sed -e 's/(b\yte)[ ]*0/0.0f/g' -e 's/(b\yte)[ ]*/(float) /g' -e 's/b\yte/float/g' -e 's/B\yte/Float/g' < B\yteArrayCache.java > FloatArrayCache.java
+// % sed -e 's/(b\yte)[ ]*0/0.0d/g' -e 's/(b\yte)[ ]*/(double) /g' -e 's/b\yte/double/g' -e 's/B\yte/Double/g' < B\yteArrayCache.java > DoubleArrayCache.java
+
+final class DoubleArrayCache implements MarlinConst {
+
+    final boolean clean;
+    private final int bucketCapacity;
+    private WeakReference<Bucket[]> refBuckets = null;
+    final CacheStats stats;
+
+    DoubleArrayCache(final boolean clean, final int bucketCapacity) {
+        this.clean = clean;
+        this.bucketCapacity = bucketCapacity;
+        this.stats = (DO_STATS) ?
+            new CacheStats(getLogPrefix(clean) + "DoubleArrayCache") : null;
+    }
+
+    Bucket getCacheBucket(final int length) {
+        final int bucket = ArrayCacheConst.getBucket(length);
+        return getBuckets()[bucket];
+    }
+
+    private Bucket[] getBuckets() {
+        // resolve reference:
+        Bucket[] buckets = (refBuckets != null) ? refBuckets.get() : null;
+
+        // create a new buckets ?
+        if (buckets == null) {
+            buckets = new Bucket[BUCKETS];
+
+            for (int i = 0; i < BUCKETS; i++) {
+                buckets[i] = new Bucket(clean, ARRAY_SIZES[i], bucketCapacity,
+                        (DO_STATS) ? stats.bucketStats[i] : null);
+            }
+
+            // update weak reference:
+            refBuckets = new WeakReference<Bucket[]>(buckets);
+        }
+        return buckets;
+    }
+
+    Reference createRef(final int initialSize) {
+        return new Reference(this, initialSize);
+    }
+
+    static final class Reference {
+
+        // initial array reference (direct access)
+        final double[] initial;
+        private final boolean clean;
+        private final DoubleArrayCache cache;
+
+        Reference(final DoubleArrayCache cache, final int initialSize) {
+            this.cache = cache;
+            this.clean = cache.clean;
+            this.initial = createArray(initialSize, clean);
+            if (DO_STATS) {
+                cache.stats.totalInitial += initialSize;
+            }
+        }
+
+        double[] getArray(final int length) {
+            if (length <= MAX_ARRAY_SIZE) {
+                return cache.getCacheBucket(length).getArray();
+            }
+            if (DO_STATS) {
+                cache.stats.oversize++;
+            }
+            if (DO_LOG_OVERSIZE) {
+                logInfo(getLogPrefix(clean) + "DoubleArrayCache: "
+                        + "getArray[oversize]: length=\t" + length);
+            }
+            return createArray(length, clean);
+        }
+
+        double[] widenArray(final double[] array, final int usedSize,
+                          final int needSize)
+        {
+            final int length = array.length;
+            if (DO_CHECKS && length >= needSize) {
+                return array;
+            }
+            if (DO_STATS) {
+                cache.stats.resize++;
+            }
+
+            // maybe change bucket:
+            // ensure getNewSize() > newSize:
+            final double[] res = getArray(ArrayCacheConst.getNewSize(usedSize, needSize));
+
+            // use wrapper to ensure proper copy:
+            System.arraycopy(array, 0, res, 0, usedSize); // copy only used elements
+
+            // maybe return current array:
+            putArray(array, 0, usedSize); // ensure array is cleared
+
+            if (DO_LOG_WIDEN_ARRAY) {
+                logInfo(getLogPrefix(clean) + "DoubleArrayCache: "
+                        + "widenArray[" + res.length
+                        + "]: usedSize=\t" + usedSize + "\tlength=\t" + length
+                        + "\tneeded length=\t" + needSize);
+            }
+            return res;
+        }
+
+        double[] putArray(final double[] array)
+        {
+            // dirty array helper:
+            return putArray(array, 0, array.length);
+        }
+
+        double[] putArray(final double[] array, final int fromIndex,
+                        final int toIndex)
+        {
+            if (array.length <= MAX_ARRAY_SIZE) {
+                if ((clean || DO_CLEAN_DIRTY) && (toIndex != 0)) {
+                    // clean-up array of dirty part[fromIndex; toIndex[
+                    fill(array, fromIndex, toIndex, 0.0d);
+                }
+                // ensure to never store initial arrays in cache:
+                if (array != initial) {
+                    cache.getCacheBucket(array.length).putArray(array);
+                }
+            }
+            return initial;
+        }
+    }
+
+    static final class Bucket {
+
+        private int tail = 0;
+        private final int arraySize;
+        private final boolean clean;
+        private final double[][] arrays;
+        private final BucketStats stats;
+
+        Bucket(final boolean clean, final int arraySize,
+               final int capacity, final BucketStats stats)
+        {
+            this.arraySize = arraySize;
+            this.clean = clean;
+            this.stats = stats;
+            this.arrays = new double[capacity][];
+        }
+
+        double[] getArray() {
+            if (DO_STATS) {
+                stats.getOp++;
+            }
+            // use cache:
+            if (tail != 0) {
+                final double[] array = arrays[--tail];
+                arrays[tail] = null;
+                return array;
+            }
+            if (DO_STATS) {
+                stats.createOp++;
+            }
+            return createArray(arraySize, clean);
+        }
+
+        void putArray(final double[] array)
+        {
+            if (DO_CHECKS && (array.length != arraySize)) {
+                logInfo(getLogPrefix(clean) + "DoubleArrayCache: "
+                        + "bad length = " + array.length);
+                return;
+            }
+            if (DO_STATS) {
+                stats.returnOp++;
+            }
+            // fill cache:
+            if (arrays.length > tail) {
+                arrays[tail++] = array;
+
+                if (DO_STATS) {
+                    stats.updateMaxSize(tail);
+                }
+            } else if (DO_CHECKS) {
+                logInfo(getLogPrefix(clean) + "DoubleArrayCache: "
+                        + "array capacity exceeded !");
+            }
+        }
+    }
+
+    static double[] createArray(final int length, final boolean clean) {
+        if (clean) {
+            return new double[length];
+        }
+        // use JDK9 Unsafe.allocateUninitializedArray(class, length):
+        return (double[]) OffHeapArray.UNSAFE.allocateUninitializedArray(double.class, length);
+    }
+
+    static void fill(final double[] array, final int fromIndex,
+                     final int toIndex, final double value)
+    {
+        // clear array data:
+        Arrays.fill(array, fromIndex, toIndex, value);
+        if (DO_CHECKS) {
+            check(array, fromIndex, toIndex, value);
+        }
+    }
+
+    static void check(final double[] array, final int fromIndex,
+                      final int toIndex, final double value)
+    {
+        if (DO_CHECKS) {
+            // check zero on full array:
+            for (int i = 0; i < array.length; i++) {
+                if (array[i] != value) {
+                    logException("Invalid value at: " + i + " = " + array[i]
+                            + " from: " + fromIndex + " to: " + toIndex + "\n"
+                            + Arrays.toString(array), new Throwable());
+
+                    // ensure array is correctly filled:
+                    Arrays.fill(array, value);
+
+                    return;
+                }
+            }
+        }
+    }
+
+    static String getLogPrefix(final boolean clean) {
+        return (clean) ? "Clean" : "Dirty";
+    }
+}
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/FloatArrayCache.java	Wed Jul 05 23:27:00 2017 +0200
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/FloatArrayCache.java	Wed May 17 22:05:11 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,6 +22,7 @@
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */
+
 package sun.java2d.marlin;
 
 import static sun.java2d.marlin.ArrayCacheConst.ARRAY_SIZES;
@@ -37,13 +38,14 @@
 import sun.java2d.marlin.ArrayCacheConst.CacheStats;
 
 /*
- * Note that the [BYTE/INT/FLOAT]ArrayCache files are nearly identical except
+ * Note that the [BYTE/INT/FLOAT/DOUBLE]ArrayCache files are nearly identical except
  * for a few type and name differences. Typically, the [BYTE]ArrayCache.java file
- * is edited manually and then [INT]ArrayCache.java and [FLOAT]ArrayCache.java
+ * is edited manually and then [INT/FLOAT/DOUBLE]ArrayCache.java
  * files are generated with the following command lines:
  */
 // % sed -e 's/(b\yte)[ ]*//g' -e 's/b\yte/int/g' -e 's/B\yte/Int/g' < B\yteArrayCache.java > IntArrayCache.java
-// % sed -e 's/(b\yte)[ ]*/(float) /g' -e 's/b\yte/float/g' -e 's/B\yte/Float/g' < B\yteArrayCache.java > FloatArrayCache.java
+// % sed -e 's/(b\yte)[ ]*0/0.0f/g' -e 's/(b\yte)[ ]*/(float) /g' -e 's/b\yte/float/g' -e 's/B\yte/Float/g' < B\yteArrayCache.java > FloatArrayCache.java
+// % sed -e 's/(b\yte)[ ]*0/0.0d/g' -e 's/(b\yte)[ ]*/(double) /g' -e 's/b\yte/double/g' -e 's/B\yte/Double/g' < B\yteArrayCache.java > DoubleArrayCache.java
 
 final class FloatArrayCache implements MarlinConst {
 
@@ -159,7 +161,7 @@
             if (array.length <= MAX_ARRAY_SIZE) {
                 if ((clean || DO_CLEAN_DIRTY) && (toIndex != 0)) {
                     // clean-up array of dirty part[fromIndex; toIndex[
-                    fill(array, fromIndex, toIndex, (float) 0);
+                    fill(array, fromIndex, toIndex, 0.0f);
                 }
                 // ensure to never store initial arrays in cache:
                 if (array != initial) {
@@ -231,8 +233,8 @@
         if (clean) {
             return new float[length];
         }
-       // use JDK9 Unsafe.allocateUninitializedArray(class, length):
-       return (float[]) OffHeapArray.UNSAFE.allocateUninitializedArray(float.class, length);
+        // use JDK9 Unsafe.allocateUninitializedArray(class, length):
+        return (float[]) OffHeapArray.UNSAFE.allocateUninitializedArray(float.class, length);
     }
 
     static void fill(final float[] array, final int fromIndex,
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/FloatMath.java	Wed Jul 05 23:27:00 2017 +0200
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/FloatMath.java	Wed May 17 22:05:11 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,10 +22,9 @@
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */
+
 package sun.java2d.marlin;
 
-import jdk.internal.math.FloatConsts;
-
 /**
  * Faster Math ceil / floor routines derived from StrictMath
  */
@@ -34,17 +33,17 @@
     // overflow / NaN handling enabled:
     static final boolean CHECK_OVERFLOW = true;
     static final boolean CHECK_NAN = true;
+    // Copied from sun.misc.FloatConsts:
+    public static final int FLOAT_SIGNIFICAND_WIDTH = 24;   // sun.misc.FloatConsts.SIGNIFICAND_WIDTH
+    public static final int FLOAT_EXP_BIAS = 127;           // sun.misc.FloatConsts.EXP_BIAS
+    public static final int FLOAT_EXP_BIT_MASK = 2139095040;// sun.misc.FloatConsts.EXP_BIT_MASK
+    public static final int FLOAT_SIGNIF_BIT_MASK = 8388607;// sun.misc.FloatConsts.SIGNIF_BIT_MASK
 
     private FloatMath() {
         // utility class
     }
 
     // faster inlined min/max functions in the branch prediction is high
-    static float max(final float a, final float b) {
-        // no NaN handling
-        return (a >= b) ? a : b;
-    }
-
     static int max(final int a, final int b) {
         return (a >= b) ? a : b;
     }
@@ -77,9 +76,9 @@
         // compute only once Float.floatToRawIntBits(a)
         final int doppel = Float.floatToRawIntBits(a);
 
-        final int exponent = ((doppel & FloatConsts.EXP_BIT_MASK)
-                >> (FloatConsts.SIGNIFICAND_WIDTH - 1))
-                - FloatConsts.EXP_BIAS;
+        final int exponent = ((doppel & FLOAT_EXP_BIT_MASK)
+                >> (FLOAT_SIGNIFICAND_WIDTH - 1))
+                - FLOAT_EXP_BIAS;
 
         if (exponent < 0) {
             /*
@@ -87,8 +86,8 @@
              * floorOrceil(-0.0) => -0.0
              * floorOrceil(+0.0) => +0.0
              */
-            return ((a == 0) ? a :
-                    ( (a < 0f) ? -0f : 1f) );
+            return ((a == 0.0f) ? a :
+                    ( (a < 0.0f) ? -0.0f : 1.0f) );
         }
         if (CHECK_OVERFLOW && (exponent >= 23)) { // 52 for double
             /*
@@ -101,7 +100,7 @@
         assert exponent >= 0 && exponent <= 22; // 51 for double
 
         final int intpart = doppel
-                & (~(FloatConsts.SIGNIF_BIT_MASK >> exponent));
+                & (~(FLOAT_SIGNIF_BIT_MASK >> exponent));
 
         if (intpart == doppel) {
             return a; // integral value (including 0)
@@ -134,9 +133,9 @@
         // compute only once Float.floatToRawIntBits(a)
         final int doppel = Float.floatToRawIntBits(a);
 
-        final int exponent = ((doppel & FloatConsts.EXP_BIT_MASK)
-                >> (FloatConsts.SIGNIFICAND_WIDTH - 1))
-                - FloatConsts.EXP_BIAS;
+        final int exponent = ((doppel & FLOAT_EXP_BIT_MASK)
+                >> (FLOAT_SIGNIFICAND_WIDTH - 1))
+                - FLOAT_EXP_BIAS;
 
         if (exponent < 0) {
             /*
@@ -144,8 +143,8 @@
              * floorOrceil(-0.0) => -0.0
              * floorOrceil(+0.0) => +0.0
              */
-            return ((a == 0) ? a :
-                    ( (a < 0f) ? -1f : 0f) );
+            return ((a == 0.0f) ? a :
+                    ( (a < 0.0f) ? -1.0f : 0.0f) );
         }
         if (CHECK_OVERFLOW && (exponent >= 23)) { // 52 for double
             /*
@@ -158,7 +157,7 @@
         assert exponent >= 0 && exponent <= 22; // 51 for double
 
         final int intpart = doppel
-                & (~(FloatConsts.SIGNIF_BIT_MASK >> exponent));
+                & (~(FLOAT_SIGNIF_BIT_MASK >> exponent));
 
         if (intpart == doppel) {
             return a; // integral value (including 0)
@@ -191,6 +190,26 @@
     }
 
     /**
+     * Faster alternative to ceil(double) optimized for the integer domain
+     * and supporting NaN and +/-Infinity.
+     *
+     * @param a a value.
+     * @return the largest (closest to positive infinity) integer value
+     * that less than or equal to the argument and is equal to a mathematical
+     * integer.
+     */
+    public static int ceil_int(final double a) {
+        final int intpart = (int) a;
+
+        if (a <= intpart
+                || (CHECK_OVERFLOW && intpart == Integer.MAX_VALUE)
+                || CHECK_NAN && Double.isNaN(a)) {
+            return intpart;
+        }
+        return intpart + 1;
+    }
+
+    /**
      * Faster alternative to floor(float) optimized for the integer domain
      * and supporting NaN and +/-Infinity.
      *
@@ -209,4 +228,24 @@
         }
         return intpart - 1;
     }
+
+    /**
+     * Faster alternative to floor(double) optimized for the integer domain
+     * and supporting NaN and +/-Infinity.
+     *
+     * @param a a value.
+     * @return the largest (closest to positive infinity) floating-point value
+     * that less than or equal to the argument and is equal to a mathematical
+     * integer.
+     */
+    public static int floor_int(final double a) {
+        final int intpart = (int) a;
+
+        if (a >= intpart
+                || (CHECK_OVERFLOW && intpart == Integer.MIN_VALUE)
+                || CHECK_NAN && Double.isNaN(a)) {
+            return intpart;
+        }
+        return intpart - 1;
+    }
 }
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/Helpers.java	Wed Jul 05 23:27:00 2017 +0200
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/Helpers.java	Wed May 17 22:05:11 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -52,27 +52,27 @@
     {
         int ret = off;
         float t;
-        if (a != 0f) {
+        if (a != 0.0f) {
             final float dis = b*b - 4*a*c;
-            if (dis > 0f) {
-                final float sqrtDis = (float)Math.sqrt(dis);
+            if (dis > 0.0f) {
+                final float sqrtDis = (float) Math.sqrt(dis);
                 // depending on the sign of b we use a slightly different
                 // algorithm than the traditional one to find one of the roots
                 // so we can avoid adding numbers of different signs (which
                 // might result in loss of precision).
-                if (b >= 0f) {
-                    zeroes[ret++] = (2f * c) / (-b - sqrtDis);
-                    zeroes[ret++] = (-b - sqrtDis) / (2f * a);
+                if (b >= 0.0f) {
+                    zeroes[ret++] = (2.0f * c) / (-b - sqrtDis);
+                    zeroes[ret++] = (-b - sqrtDis) / (2.0f * a);
                 } else {
-                    zeroes[ret++] = (-b + sqrtDis) / (2f * a);
-                    zeroes[ret++] = (2f * c) / (-b + sqrtDis);
+                    zeroes[ret++] = (-b + sqrtDis) / (2.0f * a);
+                    zeroes[ret++] = (2.0f * c) / (-b + sqrtDis);
                 }
-            } else if (dis == 0f) {
-                t = (-b) / (2f * a);
+            } else if (dis == 0.0f) {
+                t = (-b) / (2.0f * a);
                 zeroes[ret++] = t;
             }
         } else {
-            if (b != 0f) {
+            if (b != 0.0f) {
                 t = (-c) / b;
                 zeroes[ret++] = t;
             }
@@ -85,7 +85,7 @@
                               float[] pts, final int off,
                               final float A, final float B)
     {
-        if (d == 0f) {
+        if (d == 0.0f) {
             int num = quadraticRoots(a, b, c, pts, off);
             return filterOutNotInAB(pts, off, num, A, B) - off;
         }
@@ -109,8 +109,8 @@
         // q = Q/2
         // instead and use those values for simplicity of the code.
         double sq_A = a * a;
-        double p = (1.0/3.0) * ((-1.0/3.0) * sq_A + b);
-        double q = (1.0/2.0) * ((2.0/27.0) * a * sq_A - (1.0/3.0) * a * b + c);
+        double p = (1.0d/3.0d) * ((-1.0d/3.0d) * sq_A + b);
+        double q = (1.0d/2.0d) * ((2.0d/27.0d) * a * sq_A - (1.0d/3.0d) * a * b + c);
 
         // use Cardano's formula
 
@@ -118,30 +118,30 @@
         double D = q * q + cb_p;
 
         int num;
-        if (D < 0.0) {
+        if (D < 0.0d) {
             // see: http://en.wikipedia.org/wiki/Cubic_function#Trigonometric_.28and_hyperbolic.29_method
-            final double phi = (1.0/3.0) * acos(-q / sqrt(-cb_p));
-            final double t = 2.0 * sqrt(-p);
+            final double phi = (1.0d/3.0d) * acos(-q / sqrt(-cb_p));
+            final double t = 2.0d * sqrt(-p);
 
-            pts[ off+0 ] =  (float)( t * cos(phi));
-            pts[ off+1 ] =  (float)(-t * cos(phi + (PI / 3.0)));
-            pts[ off+2 ] =  (float)(-t * cos(phi - (PI / 3.0)));
+            pts[ off+0 ] = (float) ( t * cos(phi));
+            pts[ off+1 ] = (float) (-t * cos(phi + (PI / 3.0d)));
+            pts[ off+2 ] = (float) (-t * cos(phi - (PI / 3.0d)));
             num = 3;
         } else {
             final double sqrt_D = sqrt(D);
             final double u = cbrt(sqrt_D - q);
             final double v = - cbrt(sqrt_D + q);
 
-            pts[ off ] = (float)(u + v);
+            pts[ off ] = (float) (u + v);
             num = 1;
 
-            if (within(D, 0.0, 1e-8)) {
-                pts[off+1] = -(pts[off] / 2f);
+            if (within(D, 0.0d, 1e-8d)) {
+                pts[off+1] = -(pts[off] / 2.0f);
                 num = 2;
             }
         }
 
-        final float sub = (1f/3f) * a;
+        final float sub = (1.0f/3.0f) * a;
 
         for (int i = 0; i < num; ++i) {
             pts[ off+i ] -= sub;
@@ -178,7 +178,7 @@
 
     static float polyLineLength(float[] poly, final int off, final int nCoords) {
         assert nCoords % 2 == 0 && poly.length >= off + nCoords : "";
-        float acc = 0;
+        float acc = 0.0f;
         for (int i = off + 2; i < off + nCoords; i += 2) {
             acc += linelen(poly[i], poly[i+1], poly[i-2], poly[i-1]);
         }
@@ -188,7 +188,7 @@
     static float linelen(float x1, float y1, float x2, float y2) {
         final float dx = x2 - x1;
         final float dy = y2 - y1;
-        return (float)Math.sqrt(dx*dx + dy*dy);
+        return (float) Math.sqrt(dx*dx + dy*dy);
     }
 
     static void subdivide(float[] src, int srcoff, float[] left, int leftoff,
@@ -218,8 +218,8 @@
     }
 
     // Most of these are copied from classes in java.awt.geom because we need
-    // float versions of these functions, and Line2D, CubicCurve2D,
-    // QuadCurve2D don't provide them.
+    // both single and double precision variants of these functions, and Line2D,
+    // CubicCurve2D, QuadCurve2D don't provide them.
     /**
      * Subdivides the cubic curve specified by the coordinates
      * stored in the <code>src</code> array at indices <code>srcoff</code>
@@ -268,18 +268,18 @@
             right[rightoff + 6] = x2;
             right[rightoff + 7] = y2;
         }
-        x1 = (x1 + ctrlx1) / 2f;
-        y1 = (y1 + ctrly1) / 2f;
-        x2 = (x2 + ctrlx2) / 2f;
-        y2 = (y2 + ctrly2) / 2f;
-        float centerx = (ctrlx1 + ctrlx2) / 2f;
-        float centery = (ctrly1 + ctrly2) / 2f;
-        ctrlx1 = (x1 + centerx) / 2f;
-        ctrly1 = (y1 + centery) / 2f;
-        ctrlx2 = (x2 + centerx) / 2f;
-        ctrly2 = (y2 + centery) / 2f;
-        centerx = (ctrlx1 + ctrlx2) / 2f;
-        centery = (ctrly1 + ctrly2) / 2f;
+        x1 = (x1 + ctrlx1) / 2.0f;
+        y1 = (y1 + ctrly1) / 2.0f;
+        x2 = (x2 + ctrlx2) / 2.0f;
+        y2 = (y2 + ctrly2) / 2.0f;
+        float centerx = (ctrlx1 + ctrlx2) / 2.0f;
+        float centery = (ctrly1 + ctrly2) / 2.0f;
+        ctrlx1 = (x1 + centerx) / 2.0f;
+        ctrly1 = (y1 + centery) / 2.0f;
+        ctrlx2 = (x2 + centerx) / 2.0f;
+        ctrly2 = (y2 + centery) / 2.0f;
+        centerx = (ctrlx1 + ctrlx2) / 2.0f;
+        centery = (ctrly1 + ctrly2) / 2.0f;
         if (left != null) {
             left[leftoff + 2] = x1;
             left[leftoff + 3] = y1;
@@ -367,12 +367,12 @@
             right[rightoff + 4] = x2;
             right[rightoff + 5] = y2;
         }
-        x1 = (x1 + ctrlx) / 2f;
-        y1 = (y1 + ctrly) / 2f;
-        x2 = (x2 + ctrlx) / 2f;
-        y2 = (y2 + ctrly) / 2f;
-        ctrlx = (x1 + x2) / 2f;
-        ctrly = (y1 + y2) / 2f;
+        x1 = (x1 + ctrlx) / 2.0f;
+        y1 = (y1 + ctrly) / 2.0f;
+        x2 = (x2 + ctrlx) / 2.0f;
+        y2 = (y2 + ctrly) / 2.0f;
+        ctrlx = (x1 + x2) / 2.0f;
+        ctrly = (y1 + y2) / 2.0f;
         if (left != null) {
             left[leftoff + 2] = x1;
             left[leftoff + 3] = y1;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/IRendererContext.java	Wed May 17 22:05:11 2017 +0200
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+interface IRendererContext extends MarlinConst {
+
+    public RendererStats stats();
+
+    public OffHeapArray newOffHeapArray(final long initialSize);
+
+    public IntArrayCache.Reference newCleanIntArrayRef(final int initialSize);
+
+}
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/IntArrayCache.java	Wed Jul 05 23:27:00 2017 +0200
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/IntArrayCache.java	Wed May 17 22:05:11 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,6 +22,7 @@
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */
+
 package sun.java2d.marlin;
 
 import static sun.java2d.marlin.ArrayCacheConst.ARRAY_SIZES;
@@ -37,13 +38,14 @@
 import sun.java2d.marlin.ArrayCacheConst.CacheStats;
 
 /*
- * Note that the [BYTE/INT/FLOAT]ArrayCache files are nearly identical except
+ * Note that the [BYTE/INT/FLOAT/DOUBLE]ArrayCache files are nearly identical except
  * for a few type and name differences. Typically, the [BYTE]ArrayCache.java file
- * is edited manually and then [INT]ArrayCache.java and [FLOAT]ArrayCache.java
+ * is edited manually and then [INT/FLOAT/DOUBLE]ArrayCache.java
  * files are generated with the following command lines:
  */
 // % sed -e 's/(b\yte)[ ]*//g' -e 's/b\yte/int/g' -e 's/B\yte/Int/g' < B\yteArrayCache.java > IntArrayCache.java
-// % sed -e 's/(b\yte)[ ]*/(float) /g' -e 's/b\yte/float/g' -e 's/B\yte/Float/g' < B\yteArrayCache.java > FloatArrayCache.java
+// % sed -e 's/(b\yte)[ ]*0/0.0f/g' -e 's/(b\yte)[ ]*/(float) /g' -e 's/b\yte/float/g' -e 's/B\yte/Float/g' < B\yteArrayCache.java > FloatArrayCache.java
+// % sed -e 's/(b\yte)[ ]*0/0.0d/g' -e 's/(b\yte)[ ]*/(double) /g' -e 's/b\yte/double/g' -e 's/B\yte/Double/g' < B\yteArrayCache.java > DoubleArrayCache.java
 
 final class IntArrayCache implements MarlinConst {
 
@@ -231,8 +233,8 @@
         if (clean) {
             return new int[length];
         }
-       // use JDK9 Unsafe.allocateUninitializedArray(class, length):
-       return (int[]) OffHeapArray.UNSAFE.allocateUninitializedArray(int.class, length);
+        // use JDK9 Unsafe.allocateUninitializedArray(class, length):
+        return (int[]) OffHeapArray.UNSAFE.allocateUninitializedArray(int.class, length);
     }
 
     static void fill(final int[] array, final int fromIndex,
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinCache.java	Wed Jul 05 23:27:00 2017 +0200
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinCache.java	Wed May 17 22:05:11 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -45,7 +45,7 @@
 
     // 2048 (pixelSize) alpha values (width) x 32 rows (tile) = 64K bytes
     // x1 instead of 4 bytes (RLE) ie 1/4 capacity or average good RLE compression
-    static final long INITIAL_CHUNK_ARRAY = TILE_SIZE * INITIAL_PIXEL_DIM; // 64K
+    static final long INITIAL_CHUNK_ARRAY = TILE_H * INITIAL_PIXEL_DIM; // 64K
 
     // The alpha map used by this object (taken out of our map cache) to convert
     // pixel coverage counts gotten from MarlinCache (which are in the range
@@ -72,17 +72,17 @@
 
     // 1D dirty arrays
     // row index in rowAAChunk[]
-    final long[] rowAAChunkIndex = new long[TILE_SIZE];
+    final long[] rowAAChunkIndex = new long[TILE_H];
     // first pixel (inclusive) for each row
-    final int[] rowAAx0 = new int[TILE_SIZE];
+    final int[] rowAAx0 = new int[TILE_H];
     // last pixel (exclusive) for each row
-    final int[] rowAAx1 = new int[TILE_SIZE];
+    final int[] rowAAx1 = new int[TILE_H];
     // encoding mode (0=raw, 1=RLE encoding) for each row
-    final int[] rowAAEnc = new int[TILE_SIZE];
+    final int[] rowAAEnc = new int[TILE_H];
     // coded length (RLE encoding) for each row
-    final long[] rowAALen = new long[TILE_SIZE];
+    final long[] rowAALen = new long[TILE_H];
     // last position in RLE decoding for each row (getAlpha):
-    final long[] rowAAPos = new long[TILE_SIZE];
+    final long[] rowAAPos = new long[TILE_H];
 
     // dirty off-heap array containing pixel coverages for (32) rows (packed)
     // if encoding=raw, it contains alpha coverage values (val) as integer
@@ -97,8 +97,8 @@
     // x=j*TILE_SIZE+bboxX0.
     int[] touchedTile;
 
-    // per-thread renderer context
-    final RendererContext rdrCtx;
+    // per-thread renderer stats
+    final RendererStats rdrStats;
 
     // touchedTile ref (clean)
     private final IntArrayCache.Reference touchedTile_ref;
@@ -107,8 +107,8 @@
 
     boolean useRLE = false;
 
-    MarlinCache(final RendererContext rdrCtx) {
-        this.rdrCtx = rdrCtx;
+    MarlinCache(final IRendererContext rdrCtx) {
+        this.rdrStats = rdrCtx.stats();
 
         rowAAChunk = rdrCtx.newOffHeapArray(INITIAL_CHUNK_ARRAY); // 64K
 
@@ -120,7 +120,7 @@
         tileMax = Integer.MIN_VALUE;
     }
 
-    void init(int minx, int miny, int maxx, int maxy, int edgeSumDeltaY)
+    void init(int minx, int miny, int maxx, int maxy)
     {
         // assert maxy >= miny && maxx >= minx;
         bboxX0 = minx;
@@ -142,47 +142,16 @@
             if (width <= RLE_MIN_WIDTH || width >= RLE_MAX_WIDTH) {
                 useRLE = false;
             } else {
-                // perimeter approach: how fit the total length into given height:
-
-                // if stroking: meanCrossings /= 2 => divide edgeSumDeltaY by 2
-                final int heightSubPixel
-                    = (((maxy - miny) << SUBPIXEL_LG_POSITIONS_Y) << rdrCtx.stroking);
-
-                // check meanDist > block size:
-                // check width / (meanCrossings - 1) >= RLE_THRESHOLD
-
-                // fast case: (meanCrossingPerPixel <= 2) means 1 span only
-                useRLE = (edgeSumDeltaY <= (heightSubPixel << 1))
-                    // note: already checked (meanCrossingPerPixel <= 2)
-                    // rewritten to avoid division:
-                    || (width * heightSubPixel) >
-                            ((edgeSumDeltaY - heightSubPixel) << BLOCK_SIZE_LG);
-
-                if (DO_TRACE && !useRLE) {
-                    final float meanCrossings
-                        = ((float) edgeSumDeltaY) / heightSubPixel;
-                    final float meanDist = width / (meanCrossings - 1);
-
-                    System.out.println("High complexity: "
-                        + " for bbox[width = " + width
-                        + " height = " + (maxy - miny)
-                        + "] edgeSumDeltaY = " + edgeSumDeltaY
-                        + " heightSubPixel = " + heightSubPixel
-                        + " meanCrossings = "+ meanCrossings
-                        + " meanDist = " + meanDist
-                        + " width =  " + (width * heightSubPixel)
-                        + " <= criteria:  " + ((edgeSumDeltaY - heightSubPixel) << BLOCK_SIZE_LG)
-                    );
-                }
+                useRLE = true;
             }
         }
 
         // the ceiling of (maxy - miny + 1) / TILE_SIZE;
-        final int nxTiles = (width + TILE_SIZE) >> TILE_SIZE_LG;
+        final int nxTiles = (width + TILE_W) >> TILE_W_LG;
 
         if (nxTiles > INITIAL_ARRAY) {
             if (DO_STATS) {
-                rdrCtx.stats.stat_array_marlincache_touchedTile.add(nxTiles);
+                rdrStats.stat_array_marlincache_touchedTile.add(nxTiles);
             }
             touchedTile = touchedTile_ref.getArray(nxTiles);
         }
@@ -197,7 +166,7 @@
         resetTileLine(0);
 
         if (DO_STATS) {
-            rdrCtx.stats.totalOffHeap += rowAAChunk.length;
+            rdrStats.totalOffHeap += rowAAChunk.length;
         }
 
         // Return arrays:
@@ -220,14 +189,14 @@
 
         // reset current pos
         if (DO_STATS) {
-            rdrCtx.stats.stat_cache_rowAAChunk.add(rowAAChunkPos);
+            rdrStats.stat_cache_rowAAChunk.add(rowAAChunkPos);
         }
         rowAAChunkPos = 0L;
 
         // Reset touchedTile:
         if (tileMin != Integer.MAX_VALUE) {
             if (DO_STATS) {
-                rdrCtx.stats.stat_cache_tiles.add(tileMax - tileMin);
+                rdrStats.stat_cache_tiles.add(tileMax - tileMin);
             }
             // clean only dirty touchedTile:
             if (tileMax == 1) {
@@ -269,10 +238,6 @@
     void copyAARowNoRLE(final int[] alphaRow, final int y,
                    final int px0, final int px1)
     {
-        if (DO_MONITORS) {
-            rdrCtx.stats.mon_rdr_copyAARow.start();
-        }
-
         // skip useless pixels above boundary
         final int px_bbox1 = FloatMath.min(px1, bboxX1);
 
@@ -308,12 +273,12 @@
             expandRowAAChunk(needSize);
         }
         if (DO_STATS) {
-            rdrCtx.stats.stat_cache_rowAA.add(px_bbox1 - px0);
+            rdrStats.stat_cache_rowAA.add(px_bbox1 - px0);
         }
 
         // rowAA contains only alpha values for range[x0; x1[
         final int[] _touchedTile = touchedTile;
-        final int _TILE_SIZE_LG = TILE_SIZE_LG;
+        final int _TILE_SIZE_LG = TILE_W_LG;
 
         final int from = px0      - bboxX0; // first pixel inclusive
         final int to   = px_bbox1 - bboxX0; //  last pixel exclusive
@@ -342,9 +307,9 @@
 
             // store alpha sum (as byte):
             if (val == 0) {
-                _unsafe.putByte(addr_off, (byte)0); // [0..255]
+                _unsafe.putByte(addr_off, (byte)0); // [0-255]
             } else {
-                _unsafe.putByte(addr_off, _unsafe.getByte(addr_alpha + val)); // [0..255]
+                _unsafe.putByte(addr_off, _unsafe.getByte(addr_alpha + val)); // [0-255]
 
                 // update touchedTile
                 _touchedTile[x >> _TILE_SIZE_LG] += val;
@@ -368,25 +333,17 @@
         }
 
         // Clear alpha row for reuse:
-        IntArrayCache.fill(alphaRow, from, px1 - bboxX0, 0);
-
-        if (DO_MONITORS) {
-            rdrCtx.stats.mon_rdr_copyAARow.stop();
-        }
+        IntArrayCache.fill(alphaRow, from, px1 + 1 - bboxX0, 0);
     }
 
     void copyAARowRLE_WithBlockFlags(final int[] blkFlags, final int[] alphaRow,
                       final int y, final int px0, final int px1)
     {
-        if (DO_MONITORS) {
-            rdrCtx.stats.mon_rdr_copyAARow.start();
-        }
-
         // Copy rowAA data into the piscesCache if one is present
         final int _bboxX0 = bboxX0;
 
         // process tile line [0 - 32]
-        final int row  = y - bboxY0;
+        final int row  =   y -  bboxY0;
         final int from = px0 - _bboxX0; // first pixel inclusive
 
         // skip useless pixels above boundary
@@ -418,12 +375,14 @@
         long addr_off = _rowAAChunk.address + initialPos;
 
         final int[] _touchedTile = touchedTile;
-        final int _TILE_SIZE_LG = TILE_SIZE_LG;
+        final int _TILE_SIZE_LG = TILE_W_LG;
         final int _BLK_SIZE_LG  = BLOCK_SIZE_LG;
 
         // traverse flagged blocks:
         final int blkW = (from >> _BLK_SIZE_LG);
         final int blkE = (to   >> _BLK_SIZE_LG) + 1;
+        // ensure last block flag = 0 to process final block:
+        blkFlags[blkE] = 0;
 
         // Perform run-length encoding and store results in the piscesCache
         int val = 0;
@@ -481,7 +440,7 @@
                             } else {
                                 _unsafe.putInt(addr_off,
                                     ((_bboxX0 + cx) << 8)
-                                    | (((int) _unsafe.getByte(addr_alpha + val)) & 0xFF) // [0..255]
+                                    | (((int) _unsafe.getByte(addr_alpha + val)) & 0xFF) // [0-255]
                                 );
 
                                 if (runLen == 1) {
@@ -493,7 +452,7 @@
                             addr_off += SIZE_INT;
 
                             if (DO_STATS) {
-                                rdrCtx.stats.hist_tile_generator_encoding_runLen
+                                rdrStats.hist_tile_generator_encoding_runLen
                                     .add(runLen);
                             }
                             cx0 = cx;
@@ -544,7 +503,7 @@
         } else {
             _unsafe.putInt(addr_off,
                 ((_bboxX0 + to) << 8)
-                | (((int) _unsafe.getByte(addr_alpha + val)) & 0xFF) // [0..255]
+                | (((int) _unsafe.getByte(addr_alpha + val)) & 0xFF) // [0-255]
             );
 
             if (runLen == 1) {
@@ -556,7 +515,7 @@
         addr_off += SIZE_INT;
 
         if (DO_STATS) {
-            rdrCtx.stats.hist_tile_generator_encoding_runLen.add(runLen);
+            rdrStats.hist_tile_generator_encoding_runLen.add(runLen);
         }
 
         long len = (addr_off - _rowAAChunk.address);
@@ -568,8 +527,8 @@
         rowAAChunkPos = len;
 
         if (DO_STATS) {
-            rdrCtx.stats.stat_cache_rowAA.add(rowAALen[row]);
-            rdrCtx.stats.hist_tile_generator_encoding_ratio.add(
+            rdrStats.stat_cache_rowAA.add(rowAALen[row]);
+            rdrStats.hist_tile_generator_encoding_ratio.add(
                 (100 * skip) / (blkE - blkW)
             );
         }
@@ -586,17 +545,10 @@
         }
 
         // Clear alpha row for reuse:
-        if (px1 > bboxX1) {
-            alphaRow[to    ] = 0;
-            alphaRow[to + 1] = 0;
-        }
+        alphaRow[to] = 0;
         if (DO_CHECKS) {
             IntArrayCache.check(blkFlags, blkW, blkE, 0);
-            IntArrayCache.check(alphaRow, from, px1 - bboxX0, 0);
-        }
-
-        if (DO_MONITORS) {
-            rdrCtx.stats.mon_rdr_copyAARow.stop();
+            IntArrayCache.check(alphaRow, from, px1 + 1 - bboxX0, 0);
         }
     }
 
@@ -613,7 +565,7 @@
 
     private void expandRowAAChunk(final long needSize) {
         if (DO_STATS) {
-            rdrCtx.stats.stat_array_marlincache_rowAAChunk.add(needSize);
+            rdrStats.stat_array_marlincache_rowAAChunk.add(needSize);
         }
 
         // note: throw IOOB if neededSize > 2Gb:
@@ -629,7 +581,7 @@
     {
         // the x and y of the current row, minus bboxX0, bboxY0
         // process tile line [0 - 32]
-        final int _TILE_SIZE_LG = TILE_SIZE_LG;
+        final int _TILE_SIZE_LG = TILE_W_LG;
 
         // update touchedTile
         int tx = (x0 >> _TILE_SIZE_LG);
@@ -666,7 +618,7 @@
     }
 
     int alphaSumInTile(final int x) {
-        return touchedTile[(x - bboxX0) >> TILE_SIZE_LG];
+        return touchedTile[(x - bboxX0) >> TILE_W_LG];
     }
 
     @Override
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinConst.java	Wed Jul 05 23:27:00 2017 +0200
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinConst.java	Wed May 17 22:05:11 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -95,10 +95,10 @@
     // 4096 edges for initial capacity
     static final int INITIAL_EDGES_COUNT = MarlinProperties.getInitialEdges();
 
-    // initial edges = 3/4 * edges count (4096)
+    // initial edges = edges count (4096)
     // 6 ints per edges = 24 bytes
-    // edges capacity = 24 x initial edges = 18 * edges count (4096) = 72K
-    static final int INITIAL_EDGES_CAPACITY = INITIAL_EDGES_COUNT * 18;
+    // edges capacity = 24 x initial edges = 24 * edges count (4096) = 96K
+    static final int INITIAL_EDGES_CAPACITY = INITIAL_EDGES_COUNT * 24;
 
     // zero value as byte
     static final byte BYTE_0 = (byte) 0;
@@ -114,14 +114,17 @@
     public static final int SUBPIXEL_POSITIONS_Y = 1 << (SUBPIXEL_LG_POSITIONS_Y);
 
     public static final float NORM_SUBPIXELS
-        = (float)Math.sqrt(( SUBPIXEL_POSITIONS_X * SUBPIXEL_POSITIONS_X
-                           + SUBPIXEL_POSITIONS_Y * SUBPIXEL_POSITIONS_Y)/2.0);
+        = (float) Math.sqrt(( SUBPIXEL_POSITIONS_X * SUBPIXEL_POSITIONS_X
+                            + SUBPIXEL_POSITIONS_Y * SUBPIXEL_POSITIONS_Y) / 2.0d);
 
     public static final int MAX_AA_ALPHA
         = SUBPIXEL_POSITIONS_X * SUBPIXEL_POSITIONS_Y;
 
-    public static final int TILE_SIZE_LG = MarlinProperties.getTileSize_Log2();
-    public static final int TILE_SIZE = 1 << TILE_SIZE_LG; // 32 by default
+    public static final int TILE_H_LG = MarlinProperties.getTileSize_Log2();
+    public static final int TILE_H = 1 << TILE_H_LG; // 32 by default
+
+    public static final int TILE_W_LG = MarlinProperties.getTileWidth_Log2();
+    public static final int TILE_W = 1 << TILE_W_LG; // 32 by default
 
     public static final int BLOCK_SIZE_LG = MarlinProperties.getBlockSize_Log2();
     public static final int BLOCK_SIZE    = 1 << BLOCK_SIZE_LG;
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinProperties.java	Wed Jul 05 23:27:00 2017 +0200
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinProperties.java	Wed May 17 22:05:11 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -68,21 +68,21 @@
     /**
      * Return the log(2) corresponding to subpixel on x-axis (
      *
-     * @return 1 (2 subpixels) < initial pixel size < 4 (256 subpixels)
+     * @return 0 (1 subpixels) < initial pixel size < 8 (256 subpixels)
      * (3 by default ie 8 subpixels)
      */
     public static int getSubPixel_Log2_X() {
-        return getInteger("sun.java2d.renderer.subPixel_log2_X", 3, 1, 8);
+        return getInteger("sun.java2d.renderer.subPixel_log2_X", 3, 0, 8);
     }
 
     /**
      * Return the log(2) corresponding to subpixel on y-axis (
      *
-     * @return 1 (2 subpixels) < initial pixel size < 8 (256 subpixels)
+     * @return 0 (1 subpixels) < initial pixel size < 8 (256 subpixels)
      * (3 by default ie 8 subpixels)
      */
     public static int getSubPixel_Log2_Y() {
-        return getInteger("sun.java2d.renderer.subPixel_log2_Y", 3, 1, 8);
+        return getInteger("sun.java2d.renderer.subPixel_log2_Y", 3, 0, 8);
     }
 
     /**
@@ -92,7 +92,18 @@
      * (5 by default ie 32x32 pixels)
      */
     public static int getTileSize_Log2() {
-        return getInteger("sun.java2d.renderer.tileSize_log2", 5, 3, 8);
+        return getInteger("sun.java2d.renderer.tileSize_log2", 5, 3, 10);
+    }
+
+    /**
+     * Return the log(2) corresponding to the tile width in pixels
+     *
+     * @return 3 (8 pixels) < tile with < 8 (256 pixels)
+     * (by default is given by the square tile size)
+     */
+    public static int getTileWidth_Log2() {
+        final int tileSize = getTileSize_Log2();
+        return getInteger("sun.java2d.renderer.tileWidth_log2", tileSize, 3, 10);
     }
 
     /**
@@ -166,6 +177,20 @@
         return getBoolean("sun.java2d.renderer.logUnsafeMalloc", "false");
     }
 
+    // quality settings
+
+    public static float getCubicDecD2() {
+        return getFloat("sun.java2d.renderer.cubic_dec_d2", 1.0f, 0.01f, 4.0f);
+    }
+
+    public static float getCubicIncD1() {
+        return getFloat("sun.java2d.renderer.cubic_inc_d1", 0.4f, 0.01f, 2.0f);
+    }
+
+    public static float getQuadDecD2() {
+        return getFloat("sun.java2d.renderer.quad_dec_d2", 0.5f, 0.01f, 4.0f);
+    }
+
     // system property utilities
     static boolean getBoolean(final String key, final String def) {
         return Boolean.valueOf(AccessController.doPrivileged(
@@ -197,7 +222,36 @@
     }
 
     static int align(final int val, final int norm) {
-        final int ceil = FloatMath.ceil_int( ((float)val) / norm);
+        final int ceil = FloatMath.ceil_int( ((float) val) / norm);
         return ceil * norm;
     }
+
+    public static double getDouble(final String key, final double def,
+                                   final double min, final double max)
+    {
+        double value = def;
+        final String property = AccessController.doPrivileged(
+                                    new GetPropertyAction(key));
+
+        if (property != null) {
+            try {
+                value = Double.parseDouble(property);
+            } catch (NumberFormatException nfe) {
+                logInfo("Invalid value for " + key + " = " + property + " !");
+            }
+        }
+        // check for invalid values
+        if (value < min || value > max) {
+            logInfo("Invalid value for " + key + " = " + value
+                    + "; expect value in range[" + min + ", " + max + "] !");
+            value = def;
+        }
+        return value;
+    }
+
+    public static float getFloat(final String key, final float def,
+                                 final float min, final float max)
+    {
+        return (float)getDouble(key, def, min, max);
+    }
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinRenderer.java	Wed May 17 22:05:11 2017 +0200
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.java2d.marlin;
+
+public interface MarlinRenderer extends MarlinConst {
+
+}
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinRenderingEngine.java	Wed Jul 05 23:27:00 2017 +0200
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinRenderingEngine.java	Wed May 17 22:05:11 2017 +0200
@@ -44,8 +44,8 @@
 /**
  * Marlin RendererEngine implementation (derived from Pisces)
  */
-public class MarlinRenderingEngine extends RenderingEngine
-                                   implements MarlinConst
+public final class MarlinRenderingEngine extends RenderingEngine
+                                         implements MarlinConst
 {
     private static enum NormMode {
         ON_WITH_AA {
@@ -80,7 +80,7 @@
                                                          PathIterator src);
     }
 
-    private static final float MIN_PEN_SIZE = 1f / NORM_SUBPIXELS;
+    private static final float MIN_PEN_SIZE = 1.0f / NORM_SUBPIXELS;
 
     static final float UPPER_BND = Float.MAX_VALUE / 2.0f;
     static final float LOWER_BND = -UPPER_BND;
@@ -259,7 +259,7 @@
              */
 
             double EA = A*A + B*B;          // x^2 coefficient
-            double EB = 2.0*(A*C + B*D);    // xy coefficient
+            double EB = 2.0d * (A*C + B*D); // xy coefficient
             double EC = C*C + D*D;          // y^2 coefficient
 
             /*
@@ -287,7 +287,7 @@
 
             double hypot = Math.sqrt(EB*EB + (EA-EC)*(EA-EC));
             // sqrt omitted, compare to squared limits below.
-            double widthsquared = ((EA + EC + hypot)/2.0);
+            double widthsquared = ((EA + EC + hypot) / 2.0d);
 
             widthScale = (float)Math.sqrt(widthsquared);
         }
@@ -332,7 +332,7 @@
             final double d = at.getScaleY();
             final double det = a * d - c * b;
 
-            if (Math.abs(det) <= (2f * Float.MIN_VALUE)) {
+            if (Math.abs(det) <= (2.0f * Float.MIN_VALUE)) {
                 // this rendering engine takes one dimensional curves and turns
                 // them into 2D shapes by giving them width.
                 // However, if everything is to be passed through a singular
@@ -344,7 +344,7 @@
                 // of writing of this comment (September 16, 2010)). Actually,
                 // I am not sure if the moveTo is necessary to avoid the SIGSEGV
                 // but the pathDone is definitely needed.
-                pc2d.moveTo(0f, 0f);
+                pc2d.moveTo(0.0f, 0.0f);
                 pc2d.pathDone();
                 return;
             }
@@ -361,17 +361,7 @@
                 if (dashes != null) {
                     recycleDashes = true;
                     dashLen = dashes.length;
-                    final float[] newDashes;
-                    if (dashLen <= INITIAL_ARRAY) {
-                        newDashes = rdrCtx.dasher.dashes_ref.initial;
-                    } else {
-                        if (DO_STATS) {
-                            rdrCtx.stats.stat_array_dasher_dasher.add(dashLen);
-                        }
-                        newDashes = rdrCtx.dasher.dashes_ref.getArray(dashLen);
-                    }
-                    System.arraycopy(dashes, 0, newDashes, 0, dashLen);
-                    dashes = newDashes;
+                    dashes = rdrCtx.dasher.copyDashArray(dashes);
                     for (int i = 0; i < dashLen; i++) {
                         dashes[i] *= scale;
                     }
@@ -445,7 +435,7 @@
     }
 
     private static boolean nearZero(final double num) {
-        return Math.abs(num) < 2.0 * Math.ulp(num);
+        return Math.abs(num) < 2.0d * Math.ulp(num);
     }
 
     abstract static class NormalizingPathIterator implements PathIterator {
@@ -524,8 +514,8 @@
                 case PathIterator.SEG_LINETO:
                     break;
                 case PathIterator.SEG_QUADTO:
-                    coords[0] += (curx_adjust + x_adjust) / 2f;
-                    coords[1] += (cury_adjust + y_adjust) / 2f;
+                    coords[0] += (curx_adjust + x_adjust) / 2.0f;
+                    coords[1] += (cury_adjust + y_adjust) / 2.0f;
                     break;
                 case PathIterator.SEG_CUBICTO:
                     coords[0] += curx_adjust;
@@ -824,10 +814,8 @@
             }
         } finally {
             if (r != null) {
-                // dispose renderer:
+                // dispose renderer and recycle the RendererContext instance:
                 r.dispose();
-                // recycle the RendererContext instance
-                MarlinRenderingEngine.returnRendererContext(rdrCtx);
             }
         }
 
@@ -845,25 +833,25 @@
     {
         // REMIND: Deal with large coordinates!
         double ldx1, ldy1, ldx2, ldy2;
-        boolean innerpgram = (lw1 > 0.0 && lw2 > 0.0);
+        boolean innerpgram = (lw1 > 0.0d && lw2 > 0.0d);
 
         if (innerpgram) {
             ldx1 = dx1 * lw1;
             ldy1 = dy1 * lw1;
             ldx2 = dx2 * lw2;
             ldy2 = dy2 * lw2;
-            x -= (ldx1 + ldx2) / 2.0;
-            y -= (ldy1 + ldy2) / 2.0;
+            x -= (ldx1 + ldx2) / 2.0d;
+            y -= (ldy1 + ldy2) / 2.0d;
             dx1 += ldx1;
             dy1 += ldy1;
             dx2 += ldx2;
             dy2 += ldy2;
-            if (lw1 > 1.0 && lw2 > 1.0) {
+            if (lw1 > 1.0d && lw2 > 1.0d) {
                 // Inner parallelogram was entirely consumed by stroke...
                 innerpgram = false;
             }
         } else {
-            ldx1 = ldy1 = ldx2 = ldy2 = 0.0;
+            ldx1 = ldy1 = ldx2 = ldy2 = 0.0d;
         }
 
         MarlinTileGenerator ptg = null;
@@ -884,10 +872,10 @@
             if (innerpgram) {
                 x += ldx1 + ldx2;
                 y += ldy1 + ldy2;
-                dx1 -= 2.0 * ldx1;
-                dy1 -= 2.0 * ldy1;
-                dx2 -= 2.0 * ldx2;
-                dy2 -= 2.0 * ldy2;
+                dx1 -= 2.0d * ldx1;
+                dy1 -= 2.0d * ldy1;
+                dx2 -= 2.0d * ldx2;
+                dy2 -= 2.0d * ldy2;
                 r.moveTo((float) x, (float) y);
                 r.lineTo((float) (x+dx1), (float) (y+dy1));
                 r.lineTo((float) (x+dx1+dx2), (float) (y+dy1+dy2));
@@ -905,10 +893,8 @@
             }
         } finally {
             if (r != null) {
-                // dispose renderer:
+                // dispose renderer and recycle the RendererContext instance:
                 r.dispose();
-                // recycle the RendererContext instance
-                MarlinRenderingEngine.returnRendererContext(rdrCtx);
             }
         }
 
@@ -1035,12 +1021,11 @@
                 + MarlinConst.SUBPIXEL_LG_POSITIONS_X);
         logInfo("sun.java2d.renderer.subPixel_log2_Y  = "
                 + MarlinConst.SUBPIXEL_LG_POSITIONS_Y);
+
         logInfo("sun.java2d.renderer.tileSize_log2    = "
-                + MarlinConst.TILE_SIZE_LG);
-
-        logInfo("sun.java2d.renderer.blockSize_log2   = "
-                + MarlinConst.BLOCK_SIZE_LG);
-
+                + MarlinConst.TILE_H_LG);
+        logInfo("sun.java2d.renderer.tileWidth_log2   = "
+                + MarlinConst.TILE_W_LG);
         logInfo("sun.java2d.renderer.blockSize_log2   = "
                 + MarlinConst.BLOCK_SIZE_LG);
 
@@ -1078,8 +1063,14 @@
                 + MarlinConst.LOG_UNSAFE_MALLOC);
 
         // quality settings
+        logInfo("sun.java2d.renderer.cubic_dec_d2     = "
+                + MarlinProperties.getCubicDecD2());
+        logInfo("sun.java2d.renderer.cubic_inc_d1     = "
+                + MarlinProperties.getCubicIncD1());
+        logInfo("sun.java2d.renderer.quad_dec_d2      = "
+                + MarlinProperties.getQuadDecD2());
+
         logInfo("Renderer settings:");
-        logInfo("CUB_COUNT_LG = " + Renderer.CUB_COUNT_LG);
         logInfo("CUB_DEC_BND  = " + Renderer.CUB_DEC_BND);
         logInfo("CUB_INC_BND  = " + Renderer.CUB_INC_BND);
         logInfo("QUAD_DEC_BND = " + Renderer.QUAD_DEC_BND);
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinTileGenerator.java	Wed Jul 05 23:27:00 2017 +0200
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/MarlinTileGenerator.java	Wed May 17 22:05:11 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,25 +25,51 @@
 
 package sun.java2d.marlin;
 
+import java.util.Arrays;
 import sun.java2d.pipe.AATileGenerator;
 import jdk.internal.misc.Unsafe;
 
 final class MarlinTileGenerator implements AATileGenerator, MarlinConst {
 
-    private static final int MAX_TILE_ALPHA_SUM = TILE_SIZE * TILE_SIZE
-                                                      * MAX_AA_ALPHA;
+    private static final int MAX_TILE_ALPHA_SUM = TILE_W * TILE_H * MAX_AA_ALPHA;
 
-    private final Renderer rdr;
+    private static final int TH_AA_ALPHA_FILL_EMPTY = ((MAX_AA_ALPHA + 1) / 3); // 33%
+    private static final int TH_AA_ALPHA_FILL_FULL  = ((MAX_AA_ALPHA + 1) * 2 / 3); // 66%
+
+    private static final int FILL_TILE_W = TILE_W >> 1; // half tile width
+
+    static {
+        if (MAX_TILE_ALPHA_SUM <= 0) {
+            throw new IllegalStateException("Invalid MAX_TILE_ALPHA_SUM: " + MAX_TILE_ALPHA_SUM);
+        }
+        if (DO_TRACE) {
+            System.out.println("MAX_AA_ALPHA           : " + MAX_AA_ALPHA);
+            System.out.println("TH_AA_ALPHA_FILL_EMPTY : " + TH_AA_ALPHA_FILL_EMPTY);
+            System.out.println("TH_AA_ALPHA_FILL_FULL  : " + TH_AA_ALPHA_FILL_FULL);
+            System.out.println("FILL_TILE_W            : " + FILL_TILE_W);
+        }
+    }
+
+    private final Renderer rdrF;
+    private final DRenderer rdrD;
     private final MarlinCache cache;
     private int x, y;
 
-    // per-thread renderer context
-    final RendererContext rdrCtx;
+    // per-thread renderer stats
+    final RendererStats rdrStats;
 
-    MarlinTileGenerator(Renderer r) {
-        this.rdr = r;
-        this.cache = r.cache;
-        this.rdrCtx = r.rdrCtx;
+    MarlinTileGenerator(final RendererStats stats, final MarlinRenderer r,
+                        final MarlinCache cache)
+    {
+        this.rdrStats = stats;
+        if (r instanceof Renderer) {
+            this.rdrF = (Renderer)r;
+            this.rdrD = null;
+        } else {
+            this.rdrF = null;
+            this.rdrD = (DRenderer)r;
+        }
+        this.cache = cache;
     }
 
     MarlinTileGenerator init() {
@@ -61,14 +87,17 @@
     public void dispose() {
         if (DO_MONITORS) {
             // called from AAShapePipe.renderTiles() (render tiles end):
-            rdrCtx.stats.mon_pipe_renderTiles.stop();
+            rdrStats.mon_pipe_renderTiles.stop();
         }
         // dispose cache:
         cache.dispose();
-        // dispose renderer:
-        rdr.dispose();
-        // recycle the RendererContext instance
-        MarlinRenderingEngine.returnRendererContext(rdrCtx);
+        // dispose renderer and recycle the RendererContext instance:
+        // bimorphic call optimization:
+        if (rdrF != null) {
+            rdrF.dispose();
+        } else if (rdrD != null) {
+            rdrD.dispose();
+        }
     }
 
     void getBbox(int[] bbox) {
@@ -86,9 +115,9 @@
     public int getTileWidth() {
         if (DO_MONITORS) {
             // called from AAShapePipe.renderTiles() (render tiles start):
-            rdrCtx.stats.mon_pipe_renderTiles.start();
+            rdrStats.mon_pipe_renderTiles.start();
         }
-        return TILE_SIZE;
+        return TILE_W;
     }
 
     /**
@@ -97,7 +126,7 @@
      */
     @Override
     public int getTileHeight() {
-        return TILE_SIZE;
+        return TILE_H;
     }
 
     /**
@@ -131,7 +160,7 @@
         final int alpha = (al == 0x00 ? 0x00
                               : (al == MAX_TILE_ALPHA_SUM ? 0xff : 0x80));
         if (DO_STATS) {
-            rdrCtx.stats.hist_tile_generator_alpha.add(alpha);
+            rdrStats.hist_tile_generator_alpha.add(alpha);
         }
         return alpha;
     }
@@ -143,14 +172,19 @@
      */
     @Override
     public void nextTile() {
-        if ((x += TILE_SIZE) >= cache.bboxX1) {
+        if ((x += TILE_W) >= cache.bboxX1) {
             x = cache.bboxX0;
-            y += TILE_SIZE;
+            y += TILE_H;
 
             if (y < cache.bboxY1) {
                 // compute for the tile line
                 // [ y; max(y + TILE_SIZE, bboxY1) ]
-                this.rdr.endRendering(y);
+                // bimorphic call optimization:
+                if (rdrF != null) {
+                    rdrF.endRendering(y);
+                } else if (rdrD != null) {
+                    rdrD.endRendering(y);
+                }
             }
         }
     }
@@ -180,7 +214,7 @@
                                final int rowstride)
     {
         if (DO_MONITORS) {
-            rdrCtx.stats.mon_ptg_getAlpha.start();
+            rdrStats.mon_ptg_getAlpha.start();
         }
 
         // local vars for performance:
@@ -190,11 +224,11 @@
         final int[] rowAAx1 = _cache.rowAAx1;
 
         final int x0 = this.x;
-        final int x1 = FloatMath.min(x0 + TILE_SIZE, _cache.bboxX1);
+        final int x1 = FloatMath.min(x0 + TILE_W, _cache.bboxX1);
 
         // note: process tile line [0 - 32[
         final int y0 = 0;
-        final int y1 = FloatMath.min(this.y + TILE_SIZE, _cache.bboxY1) - this.y;
+        final int y1 = FloatMath.min(this.y + TILE_H, _cache.bboxY1) - this.y;
 
         if (DO_LOG_BOUNDS) {
             MarlinUtils.logInfo("getAlpha = [" + x0 + " ... " + x1
@@ -237,14 +271,14 @@
                         }
                     }
 
-                    // now: cx >= x0 but cx < aax0 (x1 < aax0)
+                    // now: cx >= x0 and cx >= aax0
 
                     // Copy AA data (sum alpha data):
                     addr = addr_rowAA + rowAAChunkIndex[cy] + (cx - aax0);
 
                     for (end = (aax1 <= x1) ? aax1 : x1; cx < end; cx++) {
                         // cx inside tile[x0; x1[ :
-                        tile[idx++] = _unsafe.getByte(addr); // [0..255]
+                        tile[idx++] = _unsafe.getByte(addr); // [0-255]
                         addr += SIZE;
                     }
                 }
@@ -269,7 +303,7 @@
         nextTile();
 
         if (DO_MONITORS) {
-            rdrCtx.stats.mon_ptg_getAlpha.stop();
+            rdrStats.mon_ptg_getAlpha.stop();
         }
     }
 
@@ -282,7 +316,7 @@
                              final int rowstride)
     {
         if (DO_MONITORS) {
-            rdrCtx.stats.mon_ptg_getAlpha.start();
+            rdrStats.mon_ptg_getAlpha.start();
         }
 
         // Decode run-length encoded alpha mask data
@@ -300,24 +334,48 @@
         final long[] rowAAPos = _cache.rowAAPos;
 
         final int x0 = this.x;
-        final int x1 = FloatMath.min(x0 + TILE_SIZE, _cache.bboxX1);
+        final int x1 = FloatMath.min(x0 + TILE_W, _cache.bboxX1);
+        final int w  = x1 - x0;
 
         // note: process tile line [0 - 32[
         final int y0 = 0;
-        final int y1 = FloatMath.min(this.y + TILE_SIZE, _cache.bboxY1) - this.y;
+        final int y1 = FloatMath.min(this.y + TILE_H, _cache.bboxY1) - this.y;
 
         if (DO_LOG_BOUNDS) {
             MarlinUtils.logInfo("getAlpha = [" + x0 + " ... " + x1
                                 + "[ [" + y0 + " ... " + y1 + "[");
         }
 
+        // avoid too small area: fill is not faster !
+        final int clearTile;
+        final byte refVal;
+        final int area;
+
+        if ((w >= FILL_TILE_W) && (area = w * y1) > 64) { // 64 / 4 ie 16 words min (faster)
+            final int alphaSum = cache.alphaSumInTile(x0);
+
+            if (alphaSum < area * TH_AA_ALPHA_FILL_EMPTY) {
+                clearTile = 1;
+                refVal = 0;
+            } else if (alphaSum > area * TH_AA_ALPHA_FILL_FULL) {
+                clearTile = 2;
+                refVal = (byte)0xff;
+            } else {
+                clearTile = 0;
+                refVal = 0;
+            }
+        } else {
+            clearTile = 0;
+            refVal = 0;
+        }
+
         final Unsafe _unsafe = OffHeapArray.UNSAFE;
         final long SIZE_BYTE = 1L;
         final long SIZE_INT = 4L;
         final long addr_rowAA = _cache.rowAAChunk.address;
         long addr, addr_row, last_addr, addr_end;
 
-        final int skipRowPixels = (rowstride - (x1 - x0));
+        final int skipRowPixels = (rowstride - w);
 
         int cx, cy, cx1;
         int rx0, rx1, runLen, end;
@@ -325,137 +383,414 @@
         byte val;
         int idx = offset;
 
-        for (cy = y0; cy < y1; cy++) {
-            // empty line (default)
-            cx = x0;
+        switch (clearTile) {
+        case 1: // 0x00
+            // Clear full tile rows:
+            Arrays.fill(tile, offset, offset + (y1 * rowstride), refVal);
 
-            if (rowAAEnc[cy] == 0) {
-                // Raw encoding:
+            for (cy = y0; cy < y1; cy++) {
+                // empty line (default)
+                cx = x0;
 
-                final int aax1 = rowAAx1[cy]; // exclusive
+                if (rowAAEnc[cy] == 0) {
+                    // Raw encoding:
 
-                // quick check if there is AA data
-                // corresponding to this tile [x0; x1[
-                if (aax1 > x0) {
-                    final int aax0 = rowAAx0[cy]; // inclusive
+                    final int aax1 = rowAAx1[cy]; // exclusive
 
-                    if (aax0 < x1) {
-                        // note: cx is the cursor pointer in the tile array
-                        // (left to right)
-                        cx = aax0;
+                    // quick check if there is AA data
+                    // corresponding to this tile [x0; x1[
+                    if (aax1 > x0) {
+                        final int aax0 = rowAAx0[cy]; // inclusive
 
-                        // ensure cx >= x0
-                        if (cx <= x0) {
-                            cx = x0;
-                        } else {
-                            // fill line start until first AA pixel rowAA exclusive:
-                            for (end = x0; end < cx; end++) {
-                                tile[idx++] = 0;
+                        if (aax0 < x1) {
+                            // note: cx is the cursor pointer in the tile array
+                            // (left to right)
+                            cx = aax0;
+
+                            // ensure cx >= x0
+                            if (cx <= x0) {
+                                cx = x0;
+                            } else {
+                                // skip line start until first AA pixel rowAA exclusive:
+                                idx += (cx - x0); // > 0
+                            }
+
+                            // now: cx >= x0 and cx >= aax0
+
+                            // Copy AA data (sum alpha data):
+                            addr = addr_rowAA + rowAAChunkIndex[cy] + (cx - aax0);
+
+                            for (end = (aax1 <= x1) ? aax1 : x1; cx < end; cx++) {
+                                tile[idx++] = _unsafe.getByte(addr); // [0-255]
+                                addr += SIZE_BYTE;
+                            }
+                        }
+                    }
+                } else {
+                    // RLE encoding:
+
+                    // quick check if there is AA data
+                    // corresponding to this tile [x0; x1[
+                    if (rowAAx1[cy] > x0) { // last pixel exclusive
+
+                        cx = rowAAx0[cy]; // inclusive
+                        if (cx > x1) {
+                            cx = x1;
+                        }
+
+                        // skip line start until first AA pixel rowAA exclusive:
+                        if (cx > x0) {
+                            idx += (cx - x0); // > 0
+                        }
+
+                        // get row address:
+                        addr_row = addr_rowAA + rowAAChunkIndex[cy];
+                        // get row end address:
+                        addr_end = addr_row + rowAALen[cy]; // coded length
+
+                        // reuse previous iteration position:
+                        addr = addr_row + rowAAPos[cy];
+
+                        last_addr = 0L;
+
+                        while ((cx < x1) && (addr < addr_end)) {
+                            // keep current position:
+                            last_addr = addr;
+
+                            // packed value:
+                            packed = _unsafe.getInt(addr);
+
+                            // last exclusive pixel x-coordinate:
+                            cx1 = (packed >> 8);
+                            // as bytes:
+                            addr += SIZE_INT;
+
+                            rx0 = cx;
+                            if (rx0 < x0) {
+                                rx0 = x0;
+                            }
+                            rx1 = cx = cx1;
+                            if (rx1 > x1) {
+                                rx1 = x1;
+                                cx  = x1; // fix last x
+                            }
+                            // adjust runLen:
+                            runLen = rx1 - rx0;
+
+                            // ensure rx1 > rx0:
+                            if (runLen > 0) {
+                                packed &= 0xFF; // [0-255]
+
+                                if (packed == 0)
+                                {
+                                    idx += runLen;
+                                    continue;
+                                }
+                                val = (byte) packed; // [0-255]
+                                do {
+                                    tile[idx++] = val;
+                                } while (--runLen > 0);
                             }
                         }
 
-                        // now: cx >= x0 but cx < aax0 (x1 < aax0)
-
-                        // Copy AA data (sum alpha data):
-                        addr = addr_rowAA + rowAAChunkIndex[cy] + (cx - aax0);
-
-                        for (end = (aax1 <= x1) ? aax1 : x1; cx < end; cx++) {
-                            tile[idx++] = _unsafe.getByte(addr); // [0..255]
-                            addr += SIZE_BYTE;
+                        // Update last position in RLE entries:
+                        if (last_addr != 0L) {
+                            // Fix x0:
+                            rowAAx0[cy]  = cx; // inclusive
+                            // Fix position:
+                            rowAAPos[cy] = (last_addr - addr_row);
                         }
                     }
                 }
-            } else {
-                // RLE encoding:
 
-                // quick check if there is AA data
-                // corresponding to this tile [x0; x1[
-                if (rowAAx1[cy] > x0) { // last pixel exclusive
+                // skip line end
+                if (cx < x1) {
+                    idx += (x1 - cx); // > 0
+                }
 
-                    cx = rowAAx0[cy]; // inclusive
-                    if (cx > x1) {
-                        cx = x1;
+                if (DO_TRACE) {
+                    for (int i = idx - (x1 - x0); i < idx; i++) {
+                        System.out.print(hex(tile[i], 2));
                     }
+                    System.out.println();
+                }
 
-                    // fill line start until first AA pixel rowAA exclusive:
-                    for (int i = x0; i < cx; i++) {
-                        tile[idx++] = 0;
-                    }
+                idx += skipRowPixels;
+            }
+        break;
 
-                    // get row address:
-                    addr_row = addr_rowAA + rowAAChunkIndex[cy];
-                    // get row end address:
-                    addr_end = addr_row + rowAALen[cy]; // coded length
+        case 0:
+        default:
+            for (cy = y0; cy < y1; cy++) {
+                // empty line (default)
+                cx = x0;
 
-                    // reuse previous iteration position:
-                    addr = addr_row + rowAAPos[cy];
+                if (rowAAEnc[cy] == 0) {
+                    // Raw encoding:
 
-                    last_addr = 0L;
+                    final int aax1 = rowAAx1[cy]; // exclusive
 
-                    while ((cx < x1) && (addr < addr_end)) {
-                        // keep current position:
-                        last_addr = addr;
+                    // quick check if there is AA data
+                    // corresponding to this tile [x0; x1[
+                    if (aax1 > x0) {
+                        final int aax0 = rowAAx0[cy]; // inclusive
 
-                        // packed value:
-                        packed = _unsafe.getInt(addr);
+                        if (aax0 < x1) {
+                            // note: cx is the cursor pointer in the tile array
+                            // (left to right)
+                            cx = aax0;
 
-                        // last exclusive pixel x-coordinate:
-                        cx1 = (packed >> 8);
-                        // as bytes:
-                        addr += SIZE_INT;
+                            // ensure cx >= x0
+                            if (cx <= x0) {
+                                cx = x0;
+                            } else {
+                                for (end = x0; end < cx; end++) {
+                                    tile[idx++] = 0;
+                                }
+                            }
 
-                        rx0 = cx;
-                        if (rx0 < x0) {
-                            rx0 = x0;
-                        }
-                        rx1 = cx = cx1;
-                        if (rx1 > x1) {
-                            rx1 = x1;
-                            cx  = x1; // fix last x
-                        }
-                        // adjust runLen:
-                        runLen = rx1 - rx0;
+                            // now: cx >= x0 and cx >= aax0
 
-                        // ensure rx1 > rx0:
-                        if (runLen > 0) {
-                            val = (byte)(packed & 0xFF); // [0..255]
+                            // Copy AA data (sum alpha data):
+                            addr = addr_rowAA + rowAAChunkIndex[cy] + (cx - aax0);
 
-                            do {
-                                tile[idx++] = val;
-                            } while (--runLen > 0);
+                            for (end = (aax1 <= x1) ? aax1 : x1; cx < end; cx++) {
+                                tile[idx++] = _unsafe.getByte(addr); // [0-255]
+                                addr += SIZE_BYTE;
+                            }
                         }
                     }
+                } else {
+                    // RLE encoding:
 
-                    // Update last position in RLE entries:
-                    if (last_addr != 0L) {
-                        // Fix x0:
-                        rowAAx0[cy]  = cx; // inclusive
-                        // Fix position:
-                        rowAAPos[cy] = (last_addr - addr_row);
+                    // quick check if there is AA data
+                    // corresponding to this tile [x0; x1[
+                    if (rowAAx1[cy] > x0) { // last pixel exclusive
+
+                        cx = rowAAx0[cy]; // inclusive
+                        if (cx > x1) {
+                            cx = x1;
+                        }
+
+                        // fill line start until first AA pixel rowAA exclusive:
+                        for (end = x0; end < cx; end++) {
+                            tile[idx++] = 0;
+                        }
+
+                        // get row address:
+                        addr_row = addr_rowAA + rowAAChunkIndex[cy];
+                        // get row end address:
+                        addr_end = addr_row + rowAALen[cy]; // coded length
+
+                        // reuse previous iteration position:
+                        addr = addr_row + rowAAPos[cy];
+
+                        last_addr = 0L;
+
+                        while ((cx < x1) && (addr < addr_end)) {
+                            // keep current position:
+                            last_addr = addr;
+
+                            // packed value:
+                            packed = _unsafe.getInt(addr);
+
+                            // last exclusive pixel x-coordinate:
+                            cx1 = (packed >> 8);
+                            // as bytes:
+                            addr += SIZE_INT;
+
+                            rx0 = cx;
+                            if (rx0 < x0) {
+                                rx0 = x0;
+                            }
+                            rx1 = cx = cx1;
+                            if (rx1 > x1) {
+                                rx1 = x1;
+                                cx  = x1; // fix last x
+                            }
+                            // adjust runLen:
+                            runLen = rx1 - rx0;
+
+                            // ensure rx1 > rx0:
+                            if (runLen > 0) {
+                                packed &= 0xFF; // [0-255]
+
+                                val = (byte) packed; // [0-255]
+                                do {
+                                    tile[idx++] = val;
+                                } while (--runLen > 0);
+                            }
+                        }
+
+                        // Update last position in RLE entries:
+                        if (last_addr != 0L) {
+                            // Fix x0:
+                            rowAAx0[cy]  = cx; // inclusive
+                            // Fix position:
+                            rowAAPos[cy] = (last_addr - addr_row);
+                        }
                     }
                 }
+
+                // fill line end
+                while (cx < x1) {
+                    tile[idx++] = 0;
+                    cx++;
+                }
+
+                if (DO_TRACE) {
+                    for (int i = idx - (x1 - x0); i < idx; i++) {
+                        System.out.print(hex(tile[i], 2));
+                    }
+                    System.out.println();
+                }
+
+                idx += skipRowPixels;
             }
+        break;
 
-            // fill line end
-            while (cx < x1) {
-                tile[idx++] = 0;
-                cx++;
+        case 2: // 0xFF
+            // Fill full tile rows:
+            Arrays.fill(tile, offset, offset + (y1 * rowstride), refVal);
+
+            for (cy = y0; cy < y1; cy++) {
+                // empty line (default)
+                cx = x0;
+
+                if (rowAAEnc[cy] == 0) {
+                    // Raw encoding:
+
+                    final int aax1 = rowAAx1[cy]; // exclusive
+
+                    // quick check if there is AA data
+                    // corresponding to this tile [x0; x1[
+                    if (aax1 > x0) {
+                        final int aax0 = rowAAx0[cy]; // inclusive
+
+                        if (aax0 < x1) {
+                            // note: cx is the cursor pointer in the tile array
+                            // (left to right)
+                            cx = aax0;
+
+                            // ensure cx >= x0
+                            if (cx <= x0) {
+                                cx = x0;
+                            } else {
+                                // fill line start until first AA pixel rowAA exclusive:
+                                for (end = x0; end < cx; end++) {
+                                    tile[idx++] = 0;
+                                }
+                            }
+
+                            // now: cx >= x0 and cx >= aax0
+
+                            // Copy AA data (sum alpha data):
+                            addr = addr_rowAA + rowAAChunkIndex[cy] + (cx - aax0);
+
+                            for (end = (aax1 <= x1) ? aax1 : x1; cx < end; cx++) {
+                                tile[idx++] = _unsafe.getByte(addr); // [0-255]
+                                addr += SIZE_BYTE;
+                            }
+                        }
+                    }
+                } else {
+                    // RLE encoding:
+
+                    // quick check if there is AA data
+                    // corresponding to this tile [x0; x1[
+                    if (rowAAx1[cy] > x0) { // last pixel exclusive
+
+                        cx = rowAAx0[cy]; // inclusive
+                        if (cx > x1) {
+                            cx = x1;
+                        }
+
+                        // fill line start until first AA pixel rowAA exclusive:
+                        for (end = x0; end < cx; end++) {
+                            tile[idx++] = 0;
+                        }
+
+                        // get row address:
+                        addr_row = addr_rowAA + rowAAChunkIndex[cy];
+                        // get row end address:
+                        addr_end = addr_row + rowAALen[cy]; // coded length
+
+                        // reuse previous iteration position:
+                        addr = addr_row + rowAAPos[cy];
+
+                        last_addr = 0L;
+
+                        while ((cx < x1) && (addr < addr_end)) {
+                            // keep current position:
+                            last_addr = addr;
+
+                            // packed value:
+                            packed = _unsafe.getInt(addr);
+
+                            // last exclusive pixel x-coordinate:
+                            cx1 = (packed >> 8);
+                            // as bytes:
+                            addr += SIZE_INT;
+
+                            rx0 = cx;
+                            if (rx0 < x0) {
+                                rx0 = x0;
+                            }
+                            rx1 = cx = cx1;
+                            if (rx1 > x1) {
+                                rx1 = x1;
+                                cx  = x1; // fix last x
+                            }
+                            // adjust runLen:
+                            runLen = rx1 - rx0;
+
+                            // ensure rx1 > rx0:
+                            if (runLen > 0) {
+                                packed &= 0xFF; // [0-255]
+
+                                if (packed == 0xFF)
+                                {
+                                    idx += runLen;
+                                    continue;
+                                }
+                                val = (byte) packed; // [0-255]
+                                do {
+                                    tile[idx++] = val;
+                                } while (--runLen > 0);
+                            }
+                        }
+
+                        // Update last position in RLE entries:
+                        if (last_addr != 0L) {
+                            // Fix x0:
+                            rowAAx0[cy]  = cx; // inclusive
+                            // Fix position:
+                            rowAAPos[cy] = (last_addr - addr_row);
+                        }
+                    }
+                }
+
+                // fill line end
+                while (cx < x1) {
+                    tile[idx++] = 0;
+                    cx++;
+                }
+
+                if (DO_TRACE) {
+                    for (int i = idx - (x1 - x0); i < idx; i++) {
+                        System.out.print(hex(tile[i], 2));
+                    }
+                    System.out.println();
+                }
+
+                idx += skipRowPixels;
             }
-
-            if (DO_TRACE) {
-                for (int i = idx - (x1 - x0); i < idx; i++) {
-                    System.out.print(hex(tile[i], 2));
-                }
-                System.out.println();
-            }
-
-            idx += skipRowPixels;
         }
 
         nextTile();
 
         if (DO_MONITORS) {
-            rdrCtx.stats.mon_ptg_getAlpha.stop();
+            rdrStats.mon_ptg_getAlpha.stop();
         }
     }
 
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/OffHeapArray.java	Wed Jul 05 23:27:00 2017 +0200
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/OffHeapArray.java	Wed May 17 22:05:11 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -89,6 +89,7 @@
                                 + this.length
                                 + " at addr = " + this.address);
         }
+        this.address = 0L;
     }
 
     void fill(final byte val) {
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/Renderer.java	Wed Jul 05 23:27:00 2017 +0200
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/Renderer.java	Wed May 17 22:05:11 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,41 +25,38 @@
 
 package sun.java2d.marlin;
 
-import java.util.Arrays;
 import sun.awt.geom.PathConsumer2D;
 import static sun.java2d.marlin.OffHeapArray.SIZE_INT;
 import jdk.internal.misc.Unsafe;
 
-final class Renderer implements PathConsumer2D, MarlinConst {
+final class Renderer implements PathConsumer2D, MarlinRenderer {
 
     static final boolean DISABLE_RENDER = false;
 
     static final boolean ENABLE_BLOCK_FLAGS = MarlinProperties.isUseTileFlags();
     static final boolean ENABLE_BLOCK_FLAGS_HEURISTICS = MarlinProperties.isUseTileFlagsWithHeuristics();
 
-    private static final int ALL_BUT_LSB = 0xfffffffe;
-    private static final int ERR_STEP_MAX = 0x7fffffff; // = 2^31 - 1
+    private static final int ALL_BUT_LSB = 0xFFFFFFFE;
+    private static final int ERR_STEP_MAX = 0x7FFFFFFF; // = 2^31 - 1
 
-    private static final double POWER_2_TO_32 = 0x1.0p32;
+    private static final double POWER_2_TO_32 = 0x1.0p32d;
 
     // use float to make tosubpix methods faster (no int to float conversion)
-    public static final float F_SUBPIXEL_POSITIONS_X
-        = (float) SUBPIXEL_POSITIONS_X;
-    public static final float F_SUBPIXEL_POSITIONS_Y
-        = (float) SUBPIXEL_POSITIONS_Y;
-    public static final int SUBPIXEL_MASK_X = SUBPIXEL_POSITIONS_X - 1;
-    public static final int SUBPIXEL_MASK_Y = SUBPIXEL_POSITIONS_Y - 1;
+    static final float SUBPIXEL_SCALE_X = (float) SUBPIXEL_POSITIONS_X;
+    static final float SUBPIXEL_SCALE_Y = (float) SUBPIXEL_POSITIONS_Y;
+    static final int SUBPIXEL_MASK_X = SUBPIXEL_POSITIONS_X - 1;
+    static final int SUBPIXEL_MASK_Y = SUBPIXEL_POSITIONS_Y - 1;
 
     // number of subpixels corresponding to a tile line
     private static final int SUBPIXEL_TILE
-        = TILE_SIZE << SUBPIXEL_LG_POSITIONS_Y;
+        = TILE_H << SUBPIXEL_LG_POSITIONS_Y;
 
     // 2048 (pixelSize) pixels (height) x 8 subpixels = 64K
     static final int INITIAL_BUCKET_ARRAY
         = INITIAL_PIXEL_DIM * SUBPIXEL_POSITIONS_Y;
 
-    // crossing capacity = edges count / 8 ~ 512
-    static final int INITIAL_CROSSING_COUNT = INITIAL_EDGES_COUNT >> 3;
+    // crossing capacity = edges count / 4 ~ 1024
+    static final int INITIAL_CROSSING_COUNT = INITIAL_EDGES_COUNT >> 2;
 
     public static final int WIND_EVEN_ODD = 0;
     public static final int WIND_NON_ZERO = 1;
@@ -80,20 +77,20 @@
     // curve break into lines
     // cubic error in subpixels to decrement step
     private static final float CUB_DEC_ERR_SUBPIX
-        = 2.5f * (NORM_SUBPIXELS / 8f); // 2.5 subpixel for typical 8x8 subpixels
+        = MarlinProperties.getCubicDecD2() * (NORM_SUBPIXELS / 8.0f); // 1 pixel
     // cubic error in subpixels to increment step
     private static final float CUB_INC_ERR_SUBPIX
-        = 1f * (NORM_SUBPIXELS / 8f); // 1 subpixel for typical 8x8 subpixels
+        = MarlinProperties.getCubicIncD1() * (NORM_SUBPIXELS / 8.0f); // 0.4 pixel
 
-    // cubic bind length to decrement step = 8 * error in subpixels
-    // pisces: 20 / 8
-    // openjfx pisces: 8 / 3.2
-    // multiply by 8 = error scale factor:
+    // TestNonAARasterization (JDK-8170879): cubics
+    // bad paths (59294/100000 == 59,29%, 94335 bad pixels (avg = 1,59), 3966 warnings (avg = 0,07)
+
+    // cubic bind length to decrement step
     public static final float CUB_DEC_BND
-        = 8f * CUB_DEC_ERR_SUBPIX; // 20f means 2.5 subpixel error
-    // cubic bind length to increment step = 8 * error in subpixels
+        = 8.0f * CUB_DEC_ERR_SUBPIX;
+    // cubic bind length to increment step
     public static final float CUB_INC_BND
-        = 8f * CUB_INC_ERR_SUBPIX; // 8f means 1 subpixel error
+        = 8.0f * CUB_INC_ERR_SUBPIX;
 
     // cubic countlg
     public static final int CUB_COUNT_LG = 2;
@@ -104,21 +101,23 @@
     // cubic count^3 = 8^countlg
     private static final int CUB_COUNT_3 = 1 << (3 * CUB_COUNT_LG);
     // cubic dt = 1 / count
-    private static final float CUB_INV_COUNT = 1f / CUB_COUNT;
+    private static final float CUB_INV_COUNT = 1.0f / CUB_COUNT;
     // cubic dt^2 = 1 / count^2 = 1 / 4^countlg
-    private static final float CUB_INV_COUNT_2 = 1f / CUB_COUNT_2;
+    private static final float CUB_INV_COUNT_2 = 1.0f / CUB_COUNT_2;
     // cubic dt^3 = 1 / count^3 = 1 / 8^countlg
-    private static final float CUB_INV_COUNT_3 = 1f / CUB_COUNT_3;
+    private static final float CUB_INV_COUNT_3 = 1.0f / CUB_COUNT_3;
 
     // quad break into lines
     // quadratic error in subpixels
     private static final float QUAD_DEC_ERR_SUBPIX
-        = 1f * (NORM_SUBPIXELS / 8f); // 1 subpixel for typical 8x8 subpixels
+        = MarlinProperties.getQuadDecD2() * (NORM_SUBPIXELS / 8.0f); // 0.5 pixel
 
-    // quadratic bind length to decrement step = 8 * error in subpixels
-    // pisces and openjfx pisces: 32
+    // TestNonAARasterization (JDK-8170879): quads
+    // bad paths (62916/100000 == 62,92%, 103818 bad pixels (avg = 1,65), 6514 warnings (avg = 0,10)
+
+    // quadratic bind length to decrement step
     public static final float QUAD_DEC_BND
-        = 8f * QUAD_DEC_ERR_SUBPIX; // 8f means 1 subpixel error
+        = 8.0f * QUAD_DEC_ERR_SUBPIX;
 
 //////////////////////////////////////////////////////////////////////////////
 //  SCAN LINE
@@ -157,7 +156,7 @@
     private float edgeMinX = Float.POSITIVE_INFINITY;
     private float edgeMaxX = Float.NEGATIVE_INFINITY;
 
-    // edges [floats|ints] stored in off-heap memory
+    // edges [ints] stored in off-heap memory
     private final OffHeapArray edges;
 
     private int[] edgeBuckets;
@@ -165,8 +164,6 @@
     // used range for edgeBuckets / edgeBucketCounts
     private int buckets_minY;
     private int buckets_maxY;
-    // sum of each edge delta Y (subpixels)
-    private int edgeSumDeltaY;
 
     // edgeBuckets ref (clean)
     private final IntArrayCache.Reference edgeBuckets_ref;
@@ -183,13 +180,13 @@
         int count = 1; // dt = 1 / count
 
         // maximum(ddX|Y) = norm(dbx, dby) * dt^2 (= 1)
-        float maxDD = FloatMath.max(Math.abs(c.dbx), Math.abs(c.dby));
+        float maxDD = Math.abs(c.dbx) + Math.abs(c.dby);
 
         final float _DEC_BND = QUAD_DEC_BND;
 
         while (maxDD >= _DEC_BND) {
             // divide step by half:
-            maxDD /= 4f; // error divided by 2^2 = 4
+            maxDD /= 4.0f; // error divided by 2^2 = 4
 
             count <<= 1;
             if (DO_STATS) {
@@ -199,7 +196,7 @@
 
         int nL = 0; // line count
         if (count > 1) {
-            final float icount = 1f / count; // dt
+            final float icount = 1.0f / count; // dt
             final float icount2 = icount * icount; // dt^2
 
             final float ddx = c.dbx * icount2;
@@ -246,8 +243,8 @@
         // the dx and dy refer to forward differencing variables, not the last
         // coefficients of the "points" polynomial
         float dddx, dddy, ddx, ddy, dx, dy;
-        dddx = 2f * c.dax * icount3;
-        dddy = 2f * c.day * icount3;
+        dddx = 2.0f * c.dax * icount3;
+        dddy = 2.0f * c.day * icount3;
         ddx = dddx + c.dbx * icount2;
         ddy = dddy + c.dby * icount2;
         dx = c.ax * icount3 + c.bx * icount2 + c.cx * icount;
@@ -262,13 +259,13 @@
 
         while (count > 0) {
             // divide step by half:
-            while (Math.abs(ddx) >= _DEC_BND || Math.abs(ddy) >= _DEC_BND) {
-                dddx /= 8f;
-                dddy /= 8f;
-                ddx = ddx/4f - dddx;
-                ddy = ddy/4f - dddy;
-                dx = (dx - ddx) / 2f;
-                dy = (dy - ddy) / 2f;
+            while (Math.abs(ddx) + Math.abs(ddy) >= _DEC_BND) {
+                dddx /= 8.0f;
+                dddy /= 8.0f;
+                ddx = ddx / 4.0f - dddx;
+                ddy = ddy / 4.0f - dddy;
+                dx = (dx - ddx) / 2.0f;
+                dy = (dy - ddy) / 2.0f;
 
                 count <<= 1;
                 if (DO_STATS) {
@@ -277,19 +274,16 @@
             }
 
             // double step:
-            // TODO: why use first derivative dX|Y instead of second ddX|Y ?
-            // both scale changes should use speed or acceleration to have the same metric.
-
             // can only do this on even "count" values, because we must divide count by 2
             while (count % 2 == 0
-                   && Math.abs(dx) <= _INC_BND && Math.abs(dy) <= _INC_BND)
+                   && Math.abs(dx) + Math.abs(dy) <= _INC_BND)
             {
-                dx = 2f * dx + ddx;
-                dy = 2f * dy + ddy;
-                ddx = 4f * (ddx + dddx);
-                ddy = 4f * (ddy + dddy);
-                dddx *= 8f;
-                dddy *= 8f;
+                dx = 2.0f * dx + ddx;
+                dy = 2.0f * dy + ddy;
+                ddx = 4.0f * (ddx + dddx);
+                ddy = 4.0f * (ddy + dddy);
+                dddx *= 8.0f;
+                dddy *= 8.0f;
 
                 count >>= 1;
                 if (DO_STATS) {
@@ -337,7 +331,7 @@
             x1 = tmp;
         }
 
-        // convert subpixel coordinates (float) into pixel positions (int)
+        // convert subpixel coordinates [float] into pixel positions [int]
 
         // The index of the pixel that holds the next HPC is at ceil(trueY - 0.5)
         // Since y1 and y2 are biased by -0.5 in tosubpixy(), this is simply
@@ -361,7 +355,7 @@
             return;
         }
 
-        // edge min/max X/Y are in subpixel space (inclusive) within bounds:
+        // edge min/max X/Y are in subpixel space (half-open interval):
         // note: Use integer crossings to ensure consistent range within
         // edgeBuckets / edgeBucketCounts arrays in case of NaN values (int = 0)
         if (firstCrossing < edgeMinY) {
@@ -376,7 +370,7 @@
         final double y1d   = y1;
         final double slope = (x1d - x2) / (y1d - y2);
 
-        if (slope >= 0.0) { // <==> x1 < x2
+        if (slope >= 0.0d) { // <==> x1 < x2
             if (x1 < edgeMinX) {
                 edgeMinX = x1;
             }
@@ -439,13 +433,13 @@
         // long x1_fixed = x1_intercept * 2^32;  (fixed point 32.32 format)
         // curx = next VPC = fixed_floor(x1_fixed - 2^31 + 2^32 - 1)
         //                 = fixed_floor(x1_fixed + 2^31 - 1)
-        //                 = fixed_floor(x1_fixed + 0x7fffffff)
-        // and error       = fixed_fract(x1_fixed + 0x7fffffff)
+        //                 = fixed_floor(x1_fixed + 0x7FFFFFFF)
+        // and error       = fixed_fract(x1_fixed + 0x7FFFFFFF)
         final double x1_intercept = x1d + (firstCrossing - y1d) * slope;
 
         // inlined scalb(x1_intercept, 32):
         final long x1_fixed_biased = ((long) (POWER_2_TO_32 * x1_intercept))
-                                     + 0x7fffffffL;
+                                     + 0x7FFFFFFFL;
         // curx:
         // last bit corresponds to the orientation
         _unsafe.putInt(addr, (((int) (x1_fixed_biased >> 31L)) & ALL_BUT_LSB) | or);
@@ -474,7 +468,7 @@
         // pointer from bucket
         _unsafe.putInt(addr, _edgeBuckets[bucketIdx]);
         addr += SIZE_INT;
-        // y max (inclusive)
+        // y max (exclusive)
         _unsafe.putInt(addr,  lastCrossing);
 
         // Update buckets:
@@ -484,9 +478,6 @@
         // last bit means edge end
         _edgeBucketCounts[lastCrossing - _boundsMinY] |= 0x1;
 
-        // update sum of delta Y (subpixels):
-        edgeSumDeltaY += (lastCrossing - firstCrossing);
-
         // update free pointer (ie length in bytes)
         _edges.used += _SIZEOF_EDGE_BYTES;
 
@@ -568,8 +559,8 @@
 
     Renderer init(final int pix_boundsX, final int pix_boundsY,
                   final int pix_boundsWidth, final int pix_boundsHeight,
-                  final int windingRule) {
-
+                  final int windingRule)
+    {
         this.windingRule = windingRule;
 
         // bounds as half-open intervals: minX <= x < maxX and minY <= y < maxY
@@ -611,8 +602,6 @@
         activeEdgeMaxUsed = 0;
         edges.used = 0;
 
-        edgeSumDeltaY = 0;
-
         return this; // fluent API
     }
 
@@ -669,15 +658,17 @@
         if (DO_MONITORS) {
             rdrCtx.stats.mon_rdr_endRendering.stop();
         }
+        // recycle the RendererContext instance
+        MarlinRenderingEngine.returnRendererContext(rdrCtx);
     }
 
     private static float tosubpixx(final float pix_x) {
-        return F_SUBPIXEL_POSITIONS_X * pix_x;
+        return SUBPIXEL_SCALE_X * pix_x;
     }
 
     private static float tosubpixy(final float pix_y) {
         // shift y by -0.5 for fast ceil(y - 0.5):
-        return F_SUBPIXEL_POSITIONS_Y * pix_y - 0.5f;
+        return SUBPIXEL_SCALE_Y * pix_y - 0.5f;
     }
 
     @Override
@@ -702,8 +693,8 @@
 
     @Override
     public void curveTo(float x1, float y1,
-            float x2, float y2,
-            float x3, float y3)
+                        float x2, float y2,
+                        float x3, float y3)
     {
         final float xe = tosubpixx(x3);
         final float ye = tosubpixy(y3);
@@ -969,8 +960,8 @@
                         // get the pointer to the edge
                         ecur = _edgePtrs[i];
 
-                        /* convert subpixel coordinates (float) into pixel
-                            positions (int) for coming scanline */
+                        /* convert subpixel coordinates into pixel
+                            positions for coming scanline */
                         /* note: it is faster to always update edges even
                            if it is removed from AEL for coming or last scanline */
 
@@ -1069,8 +1060,8 @@
                         // get the pointer to the edge
                         ecur = _edgePtrs[i];
 
-                        /* convert subpixel coordinates (float) into pixel
-                            positions (int) for coming scanline */
+                        /* convert subpixel coordinates into pixel
+                            positions for coming scanline */
                         /* note: it is faster to always update edges even
                            if it is removed from AEL for coming or last scanline */
 
@@ -1176,7 +1167,14 @@
                             // TODO: perform line clipping on left-right sides
                             // to avoid such bound checks:
                             x0 = (prev > bboxx0) ? prev : bboxx0;
-                            x1 = (curx < bboxx1) ? curx : bboxx1;
+
+                            if (curx < bboxx1) {
+                                x1 = curx;
+                            } else {
+                                x1 = bboxx1;
+                                // skip right side (fast exit loop):
+                                i = numCrossings;
+                            }
 
                             if (x0 < x1) {
                                 x0 -= bboxx0; // turn x0, x1 from coords to indices
@@ -1193,7 +1191,8 @@
 
                                     if (useBlkFlags) {
                                         // flag used blocks:
-                                        _blkFlags[pix_x >> _BLK_SIZE_LG] = 1;
+                                        // note: block processing handles extra pixel:
+                                        _blkFlags[pix_x    >> _BLK_SIZE_LG] = 1;
                                     }
                                 } else {
                                     tmp = (x0 & _SUBPIXEL_MASK_X);
@@ -1212,6 +1211,7 @@
 
                                     if (useBlkFlags) {
                                         // flag used blocks:
+                                        // note: block processing handles extra pixel:
                                         _blkFlags[pix_x    >> _BLK_SIZE_LG] = 1;
                                         _blkFlags[pix_xmax >> _BLK_SIZE_LG] = 1;
                                     }
@@ -1237,7 +1237,14 @@
                             // TODO: perform line clipping on left-right sides
                             // to avoid such bound checks:
                             x0 = (prev > bboxx0) ? prev : bboxx0;
-                            x1 = (curx < bboxx1) ? curx : bboxx1;
+
+                            if (curx < bboxx1) {
+                                x1 = curx;
+                            } else {
+                                x1 = bboxx1;
+                                // skip right side (fast exit loop):
+                                i = numCrossings;
+                            }
 
                             if (x0 < x1) {
                                 x0 -= bboxx0; // turn x0, x1 from coords to indices
@@ -1254,7 +1261,8 @@
 
                                     if (useBlkFlags) {
                                         // flag used blocks:
-                                        _blkFlags[pix_x >> _BLK_SIZE_LG] = 1;
+                                        // note: block processing handles extra pixel:
+                                        _blkFlags[pix_x    >> _BLK_SIZE_LG] = 1;
                                     }
                                 } else {
                                     tmp = (x0 & _SUBPIXEL_MASK_X);
@@ -1273,6 +1281,7 @@
 
                                     if (useBlkFlags) {
                                         // flag used blocks:
+                                        // note: block processing handles extra pixel:
                                         _blkFlags[pix_x    >> _BLK_SIZE_LG] = 1;
                                         _blkFlags[pix_xmax >> _BLK_SIZE_LG] = 1;
                                     }
@@ -1306,9 +1315,12 @@
 
                 if (maxX >= minX) {
                     // note: alpha array will be zeroed by copyAARow()
-                    // +2 because alpha [pix_minX; pix_maxX+1]
+                    // +1 because alpha [pix_minX; pix_maxX[
                     // fix range [x0; x1[
-                    copyAARow(_alpha, lastY, minX, maxX + 2, useBlkFlags);
+                    // note: if x1=bboxx1, then alpha is written up to bboxx1+1
+                    // inclusive: alpha[bboxx1] ignored, alpha[bboxx1+1] == 0
+                    // (normally so never cleared below)
+                    copyAARow(_alpha, lastY, minX, maxX + 1, useBlkFlags);
 
                     // speculative for next pixel row (scanline coherence):
                     if (_enableBlkFlagsHeuristics) {
@@ -1350,9 +1362,12 @@
 
         if (maxX >= minX) {
             // note: alpha array will be zeroed by copyAARow()
-            // +2 because alpha [pix_minX; pix_maxX+1]
+            // +1 because alpha [pix_minX; pix_maxX[
             // fix range [x0; x1[
-            copyAARow(_alpha, y, minX, maxX + 2, useBlkFlags);
+            // note: if x1=bboxx1, then alpha is written up to bboxx1+1
+            // inclusive: alpha[bboxx1] ignored then cleared and
+            // alpha[bboxx1+1] == 0 (normally so never cleared after)
+            copyAARow(_alpha, y, minX, maxX + 1, useBlkFlags);
         } else if (y != lastY) {
             _cache.clearAARow(y);
         }
@@ -1375,36 +1390,26 @@
             return false; // undefined edges bounds
         }
 
-        final int _boundsMinY = boundsMinY;
-        final int _boundsMaxY = boundsMaxY;
-
-        // bounds as inclusive intervals
+        // bounds as half-open intervals
         final int spminX = FloatMath.max(FloatMath.ceil_int(edgeMinX - 0.5f), boundsMinX);
-        final int spmaxX = FloatMath.min(FloatMath.ceil_int(edgeMaxX - 0.5f), boundsMaxX - 1);
+        final int spmaxX = FloatMath.min(FloatMath.ceil_int(edgeMaxX - 0.5f), boundsMaxX);
 
         // edge Min/Max Y are already rounded to subpixels within bounds:
         final int spminY = edgeMinY;
-        final int spmaxY;
-        int maxY = edgeMaxY;
+        final int spmaxY = edgeMaxY;
 
-        if (maxY <= _boundsMaxY - 1) {
-            spmaxY = maxY;
-        } else {
-            spmaxY = _boundsMaxY - 1;
-            maxY   = _boundsMaxY;
-        }
-        buckets_minY = spminY - _boundsMinY;
-        buckets_maxY = maxY   - _boundsMinY;
+        buckets_minY = spminY - boundsMinY;
+        buckets_maxY = spmaxY - boundsMinY;
 
         if (DO_LOG_BOUNDS) {
             MarlinUtils.logInfo("edgesXY = [" + edgeMinX + " ... " + edgeMaxX
-                                + "][" + edgeMinY + " ... " + edgeMaxY + "]");
+                                + "[ [" + edgeMinY + " ... " + edgeMaxY + "[");
             MarlinUtils.logInfo("spXY    = [" + spminX + " ... " + spmaxX
-                                + "][" + spminY + " ... " + spmaxY + "]");
+                                + "[ [" + spminY + " ... " + spmaxY + "[");
         }
 
         // test clipping for shapes out of bounds
-        if ((spminX > spmaxX) || (spminY > spmaxY)) {
+        if ((spminX >= spmaxX) || (spminY >= spmaxY)) {
             return false;
         }
 
@@ -1419,7 +1424,7 @@
         final int pmaxY = (spmaxY + SUBPIXEL_MASK_Y) >> SUBPIXEL_LG_POSITIONS_Y;
 
         // store BBox to answer ptg.getBBox():
-        this.cache.init(pminX, pminY, pmaxX, pmaxY, edgeSumDeltaY);
+        this.cache.init(pminX, pminY, pmaxX, pmaxY);
 
         // Heuristics for using block flags:
         if (ENABLE_BLOCK_FLAGS) {
@@ -1429,9 +1434,9 @@
             if (enableBlkFlags) {
                 // ensure blockFlags array is large enough:
                 // note: +2 to ensure enough space left at end
-                final int nxTiles = ((pmaxX - pminX) >> TILE_SIZE_LG) + 2;
-                if (nxTiles > INITIAL_ARRAY) {
-                    blkFlags = blkFlags_ref.getArray(nxTiles);
+                final int blkLen = ((pmaxX - pminX) >> BLOCK_SIZE_LG) + 2;
+                if (blkLen > INITIAL_ARRAY) {
+                    blkFlags = blkFlags_ref.getArray(blkLen);
                 }
             }
         }
@@ -1446,7 +1451,7 @@
         // inclusive:
         bbox_spminY = spminY;
         // exclusive:
-        bbox_spmaxY = FloatMath.min(spmaxY + 1, pmaxY << SUBPIXEL_LG_POSITIONS_Y);
+        bbox_spmaxY = spmaxY;
 
         if (DO_LOG_BOUNDS) {
             MarlinUtils.logInfo("pXY       = [" + pminX + " ... " + pmaxX
@@ -1504,6 +1509,9 @@
                    final int pix_y, final int pix_from, final int pix_to,
                    final boolean useBlockFlags)
     {
+        if (DO_MONITORS) {
+            rdrCtx.stats.mon_rdr_copyAARow.start();
+        }
         if (useBlockFlags) {
             if (DO_STATS) {
                 rdrCtx.stats.hist_tile_generator_encoding.add(1);
@@ -1515,5 +1523,8 @@
             }
             cache.copyAARowNoRLE(alphaRow, pix_y, pix_from, pix_to);
         }
+        if (DO_MONITORS) {
+            rdrCtx.stats.mon_rdr_copyAARow.stop();
+        }
     }
 }
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/RendererContext.java	Wed Jul 05 23:27:00 2017 +0200
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/RendererContext.java	Wed May 17 22:05:11 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -35,7 +35,7 @@
 /**
  * This class is a renderer context dedicated to a single thread
  */
-final class RendererContext extends ReentrantContext implements MarlinConst {
+final class RendererContext extends ReentrantContext implements IRendererContext {
 
     // RendererContext creation counter
     private static final AtomicInteger CTX_COUNT = new AtomicInteger(1);
@@ -121,7 +121,7 @@
         // Renderer:
         cache = new MarlinCache(this);
         renderer = new Renderer(this); // needs MarlinCache from rdrCtx.cache
-        ptg = new MarlinTileGenerator(renderer);
+        ptg = new MarlinTileGenerator(stats, renderer, cache);
 
         stroker = new Stroker(this);
         dasher = new Dasher(this);
@@ -174,14 +174,21 @@
         return p2d;
     }
 
-    OffHeapArray newOffHeapArray(final long initialSize) {
+    @Override
+    public RendererStats stats() {
+        return stats;
+    }
+
+    @Override
+    public OffHeapArray newOffHeapArray(final long initialSize) {
         if (DO_STATS) {
             stats.totalOffHeapInitial += initialSize;
         }
         return new OffHeapArray(cleanerObj, initialSize);
     }
 
-    IntArrayCache.Reference newCleanIntArrayRef(final int initialSize) {
+    @Override
+    public IntArrayCache.Reference newCleanIntArrayRef(final int initialSize) {
         return cleanIntCache.createRef(initialSize);
     }
 
--- a/jdk/src/java.desktop/share/classes/sun/java2d/marlin/Stroker.java	Wed Jul 05 23:27:00 2017 +0200
+++ b/jdk/src/java.desktop/share/classes/sun/java2d/marlin/Stroker.java	Wed May 17 22:05:11 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -26,12 +26,8 @@
 package sun.java2d.marlin;
 
 import java.util.Arrays;
-import static java.lang.Math.ulp;
-import static java.lang.Math.sqrt;
 
 import sun.awt.geom.PathConsumer2D;
-import sun.java2d.marlin.Curve.BreakPtrIterator;
-
 
 // TODO: some of the arithmetic here is too verbose and prone to hard to
 // debug typos. We should consider making a small Point/Vector class that
@@ -75,7 +71,7 @@
     // pisces used to use fixed point arithmetic with 16 decimal digits. I
     // didn't want to change the values of the constant below when I converted
     // it to floating point, so that's why the divisions by 2^16 are there.
-    private static final float ROUND_JOIN_THRESHOLD = 1000/65536f;
+    private static final float ROUND_JOIN_THRESHOLD = 1000.0f/65536.0f;
 
     private static final float C = 0.5522847498307933f;
 
@@ -112,9 +108,8 @@
     private final PolyStack reverse;
 
     // This is where the curve to be processed is put. We give it
-    // enough room to store 2 curves: one for the current subdivision, the
-    // other for the rest of the curve.
-    private final float[] middle = new float[2 * 8];
+    // enough room to store all curves.
+    private final float[] middle = new float[MAX_N_CURVES * 6 + 2];
     private final float[] lp = new float[8];
     private final float[] rp = new float[8];
     private final float[] subdivTs = new float[MAX_N_CURVES - 1];
@@ -158,8 +153,8 @@
     {
         this.out = pc2d;
 
-        this.lineWidth2 = lineWidth / 2f;
-        this.invHalfLineWidth2Sq = 1f / (2f * lineWidth2 * lineWidth2);
+        this.lineWidth2 = lineWidth / 2.0f;
+        this.invHalfLineWidth2Sq = 1.0f / (2.0f * lineWidth2 * lineWidth2);
         this.capStyle = capStyle;
         this.joinStyle = joinStyle;
 
@@ -182,14 +177,14 @@
 
         if (DO_CLEAN_DIRTY) {
             // Force zero-fill dirty arrays:
-            Arrays.fill(offset0, 0f);
-            Arrays.fill(offset1, 0f);
-            Arrays.fill(offset2, 0f);
-            Arrays.fill(miter, 0f);
-            Arrays.fill(middle, 0f);
-            Arrays.fill(lp, 0f);
-            Arrays.fill(rp, 0f);
-            Arrays.fill(subdivTs, 0f);
+            Arrays.fill(offset0, 0.0f);
+            Arrays.fill(offset1, 0.0f);
+            Arrays.fill(offset2, 0.0f);
+            Arrays.fill(miter, 0.0f);
+            Arrays.fill(middle, 0.0f);
+            Arrays.fill(lp, 0.0f);
+            Arrays.fill(rp, 0.0f);
+            Arrays.fill(subdivTs, 0.0f);
         }
     }
 
@@ -197,11 +192,11 @@
                                       final float w, final float[] m)
     {
         float len = lx*lx + ly*ly;
-        if (len == 0f) {
-            m[0] = 0f;
-            m[1] = 0f;
+        if (len == 0.0f) {
+            m[0] = 0.0f;
+            m[1] = 0.0f;
         } else {
-            len = (float) sqrt(len);
+            len = (float) Math.sqrt(len);
             m[0] =  (ly * w) / len;
             m[1] = -(lx * w) / len;
         }
@@ -226,7 +221,7 @@
                                boolean rev,
                                float threshold)
     {
-        if ((omx == 0f && omy == 0f) || (mx == 0f && my == 0f)) {
+        if ((omx == 0.0f && omy == 0.0f) || (mx == 0.0f && my == 0.0f)) {
             return;
         }
 
@@ -258,7 +253,7 @@
         // If it is >=0, we know that abs(ext) is <= 90 degrees, so we only
         // need 1 curve to approximate the circle section that joins omx,omy
         // and mx,my.
-        final int numCurves = (cosext >= 0f) ? 1 : 2;
+        final int numCurves = (cosext >= 0.0f) ? 1 : 2;
 
         switch (numCurves) {
         case 1:
@@ -280,7 +275,7 @@
             // this normal's length is at least 0.5 and at most sqrt(2)/2 (because
             // we know the angle of the arc is > 90 degrees).
             float nx = my - omy, ny = omx - mx;
-            float nlen = (float) sqrt(nx*nx + ny*ny);
+            float nlen = (float) Math.sqrt(nx*nx + ny*ny);
             float scale = lineWidth2/nlen;
             float mmx = nx * scale, mmy = ny * scale;
 
@@ -318,8 +313,8 @@
         // define the bezier curve we're computing.
         // It is computed using the constraints that P1-P0 and P3-P2 are parallel
         // to the arc tangents at the endpoints, and that |P1-P0|=|P3-P2|.
-        float cv = (float) ((4.0 / 3.0) * sqrt(0.5 - cosext2) /
-                            (1.0 + sqrt(cosext2 + 0.5)));
+        float cv = (float) ((4.0d / 3.0d) * Math.sqrt(0.5d - cosext2) /
+                            (1.0d + Math.sqrt(cosext2 + 0.5d)));
         // if clockwise, we need to negate cv.
         if (rev) { // rev is equivalent to isCW(omx, omy, mx, my)
             cv = -cv;
@@ -348,20 +343,28 @@
                     cx - mx,       cy - my);
     }
 
-    // Put the intersection point of the lines (x0, y0) -> (x1, y1)
-    // and (x0p, y0p) -> (x1p, y1p) in m[off] and m[off+1].
-    // If the lines are parallel, it will put a non finite number in m.
-    private static void computeIntersection(final float x0, final float y0,
-                                            final float x1, final float y1,
-                                            final float x0p, final float y0p,
-                                            final float x1p, final float y1p,
-                                            final float[] m, int off)
+    // Return the intersection point of the lines (x0, y0) -> (x1, y1)
+    // and (x0p, y0p) -> (x1p, y1p) in m[off] and m[off+1]
+    private static void computeMiter(final float x0, final float y0,
+                                     final float x1, final float y1,
+                                     final float x0p, final float y0p,
+                                     final float x1p, final float y1p,
+                                     final float[] m, int off)
     {
         float x10 = x1 - x0;
         float y10 = y1 - y0;
         float x10p = x1p - x0p;
         float y10p = y1p - y0p;
 
+        // if this is 0, the lines are parallel. If they go in the
+        // same direction, there is no intersection so m[off] and
+        // m[off+1] will contain infinity, so no miter will be drawn.
+        // If they go in the same direction that means that the start of the
+        // current segment and the end of the previous segment have the same
+        // tangent, in which case this method won't even be involved in
+        // miter drawing because it won't be called by drawMiter (because
+        // (mx == omx && my == omy) will be true, and drawMiter will return
+        // immediately).
         float den = x10*y10p - x10p*y10;
         float t = x10p*(y0-y0p) - y10p*(x0-x0p);
         t /= den;
@@ -369,6 +372,40 @@
         m[off]   = y0 + t*y10;
     }
 
+    // Return the intersection point of the lines (x0, y0) -> (x1, y1)
+    // and (x0p, y0p) -> (x1p, y1p) in m[off] and m[off+1]
+    private static void safeComputeMiter(final float x0, final float y0,
+                                         final float x1, final float y1,
+                                         final float x0p, final float y0p,
+                                         final float x1p, final float y1p,
+                                         final float[] m, int off)
+    {
+        float x10 = x1 - x0;
+        float y10 = y1 - y0;
+        float x10p = x1p - x0p;
+        float y10p = y1p - y0p;
+
+        // if this is 0, the lines are parallel. If they go in the
+        // same direction, there is no intersection so m[off] and
+        // m[off+1] will contain infinity, so no miter will be drawn.
+        // If they go in the same direction that means that the start of the
+        // current segment and the end of the previous segment have the same
+        // tangent, in which case this method won't even be involved in
+        // miter drawing because it won't be called by drawMiter (because
+        // (mx == omx && my == omy) will be true, and drawMiter will return
+        // immediately).
+        float den = x10*y10p - x10p*y10;
+        if (den == 0.0f) {
+            m[off++] = (x0 + x0p) / 2.0f;
+            m[off]   = (y0 + y0p) / 2.0f;
+            return;
+        }
+        float t = x10p*(y0-y0p) - y10p*(x0-x0p);
+        t /= den;
+        m[off++] = x0 + t*x10;
+        m[off] = y0 + t*y10;
+    }
+
     private void drawMiter(final float pdx, final float pdy,
                            final float x0, final float y0,
                            final float dx, final float dy,
@@ -376,8 +413,8 @@
                            boolean rev)
     {
         if ((mx == omx && my == omy) ||
-            (pdx == 0f && pdy == 0f) ||
-            (dx == 0f && dy == 0f))
+            (pdx == 0.0f && pdy == 0.0f) ||
+            (dx == 0.0f && dy == 0.0f))
         {
             return;
         }
@@ -389,9 +426,9 @@
             my  = -my;
         }
 
-        computeIntersection((x0 - pdx) + omx, (y0 - pdy) + omy, x0 + omx, y0 + omy,
-                            (dx + x0) + mx, (dy + y0) + my, x0 + mx, y0 + my,
-                            miter, 0);
+        computeMiter((x0 - pdx) + omx, (y0 - pdy) + omy, x0 + omx, y0 + omy,
+                     (dx + x0) + mx, (dy + y0) + my, x0 + mx, y0 + my,
+                     miter, 0);
 
         final float miterX = miter[0];
         final float miterY = miter[1];
@@ -414,8 +451,8 @@
         }
         this.sx0 = this.cx0 = x0;
         this.sy0 = this.cy0 = y0;
-        this.cdx = this.sdx = 1f;
-        this.cdy = this.sdy = 0f;
+        this.cdx = this.sdx = 1.0f;
+        this.cdy = this.sdy = 0.0f;
         this.prev = MOVE_TO;
     }
 
@@ -423,8 +460,8 @@
     public void lineTo(float x1, float y1) {
         float dx = x1 - cx0;
         float dy = y1 - cy0;
-        if (dx == 0f && dy == 0f) {
-            dx = 1f;
+        if (dx == 0.0f && dy == 0.0f) {
+            dx = 1.0f;
         }
         computeOffset(dx, dy, lineWidth2, offset0);
         final float mx = offset0[0];
@@ -454,10 +491,10 @@
                 return;
             }
             emitMoveTo(cx0, cy0 - lineWidth2);
-            this.cmx = this.smx = 0f;
+            this.cmx = this.smx = 0.0f;
             this.cmy = this.smy = -lineWidth2;
-            this.cdx = this.sdx = 1f;
-            this.cdy = this.sdy = 0f;
+            this.cdx = this.sdx = 1.0f;
+            this.cdy = this.sdy = 0.0f;
             finish();
             return;
         }
@@ -640,7 +677,7 @@
     {
         // if p1=p2 or p3=p4 it means that the derivative at the endpoint
         // vanishes, which creates problems with computeOffset. Usually
-        // this happens when this stroker object is trying to winden
+        // this happens when this stroker object is trying to widen
         // a curve with a cusp. What happens is that curveTo splits
         // the input curve at the cusp, and passes it to this function.
         // because of inaccuracies in the splitting, we consider points
@@ -657,8 +694,8 @@
 
         // if p1 == p2 && p3 == p4: draw line from p1->p4, unless p1 == p4,
         // in which case ignore if p1 == p2
-        final bo