changeset 13414:4368832d1991

8178723: Workaround for failure of CRC32C intrinsic on x86 machines without CLMUL support (JDK-8178720) Summary: Disable CRC32C intrinsic on affected machines. Improve tests. Reviewed-by: kvn, simonis, mdoerr, aph Contributed-by: Lutz Schmidt <lutz.schmidt@sap.com>, Zoltan Majo <zoltan.majo@oracle.com>
author zmajo
date Wed, 19 Apr 2017 14:37:11 +0200
parents a92a6f4a454e
children 0cdc61e61d57
files src/cpu/x86/vm/vm_version_x86.cpp test/compiler/intrinsics/zip/TestCRC32.java test/compiler/intrinsics/zip/TestCRC32C.java
diffstat 3 files changed, 218 insertions(+), 96 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Wed Apr 19 13:38:12 2017 +0200
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Wed Apr 19 14:37:11 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -763,12 +763,11 @@
     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
   }
 
-  if (supports_sse4_2()) {
+  if (supports_sse4_2() && supports_clmul()) {
     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
       UseCRC32CIntrinsics = true;
     }
-  }
-  else if (UseCRC32CIntrinsics) {
+  } else if (UseCRC32CIntrinsics) {
     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
       warning("CRC32C intrinsics are not available on this CPU");
     }
--- a/test/compiler/intrinsics/zip/TestCRC32.java	Wed Apr 19 13:38:12 2017 +0200
+++ b/test/compiler/intrinsics/zip/TestCRC32.java	Wed Apr 19 14:37:11 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,7 +36,22 @@
 import java.util.zip.Checksum;
 
 public class TestCRC32 {
-    public static void main(String[] args) {
+    // standard CRC32 polynomial
+    // coefficients in different forms
+    // normal:              polyBits = 0x04c11db7   = 0b0000 0100 1100 0001 0001 1101 1011 0111
+    // reversed:            polybits = 0xedb88320   = 0b1110 1101 1011 1000 1000 0011 0010 0000
+    // reversed reciprocal  polybits = 0x82608edb   = 0b1000 0010 0110 0000 1000 1110 1101 1011
+    //
+    //                                                  0      5    9    13   17   21   25   29
+    //                                                  |      |    |    |    |    |    |    |
+    // reversed shiftL 1    polyBits = 0x1db710641L = 0b1 1101 1011 0111 0001 0000 0110 0100 0001
+    final static long polyBits = (1L<<(32-32)) + (1L<<(32-26)) + (1L<<(32-23)) + (1L<<(32-22))
+                               + (1L<<(32-16)) + (1L<<(32-12)) + (1L<<(32-11)) + (1L<<(32-10))
+                               + (1L<<(32-8))  + (1L<<(32-7))  + (1L<<(32-5))  + (1L<<(32-4))
+                               + (1L<<(32-2))  + (1L<<(32-1))  + (1L<<(32-0));
+    final static long polyBitsShifted = polyBits>>1;
+
+    public static void main(String[] args) throws Exception {
         int offset = Integer.getInteger("offset", 0);
         int msgSize = Integer.getInteger("msgSize", 512);
         boolean multi = false;
@@ -65,11 +80,14 @@
 
         byte[] b = initializedBytes(msgSize, offset);
 
+        final long crcReference = update_byteLoop(0, b, offset);
+
         CRC32 crc0 = new CRC32();
         CRC32 crc1 = new CRC32();
         CRC32 crc2 = new CRC32();
 
         crc0.update(b, offset, msgSize);
+        check(crc0, crcReference);
 
         System.out.println("-------------------------------------------------------");
 
@@ -77,27 +95,35 @@
         for (int i = 0; i < warmupIters; i++) {
             crc1.reset();
             crc1.update(b, offset, msgSize);
+            check(crc1, crcReference);
         }
 
-        /* measure performance */
+        /* check correctness
+         * Do that before measuring performance
+         * to even better heat up involved methods.
+         */
+        for (int i = 0; i < iters; i++) {
+            crc1.reset();
+            crc1.update(b, offset, msgSize);
+            check(crc1, crcReference);
+        }
+        report("CRCs", crc1, crcReference);
+
+        /* measure performance
+         * Don't spoil times with error checking.
+         */
         long start = System.nanoTime();
         for (int i = 0; i < iters; i++) {
             crc1.reset();
             crc1.update(b, offset, msgSize);
         }
         long end = System.nanoTime();
+
         double total = (double)(end - start)/1e9;         // in seconds
         double thruput = (double)msgSize*iters/1e6/total; // in MB/s
         System.out.println("CRC32.update(byte[]) runtime = " + total + " seconds");
         System.out.println("CRC32.update(byte[]) throughput = " + thruput + " MB/s");
-
-        /* check correctness */
-        for (int i = 0; i < iters; i++) {
-            crc1.reset();
-            crc1.update(b, offset, msgSize);
-            if (!check(crc0, crc1)) break;
-        }
-        report("CRCs", crc0, crc1);
+        report("CRCs", crc1, crcReference);
 
         System.out.println("-------------------------------------------------------");
 
@@ -110,9 +136,24 @@
             crc2.reset();
             crc2.update(buf);
             buf.rewind();
+            check(crc2, crcReference);
         }
 
-        /* measure performance */
+        /* check correctness
+         * Do that before measuring performance
+         * to even better heat up involved methods.
+         */
+        for (int i = 0; i < iters; i++) {
+            crc2.reset();
+            crc2.update(buf);
+            buf.rewind();
+            check(crc2, crcReference);
+        }
+        report("CRCs", crc2, crcReference);
+
+        /* measure performance
+         * Don't spoil times with error checking.
+         */
         start = System.nanoTime();
         for (int i = 0; i < iters; i++) {
             crc2.reset();
@@ -124,31 +165,57 @@
         thruput = (double)msgSize*iters/1e6/total; // in MB/s
         System.out.println("CRC32.update(ByteBuffer) runtime = " + total + " seconds");
         System.out.println("CRC32.update(ByteBuffer) throughput = " + thruput + " MB/s");
-
-        /* check correctness */
-        for (int i = 0; i < iters; i++) {
-            crc2.reset();
-            crc2.update(buf);
-            buf.rewind();
-            if (!check(crc0, crc2)) break;
-        }
-        report("CRCs", crc0, crc2);
+        report("CRCs", crc2, crcReference);
 
         System.out.println("-------------------------------------------------------");
     }
 
-    private static void report(String s, Checksum crc0, Checksum crc1) {
-        System.out.printf("%s: crc0 = %08x, crc1 = %08x\n",
-                          s, crc0.getValue(), crc1.getValue());
+    // Just a loop over a byte array, updating the CRC byte by byte.
+    public static long update_byteLoop(long crc, byte[] buf, int offset) {
+        return update_byteLoop(crc, buf, offset, buf.length-offset);
     }
 
-    private static boolean check(Checksum crc0, Checksum crc1) {
-        if (crc0.getValue() != crc1.getValue()) {
-            System.err.printf("ERROR: crc0 = %08x, crc1 = %08x\n",
-                              crc0.getValue(), crc1.getValue());
-            return false;
+    // Just a loop over a byte array, with given length, updating the CRC byte by byte.
+    public static long update_byteLoop(long crc, byte[] buf, int offset, int length) {
+        int end = length+offset;
+        for (int i = offset; i < end; i++) {
+            crc = update_singlebyte(crc, polyBitsShifted, buf[i]);
         }
-        return true;
+        return crc;
+    }
+
+    // Straight-forward implementation of CRC update by one byte.
+    // We use this very basic implementation to calculate reference
+    // results. It is necessary to have full control over how the
+    // reference results are calculated. It is not sufficient to rely
+    // on the interpreter (or c1, or c2) to do the right thing.
+    public static long update_singlebyte(long crc, long polynomial, int val) {
+        crc = (crc ^ -1L) & 0x00000000ffffffffL;  // use 1's complement of crc
+        crc =  crc ^ (val&0xff);                  // XOR in next byte from stream
+        for (int i = 0; i <  8; i++) {
+            boolean bitset = (crc & 0x01L) != 0;
+
+            crc = crc>>1;
+            if (bitset) {
+                crc = crc ^ polynomial;
+                crc = crc & 0x00000000ffffffffL;
+            }
+        }
+        crc = (crc ^ -1L) & 0x00000000ffffffffL;  // revert taking 1's complement
+        return crc;
+    }
+
+    private static void report(String s, Checksum crc, long crcReference) {
+        System.out.printf("%s: crc = %08x, crcReference = %08x\n",
+                          s, crc.getValue(), crcReference);
+    }
+
+    private static void check(Checksum crc, long crcReference) throws Exception {
+        if (crc.getValue() != crcReference) {
+            System.err.printf("ERROR: crc = %08x, crcReference = %08x\n",
+                              crc.getValue(), crcReference);
+            throw new Exception("TestCRC32 Error");
+        }
     }
 
     private static byte[] initializedBytes(int M, int offset) {
@@ -162,7 +229,7 @@
         return bytes;
     }
 
-    private static void test_multi(int iters) {
+    private static void test_multi(int iters) throws Exception {
         int len1 = 8;    // the  8B/iteration loop
         int len2 = 32;   // the 32B/iteration loop
         int len3 = 4096; // the 4KB/iteration loop
@@ -185,37 +252,31 @@
                         (len1+len2+len3)*2+5, (len1+len2+len3)*2+7,
                         (len1+len2+len3)*3, (len1+len2+len3)*3-1, (len1+len2+len3)*3-3,
                         (len1+len2+len3)*3-5, (len1+len2+len3)*3-7 };
-        CRC32[] crc0 = new CRC32[offsets.length*sizes.length];
         CRC32[] crc1 = new CRC32[offsets.length*sizes.length];
+        long[] crcReference = new long[offsets.length*sizes.length];
         int i, j, k;
 
         System.out.printf("testing %d cases ...\n", offsets.length*sizes.length);
 
-        /* set the result from interpreter as reference */
+        // Initialize CRC32 result arrays, CRC32 reference array.
+        // Reference is calculated using a very basic Java implementation.
         for (i = 0; i < offsets.length; i++) {
             for (j = 0; j < sizes.length; j++) {
-                crc0[i*sizes.length + j] = new CRC32();
                 crc1[i*sizes.length + j] = new CRC32();
-                crc0[i*sizes.length + j].update(b, offsets[i], sizes[j]);
+                crcReference[i*sizes.length + j] = update_byteLoop(0, b, offsets[i], sizes[j]);
             }
         }
 
-        /* warm up the JIT compiler and get result */
+        // Warm up the JIT compiler. Over time, all methods involved will
+        // be executed by the interpreter, then get compiled by c1 and
+        // finally by c2. Each calculated CRC value must, in each iteration,
+        // be equal to the precalculated reference value for the test to pass.
         for (k = 0; k < iters; k++) {
             for (i = 0; i < offsets.length; i++) {
                 for (j = 0; j < sizes.length; j++) {
                     crc1[i*sizes.length + j].reset();
                     crc1[i*sizes.length + j].update(b, offsets[i], sizes[j]);
-                }
-            }
-        }
-
-        /* check correctness */
-        for (i = 0; i < offsets.length; i++) {
-            for (j = 0; j < sizes.length; j++) {
-                if (!check(crc0[i*sizes.length + j], crc1[i*sizes.length + j])) {
-                    System.out.printf("offsets[%d] = %d", i, offsets[i]);
-                    System.out.printf("\tsizes[%d] = %d\n", j, sizes[j]);
+                    check(crc1[i*sizes.length + j], crcReference[i*sizes.length + j]);
                 }
             }
         }
--- a/test/compiler/intrinsics/zip/TestCRC32C.java	Wed Apr 19 13:38:12 2017 +0200
+++ b/test/compiler/intrinsics/zip/TestCRC32C.java	Wed Apr 19 14:37:11 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,7 +36,23 @@
 import java.util.zip.Checksum;
 
 public class TestCRC32C {
-    public static void main(String[] args) {
+    // CRC32C (Castagnoli) polynomial
+    // coefficients in different forms
+    // normal:              polyBits = 0x1edc6f41   = 0b0001 1110 1101 1100 0110 1111 0100 0001
+    // reversed:            polybits = 0x82f63b78   = 0b1000 0010 1111 0110 0011 1011 0111 1000
+    // reversed reciprocal  polybits = 0x8f6e37a0   = 0b1000 1111 0110 1110 0011 0111 1010 0000
+    //
+    //                                                  0      5    9    13   17   21   25   29
+    //                                                  |      |    |    |    |    |    |    |
+    // reversed shiftL 1    polyBits = 0x105ec76f1L = 0b1 0000 0101 1110 1100 0111 0110 1111 0001
+    final static long polyBits = (1L<<(32-32)) + (1L<<(32-28)) + (1L<<(32-27))
+                               + (1L<<(32-26)) + (1L<<(32-25)) + (1L<<(32-23)) + (1L<<(32-22))
+                               + (1L<<(32-20)) + (1L<<(32-19)) + (1L<<(32-18)) + (1L<<(32-14))
+                               + (1L<<(32-13)) + (1L<<(32-11)) + (1L<<(32-10)) + (1L<<(32-9))
+                               + (1L<<(32-8))  + (1L<<(32-6))  + (1L<<(32-0));
+    final static long polyBitsShifted = polyBits>>1;
+
+    public static void main(String[] args) throws Exception {
         int offset = Integer.getInteger("offset", 0);
         int msgSize = Integer.getInteger("msgSize", 512);
         boolean multi = false;
@@ -65,11 +81,14 @@
 
         byte[] b = initializedBytes(msgSize, offset);
 
+        final long crcReference = update_byteLoop(0, b, offset);
+
         CRC32C crc0 = new CRC32C();
         CRC32C crc1 = new CRC32C();
         CRC32C crc2 = new CRC32C();
 
         crc0.update(b, offset, msgSize);
+        check(crc0, crcReference);
 
         System.out.println("-------------------------------------------------------");
 
@@ -77,27 +96,35 @@
         for (int i = 0; i < warmupIters; i++) {
             crc1.reset();
             crc1.update(b, offset, msgSize);
+            check(crc1, crcReference);
         }
 
-        /* measure performance */
+        /* check correctness
+         * Do that before measuring performance
+         * to even better heat up involved methods.
+         */
+        for (int i = 0; i < iters; i++) {
+            crc1.reset();
+            crc1.update(b, offset, msgSize);
+            check(crc1, crcReference);
+        }
+        report("CRCs", crc1, crcReference);
+
+        /* measure performance
+         * Don't spoil times with error checking.
+         */
         long start = System.nanoTime();
         for (int i = 0; i < iters; i++) {
             crc1.reset();
             crc1.update(b, offset, msgSize);
         }
         long end = System.nanoTime();
+
         double total = (double)(end - start)/1e9;         // in seconds
         double thruput = (double)msgSize*iters/1e6/total; // in MB/s
         System.out.println("CRC32C.update(byte[]) runtime = " + total + " seconds");
         System.out.println("CRC32C.update(byte[]) throughput = " + thruput + " MB/s");
-
-        /* check correctness */
-        for (int i = 0; i < iters; i++) {
-            crc1.reset();
-            crc1.update(b, offset, msgSize);
-            if (!check(crc0, crc1)) break;
-        }
-        report("CRCs", crc0, crc1);
+        report("CRCs", crc1, crcReference);
 
         System.out.println("-------------------------------------------------------");
 
@@ -110,9 +137,24 @@
             crc2.reset();
             crc2.update(buf);
             buf.rewind();
+            check(crc2, crcReference);
         }
 
-        /* measure performance */
+        /* check correctness
+         * Do that before measuring performance
+         * to even better heat up involved methods.
+         */
+        for (int i = 0; i < iters; i++) {
+            crc2.reset();
+            crc2.update(buf);
+            buf.rewind();
+            check(crc2, crcReference);
+        }
+        report("CRCs", crc2, crcReference);
+
+        /* measure performance
+         * Don't spoil times with error checking.
+         */
         start = System.nanoTime();
         for (int i = 0; i < iters; i++) {
             crc2.reset();
@@ -124,31 +166,57 @@
         thruput = (double)msgSize*iters/1e6/total; // in MB/s
         System.out.println("CRC32C.update(ByteBuffer) runtime = " + total + " seconds");
         System.out.println("CRC32C.update(ByteBuffer) throughput = " + thruput + " MB/s");
-
-        /* check correctness */
-        for (int i = 0; i < iters; i++) {
-            crc2.reset();
-            crc2.update(buf);
-            buf.rewind();
-            if (!check(crc0, crc2)) break;
-        }
-        report("CRCs", crc0, crc2);
+        report("CRCs", crc2, crcReference);
 
         System.out.println("-------------------------------------------------------");
     }
 
-    private static void report(String s, Checksum crc0, Checksum crc1) {
-        System.out.printf("%s: crc0 = %08x, crc1 = %08x\n",
-                          s, crc0.getValue(), crc1.getValue());
+    // Just a loop over a byte array, updating the CRC byte by byte.
+    public static long update_byteLoop(long crc, byte[] buf, int offset) {
+        return update_byteLoop(crc, buf, offset, buf.length-offset);
     }
 
-    private static boolean check(Checksum crc0, Checksum crc1) {
-        if (crc0.getValue() != crc1.getValue()) {
-            System.err.printf("ERROR: crc0 = %08x, crc1 = %08x\n",
-                              crc0.getValue(), crc1.getValue());
-            return false;
+    // Just a loop over a byte array, with given length, updating the CRC byte by byte.
+    public static long update_byteLoop(long crc, byte[] buf, int offset, int length) {
+        int end = length+offset;
+        for (int i = offset; i < end; i++) {
+            crc = update_singlebyte(crc, polyBitsShifted, buf[i]);
         }
-        return true;
+        return crc;
+    }
+
+    // Straight-forward implementation of CRC update by one byte.
+    // We use this very basic implementation to calculate reference
+    // results. It is necessary to have full control over how the
+    // reference results are calculated. It is not sufficient to rely
+    // on the interpreter (or c1, or c2) to do the right thing.
+    public static long update_singlebyte(long crc, long polynomial, int val) {
+        crc = (crc ^ -1L) & 0x00000000ffffffffL;  // use 1's complement of crc
+        crc =  crc ^ (val&0xff);                  // XOR in next byte from stream
+        for (int i = 0; i <  8; i++) {
+            boolean bitset = (crc & 0x01L) != 0;
+
+            crc = crc>>1;
+            if (bitset) {
+                crc = crc ^ polynomial;
+                crc = crc & 0x00000000ffffffffL;
+            }
+        }
+        crc = (crc ^ -1L) & 0x00000000ffffffffL;  // revert taking 1's complement
+        return crc;
+    }
+
+    private static void report(String s, Checksum crc, long crcReference) {
+        System.out.printf("%s: crc = %08x, crcReference = %08x\n",
+                          s, crc.getValue(), crcReference);
+    }
+
+    private static void check(Checksum crc, long crcReference) throws Exception {
+        if (crc.getValue() != crcReference) {
+            System.err.printf("ERROR: crc = %08x, crcReference = %08x\n",
+                              crc.getValue(), crcReference);
+            throw new Exception("TestCRC32C Error");
+        }
     }
 
     private static byte[] initializedBytes(int M, int offset) {
@@ -162,7 +230,7 @@
         return bytes;
     }
 
-    private static void test_multi(int iters) {
+    private static void test_multi(int iters) throws Exception {
         int len1 = 8;    // the  8B/iteration loop
         int len2 = 32;   // the 32B/iteration loop
         int len3 = 4096; // the 4KB/iteration loop
@@ -185,37 +253,31 @@
                         (len1+len2+len3)*2+5, (len1+len2+len3)*2+7,
                         (len1+len2+len3)*3, (len1+len2+len3)*3-1, (len1+len2+len3)*3-3,
                         (len1+len2+len3)*3-5, (len1+len2+len3)*3-7 };
-        CRC32C[] crc0 = new CRC32C[offsets.length*sizes.length];
         CRC32C[] crc1 = new CRC32C[offsets.length*sizes.length];
+        long[] crcReference = new long[offsets.length*sizes.length];
         int i, j, k;
 
         System.out.printf("testing %d cases ...\n", offsets.length*sizes.length);
 
-        /* set the result from interpreter as reference */
+        // Initialize CRC32C result arrays, CRC32C reference array.
+        // Reference is calculated using a very basic Java implementation.
         for (i = 0; i < offsets.length; i++) {
             for (j = 0; j < sizes.length; j++) {
-                crc0[i*sizes.length + j] = new CRC32C();
                 crc1[i*sizes.length + j] = new CRC32C();
-                crc0[i*sizes.length + j].update(b, offsets[i], sizes[j]);
+                crcReference[i*sizes.length + j] = update_byteLoop(0, b, offsets[i], sizes[j]);
             }
         }
 
-        /* warm up the JIT compiler and get result */
+        // Warm up the JIT compiler. Over time, all methods involved will
+        // be executed by the interpreter, then get compiled by c1 and
+        // finally by c2. Each calculated CRC value must, in each iteration,
+        // be equal to the precalculated reference value for the test to pass.
         for (k = 0; k < iters; k++) {
             for (i = 0; i < offsets.length; i++) {
                 for (j = 0; j < sizes.length; j++) {
                     crc1[i*sizes.length + j].reset();
                     crc1[i*sizes.length + j].update(b, offsets[i], sizes[j]);
-                }
-            }
-        }
-
-        /* check correctness */
-        for (i = 0; i < offsets.length; i++) {
-            for (j = 0; j < sizes.length; j++) {
-                if (!check(crc0[i*sizes.length + j], crc1[i*sizes.length + j])) {
-                    System.out.printf("offsets[%d] = %d", i, offsets[i]);
-                    System.out.printf("\tsizes[%d] = %d\n", j, sizes[j]);
+                    check(crc1[i*sizes.length + j], crcReference[i*sizes.length + j]);
                 }
             }
         }