changeset 59213:c9adad6d7055

8235510: java.util.zip.CRC32 performance drop after 8200067 Summary: backout 8200067 optimization Reviewed-by: kvn
author sviswanathan
date Fri, 06 Dec 2019 16:03:44 -0800
parents 4437d58547ce
children 31882abe1494
files src/hotspot/cpu/x86/assembler_x86.cpp src/hotspot/cpu/x86/macroAssembler_x86.cpp src/hotspot/cpu/x86/vm_version_x86.cpp src/hotspot/cpu/x86/vm_version_x86.hpp
diffstat 4 files changed, 7 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/src/hotspot/cpu/x86/assembler_x86.cpp	Fri Dec 06 15:10:40 2019 -0800
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp	Fri Dec 06 16:03:44 2019 -0800
@@ -7232,7 +7232,7 @@
 }
 
 void Assembler::evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len) {
-  assert(VM_Version::supports_vpclmulqdq(), "Requires vector carryless multiplication support");
+  assert(VM_Version::supports_avx512_vpclmulqdq(), "Requires vector carryless multiplication support");
   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_is_evex_instruction();
   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Fri Dec 06 15:10:40 2019 -0800
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Fri Dec 06 16:03:44 2019 -0800
@@ -8945,34 +8945,6 @@
   shrl(len, 4);
   jcc(Assembler::zero, L_tail_restore);
 
-  // Fold total 512 bits of polynomial on each iteration
-  if (VM_Version::supports_vpclmulqdq()) {
-    Label Parallel_loop, L_No_Parallel;
-
-    cmpl(len, 8);
-    jccb(Assembler::less, L_No_Parallel);
-
-    movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 32));
-    evmovdquq(xmm1, Address(buf, 0), Assembler::AVX_512bit);
-    movdl(xmm5, crc);
-    evpxorq(xmm1, xmm1, xmm5, Assembler::AVX_512bit);
-    addptr(buf, 64);
-    subl(len, 7);
-    evshufi64x2(xmm0, xmm0, xmm0, 0x00, Assembler::AVX_512bit); //propagate the mask from 128 bits to 512 bits
-
-    BIND(Parallel_loop);
-    fold_128bit_crc32_avx512(xmm1, xmm0, xmm5, buf, 0);
-    addptr(buf, 64);
-    subl(len, 4);
-    jcc(Assembler::greater, Parallel_loop);
-
-    vextracti64x2(xmm2, xmm1, 0x01);
-    vextracti64x2(xmm3, xmm1, 0x02);
-    vextracti64x2(xmm4, xmm1, 0x03);
-    jmp(L_fold_512b);
-
-    BIND(L_No_Parallel);
-  }
   // Fold crc into first bytes of vector
   movdqa(xmm1, Address(buf, 0));
   movdl(rax, xmm1);
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp	Fri Dec 06 15:10:40 2019 -0800
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp	Fri Dec 06 16:03:44 2019 -0800
@@ -691,7 +691,7 @@
     _features &= ~CPU_AVX512BW;
     _features &= ~CPU_AVX512VL;
     _features &= ~CPU_AVX512_VPOPCNTDQ;
-    _features &= ~CPU_VPCLMULQDQ;
+    _features &= ~CPU_AVX512_VPCLMULQDQ;
     _features &= ~CPU_VAES;
   }
 
--- a/src/hotspot/cpu/x86/vm_version_x86.hpp	Fri Dec 06 15:10:40 2019 -0800
+++ b/src/hotspot/cpu/x86/vm_version_x86.hpp	Fri Dec 06 16:03:44 2019 -0800
@@ -245,7 +245,7 @@
                            : 1,
                       gfni : 1,
                       vaes : 1,
-                vpclmulqdq : 1,
+         avx512_vpclmulqdq : 1,
                avx512_vnni : 1,
              avx512_bitalg : 1,
                            : 1,
@@ -338,7 +338,7 @@
 #define CPU_FMA ((uint64_t)UCONST64(0x800000000))      // FMA instructions
 #define CPU_VZEROUPPER ((uint64_t)UCONST64(0x1000000000))       // Vzeroupper instruction
 #define CPU_AVX512_VPOPCNTDQ ((uint64_t)UCONST64(0x2000000000)) // Vector popcount
-#define CPU_VPCLMULQDQ ((uint64_t)UCONST64(0x4000000000)) //Vector carryless multiplication
+#define CPU_AVX512_VPCLMULQDQ ((uint64_t)UCONST64(0x4000000000)) //Vector carryless multiplication
 #define CPU_VAES ((uint64_t)UCONST64(0x8000000000))    // Vector AES instructions
 #define CPU_VNNI ((uint64_t)UCONST64(0x10000000000))   // Vector Neural Network Instructions
 
@@ -561,8 +561,8 @@
           result |= CPU_AVX512VL;
         if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
           result |= CPU_AVX512_VPOPCNTDQ;
-        if (_cpuid_info.sef_cpuid7_ecx.bits.vpclmulqdq != 0)
-          result |= CPU_VPCLMULQDQ;
+        if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
+          result |= CPU_AVX512_VPCLMULQDQ;
         if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0)
           result |= CPU_VAES;
         if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0)
@@ -855,7 +855,7 @@
   static bool supports_fma()        { return (_features & CPU_FMA) != 0 && supports_avx(); }
   static bool supports_vzeroupper() { return (_features & CPU_VZEROUPPER) != 0; }
   static bool supports_vpopcntdq()  { return (_features & CPU_AVX512_VPOPCNTDQ) != 0; }
-  static bool supports_vpclmulqdq() { return (_features & CPU_VPCLMULQDQ) != 0; }
+  static bool supports_avx512_vpclmulqdq() { return (_features & CPU_AVX512_VPCLMULQDQ) != 0; }
   static bool supports_vaes()       { return (_features & CPU_VAES) != 0; }
   static bool supports_vnni()       { return (_features & CPU_VNNI) != 0; }