changeset 51459:7339b9e38182

8207746: C2: Lucene crashes on AVX512 instruction Summary: replace add/sub instructions which affect flags with lea instruction. Reviewed-by: kvn, thartmann
author vdeshpande
date Wed, 15 Aug 2018 00:48:12 -0700
parents 97300133cc23
children 97e361fe3433
files src/hotspot/cpu/x86/macroAssembler_x86.cpp src/hotspot/cpu/x86/macroAssembler_x86.hpp
diffstat 2 files changed, 151 insertions(+), 265 deletions(-) [+]
line wrap: on
line diff
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Tue Aug 14 17:21:44 2018 -0700
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Wed Aug 15 00:48:12 2018 -0700
@@ -3123,6 +3123,16 @@
   }
 }
 
+void MacroAssembler::push_zmm(XMMRegister reg) {
+  lea(rsp, Address(rsp, -64)); // Use lea to not affect flags
+  evmovdqul(Address(rsp, 0), reg, Assembler::AVX_512bit);
+}
+
+void MacroAssembler::pop_zmm(XMMRegister reg) {
+  evmovdqul(reg, Address(rsp, 0), Assembler::AVX_512bit);
+  lea(rsp, Address(rsp, 64)); // Use lea to not affect flags
+}
+
 void MacroAssembler::fremr(Register tmp) {
   save_rax(tmp);
   { Label L;
@@ -3848,33 +3858,25 @@
   } else if ((dst_enc < 16) && (src_enc < 16)) {
     Assembler::pcmpeqb(dst, src);
   } else if (src_enc < 16) {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    push_zmm(xmm0);
     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
     Assembler::pcmpeqb(xmm0, src);
     movdqu(dst, xmm0);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm0);
   } else if (dst_enc < 16) {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    push_zmm(xmm0);
     evmovdqul(xmm0, src, Assembler::AVX_512bit);
     Assembler::pcmpeqb(dst, xmm0);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
-  } else {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    pop_zmm(xmm0);
+  } else {
+    push_zmm(xmm0);
+    push_zmm(xmm1);
     movdqu(xmm0, src);
     movdqu(xmm1, dst);
     Assembler::pcmpeqb(xmm1, xmm0);
     movdqu(dst, xmm1);
-    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm1);
+    pop_zmm(xmm0);
   }
 }
 
@@ -3886,33 +3888,25 @@
   } else if ((dst_enc < 16) && (src_enc < 16)) {
     Assembler::pcmpeqw(dst, src);
   } else if (src_enc < 16) {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    push_zmm(xmm0);
     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
     Assembler::pcmpeqw(xmm0, src);
     movdqu(dst, xmm0);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm0);
   } else if (dst_enc < 16) {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    push_zmm(xmm0);
     evmovdqul(xmm0, src, Assembler::AVX_512bit);
     Assembler::pcmpeqw(dst, xmm0);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
-  } else {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    pop_zmm(xmm0);
+  } else {
+    push_zmm(xmm0);
+    push_zmm(xmm1);
     movdqu(xmm0, src);
     movdqu(xmm1, dst);
     Assembler::pcmpeqw(xmm1, xmm0);
     movdqu(dst, xmm1);
-    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm1);
+    pop_zmm(xmm0);
   }
 }
 
@@ -3921,13 +3915,11 @@
   if (dst_enc < 16) {
     Assembler::pcmpestri(dst, src, imm8);
   } else {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    push_zmm(xmm0);
     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
     Assembler::pcmpestri(xmm0, src, imm8);
     movdqu(dst, xmm0);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm0);
   }
 }
 
@@ -3937,33 +3929,25 @@
   if ((dst_enc < 16) && (src_enc < 16)) {
     Assembler::pcmpestri(dst, src, imm8);
   } else if (src_enc < 16) {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    push_zmm(xmm0);
     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
     Assembler::pcmpestri(xmm0, src, imm8);
     movdqu(dst, xmm0);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm0);
   } else if (dst_enc < 16) {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    push_zmm(xmm0);
     evmovdqul(xmm0, src, Assembler::AVX_512bit);
     Assembler::pcmpestri(dst, xmm0, imm8);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
-  } else {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    pop_zmm(xmm0);
+  } else {
+    push_zmm(xmm0);
+    push_zmm(xmm1);
     movdqu(xmm0, src);
     movdqu(xmm1, dst);
     Assembler::pcmpestri(xmm1, xmm0, imm8);
     movdqu(dst, xmm1);
-    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm1);
+    pop_zmm(xmm0);
   }
 }
 
@@ -3975,33 +3959,25 @@
   } else if ((dst_enc < 16) && (src_enc < 16)) {
     Assembler::pmovzxbw(dst, src);
   } else if (src_enc < 16) {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    push_zmm(xmm0);
     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
     Assembler::pmovzxbw(xmm0, src);
     movdqu(dst, xmm0);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm0);
   } else if (dst_enc < 16) {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    push_zmm(xmm0);
     evmovdqul(xmm0, src, Assembler::AVX_512bit);
     Assembler::pmovzxbw(dst, xmm0);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
-  } else {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    pop_zmm(xmm0);
+  } else {
+    push_zmm(xmm0);
+    push_zmm(xmm1);
     movdqu(xmm0, src);
     movdqu(xmm1, dst);
     Assembler::pmovzxbw(xmm1, xmm0);
     movdqu(dst, xmm1);
-    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm1);
+    pop_zmm(xmm0);
   }
 }
 
@@ -4012,13 +3988,11 @@
   } else if (dst_enc < 16) {
     Assembler::pmovzxbw(dst, src);
   } else {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    push_zmm(xmm0);
     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
     Assembler::pmovzxbw(xmm0, src);
     movdqu(dst, xmm0);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm0);
   }
 }
 
@@ -4027,12 +4001,10 @@
   if (src_enc < 16) {
     Assembler::pmovmskb(dst, src);
   } else {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    push_zmm(xmm0);
     evmovdqul(xmm0, src, Assembler::AVX_512bit);
     Assembler::pmovmskb(dst, xmm0);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm0);
   }
 }
 
@@ -4042,31 +4014,23 @@
   if ((dst_enc < 16) && (src_enc < 16)) {
     Assembler::ptest(dst, src);
   } else if (src_enc < 16) {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    push_zmm(xmm0);
     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
     Assembler::ptest(xmm0, src);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm0);
   } else if (dst_enc < 16) {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    push_zmm(xmm0);
     evmovdqul(xmm0, src, Assembler::AVX_512bit);
     Assembler::ptest(dst, xmm0);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
-  } else {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    pop_zmm(xmm0);
+  } else {
+    push_zmm(xmm0);
+    push_zmm(xmm1);
     movdqu(xmm0, src);
     movdqu(xmm1, dst);
     Assembler::ptest(xmm1, xmm0);
-    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm1);
+    pop_zmm(xmm0);
   }
 }
 
@@ -4221,13 +4185,11 @@
       evmovdqul(dst, xmm0, Assembler::AVX_512bit);
       evmovdqul(xmm0, src, Assembler::AVX_512bit);
     } else {
-      subptr(rsp, 64);
-      evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+      push_zmm(xmm0);
       evmovdqul(xmm0, nds, Assembler::AVX_512bit);
       vandps(xmm0, xmm0, negate_field, vector_len);
       evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-      evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-      addptr(rsp, 64);
+      pop_zmm(xmm0);
     }
   }
 }
@@ -4258,13 +4220,11 @@
       evmovdqul(dst, xmm0, Assembler::AVX_512bit);
       evmovdqul(xmm0, src, Assembler::AVX_512bit);
     } else {
-      subptr(rsp, 64);
-      evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+      push_zmm(xmm0);
       evmovdqul(xmm0, nds, Assembler::AVX_512bit);
       vandpd(xmm0, xmm0, negate_field, vector_len);
       evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-      evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-      addptr(rsp, 64);
+      pop_zmm(xmm0);
     }
   }
 }
@@ -4294,16 +4254,14 @@
     evmovdqul(xmm0, nds, Assembler::AVX_512bit);
   } else {
     // worse case scenario, all regs are in the upper bank
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    push_zmm(xmm1);
     evmovdqul(nds, xmm0, Assembler::AVX_512bit);
     evmovdqul(xmm1, src, Assembler::AVX_512bit);
     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
     Assembler::vpaddb(xmm0, xmm0, xmm1, vector_len);
     evmovdqul(dst, xmm0, Assembler::AVX_512bit);
     evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm1);
   }
 }
 
@@ -4353,16 +4311,14 @@
     evmovdqul(xmm0, nds, Assembler::AVX_512bit);
   } else {
     // worse case scenario, all regs are in the upper bank
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    push_zmm(xmm1);
     evmovdqul(nds, xmm0, Assembler::AVX_512bit);
     evmovdqul(xmm1, src, Assembler::AVX_512bit);
     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
     Assembler::vpaddw(xmm0, xmm0, xmm1, vector_len);
     evmovdqul(dst, xmm0, Assembler::AVX_512bit);
     evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm1);
   }
 }
 
@@ -4404,33 +4360,25 @@
   } else if ((dst_enc < 16) && (src_enc < 16)) {
     Assembler::vpbroadcastw(dst, src);
   } else if (src_enc < 16) {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    push_zmm(xmm0);
     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
     Assembler::vpbroadcastw(xmm0, src);
     movdqu(dst, xmm0);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm0);
   } else if (dst_enc < 16) {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    push_zmm(xmm0);
     evmovdqul(xmm0, src, Assembler::AVX_512bit);
     Assembler::vpbroadcastw(dst, xmm0);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
-  } else {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    pop_zmm(xmm0);
+  } else {
+    push_zmm(xmm0);
+    push_zmm(xmm1);
     movdqu(xmm0, src);
     movdqu(xmm1, dst);
     Assembler::vpbroadcastw(xmm1, xmm0);
     movdqu(dst, xmm1);
-    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm1);
+    pop_zmm(xmm0);
   }
 }
 
@@ -4442,33 +4390,25 @@
   if ((dst_enc < 16) && (src_enc < 16)) {
     Assembler::vpcmpeqb(dst, nds, src, vector_len);
   } else if (src_enc < 16) {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    push_zmm(xmm0);
     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
     Assembler::vpcmpeqb(xmm0, xmm0, src, vector_len);
     movdqu(dst, xmm0);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm0);
   } else if (dst_enc < 16) {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    push_zmm(xmm0);
     evmovdqul(xmm0, src, Assembler::AVX_512bit);
     Assembler::vpcmpeqb(dst, dst, xmm0, vector_len);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
-  } else {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    pop_zmm(xmm0);
+  } else {
+    push_zmm(xmm0);
+    push_zmm(xmm1);
     movdqu(xmm0, src);
     movdqu(xmm1, dst);
     Assembler::vpcmpeqb(xmm1, xmm1, xmm0, vector_len);
     movdqu(dst, xmm1);
-    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm1);
+    pop_zmm(xmm0);
   }
 }
 
@@ -4480,33 +4420,25 @@
   if ((dst_enc < 16) && (src_enc < 16)) {
     Assembler::vpcmpeqw(dst, nds, src, vector_len);
   } else if (src_enc < 16) {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    push_zmm(xmm0);
     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
     Assembler::vpcmpeqw(xmm0, xmm0, src, vector_len);
     movdqu(dst, xmm0);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm0);
   } else if (dst_enc < 16) {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    push_zmm(xmm0);
     evmovdqul(xmm0, src, Assembler::AVX_512bit);
     Assembler::vpcmpeqw(dst, dst, xmm0, vector_len);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
-  } else {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    pop_zmm(xmm0);
+  } else {
+    push_zmm(xmm0);
+    push_zmm(xmm1);
     movdqu(xmm0, src);
     movdqu(xmm1, dst);
     Assembler::vpcmpeqw(xmm1, xmm1, xmm0, vector_len);
     movdqu(dst, xmm1);
-    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm1);
+    pop_zmm(xmm0);
   }
 }
 
@@ -4517,13 +4449,11 @@
   } else if (dst_enc < 16) {
     Assembler::vpmovzxbw(dst, src, vector_len);
   } else {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    push_zmm(xmm0);
     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
     Assembler::vpmovzxbw(xmm0, src, vector_len);
     movdqu(dst, xmm0);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm0);
   }
 }
 
@@ -4532,12 +4462,10 @@
   if (src_enc < 16) {
     Assembler::vpmovmskb(dst, src);
   } else {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    push_zmm(xmm0);
     evmovdqul(xmm0, src, Assembler::AVX_512bit);
     Assembler::vpmovmskb(dst, xmm0);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm0);
   }
 }
 
@@ -4566,16 +4494,14 @@
     evmovdqul(xmm0, nds, Assembler::AVX_512bit);
   } else {
     // worse case scenario, all regs are in the upper bank
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    push_zmm(xmm1);
     evmovdqul(nds, xmm0, Assembler::AVX_512bit);
     evmovdqul(xmm1, src, Assembler::AVX_512bit);
     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
     Assembler::vpmullw(xmm0, xmm0, xmm1, vector_len);
     evmovdqul(dst, xmm0, Assembler::AVX_512bit);
     evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm1);
   }
 }
 
@@ -4625,16 +4551,14 @@
     evmovdqul(xmm0, nds, Assembler::AVX_512bit);
   } else {
     // worse case scenario, all regs are in the upper bank
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    push_zmm(xmm1);
     evmovdqul(nds, xmm0, Assembler::AVX_512bit);
     evmovdqul(xmm1, src, Assembler::AVX_512bit);
     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
     Assembler::vpsubb(xmm0, xmm0, xmm1, vector_len);
     evmovdqul(dst, xmm0, Assembler::AVX_512bit);
     evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm1);
   }
 }
 
@@ -4684,16 +4608,14 @@
     evmovdqul(xmm0, nds, Assembler::AVX_512bit);
   } else {
     // worse case scenario, all regs are in the upper bank
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    push_zmm(xmm1);
     evmovdqul(nds, xmm0, Assembler::AVX_512bit);
     evmovdqul(xmm1, src, Assembler::AVX_512bit);
     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
     Assembler::vpsubw(xmm0, xmm0, xmm1, vector_len);
     evmovdqul(dst, xmm0, Assembler::AVX_512bit);
     evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm1);
   }
 }
 
@@ -4751,8 +4673,7 @@
     evmovdqul(dst, nds, Assembler::AVX_512bit);
   } else {
     // worse case scenario, all regs are in the upper bank
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    push_zmm(xmm1);
     evmovdqul(nds, xmm0, Assembler::AVX_512bit);
     evmovdqul(xmm1, shift, Assembler::AVX_512bit);
     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
@@ -4760,8 +4681,7 @@
     evmovdqul(xmm1, dst, Assembler::AVX_512bit);
     evmovdqul(dst, xmm0, Assembler::AVX_512bit);
     evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm1);
   }
 }
 
@@ -4819,8 +4739,7 @@
     evmovdqul(dst, nds, Assembler::AVX_512bit);
   } else {
     // worse case scenario, all regs are in the upper bank
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    push_zmm(xmm1);
     evmovdqul(nds, xmm0, Assembler::AVX_512bit);
     evmovdqul(xmm1, shift, Assembler::AVX_512bit);
     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
@@ -4828,8 +4747,7 @@
     evmovdqul(xmm1, dst, Assembler::AVX_512bit);
     evmovdqul(dst, xmm0, Assembler::AVX_512bit);
     evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm1);
   }
 }
 
@@ -4887,8 +4805,7 @@
     evmovdqul(dst, nds, Assembler::AVX_512bit);
   } else {
     // worse case scenario, all regs are in the upper bank
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    push_zmm(xmm1);
     evmovdqul(nds, xmm0, Assembler::AVX_512bit);
     evmovdqul(xmm1, shift, Assembler::AVX_512bit);
     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
@@ -4896,8 +4813,7 @@
     evmovdqul(xmm1, dst, Assembler::AVX_512bit);
     evmovdqul(dst, xmm0, Assembler::AVX_512bit);
     evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm1);
   }
 }
 
@@ -4928,31 +4844,23 @@
   if ((dst_enc < 16) && (src_enc < 16)) {
     Assembler::vptest(dst, src);
   } else if (src_enc < 16) {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    push_zmm(xmm0);
     evmovdqul(xmm0, dst, Assembler::AVX_512bit);
     Assembler::vptest(xmm0, src);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm0);
   } else if (dst_enc < 16) {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    push_zmm(xmm0);
     evmovdqul(xmm0, src, Assembler::AVX_512bit);
     Assembler::vptest(dst, xmm0);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
-  } else {
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
-    subptr(rsp, 64);
-    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    pop_zmm(xmm0);
+  } else {
+    push_zmm(xmm0);
+    push_zmm(xmm1);
     movdqu(xmm0, src);
     movdqu(xmm1, dst);
     Assembler::vptest(xmm1, xmm0);
-    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
-    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-    addptr(rsp, 64);
+    pop_zmm(xmm1);
+    pop_zmm(xmm0);
   }
 }
 
@@ -4966,45 +4874,35 @@
       if (dst_enc < 16) {
         Assembler::punpcklbw(dst, src);
       } else {
-        subptr(rsp, 64);
-        evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+        push_zmm(xmm0);
         evmovdqul(xmm0, dst, Assembler::AVX_512bit);
         Assembler::punpcklbw(xmm0, xmm0);
         evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-        evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-        addptr(rsp, 64);
+        pop_zmm(xmm0);
       }
     } else {
       if ((src_enc < 16) && (dst_enc < 16)) {
         Assembler::punpcklbw(dst, src);
       } else if (src_enc < 16) {
-        subptr(rsp, 64);
-        evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+        push_zmm(xmm0);
         evmovdqul(xmm0, dst, Assembler::AVX_512bit);
         Assembler::punpcklbw(xmm0, src);
         evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-        evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-        addptr(rsp, 64);
+        pop_zmm(xmm0);
       } else if (dst_enc < 16) {
-        subptr(rsp, 64);
-        evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+        push_zmm(xmm0);
         evmovdqul(xmm0, src, Assembler::AVX_512bit);
         Assembler::punpcklbw(dst, xmm0);
-        evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-        addptr(rsp, 64);
+        pop_zmm(xmm0);
       } else {
-        subptr(rsp, 64);
-        evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
-        subptr(rsp, 64);
-        evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+        push_zmm(xmm0);
+        push_zmm(xmm1);
         evmovdqul(xmm0, dst, Assembler::AVX_512bit);
         evmovdqul(xmm1, src, Assembler::AVX_512bit);
         Assembler::punpcklbw(xmm0, xmm1);
         evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-        evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
-        addptr(rsp, 64);
-        evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-        addptr(rsp, 64);
+        pop_zmm(xmm1);
+        pop_zmm(xmm0);
       }
     }
   } else {
@@ -5020,12 +4918,10 @@
     if (dst_enc < 16) {
       Assembler::pshufd(dst, src, mode);
     } else {
-      subptr(rsp, 64);
-      evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+      push_zmm(xmm0);
       Assembler::pshufd(xmm0, src, mode);
       evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-      evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-      addptr(rsp, 64);
+      pop_zmm(xmm0);
     }
   }
 }
@@ -5040,45 +4936,35 @@
       if (dst_enc < 16) {
         Assembler::pshuflw(dst, src, mode);
       } else {
-        subptr(rsp, 64);
-        evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+        push_zmm(xmm0);
         evmovdqul(xmm0, dst, Assembler::AVX_512bit);
         Assembler::pshuflw(xmm0, xmm0, mode);
         evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-        evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-        addptr(rsp, 64);
+        pop_zmm(xmm0);
       }
     } else {
       if ((src_enc < 16) && (dst_enc < 16)) {
         Assembler::pshuflw(dst, src, mode);
       } else if (src_enc < 16) {
-        subptr(rsp, 64);
-        evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+        push_zmm(xmm0);
         evmovdqul(xmm0, dst, Assembler::AVX_512bit);
         Assembler::pshuflw(xmm0, src, mode);
         evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-        evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-        addptr(rsp, 64);
+        pop_zmm(xmm0);
       } else if (dst_enc < 16) {
-        subptr(rsp, 64);
-        evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+        push_zmm(xmm0);
         evmovdqul(xmm0, src, Assembler::AVX_512bit);
         Assembler::pshuflw(dst, xmm0, mode);
-        evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-        addptr(rsp, 64);
+        pop_zmm(xmm0);
       } else {
-        subptr(rsp, 64);
-        evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
-        subptr(rsp, 64);
-        evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+        push_zmm(xmm0);
+        push_zmm(xmm1);
         evmovdqul(xmm0, dst, Assembler::AVX_512bit);
         evmovdqul(xmm1, src, Assembler::AVX_512bit);
         Assembler::pshuflw(xmm0, xmm1, mode);
         evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-        evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
-        addptr(rsp, 64);
-        evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-        addptr(rsp, 64);
+        pop_zmm(xmm1);
+        pop_zmm(xmm0);
       }
     }
   } else {
@@ -5166,13 +5052,11 @@
   if (VM_Version::supports_avx512novl() &&
       (nds_upper_bank || dst_upper_bank)) {
     if (dst_upper_bank) {
-      subptr(rsp, 64);
-      evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+      push_zmm(xmm0);
       movflt(xmm0, nds);
       vxorps(xmm0, xmm0, src, Assembler::AVX_128bit);
       movflt(dst, xmm0);
-      evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-      addptr(rsp, 64);
+      pop_zmm(xmm0);
     } else {
       movflt(dst, nds);
       vxorps(dst, dst, src, Assembler::AVX_128bit);
@@ -5190,13 +5074,11 @@
   if (VM_Version::supports_avx512novl() &&
       (nds_upper_bank || dst_upper_bank)) {
     if (dst_upper_bank) {
-      subptr(rsp, 64);
-      evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+      push_zmm(xmm0);
       movdbl(xmm0, nds);
       vxorpd(xmm0, xmm0, src, Assembler::AVX_128bit);
       movdbl(dst, xmm0);
-      evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
-      addptr(rsp, 64);
+      pop_zmm(xmm0);
     } else {
       movdbl(dst, nds);
       vxorpd(dst, dst, src, Assembler::AVX_128bit);
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp	Tue Aug 14 17:21:44 2018 -0700
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp	Wed Aug 15 00:48:12 2018 -0700
@@ -478,6 +478,10 @@
   // from register xmm0. Otherwise, the value is stored from the FPU stack.
   void store_double(Address dst);
 
+  // Save/restore ZMM (512bit) register on stack.
+  void push_zmm(XMMRegister reg);
+  void pop_zmm(XMMRegister reg);
+
   // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
   void push_fTOS();