changeset 12764:6427ba02ae4b

Merge
author lana
date Thu, 18 May 2017 16:48:24 +0000
parents b3ee8ab233ed 507f8a7678b4
children c2314cb67e28
files
diffstat 2 files changed, 65 insertions(+), 34 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu May 18 14:54:53 2017 +0000
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu May 18 16:48:24 2017 +0000
@@ -4134,28 +4134,33 @@
   if ((dst_enc < 16) && (nds_enc < 16)) {
     vandps(dst, nds, negate_field, vector_len);
   } else if ((src_enc < 16) && (dst_enc < 16)) {
-    movss(src, nds);
+    evmovdqul(src, nds, Assembler::AVX_512bit);
     vandps(dst, src, negate_field, vector_len);
   } else if (src_enc < 16) {
-    movss(src, nds);
+    evmovdqul(src, nds, Assembler::AVX_512bit);
     vandps(src, src, negate_field, vector_len);
-    movss(dst, src);
+    evmovdqul(dst, src, Assembler::AVX_512bit);
   } else if (dst_enc < 16) {
-    movdqu(src, xmm0);
-    movss(xmm0, nds);
+    evmovdqul(src, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
     vandps(dst, xmm0, negate_field, vector_len);
-    movdqu(xmm0, src);
-  } else if (nds_enc < 16) {
-    movdqu(src, xmm0);
-    vandps(xmm0, nds, negate_field, vector_len);
-    movss(dst, xmm0);
-    movdqu(xmm0, src);
-  } else {
-    movdqu(src, xmm0);
-    movss(xmm0, nds);
-    vandps(xmm0, xmm0, negate_field, vector_len);
-    movss(dst, xmm0);
-    movdqu(xmm0, src);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+  } else {
+    if (src_enc != dst_enc) {
+      evmovdqul(src, xmm0, Assembler::AVX_512bit);
+      evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+      vandps(xmm0, xmm0, negate_field, vector_len);
+      evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+      evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    } else {
+      subptr(rsp, 64);
+      evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+      evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+      vandps(xmm0, xmm0, negate_field, vector_len);
+      evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+      evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+      addptr(rsp, 64);
+    }
   }
 }
 
@@ -4166,28 +4171,33 @@
   if ((dst_enc < 16) && (nds_enc < 16)) {
     vandpd(dst, nds, negate_field, vector_len);
   } else if ((src_enc < 16) && (dst_enc < 16)) {
-    movsd(src, nds);
+    evmovdqul(src, nds, Assembler::AVX_512bit);
     vandpd(dst, src, negate_field, vector_len);
   } else if (src_enc < 16) {
-    movsd(src, nds);
+    evmovdqul(src, nds, Assembler::AVX_512bit);
     vandpd(src, src, negate_field, vector_len);
-    movsd(dst, src);
+    evmovdqul(dst, src, Assembler::AVX_512bit);
   } else if (dst_enc < 16) {
-    movdqu(src, xmm0);
-    movsd(xmm0, nds);
+    evmovdqul(src, xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
     vandpd(dst, xmm0, negate_field, vector_len);
-    movdqu(xmm0, src);
-  } else if (nds_enc < 16) {
-    movdqu(src, xmm0);
-    vandpd(xmm0, nds, negate_field, vector_len);
-    movsd(dst, xmm0);
-    movdqu(xmm0, src);
-  } else {
-    movdqu(src, xmm0);
-    movsd(xmm0, nds);
-    vandpd(xmm0, xmm0, negate_field, vector_len);
-    movsd(dst, xmm0);
-    movdqu(xmm0, src);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+  } else {
+    if (src_enc != dst_enc) {
+      evmovdqul(src, xmm0, Assembler::AVX_512bit);
+      evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+      vandpd(xmm0, xmm0, negate_field, vector_len);
+      evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+      evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    } else {
+      subptr(rsp, 64);
+      evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+      evmovdqul(xmm0, nds, Assembler::AVX_512bit);
+      vandpd(xmm0, xmm0, negate_field, vector_len);
+      evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+      evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+      addptr(rsp, 64);
+    }
   }
 }
 
@@ -4934,6 +4944,24 @@
   }
 }
 
+void MacroAssembler::pshufd(XMMRegister dst, Address src, int mode) {
+  if (VM_Version::supports_avx512vl()) {
+    Assembler::pshufd(dst, src, mode);
+  } else {
+    int dst_enc = dst->encoding();
+    if (dst_enc < 16) {
+      Assembler::pshufd(dst, src, mode);
+    } else {
+      subptr(rsp, 64);
+      evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+      Assembler::pshufd(xmm0, src, mode);
+      evmovdqul(dst, xmm0, Assembler::AVX_512bit);
+      evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+      addptr(rsp, 64);
+    }
+  }
+}
+
 // This instruction exists within macros, ergo we cannot control its input
 // when emitted through those patterns.
 void MacroAssembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
--- a/src/cpu/x86/vm/macroAssembler_x86.hpp	Thu May 18 14:54:53 2017 +0000
+++ b/src/cpu/x86/vm/macroAssembler_x86.hpp	Thu May 18 16:48:24 2017 +0000
@@ -1232,6 +1232,9 @@
   void punpcklbw(XMMRegister dst, XMMRegister src);
   void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); }
 
+  void pshufd(XMMRegister dst, Address src, int mode);
+  void pshufd(XMMRegister dst, XMMRegister src, int mode) { Assembler::pshufd(dst, src, mode); }
+
   void pshuflw(XMMRegister dst, XMMRegister src, int mode);
   void pshuflw(XMMRegister dst, Address src, int mode) { Assembler::pshuflw(dst, src, mode); }