changeset 58375:26b0d25e0cd5 foreign+vector

Automatic merge with vectorIntrinsics
author mcimadamore
date Tue, 04 Sep 2018 22:54:22 +0200
parents 2df4cb9a1cba 77c09ee06154
children 84b78ea0dd83
files
diffstat 4 files changed, 351 insertions(+), 48 deletions(-) [+]
line wrap: on
line diff
--- a/src/hotspot/cpu/x86/assembler_x86.cpp	Mon Sep 03 13:24:11 2018 +0200
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp	Tue Sep 04 22:54:22 2018 +0200
@@ -7041,6 +7041,25 @@
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
+void Assembler::evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
+  assert(UseAVX > 2, "requires AVX512");
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_is_evex_instruction();
+  int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0x72);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift & 0xFF);
+}
+
+void Assembler::evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+  assert(UseAVX > 2, "requires AVX512");
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_is_evex_instruction();
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+  emit_int8((unsigned char)0xE2);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
 //Variable Shift packed integers logically left.
 void Assembler::vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
   assert(UseAVX > 1, "requires AVX2");
@@ -7084,9 +7103,10 @@
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
-void Assembler::vpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
-  assert(UseAVX > 1, "requires AVX2");
+void Assembler::evpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+  assert(UseAVX > 2, "requires AVX512");
   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_is_evex_instruction();
   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x46);
   emit_int8((unsigned char)(0xC0 | encode));
--- a/src/hotspot/cpu/x86/assembler_x86.hpp	Mon Sep 03 13:24:11 2018 +0200
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp	Tue Sep 04 22:54:22 2018 +0200
@@ -2240,6 +2240,8 @@
   void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
   void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
   void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+  void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
+  void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
 
   // Variable shift left packed integers
   void vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
@@ -2251,7 +2253,7 @@
 
   // Variable shift right arithmetic packed integers
   void vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
-  void vpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+  void evpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
 
   // And packed integers
   void pand(XMMRegister dst, XMMRegister src);
--- a/src/hotspot/cpu/x86/stubRoutines_x86.hpp	Mon Sep 03 13:24:11 2018 +0200
+++ b/src/hotspot/cpu/x86/stubRoutines_x86.hpp	Tue Sep 04 22:54:22 2018 +0200
@@ -157,6 +157,10 @@
     return _vector_64_bit_mask;
   }
 
+  static address vector_all_ones_mask() {
+    return _vector_double_sign_flip;
+  }
+
 #else // !LP64
 
  private:
--- a/src/hotspot/cpu/x86/x86.ad	Mon Sep 03 13:24:11 2018 +0200
+++ b/src/hotspot/cpu/x86/x86.ad	Tue Sep 04 22:54:22 2018 +0200
@@ -1348,6 +1348,7 @@
   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
+  static address vector_all_ones_mask() { return StubRoutines::x86::vector_all_ones_mask(); }
 #else
   static address float_signmask()  { return (address)float_signmask_pool; }
   static address float_signflip()  { return (address)float_signflip_pool; }
@@ -17425,7 +17426,245 @@
   ins_pipe( pipe_slow );
 %}
 
-// There are no longs vector arithmetic right shift instructions.
+// Long vector arithmetic right shift
+instruct vsra1L(vecD dst, vecD src, vecS shift, vecD tmp) %{
+  predicate(n->as_Vector()->length() == 1);
+  match(Set dst (RShiftVL src shift));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "movdqu  $dst,$src\n\t"
+            "psrlq   $dst,$shift\n\t"
+            "movdqu  $tmp,[0x8000000000000000]\n\t"
+            "psrlq   $tmp,$shift\n\t"
+            "pxor    $dst,$tmp\n\t"
+            "psubq   $dst,$tmp\t! arithmetic right shift packed1L" %}
+  ins_encode %{
+    __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
+    __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
+    __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask()));
+    __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
+    __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
+    __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra1L_imm(vecD dst, vecD src, immI8 shift, vecD tmp) %{
+  predicate(n->as_Vector()->length() == 1);
+  match(Set dst (RShiftVL src shift));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "movdqu  $dst,$src\n\t"
+            "psrlq   $dst,$shift\n\t"
+            "movdqu  $tmp,[0x8000000000000000]\n\t"
+            "psrlq   $tmp,$shift\n\t"
+            "pxor    $dst,$tmp\n\t"
+            "psubq   $dst,$tmp\t! arithmetic right shift packed1L" %}
+  ins_encode %{
+    __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
+    __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
+    __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask()));
+    __ psrlq($tmp$$XMMRegister, (int)$shift$$constant);
+    __ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
+    __ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra1L_reg(vecD dst, vecD src, vecS shift, vecD tmp) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 1);
+  match(Set dst (RShiftVL src shift));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "vpsrlq   $dst,$src,$shift\n\t"
+            "vmovdqu  $tmp,[0x8000000000000000]\n\t"
+            "vpsrlq   $tmp,$tmp,$shift\n\t"
+            "vpxor    $dst,$dst,$tmp\n\t"
+            "vpsubq   $dst,$dst,$tmp\t! arithmetic right shift packed1L" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+    __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask()));
+    __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
+    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
+    __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra1L_reg_imm(vecD dst, vecD src, immI8 shift, vecD tmp) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 1);
+  match(Set dst (RShiftVL src shift));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "vpsrlq   $dst,$src,$shift\n\t"
+            "vmovdqu  $tmp,[0x8000000000000000]\n\t"
+            "vpsrlq   $tmp,$tmp,$shift\n\t"
+            "vpxor    $dst,$dst,$tmp\n\t"
+            "vpsubq   $dst,$dst,$tmp\t! arithmetic right shift packed1L" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+    __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask()));
+    __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, (int)$shift$$constant, vector_len);
+    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
+    __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra1L_reg_evex(vecD dst, vecD src, vecS shift) %{
+  predicate(UseAVX > 2 && n->as_Vector()->length() == 1);
+  match(Set dst (RShiftVL src shift));
+  format %{ "evpsraq  $dst,$src,$shift\t! arithmetic right shift packed1L" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2L_reg_imm(vecX dst, vecX src, immI8 shift, vecX tmp) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVL src shift));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "vpsrlq   $dst,$src,$shift\n\t"
+            "vmovdqu  $tmp,[0x8000000000000000]\n\t"
+            "vpsrlq   $tmp,$tmp,$shift\n\t"
+            "vpxor    $dst,$dst,$tmp\n\t"
+            "vpsubq   $dst,$dst,$tmp\t! arithmetic right shift packed2L" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+    __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask()));
+    __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, (int)$shift$$constant, vector_len);
+    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
+    __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2L_reg(vecX dst, vecX src, vecS shift, vecX tmp) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVL src shift));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "vpsrlq   $dst,$src,$shift\n\t"
+            "vmovdqu  $tmp,[0x8000000000000000]\n\t"
+            "vpsrlq   $tmp,$tmp,$shift\n\t"
+            "vpxor    $dst,$dst,$tmp\n\t"
+            "vpsubq   $dst,$dst,$tmp\t! arithmetic right shift packed2L" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+    __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask()));
+    __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
+    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
+    __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2L_reg_evex_imm(vecX dst, vecX src, immI8 shift) %{
+  predicate(UseAVX > 2 && n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVL src shift));
+  format %{ "evpsraq  $dst,$src,$shift\t! arithmetic right shift packed2L" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2L_reg_evex(vecX dst, vecX src, vecS shift) %{
+  predicate(UseAVX > 2 && n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVL src shift));
+  format %{ "evpsraq  $dst,$src,$shift\t! arithmetic right shift packed2L" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4L_reg_imm(vecY dst, vecY src, immI8 shift, vecY tmp) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVL src shift));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "vpsrlq   $dst,$src,$shift\n\t"
+            "vmovdqu  $tmp,[0x8000000000000000]\n\t"
+            "vpsrlq   $tmp,$tmp,$shift\n\t"
+            "vpxor    $dst,$dst,$tmp\n\t"
+            "vpsubq   $dst,$dst,$tmp\t! arithmetic right shift packed4L" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+    __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask()));
+    __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, (int)$shift$$constant, vector_len);
+    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
+    __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4L_reg(vecY dst, vecY src, vecS shift, vecY tmp) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVL src shift));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "vpsrlq   $dst,$src,$shift\n\t"
+            "vmovdqu  $tmp,[0x8000000000000000]\n\t"
+            "vpsrlq   $tmp,$tmp,$shift\n\t"
+            "vpxor    $dst,$dst,$tmp\n\t"
+            "vpsubq   $dst,$dst,$tmp\t! arithmetic right shift packed4L" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+    __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask()));
+    __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
+    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
+    __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4L_reg_evex_imm(vecY dst, vecY src, immI8 shift) %{
+  predicate(UseAVX > 2 && n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVL src shift));
+  format %{ "evpsraq  $dst,$src,$shift\t! arithmetic right shift packed2L" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4L_reg_evex(vecY dst, vecY src, vecS shift) %{
+  predicate(UseAVX > 2 && n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVL src shift));
+  format %{ "evpsraq  $dst,$src,$shift\t! arithmetic right shift packed4L" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8L_reg_evex_imm(vecZ dst, vecZ src, immI8 shift) %{
+  predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVL src shift));
+  format %{ "evpsraq  $dst,$src,$shift\t! arithmetic right shift packed2L" %}
+  ins_encode %{
+    int vector_len = 2;
+    __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8L_reg_evex(vecZ dst, vecZ src, vecS shift) %{
+  predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVL src shift));
+  format %{ "evpsraq  $dst,$src,$shift\t! arithmetic right shift packed8L" %}
+  ins_encode %{
+    int vector_len = 2;
+    __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
 
 // ------------------- Variable Bit Shift Left Logical -----------------------------
 //Integer Variable left shift
@@ -17766,46 +18005,84 @@
 %}
 
 //Long Variable right shift arithmetic
-instruct vsrav1L_reg(vecD dst, vecD src, vecD shift) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 1 && n->in(2)->Opcode() != Op_RShiftCntV);
+instruct vsrav1L_reg(vecD dst, vecD src, vecD shift, vecD tmp) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->in(2)->Opcode() != Op_RShiftCntV);
   match(Set dst (RShiftVL src shift));
-  format %{ "vpsravq  $dst,$src,$shift\t! variable bit shift right shift packed1L" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vsrav2L_reg(vecX dst, vecX src, vecX shift) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV);
+  effect(TEMP dst, TEMP tmp);
+  format %{ "vpsrlvq   $dst,$src,$shift\n\t"
+            "vmovdqu   $tmp,[0x8000000000000000]\n\t"
+            "vpsrlvq   $tmp,$tmp,$shift\n\t"
+            "vpxor     $dst,$dst,$tmp\n\t"
+            "vpsubq    $dst,$dst,$tmp\t! variable arithmetic right shift packed1L" %}
+   ins_encode %{
+     int vector_len = 0;
+     __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+     __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask()));
+     __ vpsrlvq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
+     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
+     __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
+   %}
+   ins_pipe( pipe_slow );
+ %}
+
+instruct vsrav1L_reg_evex(vecD dst, vecD src, vecD shift) %{
+  predicate(UseAVX > 2 && n->as_Vector()->length() == 1 && n->in(2)->Opcode() != Op_RShiftCntV);
   match(Set dst (RShiftVL src shift));
-  format %{ "vpsravq  $dst,$src,$shift\t! variable bit shift right shift packed2L" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  format %{ "evpsravq  $dst,$src,$shift\t! variable arithmetic right shift packed1L" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ evpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrav2L_reg(vecX dst, vecX src, vecX shift, vecX tmp) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV);
+  match(Set dst (RShiftVL src shift));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "vpsrlvq   $dst,$src,$shift\n\t"
+            "vmovdqu   $tmp,[0x8000000000000000]\n\t"
+            "vpsrlvq   $tmp,$tmp,$shift\n\t"
+            "vpxor     $dst,$dst,$tmp\n\t"
+            "vpsubq    $dst,$dst,$tmp\t! variable arithmetic right shift packed2L" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+    __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask()));
+    __ vpsrlvq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
+    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
+    __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
 
 instruct vsrav2L_reg_evex(vecX dst, vecX src, vecX shift) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV);
+  predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV);
   match(Set dst (RShiftVL src shift));
-  format %{ "vpsravq  $dst,$src,$shift\t! variable bit shift right shift packed2L" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vsrav4L_reg(vecY dst, vecY src, vecY shift) %{
+  format %{ "evpsravq  $dst,$src,$shift\t! variable arithmetic right shift packed2L" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ evpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrav4L_reg(vecY dst, vecY src, vecY shift, vecY tmp) %{
   predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV);
   match(Set dst (RShiftVL src shift));
-  format %{ "vpsravq  $dst,$src,$shift\t! variable bit shift right shift packed4L" %}
-  ins_encode %{
-    int vector_len = 1;
-    __ vpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  effect(TEMP dst, TEMP tmp);
+  format %{ "vpsrlvq   $dst,$src,$shift\n\t"
+            "vmovdqu   $tmp,[0x8000000000000000]\n\t"
+            "vpsrlvq   $tmp,$tmp,$shift\n\t"
+            "vpxor     $dst,$dst,$tmp\n\t"
+            "vpsubq    $dst,$dst,$tmp\t! variable arithmetic right shift packed4L" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+    __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask()));
+    __ vpsrlvq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
+    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
+    __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -17813,21 +18090,21 @@
 instruct vsrav4L_reg_evex(vecY dst, vecY src, vecY shift) %{
   predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV);
   match(Set dst (RShiftVL src shift));
-  format %{ "vpsravq  $dst,$src,$shift\t! variable bit shift right shift packed4L" %}
-  ins_encode %{
-    int vector_len = 1;
-    __ vpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vsrav8L_reg(vecZ dst, vecZ src, vecZ shift) %{
-  predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV);
+  format %{ "evpsravq  $dst,$src,$shift\t! variable bit shift right shift packed4L" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ evpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrav8L_reg_evex(vecZ dst, vecZ src, vecZ shift) %{
+  predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV);
   match(Set dst (RShiftVL src shift));
-  format %{ "vpsravq  $dst,$src,$shift\t! variable bit shift right shift packed8L" %}
-  ins_encode %{
-    int vector_len = 2;
-    __ vpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+  format %{ "evpsravq  $dst,$src,$shift\t! variable bit shift right shift packed8L" %}
+  ins_encode %{
+    int vector_len = 2;
+    __ evpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}