OpenJDK / panama / dev
changeset 58375:26b0d25e0cd5 foreign+vector
Automatic merge with vectorIntrinsics
author | mcimadamore |
---|---|
date | Tue, 04 Sep 2018 22:54:22 +0200 |
parents | 2df4cb9a1cba 77c09ee06154 |
children | 84b78ea0dd83 |
files | |
diffstat | 4 files changed, 351 insertions(+), 48 deletions(-) [+] |
line wrap: on
line diff
--- a/src/hotspot/cpu/x86/assembler_x86.cpp Mon Sep 03 13:24:11 2018 +0200 +++ b/src/hotspot/cpu/x86/assembler_x86.cpp Tue Sep 04 22:54:22 2018 +0200 @@ -7041,6 +7041,25 @@ emit_int8((unsigned char)(0xC0 | encode)); } +void Assembler::evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len) { + assert(UseAVX > 2, "requires AVX512"); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0x72); + emit_int8((unsigned char)(0xC0 | encode)); + emit_int8(shift & 0xFF); +} + +void Assembler::evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { + assert(UseAVX > 2, "requires AVX512"); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0xE2); + emit_int8((unsigned char)(0xC0 | encode)); +} + //Variable Shift packed integers logically left. void Assembler::vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { assert(UseAVX > 1, "requires AVX2"); @@ -7084,9 +7103,10 @@ emit_int8((unsigned char)(0xC0 | encode)); } -void Assembler::vpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { - assert(UseAVX > 1, "requires AVX2"); +void Assembler::evpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { + assert(UseAVX > 2, "requires AVX512"); InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x46); emit_int8((unsigned char)(0xC0 | encode));
--- a/src/hotspot/cpu/x86/assembler_x86.hpp Mon Sep 03 13:24:11 2018 +0200 +++ b/src/hotspot/cpu/x86/assembler_x86.hpp Tue Sep 04 22:54:22 2018 +0200 @@ -2240,6 +2240,8 @@ void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len); void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); + void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len); + void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); // Variable shift left packed integers void vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); @@ -2251,7 +2253,7 @@ // Variable shift right arithmetic packed integers void vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); - void vpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); + void evpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); // And packed integers void pand(XMMRegister dst, XMMRegister src);
--- a/src/hotspot/cpu/x86/stubRoutines_x86.hpp Mon Sep 03 13:24:11 2018 +0200 +++ b/src/hotspot/cpu/x86/stubRoutines_x86.hpp Tue Sep 04 22:54:22 2018 +0200 @@ -157,6 +157,10 @@ return _vector_64_bit_mask; } + static address vector_all_ones_mask() { + return _vector_double_sign_flip; + } + #else // !LP64 private:
--- a/src/hotspot/cpu/x86/x86.ad Mon Sep 03 13:24:11 2018 +0200 +++ b/src/hotspot/cpu/x86/x86.ad Tue Sep 04 22:54:22 2018 +0200 @@ -1348,6 +1348,7 @@ static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } + static address vector_all_ones_mask() { return StubRoutines::x86::vector_all_ones_mask(); } #else static address float_signmask() { return (address)float_signmask_pool; } static address float_signflip() { return (address)float_signflip_pool; } @@ -17425,7 +17426,245 @@ ins_pipe( pipe_slow ); %} -// There are no longs vector arithmetic right shift instructions. +// Long vector arithmetic right shift +instruct vsra1L(vecD dst, vecD src, vecS shift, vecD tmp) %{ + predicate(n->as_Vector()->length() == 1); + match(Set dst (RShiftVL src shift)); + effect(TEMP dst, TEMP tmp); + format %{ "movdqu $dst,$src\n\t" + "psrlq $dst,$shift\n\t" + "movdqu $tmp,[0x8000000000000000]\n\t" + "psrlq $tmp,$shift\n\t" + "pxor $dst,$tmp\n\t" + "psubq $dst,$tmp\t! arithmetic right shift packed1L" %} + ins_encode %{ + __ movdqu($dst$$XMMRegister, $src$$XMMRegister); + __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); + __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); + __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); + __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); + __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra1L_imm(vecD dst, vecD src, immI8 shift, vecD tmp) %{ + predicate(n->as_Vector()->length() == 1); + match(Set dst (RShiftVL src shift)); + effect(TEMP dst, TEMP tmp); + format %{ "movdqu $dst,$src\n\t" + "psrlq $dst,$shift\n\t" + "movdqu $tmp,[0x8000000000000000]\n\t" + "psrlq $tmp,$shift\n\t" + "pxor $dst,$tmp\n\t" + "psubq $dst,$tmp\t! arithmetic right shift packed1L" %} + ins_encode %{ + __ movdqu($dst$$XMMRegister, $src$$XMMRegister); + __ psrlq($dst$$XMMRegister, (int)$shift$$constant); + __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); + __ psrlq($tmp$$XMMRegister, (int)$shift$$constant); + __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); + __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra1L_reg(vecD dst, vecD src, vecS shift, vecD tmp) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 1); + match(Set dst (RShiftVL src shift)); + effect(TEMP dst, TEMP tmp); + format %{ "vpsrlq $dst,$src,$shift\n\t" + "vmovdqu $tmp,[0x8000000000000000]\n\t" + "vpsrlq $tmp,$tmp,$shift\n\t" + "vpxor $dst,$dst,$tmp\n\t" + "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed1L" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); + __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); + __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); + __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra1L_reg_imm(vecD dst, vecD src, immI8 shift, vecD tmp) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 1); + match(Set dst (RShiftVL src shift)); + effect(TEMP dst, TEMP tmp); + format %{ "vpsrlq $dst,$src,$shift\n\t" + "vmovdqu $tmp,[0x8000000000000000]\n\t" + "vpsrlq $tmp,$tmp,$shift\n\t" + "vpxor $dst,$dst,$tmp\n\t" + "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed1L" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); + __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, (int)$shift$$constant, vector_len); + __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); + __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra1L_reg_evex(vecD dst, vecD src, vecS shift) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 1); + match(Set dst (RShiftVL src shift)); + format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed1L" %} + ins_encode %{ + int vector_len = 0; + __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra2L_reg_imm(vecX dst, vecX src, immI8 shift, vecX tmp) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (RShiftVL src shift)); + effect(TEMP dst, TEMP tmp); + format %{ "vpsrlq $dst,$src,$shift\n\t" + "vmovdqu $tmp,[0x8000000000000000]\n\t" + "vpsrlq $tmp,$tmp,$shift\n\t" + "vpxor $dst,$dst,$tmp\n\t" + "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed2L" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); + __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, (int)$shift$$constant, vector_len); + __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); + __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra2L_reg(vecX dst, vecX src, vecS shift, vecX tmp) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (RShiftVL src shift)); + effect(TEMP dst, TEMP tmp); + format %{ "vpsrlq $dst,$src,$shift\n\t" + "vmovdqu $tmp,[0x8000000000000000]\n\t" + "vpsrlq $tmp,$tmp,$shift\n\t" + "vpxor $dst,$dst,$tmp\n\t" + "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed2L" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); + __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); + __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); + __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra2L_reg_evex_imm(vecX dst, vecX src, immI8 shift) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 2); + match(Set dst (RShiftVL src shift)); + format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed2L" %} + ins_encode %{ + int vector_len = 0; + __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra2L_reg_evex(vecX dst, vecX src, vecS shift) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 2); + match(Set dst (RShiftVL src shift)); + format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed2L" %} + ins_encode %{ + int vector_len = 0; + __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra4L_reg_imm(vecY dst, vecY src, immI8 shift, vecY tmp) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); + match(Set dst (RShiftVL src shift)); + effect(TEMP dst, TEMP tmp); + format %{ "vpsrlq $dst,$src,$shift\n\t" + "vmovdqu $tmp,[0x8000000000000000]\n\t" + "vpsrlq $tmp,$tmp,$shift\n\t" + "vpxor $dst,$dst,$tmp\n\t" + "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed4L" %} + ins_encode %{ + int vector_len = 1; + __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); + __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, (int)$shift$$constant, vector_len); + __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); + __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra4L_reg(vecY dst, vecY src, vecS shift, vecY tmp) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 4); + match(Set dst (RShiftVL src shift)); + effect(TEMP dst, TEMP tmp); + format %{ "vpsrlq $dst,$src,$shift\n\t" + "vmovdqu $tmp,[0x8000000000000000]\n\t" + "vpsrlq $tmp,$tmp,$shift\n\t" + "vpxor $dst,$dst,$tmp\n\t" + "vpsubq $dst,$dst,$tmp\t! arithmetic right shift packed4L" %} + ins_encode %{ + int vector_len = 1; + __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); + __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); + __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); + __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra4L_reg_evex_imm(vecY dst, vecY src, immI8 shift) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 4); + match(Set dst (RShiftVL src shift)); + format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed2L" %} + ins_encode %{ + int vector_len = 1; + __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra4L_reg_evex(vecY dst, vecY src, vecS shift) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 4); + match(Set dst (RShiftVL src shift)); + format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed4L" %} + ins_encode %{ + int vector_len = 1; + __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra8L_reg_evex_imm(vecZ dst, vecZ src, immI8 shift) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 8); + match(Set dst (RShiftVL src shift)); + format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed2L" %} + ins_encode %{ + int vector_len = 2; + __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsra8L_reg_evex(vecZ dst, vecZ src, vecS shift) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 8); + match(Set dst (RShiftVL src shift)); + format %{ "evpsraq $dst,$src,$shift\t! arithmetic right shift packed8L" %} + ins_encode %{ + int vector_len = 2; + __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} // ------------------- Variable Bit Shift Left Logical ----------------------------- //Integer Variable left shift @@ -17766,46 +18005,84 @@ %} //Long Variable right shift arithmetic -instruct vsrav1L_reg(vecD dst, vecD src, vecD shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 1 && n->in(2)->Opcode() != Op_RShiftCntV); +instruct vsrav1L_reg(vecD dst, vecD src, vecD shift, vecD tmp) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 1 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (RShiftVL src shift)); - format %{ "vpsravq $dst,$src,$shift\t! variable bit shift right shift packed1L" %} - ins_encode %{ - int vector_len = 0; - __ vpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsrav2L_reg(vecX dst, vecX src, vecX shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); + effect(TEMP dst, TEMP tmp); + format %{ "vpsrlvq $dst,$src,$shift\n\t" + "vmovdqu $tmp,[0x8000000000000000]\n\t" + "vpsrlvq $tmp,$tmp,$shift\n\t" + "vpxor $dst,$dst,$tmp\n\t" + "vpsubq $dst,$dst,$tmp\t! variable arithmetic right shift packed1L" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); + __ vpsrlvq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); + __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); + __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); + %} + +instruct vsrav1L_reg_evex(vecD dst, vecD src, vecD shift) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 1 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (RShiftVL src shift)); - format %{ "vpsravq $dst,$src,$shift\t! variable bit shift right shift packed2L" %} - ins_encode %{ - int vector_len = 0; - __ vpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + format %{ "evpsravq $dst,$src,$shift\t! variable arithmetic right shift packed1L" %} + ins_encode %{ + int vector_len = 0; + __ evpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrav2L_reg(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); + match(Set dst (RShiftVL src shift)); + effect(TEMP dst, TEMP tmp); + format %{ "vpsrlvq $dst,$src,$shift\n\t" + "vmovdqu $tmp,[0x8000000000000000]\n\t" + "vpsrlvq $tmp,$tmp,$shift\n\t" + "vpxor $dst,$dst,$tmp\n\t" + "vpsubq $dst,$dst,$tmp\t! variable arithmetic right shift packed2L" %} + ins_encode %{ + int vector_len = 0; + __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); + __ vpsrlvq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); + __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); + __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} instruct vsrav2L_reg_evex(vecX dst, vecX src, vecX shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); + predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (RShiftVL src shift)); - format %{ "vpsravq $dst,$src,$shift\t! variable bit shift right shift packed2L" %} - ins_encode %{ - int vector_len = 0; - __ vpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsrav4L_reg(vecY dst, vecY src, vecY shift) %{ + format %{ "evpsravq $dst,$src,$shift\t! variable arithmetic right shift packed2L" %} + ins_encode %{ + int vector_len = 0; + __ evpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrav4L_reg(vecY dst, vecY src, vecY shift, vecY tmp) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (RShiftVL src shift)); - format %{ "vpsravq $dst,$src,$shift\t! variable bit shift right shift packed4L" %} - ins_encode %{ - int vector_len = 1; - __ vpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + effect(TEMP dst, TEMP tmp); + format %{ "vpsrlvq $dst,$src,$shift\n\t" + "vmovdqu $tmp,[0x8000000000000000]\n\t" + "vpsrlvq $tmp,$tmp,$shift\n\t" + "vpxor $dst,$dst,$tmp\n\t" + "vpsubq $dst,$dst,$tmp\t! variable arithmetic right shift packed4L" %} + ins_encode %{ + int vector_len = 1; + __ vpsrlvq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_all_ones_mask())); + __ vpsrlvq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len); + __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); + __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} @@ -17813,21 +18090,21 @@ instruct vsrav4L_reg_evex(vecY dst, vecY src, vecY shift) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (RShiftVL src shift)); - format %{ "vpsravq $dst,$src,$shift\t! variable bit shift right shift packed4L" %} - ins_encode %{ - int vector_len = 1; - __ vpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vsrav8L_reg(vecZ dst, vecZ src, vecZ shift) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); + format %{ "evpsravq $dst,$src,$shift\t! variable bit shift right shift packed4L" %} + ins_encode %{ + int vector_len = 1; + __ evpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vsrav8L_reg_evex(vecZ dst, vecZ src, vecZ shift) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && n->in(2)->Opcode() != Op_RShiftCntV); match(Set dst (RShiftVL src shift)); - format %{ "vpsravq $dst,$src,$shift\t! variable bit shift right shift packed8L" %} - ins_encode %{ - int vector_len = 2; - __ vpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); + format %{ "evpsravq $dst,$src,$shift\t! variable bit shift right shift packed8L" %} + ins_encode %{ + int vector_len = 2; + __ evpsravq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %}