changeset 9374:fa430fa4f577

8135231: aarch64: add support for vectorizing double precision sqrt Reviewed-by: roland, aph
author enevill
date Wed, 23 Sep 2015 12:39:30 -0400
parents 5ee8eccf7900
children f244d455e4dd
files src/cpu/aarch64/vm/aarch64.ad src/cpu/aarch64/vm/assembler_aarch64.hpp test/compiler/loopopts/superword/SumRedSqrt_Double.java
diffstat 3 files changed, 19 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/aarch64/vm/aarch64.ad	Mon Sep 28 16:18:15 2015 +0000
+++ b/src/cpu/aarch64/vm/aarch64.ad	Wed Sep 23 12:39:30 2015 -0400
@@ -15209,6 +15209,20 @@
   ins_pipe(pipe_class_default);
 %}
 
+// --------------------------------- SQRT -------------------------------------
+
+instruct vsqrt2D(vecX dst, vecX src)
+%{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SqrtVD src));
+  format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
+  ins_encode %{
+    __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
+             as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 // --------------------------------- AND --------------------------------------
 
 instruct vand8B(vecD dst, vecD src1, vecD src2)
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp	Mon Sep 28 16:18:15 2015 +0000
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp	Wed Sep 23 12:39:30 2015 -0400
@@ -2311,6 +2311,10 @@
 
 #define MSG "invalid arrangement"
 
+#define ASSERTION (T == T2S || T == T4S || T == T2D)
+  INSN(fsqrt, 1, 0b11111);
+#undef ASSERTION
+
 #define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H || T == T2S || T == T4S)
   INSN(rev64, 0, 0b00000);
 #undef ASSERTION
--- a/test/compiler/loopopts/superword/SumRedSqrt_Double.java	Mon Sep 28 16:18:15 2015 +0000
+++ b/test/compiler/loopopts/superword/SumRedSqrt_Double.java	Wed Sep 23 12:39:30 2015 -0400
@@ -26,7 +26,7 @@
 * @test
 * @bug 8135028
 * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double sqrt test
-* @requires os.arch=="x86" | os.arch=="amd64" | os.arch=="x86_64"
+* @requires os.arch=="x86" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
 *
 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRedSqrt_Double
 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRedSqrt_Double