changeset 60212:ff1f4b5e0c9a

8239549: AArch64: Backend support for MulAddVS2VI node Reviewed-by: aph
author pli
date Wed, 26 Feb 2020 09:33:29 +0800
parents 16973c5b27be
children cf96533fd215
files src/hotspot/cpu/aarch64/aarch64.ad src/hotspot/cpu/aarch64/assembler_aarch64.hpp src/hotspot/cpu/aarch64/vm_version_aarch64.cpp test/hotspot/jtreg/compiler/loopopts/superword/Vec_MulAddS2I.java
diffstat 4 files changed, 66 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/src/hotspot/cpu/aarch64/aarch64.ad	Tue Feb 25 15:49:24 2020 -0800
+++ b/src/hotspot/cpu/aarch64/aarch64.ad	Wed Feb 26 09:33:29 2020 +0800
@@ -2074,15 +2074,24 @@
   return ret_value; // Per default match rules are supported.
 }
 
+// Identify extra cases that we might want to provide match rules for vector nodes and
+// other intrinsics guarded with vector length (vlen) and element type (bt).
 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
-
-  // TODO
-  // identify extra cases that we might want to provide match rules for
-  // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
-  bool ret_value = match_rule_supported(opcode);
-  // Add rules here.
-
-  return ret_value;  // Per default match rules are supported.
+  if (!match_rule_supported(opcode)) {
+    return false;
+  }
+
+  // Special cases which require vector length
+  switch (opcode) {
+    case Op_MulAddVS2VI: {
+      if (vlen != 4) {
+        return false;
+      }
+      break;
+    }
+  }
+
+  return true; // Per default match rules are supported.
 }
 
 const bool Matcher::has_predicated_vectors(void) {
@@ -10555,6 +10564,22 @@
   ins_pipe(imac_reg_reg);
 %}
 
+// Combined Multiply-Add Shorts into Integer (dst = src1 * src2 + src3 * src4)
+
+instruct muladdS2I(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3, iRegIorL2I src4) %{
+  match(Set dst (MulAddS2I (Binary src1 src2) (Binary src3 src4)));
+
+  ins_cost(INSN_COST * 5);
+  format %{ "mulw  rscratch1, $src1, $src2\n\t"
+            "maddw $dst, $src3, $src4, rscratch1" %}
+
+  ins_encode %{
+    __ mulw(rscratch1, as_Register($src1$$reg), as_Register($src2$$reg));
+    __ maddw(as_Register($dst$$reg), as_Register($src3$$reg), as_Register($src4$$reg), rscratch1); %}
+
+  ins_pipe(imac_reg_reg);
+%}
+
 // Integer Divide
 
 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
@@ -16935,6 +16960,30 @@
   ins_pipe(vmuldiv_fp128);
 %}
 
+// --------------- Vector Multiply-Add Shorts into Integer --------------------
+
+instruct vmuladdS2I(vecX dst, vecX src1, vecX src2, vecX tmp) %{
+  predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+  match(Set dst (MulAddVS2VI src1 src2));
+  ins_cost(INSN_COST);
+  effect(TEMP tmp);
+  format %{ "smullv  $tmp, $src1, $src2\t# vector (4H)\n\t"
+            "smullv  $dst, $src1, $src2\t# vector (8H)\n\t"
+            "addpv   $dst, $tmp, $dst\t# vector (4S)\n\t" %}
+  ins_encode %{
+    __ smullv(as_FloatRegister($tmp$$reg), __ T4H,
+              as_FloatRegister($src1$$reg),
+              as_FloatRegister($src2$$reg));
+    __ smullv(as_FloatRegister($dst$$reg), __ T8H,
+              as_FloatRegister($src1$$reg),
+              as_FloatRegister($src2$$reg));
+    __ addpv(as_FloatRegister($dst$$reg), __ T4S,
+             as_FloatRegister($tmp$$reg),
+             as_FloatRegister($dst$$reg));
+  %}
+  ins_pipe(vmuldiv_fp128);
+%}
+
 // --------------------------------- DIV --------------------------------------
 
 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp	Tue Feb 25 15:49:24 2020 -0800
+++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp	Wed Feb 26 09:33:29 2020 +0800
@@ -2259,6 +2259,8 @@
   INSN(mlsv,   1, 0b100101, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
   INSN(sshl,   0, 0b010001, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
   INSN(ushl,   1, 0b010001, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
+  INSN(addpv,  0, 0b101111, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
+  INSN(smullv, 0, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
   INSN(umullv, 1, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
   INSN(umlalv, 1, 0b100000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
 
--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp	Tue Feb 25 15:49:24 2020 -0800
+++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp	Wed Feb 26 09:33:29 2020 +0800
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2015, 2019, Red Hat Inc. All rights reserved.
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -451,6 +451,10 @@
   if (FLAG_IS_DEFAULT(OptoScheduling)) {
     OptoScheduling = true;
   }
+
+  if (FLAG_IS_DEFAULT(AlignVector)) {
+    AlignVector = AvoidUnalignedAccesses;
+  }
 #endif
 }
 
--- a/test/hotspot/jtreg/compiler/loopopts/superword/Vec_MulAddS2I.java	Tue Feb 25 15:49:24 2020 -0800
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/Vec_MulAddS2I.java	Wed Feb 26 09:33:29 2020 +0800
@@ -24,8 +24,7 @@
 /**
  * @test
  * @bug 8214751
- * @summary Add C2 x86 Superword support for VNNI VPDPWSSD Instruction
- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64"
+ * @summary Test operations in C2 MulAddS2I and MulAddVS2VI nodes.
  *
  * @run main/othervm -XX:LoopUnrollLimit=250
  *      -XX:CompileThresholdScaling=0.1