changeset 8618:69fad5109885

8129426: aarch64: add support for PopCount in C2 Summary: Add support for PopCount using SIMD cnt and addv inst Reviewed-by: kvn, aph Contributed-by: alexander.alexeev@caviumnetworks.com
author enevill
date Thu, 25 Jun 2015 13:41:29 +0000
parents 03498f2b20a8
children a88735af82a4
files src/cpu/aarch64/vm/aarch64.ad src/cpu/aarch64/vm/assembler_aarch64.hpp src/cpu/aarch64/vm/macroAssembler_aarch64.hpp src/cpu/aarch64/vm/vm_version_aarch64.cpp
diffstat 4 files changed, 98 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/aarch64/vm/aarch64.ad	Thu Jun 25 08:52:12 2015 +0000
+++ b/src/cpu/aarch64/vm/aarch64.ad	Thu Jun 25 13:41:29 2015 +0000
@@ -7464,6 +7464,96 @@
   ins_pipe(ialu_reg);
 %}
 
+//---------- Population Count Instructions -------------------------------------
+//
+
+instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountI src));
+  effect(TEMP tmp);
+  ins_cost(INSN_COST * 13);
+
+  format %{ "movw   $src, $src\n\t"
+            "mov    $tmp, $src\t# vector (1D)\n\t"
+            "cnt    $tmp, $tmp\t# vector (8B)\n\t"
+            "addv   $tmp, $tmp\t# vector (8B)\n\t"
+            "mov    $dst, $tmp\t# vector (1D)" %}
+  ins_encode %{
+    __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
+    __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
+    __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
+    __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
+    __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountI (LoadI mem)));
+  effect(TEMP tmp);
+  ins_cost(INSN_COST * 13);
+
+  format %{ "ldrs   $tmp, $mem\n\t"
+            "cnt    $tmp, $tmp\t# vector (8B)\n\t"
+            "addv   $tmp, $tmp\t# vector (8B)\n\t"
+            "mov    $dst, $tmp\t# vector (1D)" %}
+  ins_encode %{
+    FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+    __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
+    __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
+    __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// Note: Long.bitCount(long) returns an int.
+instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountL src));
+  effect(TEMP tmp);
+  ins_cost(INSN_COST * 13);
+
+  format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
+            "cnt    $tmp, $tmp\t# vector (8B)\n\t"
+            "addv   $tmp, $tmp\t# vector (8B)\n\t"
+            "mov    $dst, $tmp\t# vector (1D)" %}
+  ins_encode %{
+    __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
+    __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
+    __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
+    __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
+  predicate(UsePopCountInstruction);
+  match(Set dst (PopCountL (LoadL mem)));
+  effect(TEMP tmp);
+  ins_cost(INSN_COST * 13);
+
+  format %{ "ldrd   $tmp, $mem\n\t"
+            "cnt    $tmp, $tmp\t# vector (8B)\n\t"
+            "addv   $tmp, $tmp\t# vector (8B)\n\t"
+            "mov    $dst, $tmp\t# vector (1D)" %}
+  ins_encode %{
+    FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
+    loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
+               as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+    __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
+    __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
+    __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
 // ============================================================================
 // MemBar Instruction
 
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp	Thu Jun 25 08:52:12 2015 +0000
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp	Thu Jun 25 13:41:29 2015 +0000
@@ -2055,6 +2055,9 @@
   INSN(negr,  1, 0b100000101110);
   INSN(notr,  1, 0b100000010110);
   INSN(addv,  0, 0b110001101110);
+  INSN(cls,   0, 0b100000010010);
+  INSN(clz,   1, 0b100000010010);
+  INSN(cnt,   0, 0b100000010110);
 
 #undef INSN
 
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Thu Jun 25 08:52:12 2015 +0000
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Thu Jun 25 13:41:29 2015 +0000
@@ -36,6 +36,7 @@
 class MacroAssembler: public Assembler {
   friend class LIR_Assembler;
 
+ public:
   using Assembler::mov;
   using Assembler::movi;
 
--- a/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Thu Jun 25 08:52:12 2015 +0000
+++ b/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Thu Jun 25 13:41:29 2015 +0000
@@ -257,6 +257,10 @@
     UseBarriersForVolatile = (_cpuFeatures & CPU_DMB_ATOMICS) != 0;
   }
 
+  if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
+    UsePopCountInstruction = true;
+  }
+
 #ifdef COMPILER2
   if (FLAG_IS_DEFAULT(OptoScheduling)) {
     OptoScheduling = true;