changeset 10436:87e72c51ec69

8150394: aarch64: add support for 8.1 LSE CAS instructions Reviewed-by: aph Contributed-by: ananth.jasty@caviumnetworks.com, edward.nevill@linaro.org
author enevill
date Tue, 08 Mar 2016 14:39:50 +0000
parents 0edd74a48586
children 9e7c906e3208
files src/cpu/aarch64/vm/aarch64.ad src/cpu/aarch64/vm/assembler_aarch64.hpp src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp src/cpu/aarch64/vm/globals_aarch64.hpp src/cpu/aarch64/vm/macroAssembler_aarch64.cpp src/cpu/aarch64/vm/macroAssembler_aarch64.hpp src/cpu/aarch64/vm/register_aarch64.hpp src/cpu/aarch64/vm/vm_version_aarch64.cpp
diffstat 8 files changed, 225 insertions(+), 93 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/aarch64/vm/aarch64.ad	Mon Mar 07 15:03:48 2016 -0800
+++ b/src/cpu/aarch64/vm/aarch64.ad	Tue Mar 08 14:39:50 2016 +0000
@@ -4132,14 +4132,14 @@
     MacroAssembler _masm(&cbuf);
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
-               &Assembler::ldxr, &MacroAssembler::cmp, &Assembler::stlxr);
+               Assembler::xword, /*acquire*/ false, /*release*/ true);
   %}
 
   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
     MacroAssembler _masm(&cbuf);
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
-               &Assembler::ldxrw, &MacroAssembler::cmpw, &Assembler::stlxrw);
+               Assembler::word, /*acquire*/ false, /*release*/ true);
   %}
 
 
@@ -4151,14 +4151,14 @@
     MacroAssembler _masm(&cbuf);
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
-               &Assembler::ldaxr, &MacroAssembler::cmp, &Assembler::stlxr);
+               Assembler::xword, /*acquire*/ true, /*release*/ true);
   %}
 
   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
     MacroAssembler _masm(&cbuf);
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
-               &Assembler::ldaxrw, &MacroAssembler::cmpw, &Assembler::stlxrw);
+               Assembler::word, /*acquire*/ true, /*release*/ true);
   %}
 
 
@@ -4676,7 +4676,12 @@
 
     // Compare object markOop with mark and if equal exchange scratch1
     // with object markOop.
-    {
+    if (UseLSE) {
+      __ mov(tmp, disp_hdr);
+      __ casal(Assembler::xword, tmp, box, oop);
+      __ cmp(tmp, disp_hdr);
+      __ br(Assembler::EQ, cont);
+    } else {
       Label retry_load;
       __ bind(retry_load);
       __ ldaxr(tmp, oop);
@@ -4726,7 +4731,11 @@
       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
       __ mov(disp_hdr, zr);
 
-      {
+      if (UseLSE) {
+        __ mov(rscratch1, disp_hdr);
+        __ casal(Assembler::xword, rscratch1, rthread, tmp);
+        __ cmp(rscratch1, disp_hdr);
+      } else {
         Label retry_load, fail;
         __ bind(retry_load);
         __ ldaxr(rscratch1, tmp);
@@ -4815,7 +4824,11 @@
     // see the stack address of the basicLock in the markOop of the
     // object.
 
-      {
+      if (UseLSE) {
+        __ mov(tmp, box);
+        __ casl(Assembler::xword, tmp, disp_hdr, oop);
+        __ cmp(tmp, box);
+      } else {
         Label retry_load;
         __ bind(retry_load);
         __ ldxr(tmp, oop);
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp	Mon Mar 07 15:03:48 2016 -0800
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp	Tue Mar 08 14:39:50 2016 +0000
@@ -972,7 +972,7 @@
 
   // System
   void system(int op0, int op1, int CRn, int CRm, int op2,
-              Register rt = (Register)0b11111)
+              Register rt = dummy_reg)
   {
     starti;
     f(0b11010101000, 31, 21);
@@ -1082,7 +1082,7 @@
 
 #define INSN(NAME, opc)                         \
   void NAME() {                 \
-    branch_reg((Register)0b11111, opc);         \
+    branch_reg(dummy_reg, opc);         \
   }
 
   INSN(eret, 0b0100);
@@ -1094,10 +1094,22 @@
   enum operand_size { byte, halfword, word, xword };
 
   void load_store_exclusive(Register Rs, Register Rt1, Register Rt2,
-    Register Rn, enum operand_size sz, int op, int o0) {
+    Register Rn, enum operand_size sz, int op, bool ordered) {
     starti;
     f(sz, 31, 30), f(0b001000, 29, 24), f(op, 23, 21);
-    rf(Rs, 16), f(o0, 15), rf(Rt2, 10), rf(Rn, 5), rf(Rt1, 0);
+    rf(Rs, 16), f(ordered, 15), rf(Rt2, 10), rf(Rn, 5), rf(Rt1, 0);
+  }
+
+  void load_exclusive(Register dst, Register addr,
+                      enum operand_size sz, bool ordered) {
+    load_store_exclusive(dummy_reg, dst, dummy_reg, addr,
+                         sz, 0b010, ordered);
+  }
+
+  void store_exclusive(Register status, Register new_val, Register addr,
+                       enum operand_size sz, bool ordered) {
+    load_store_exclusive(status, new_val, dummy_reg, addr,
+                         sz, 0b000, ordered);
   }
 
 #define INSN4(NAME, sz, op, o0) /* Four registers */                    \
@@ -1109,19 +1121,19 @@
 #define INSN3(NAME, sz, op, o0) /* Three registers */                   \
   void NAME(Register Rs, Register Rt, Register Rn) {                    \
     guarantee(Rs != Rn && Rs != Rt, "unpredictable instruction");       \
-    load_store_exclusive(Rs, Rt, (Register)0b11111, Rn, sz, op, o0);    \
+    load_store_exclusive(Rs, Rt, dummy_reg, Rn, sz, op, o0); \
   }
 
 #define INSN2(NAME, sz, op, o0) /* Two registers */                     \
   void NAME(Register Rt, Register Rn) {                                 \
-    load_store_exclusive((Register)0b11111, Rt, (Register)0b11111,      \
+    load_store_exclusive(dummy_reg, Rt, dummy_reg, \
                          Rn, sz, op, o0);                               \
   }
 
 #define INSN_FOO(NAME, sz, op, o0) /* Three registers, encoded differently */ \
   void NAME(Register Rt1, Register Rt2, Register Rn) {                  \
     guarantee(Rt1 != Rt2, "unpredictable instruction");                 \
-    load_store_exclusive((Register)0b11111, Rt1, Rt2, Rn, sz, op, o0);  \
+    load_store_exclusive(dummy_reg, Rt1, Rt2, Rn, sz, op, o0);          \
   }
 
   // bytes
@@ -1169,6 +1181,46 @@
 #undef INSN4
 #undef INSN_FOO
 
+  // 8.1 Compare and swap extensions
+  void lse_cas(Register Rs, Register Rt, Register Rn,
+                        enum operand_size sz, bool a, bool r, bool not_pair) {
+    starti;
+    if (! not_pair) { // Pair
+      assert(sz == word || sz == xword, "invalid size");
+      /* The size bit is in bit 30, not 31 */
+      sz = (operand_size)(sz == word ? 0b00:0b01);
+    }
+    f(sz, 31, 30), f(0b001000, 29, 24), f(1, 23), f(a, 22), f(1, 21);
+    rf(Rs, 16), f(r, 15), f(0b11111, 14, 10), rf(Rn, 5), rf(Rt, 0);
+  }
+
+  // CAS
+#define INSN(NAME, a, r)                                                \
+  void NAME(operand_size sz, Register Rs, Register Rt, Register Rn) {   \
+    assert(Rs != Rn && Rs != Rt, "unpredictable instruction");          \
+    lse_cas(Rs, Rt, Rn, sz, a, r, true);                                \
+  }
+  INSN(cas,    false, false)
+  INSN(casa,   true,  false)
+  INSN(casl,   false, true)
+  INSN(casal,  true,  true)
+#undef INSN
+
+  // CASP
+#define INSN(NAME, a, r)                                                \
+  void NAME(operand_size sz, Register Rs, Register Rs1,                 \
+            Register Rt, Register Rt1, Register Rn) {                   \
+    assert((Rs->encoding() & 1) == 0 && (Rt->encoding() & 1) == 0 &&    \
+           Rs->successor() == Rs1 && Rt->successor() == Rt1 &&          \
+           Rs != Rn && Rs1 != Rn && Rs != Rt, "invalid registers");     \
+    lse_cas(Rs, Rt, Rn, sz, a, r, false);                               \
+  }
+  INSN(casp,    false, false)
+  INSN(caspa,   true,  false)
+  INSN(caspl,   false, true)
+  INSN(caspal,  true,  true)
+#undef INSN
+
   // Load register (literal)
 #define INSN(NAME, opc, V)                                              \
   void NAME(Register Rt, address dest) {                                \
--- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Mon Mar 07 15:03:48 2016 -0800
+++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Tue Mar 08 14:39:50 2016 +0000
@@ -1556,38 +1556,52 @@
 }
 
 void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) {
-  Label retry_load, nope;
-  // flush and load exclusive from the memory location
-  // and fail if it is not what we expect
-  __ bind(retry_load);
-  __ ldaxrw(rscratch1, addr);
-  __ cmpw(rscratch1, cmpval);
-  __ cset(rscratch1, Assembler::NE);
-  __ br(Assembler::NE, nope);
-  // if we store+flush with no intervening write rscratch1 wil be zero
-  __ stlxrw(rscratch1, newval, addr);
-  // retry so we only ever return after a load fails to compare
-  // ensures we don't return a stale value after a failed write.
-  __ cbnzw(rscratch1, retry_load);
-  __ bind(nope);
+  if (UseLSE) {
+    __ mov(rscratch1, cmpval);
+    __ casal(Assembler::word, rscratch1, newval, addr);
+    __ cmpw(rscratch1, cmpval);
+    __ cset(rscratch1, Assembler::NE);
+  } else {
+    Label retry_load, nope;
+    // flush and load exclusive from the memory location
+    // and fail if it is not what we expect
+    __ bind(retry_load);
+    __ ldaxrw(rscratch1, addr);
+    __ cmpw(rscratch1, cmpval);
+    __ cset(rscratch1, Assembler::NE);
+    __ br(Assembler::NE, nope);
+    // if we store+flush with no intervening write rscratch1 wil be zero
+    __ stlxrw(rscratch1, newval, addr);
+    // retry so we only ever return after a load fails to compare
+    // ensures we don't return a stale value after a failed write.
+    __ cbnzw(rscratch1, retry_load);
+    __ bind(nope);
+  }
   __ membar(__ AnyAny);
 }
 
 void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) {
-  Label retry_load, nope;
-  // flush and load exclusive from the memory location
-  // and fail if it is not what we expect
-  __ bind(retry_load);
-  __ ldaxr(rscratch1, addr);
-  __ cmp(rscratch1, cmpval);
-  __ cset(rscratch1, Assembler::NE);
-  __ br(Assembler::NE, nope);
-  // if we store+flush with no intervening write rscratch1 wil be zero
-  __ stlxr(rscratch1, newval, addr);
-  // retry so we only ever return after a load fails to compare
-  // ensures we don't return a stale value after a failed write.
-  __ cbnz(rscratch1, retry_load);
-  __ bind(nope);
+  if (UseLSE) {
+    __ mov(rscratch1, cmpval);
+    __ casal(Assembler::xword, rscratch1, newval, addr);
+    __ cmp(rscratch1, cmpval);
+    __ cset(rscratch1, Assembler::NE);
+  } else {
+    Label retry_load, nope;
+    // flush and load exclusive from the memory location
+    // and fail if it is not what we expect
+    __ bind(retry_load);
+    __ ldaxr(rscratch1, addr);
+    __ cmp(rscratch1, cmpval);
+    __ cset(rscratch1, Assembler::NE);
+    __ br(Assembler::NE, nope);
+    // if we store+flush with no intervening write rscratch1 wil be zero
+    __ stlxr(rscratch1, newval, addr);
+    // retry so we only ever return after a load fails to compare
+    // ensures we don't return a stale value after a failed write.
+    __ cbnz(rscratch1, retry_load);
+    __ bind(nope);
+  }
   __ membar(__ AnyAny);
 }
 
--- a/src/cpu/aarch64/vm/globals_aarch64.hpp	Mon Mar 07 15:03:48 2016 -0800
+++ b/src/cpu/aarch64/vm/globals_aarch64.hpp	Tue Mar 08 14:39:50 2016 +0000
@@ -103,6 +103,9 @@
                                                                         \
   product(bool, UseCRC32, false,                                        \
           "Use CRC32 instructions for CRC32 computation")               \
+                                                                        \
+  product(bool, UseLSE, false,                                          \
+          "Use LSE instructions")                                       \
 
 // Don't attempt to use Neon on builtin sim until builtin sim supports it
 #define UseCRC32 false
@@ -123,6 +126,8 @@
           "Use Neon for CRC32 computation")                             \
   product(bool, UseCRC32, false,                                        \
           "Use CRC32 instructions for CRC32 computation")               \
+  product(bool, UseLSE, false,                                          \
+          "Use LSE instructions")                                       \
   product(bool, TraceTraps, false, "Trace all traps the signal handler")
 
 #endif
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Mon Mar 07 15:03:48 2016 -0800
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Tue Mar 08 14:39:50 2016 +0000
@@ -2070,25 +2070,32 @@
   // oldv holds comparison value
   // newv holds value to write in exchange
   // addr identifies memory word to compare against/update
-  // tmp returns 0/1 for success/failure
-  Label retry_load, nope;
-
-  bind(retry_load);
-  // flush and load exclusive from the memory location
-  // and fail if it is not what we expect
-  ldaxr(tmp, addr);
-  cmp(tmp, oldv);
-  br(Assembler::NE, nope);
-  // if we store+flush with no intervening write tmp wil be zero
-  stlxr(tmp, newv, addr);
-  cbzw(tmp, succeed);
-  // retry so we only ever return after a load fails to compare
-  // ensures we don't return a stale value after a failed write.
-  b(retry_load);
-  // if the memory word differs we return it in oldv and signal a fail
-  bind(nope);
-  membar(AnyAny);
-  mov(oldv, tmp);
+  if (UseLSE) {
+    mov(tmp, oldv);
+    casal(Assembler::xword, oldv, newv, addr);
+    cmp(tmp, oldv);
+    br(Assembler::EQ, succeed);
+    membar(AnyAny);
+  } else {
+    Label retry_load, nope;
+
+    bind(retry_load);
+    // flush and load exclusive from the memory location
+    // and fail if it is not what we expect
+    ldaxr(tmp, addr);
+    cmp(tmp, oldv);
+    br(Assembler::NE, nope);
+    // if we store+flush with no intervening write tmp wil be zero
+    stlxr(tmp, newv, addr);
+    cbzw(tmp, succeed);
+    // retry so we only ever return after a load fails to compare
+    // ensures we don't return a stale value after a failed write.
+    b(retry_load);
+    // if the memory word differs we return it in oldv and signal a fail
+    bind(nope);
+    membar(AnyAny);
+    mov(oldv, tmp);
+  }
   if (fail)
     b(*fail);
 }
@@ -2099,28 +2106,63 @@
   // newv holds value to write in exchange
   // addr identifies memory word to compare against/update
   // tmp returns 0/1 for success/failure
-  Label retry_load, nope;
-
-  bind(retry_load);
-  // flush and load exclusive from the memory location
-  // and fail if it is not what we expect
-  ldaxrw(tmp, addr);
-  cmp(tmp, oldv);
-  br(Assembler::NE, nope);
-  // if we store+flush with no intervening write tmp wil be zero
-  stlxrw(tmp, newv, addr);
-  cbzw(tmp, succeed);
-  // retry so we only ever return after a load fails to compare
-  // ensures we don't return a stale value after a failed write.
-  b(retry_load);
-  // if the memory word differs we return it in oldv and signal a fail
-  bind(nope);
-  membar(AnyAny);
-  mov(oldv, tmp);
+  if (UseLSE) {
+    mov(tmp, oldv);
+    casal(Assembler::word, oldv, newv, addr);
+    cmp(tmp, oldv);
+    br(Assembler::EQ, succeed);
+    membar(AnyAny);
+  } else {
+    Label retry_load, nope;
+
+    bind(retry_load);
+    // flush and load exclusive from the memory location
+    // and fail if it is not what we expect
+    ldaxrw(tmp, addr);
+    cmp(tmp, oldv);
+    br(Assembler::NE, nope);
+    // if we store+flush with no intervening write tmp wil be zero
+    stlxrw(tmp, newv, addr);
+    cbzw(tmp, succeed);
+    // retry so we only ever return after a load fails to compare
+    // ensures we don't return a stale value after a failed write.
+    b(retry_load);
+    // if the memory word differs we return it in oldv and signal a fail
+    bind(nope);
+    membar(AnyAny);
+    mov(oldv, tmp);
+  }
   if (fail)
     b(*fail);
 }
 
+// A generic CAS; success or failure is in the EQ flag.
+void MacroAssembler::cmpxchg(Register addr, Register expected,
+                             Register new_val,
+                             enum operand_size size,
+                             bool acquire, bool release,
+                             Register tmp) {
+  if (UseLSE) {
+    mov(tmp, expected);
+    lse_cas(tmp, new_val, addr, size, acquire, release, /*not_pair*/ true);
+    cmp(tmp, expected);
+  } else {
+    BLOCK_COMMENT("cmpxchg {");
+    Label retry_load, done;
+    bind(retry_load);
+    load_exclusive(tmp, addr, size, acquire);
+    if (size == xword)
+      cmp(tmp, expected);
+    else
+      cmpw(tmp, expected);
+    br(Assembler::NE, done);
+    store_exclusive(tmp, new_val, addr, size, release);
+    cbnzw(tmp, retry_load);
+    bind(done);
+    BLOCK_COMMENT("} cmpxchg");
+  }
+}
+
 static bool different(Register a, RegisterOrConstant b, Register c) {
   if (b.is_constant())
     return a != c;
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Mon Mar 07 15:03:48 2016 -0800
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Tue Mar 08 14:39:50 2016 +0000
@@ -971,21 +971,10 @@
   }
 
   // A generic CAS; success or failure is in the EQ flag.
-  template <typename T1, typename T2>
   void cmpxchg(Register addr, Register expected, Register new_val,
-               T1 load_insn,
-               void (MacroAssembler::*cmp_insn)(Register, Register),
-               T2 store_insn,
-               Register tmp = rscratch1) {
-    Label retry_load, done;
-    bind(retry_load);
-    (this->*load_insn)(tmp, addr);
-    (this->*cmp_insn)(tmp, expected);
-    br(Assembler::NE, done);
-    (this->*store_insn)(tmp, new_val, addr);
-    cbnzw(tmp, retry_load);
-    bind(done);
-  }
+               enum operand_size size,
+               bool acquire, bool release,
+               Register tmp = rscratch1);
 
   // Calls
 
--- a/src/cpu/aarch64/vm/register_aarch64.hpp	Mon Mar 07 15:03:48 2016 -0800
+++ b/src/cpu/aarch64/vm/register_aarch64.hpp	Tue Mar 08 14:39:50 2016 +0000
@@ -107,6 +107,9 @@
 CONSTANT_REGISTER_DECLARATION(Register, zr,  (32));
 CONSTANT_REGISTER_DECLARATION(Register, sp,  (33));
 
+// Used as a filler in instructions where a register field is unused.
+const Register dummy_reg = r31_sp;
+
 // Use FloatRegister as shortcut
 class FloatRegisterImpl;
 typedef FloatRegisterImpl* FloatRegister;
--- a/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Mon Mar 07 15:03:48 2016 -0800
+++ b/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Tue Mar 08 14:39:50 2016 +0000
@@ -61,6 +61,10 @@
 #define HWCAP_CRC32 (1<<7)
 #endif
 
+#ifndef HWCAP_ATOMICS
+#define HWCAP_ATOMICS (1<<8)
+#endif
+
 int VM_Version::_cpu;
 int VM_Version::_model;
 int VM_Version::_model2;
@@ -172,6 +176,7 @@
   if (auxv & HWCAP_AES)   strcat(buf, ", aes");
   if (auxv & HWCAP_SHA1)  strcat(buf, ", sha1");
   if (auxv & HWCAP_SHA2)  strcat(buf, ", sha256");
+  if (auxv & HWCAP_ATOMICS) strcat(buf, ", lse");
 
   _features_string = os::strdup(buf);
 
@@ -191,6 +196,15 @@
     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
   }
 
+  if (auxv & HWCAP_ATOMICS) {
+    if (FLAG_IS_DEFAULT(UseLSE))
+      FLAG_SET_DEFAULT(UseLSE, true);
+  } else {
+    if (UseLSE) {
+      warning("UseLSE specified, but not supported on this CPU");
+    }
+  }
+
   if (auxv & HWCAP_AES) {
     UseAES = UseAES || FLAG_IS_DEFAULT(UseAES);
     UseAESIntrinsics =