changeset 39444:2eae9b74c1f3

8159976: PPC64: Add missing intrinsics for sub-word atomics Reviewed-by: shade, goetz
author mdoerr
date Thu, 23 Jun 2016 17:38:29 +0200
parents ca6dfb34e46c
children 7ba3b2a8e48e
files hotspot/src/cpu/ppc/vm/assembler_ppc.hpp hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp hotspot/src/cpu/ppc/vm/ppc.ad
diffstat 5 files changed, 864 insertions(+), 974 deletions(-) [+]
line wrap: on
line diff
--- a/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp	Thu Jun 23 05:13:55 2016 +0000
+++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp	Thu Jun 23 17:38:29 2016 +0200
@@ -706,9 +706,13 @@
     TW_OPCODE      = (31u << OPCODE_SHIFT |    4u << 1),
 
     // Atomics.
+    LBARX_OPCODE   = (31u << OPCODE_SHIFT |   52u << 1),
+    LHARX_OPCODE   = (31u << OPCODE_SHIFT |  116u << 1),
     LWARX_OPCODE   = (31u << OPCODE_SHIFT |   20u << 1),
     LDARX_OPCODE   = (31u << OPCODE_SHIFT |   84u << 1),
     LQARX_OPCODE   = (31u << OPCODE_SHIFT |  276u << 1),
+    STBCX_OPCODE   = (31u << OPCODE_SHIFT |  694u << 1),
+    STHCX_OPCODE   = (31u << OPCODE_SHIFT |  726u << 1),
     STWCX_OPCODE   = (31u << OPCODE_SHIFT |  150u << 1),
     STDCX_OPCODE   = (31u << OPCODE_SHIFT |  214u << 1),
     STQCX_OPCODE   = (31u << OPCODE_SHIFT |  182u << 1)
@@ -1796,13 +1800,19 @@
   inline void waitrsv(); // >=Power7
 
   // atomics
+  inline void lbarx_unchecked(Register d, Register a, Register b, int eh1 = 0); // >=Power 8
+  inline void lharx_unchecked(Register d, Register a, Register b, int eh1 = 0); // >=Power 8
   inline void lwarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
   inline void ldarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
-  inline void lqarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
+  inline void lqarx_unchecked(Register d, Register a, Register b, int eh1 = 0); // >=Power 8
   inline bool lxarx_hint_exclusive_access();
+  inline void lbarx(  Register d, Register a, Register b, bool hint_exclusive_access = false);
+  inline void lharx(  Register d, Register a, Register b, bool hint_exclusive_access = false);
   inline void lwarx(  Register d, Register a, Register b, bool hint_exclusive_access = false);
   inline void ldarx(  Register d, Register a, Register b, bool hint_exclusive_access = false);
   inline void lqarx(  Register d, Register a, Register b, bool hint_exclusive_access = false);
+  inline void stbcx_( Register s, Register a, Register b);
+  inline void sthcx_( Register s, Register a, Register b);
   inline void stwcx_( Register s, Register a, Register b);
   inline void stdcx_( Register s, Register a, Register b);
   inline void stqcx_( Register s, Register a, Register b);
@@ -2169,12 +2179,18 @@
   inline void dcbtstct(Register s2, int ct);
 
   // Atomics: use ra0mem to disallow R0 as base.
+  inline void lbarx_unchecked(Register d, Register b, int eh1);
+  inline void lharx_unchecked(Register d, Register b, int eh1);
   inline void lwarx_unchecked(Register d, Register b, int eh1);
   inline void ldarx_unchecked(Register d, Register b, int eh1);
   inline void lqarx_unchecked(Register d, Register b, int eh1);
+  inline void lbarx( Register d, Register b, bool hint_exclusive_access);
+  inline void lharx( Register d, Register b, bool hint_exclusive_access);
   inline void lwarx( Register d, Register b, bool hint_exclusive_access);
   inline void ldarx( Register d, Register b, bool hint_exclusive_access);
   inline void lqarx( Register d, Register b, bool hint_exclusive_access);
+  inline void stbcx_(Register s, Register b);
+  inline void sthcx_(Register s, Register b);
   inline void stwcx_(Register s, Register b);
   inline void stdcx_(Register s, Register b);
   inline void stqcx_(Register s, Register b);
--- a/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp	Thu Jun 23 05:13:55 2016 +0000
+++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp	Thu Jun 23 17:38:29 2016 +0200
@@ -594,13 +594,19 @@
 
 // atomics
 // Use ra0mem to disallow R0 as base.
+inline void Assembler::lbarx_unchecked(Register d, Register a, Register b, int eh1)           { emit_int32( LBARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
+inline void Assembler::lharx_unchecked(Register d, Register a, Register b, int eh1)           { emit_int32( LHARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
 inline void Assembler::lwarx_unchecked(Register d, Register a, Register b, int eh1)           { emit_int32( LWARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
 inline void Assembler::ldarx_unchecked(Register d, Register a, Register b, int eh1)           { emit_int32( LDARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
 inline void Assembler::lqarx_unchecked(Register d, Register a, Register b, int eh1)           { emit_int32( LQARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
 inline bool Assembler::lxarx_hint_exclusive_access()                                          { return VM_Version::has_lxarxeh(); }
+inline void Assembler::lbarx( Register d, Register a, Register b, bool hint_exclusive_access) { lbarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
+inline void Assembler::lharx( Register d, Register a, Register b, bool hint_exclusive_access) { lharx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
 inline void Assembler::lwarx( Register d, Register a, Register b, bool hint_exclusive_access) { lwarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
 inline void Assembler::ldarx( Register d, Register a, Register b, bool hint_exclusive_access) { ldarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
 inline void Assembler::lqarx( Register d, Register a, Register b, bool hint_exclusive_access) { lqarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
+inline void Assembler::stbcx_(Register s, Register a, Register b)                             { emit_int32( STBCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
+inline void Assembler::sthcx_(Register s, Register a, Register b)                             { emit_int32( STHCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
 inline void Assembler::stwcx_(Register s, Register a, Register b)                             { emit_int32( STWCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
 inline void Assembler::stdcx_(Register s, Register a, Register b)                             { emit_int32( STDCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
 inline void Assembler::stqcx_(Register s, Register a, Register b)                             { emit_int32( STQCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
@@ -933,12 +939,18 @@
 inline void Assembler::dcbtstct(Register s2, int ct)  { emit_int32( DCBTST_OPCODE | rb(s2) | thct(ct)); }
 
 // ra0 version
+inline void Assembler::lbarx_unchecked(Register d, Register b, int eh1)          { emit_int32( LBARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
+inline void Assembler::lharx_unchecked(Register d, Register b, int eh1)          { emit_int32( LHARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
 inline void Assembler::lwarx_unchecked(Register d, Register b, int eh1)          { emit_int32( LWARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
 inline void Assembler::ldarx_unchecked(Register d, Register b, int eh1)          { emit_int32( LDARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
 inline void Assembler::lqarx_unchecked(Register d, Register b, int eh1)          { emit_int32( LQARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
+inline void Assembler::lbarx( Register d, Register b, bool hint_exclusive_access){ lbarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
+inline void Assembler::lharx( Register d, Register b, bool hint_exclusive_access){ lharx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
 inline void Assembler::lwarx( Register d, Register b, bool hint_exclusive_access){ lwarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
 inline void Assembler::ldarx( Register d, Register b, bool hint_exclusive_access){ ldarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
 inline void Assembler::lqarx( Register d, Register b, bool hint_exclusive_access){ lqarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
+inline void Assembler::stbcx_(Register s, Register b)                            { emit_int32( STBCX_OPCODE | rs(s) | rb(b) | rc(1)); }
+inline void Assembler::sthcx_(Register s, Register b)                            { emit_int32( STHCX_OPCODE | rs(s) | rb(b) | rc(1)); }
 inline void Assembler::stwcx_(Register s, Register b)                            { emit_int32( STWCX_OPCODE | rs(s) | rb(b) | rc(1)); }
 inline void Assembler::stdcx_(Register s, Register b)                            { emit_int32( STDCX_OPCODE | rs(s) | rb(b) | rc(1)); }
 inline void Assembler::stqcx_(Register s, Register b)                            { emit_int32( STQCX_OPCODE | rs(s) | rb(b) | rc(1)); }
--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Thu Jun 23 05:13:55 2016 +0000
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Thu Jun 23 17:38:29 2016 +0200
@@ -1422,42 +1422,168 @@
   bind(no_reserved_zone_enabling);
 }
 
-// CmpxchgX sets condition register to cmpX(current, compare).
-void MacroAssembler::cmpxchgw(ConditionRegister flag, Register dest_current_value,
-                              Register compare_value, Register exchange_value,
-                              Register addr_base, int semantics, bool cmpxchgx_hint,
-                              Register int_flag_success, bool contention_hint, bool weak) {
+void MacroAssembler::getandsetd(Register dest_current_value, Register exchange_value, Register addr_base,
+                                bool cmpxchgx_hint) {
   Label retry;
-  Label failed;
-  Label done;
-
-  // Save one branch if result is returned via register and
-  // result register is different from the other ones.
-  bool use_result_reg    = (int_flag_success != noreg);
-  bool preset_result_reg = (int_flag_success != dest_current_value && int_flag_success != compare_value &&
-                            int_flag_success != exchange_value && int_flag_success != addr_base);
-  assert(!weak || flag == CCR0, "weak only supported with CCR0");
-
-  if (use_result_reg && preset_result_reg) {
-    li(int_flag_success, 0); // preset (assume cas failed)
+  bind(retry);
+  ldarx(dest_current_value, addr_base, cmpxchgx_hint);
+  stdcx_(exchange_value, addr_base);
+  if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
+    bne_predict_not_taken(CCR0, retry); // StXcx_ sets CCR0.
+  } else {
+    bne(                  CCR0, retry); // StXcx_ sets CCR0.
   }
-
-  // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM).
-  if (contention_hint) { // Don't try to reserve if cmp fails.
-    lwz(dest_current_value, 0, addr_base);
-    cmpw(flag, dest_current_value, compare_value);
-    bne(flag, failed);
+}
+
+void MacroAssembler::getandaddd(Register dest_current_value, Register inc_value, Register addr_base,
+                                Register tmp, bool cmpxchgx_hint) {
+  Label retry;
+  bind(retry);
+  ldarx(dest_current_value, addr_base, cmpxchgx_hint);
+  add(tmp, dest_current_value, inc_value);
+  stdcx_(tmp, addr_base);
+  if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
+    bne_predict_not_taken(CCR0, retry); // StXcx_ sets CCR0.
+  } else {
+    bne(                  CCR0, retry); // StXcx_ sets CCR0.
   }
-
-  // release/fence semantics
-  if (semantics & MemBarRel) {
-    release();
+}
+
+// Word/sub-word atomic helper functions
+
+// Temps and addr_base are killed if size < 4 and processor does not support respective instructions.
+// Only signed types are supported with size < 4.
+// Atomic add always kills tmp1.
+void MacroAssembler::atomic_get_and_modify_generic(Register dest_current_value, Register exchange_value,
+                                                   Register addr_base, Register tmp1, Register tmp2, Register tmp3,
+                                                   bool cmpxchgx_hint, bool is_add, int size) {
+  // Sub-word instructions are available since Power 8.
+  // For older processors, instruction_type != size holds, and we
+  // emulate the sub-word instructions by constructing a 4-byte value
+  // that leaves the other bytes unchanged.
+  const int instruction_type = VM_Version::has_lqarx() ? size : 4;
+
+  Label retry;
+  Register shift_amount = noreg,
+           val32 = dest_current_value,
+           modval = is_add ? tmp1 : exchange_value;
+
+  if (instruction_type != size) {
+    assert_different_registers(tmp1, tmp2, tmp3, dest_current_value, exchange_value, addr_base);
+    modval = tmp1;
+    shift_amount = tmp2;
+    val32 = tmp3;
+    // Need some preperation: Compute shift amount, align address. Note: shorts must be 2 byte aligned.
+#ifdef VM_LITTLE_ENDIAN
+    rldic(shift_amount, addr_base, 3, 64-5); // (dest & 3) * 8;
+    clrrdi(addr_base, addr_base, 2);
+#else
+    xori(shift_amount, addr_base, (size == 1) ? 3 : 2);
+    clrrdi(addr_base, addr_base, 2);
+    rldic(shift_amount, shift_amount, 3, 64-5); // byte: ((3-dest) & 3) * 8; short: ((1-dest/2) & 1) * 16;
+#endif
   }
 
   // atomic emulation loop
   bind(retry);
 
-  lwarx(dest_current_value, addr_base, cmpxchgx_hint);
+  switch (instruction_type) {
+    case 4: lwarx(val32, addr_base, cmpxchgx_hint); break;
+    case 2: lharx(val32, addr_base, cmpxchgx_hint); break;
+    case 1: lbarx(val32, addr_base, cmpxchgx_hint); break;
+    default: ShouldNotReachHere();
+  }
+
+  if (instruction_type != size) {
+    srw(dest_current_value, val32, shift_amount);
+  }
+
+  if (is_add) { add(modval, dest_current_value, exchange_value); }
+
+  if (instruction_type != size) {
+    // Transform exchange value such that the replacement can be done by one xor instruction.
+    xorr(modval, dest_current_value, is_add ? modval : exchange_value);
+    clrldi(modval, modval, (size == 1) ? 56 : 48);
+    slw(modval, modval, shift_amount);
+    xorr(modval, val32, modval);
+  }
+
+  switch (instruction_type) {
+    case 4: stwcx_(modval, addr_base); break;
+    case 2: sthcx_(modval, addr_base); break;
+    case 1: stbcx_(modval, addr_base); break;
+    default: ShouldNotReachHere();
+  }
+
+  if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
+    bne_predict_not_taken(CCR0, retry); // StXcx_ sets CCR0.
+  } else {
+    bne(                  CCR0, retry); // StXcx_ sets CCR0.
+  }
+
+  // l?arx zero-extends, but Java wants byte/short values sign-extended.
+  if (size == 1) {
+    extsb(dest_current_value, dest_current_value);
+  } else if (size == 2) {
+    extsh(dest_current_value, dest_current_value);
+  };
+}
+
+// Temps, addr_base and exchange_value are killed if size < 4 and processor does not support respective instructions.
+// Only signed types are supported with size < 4.
+void MacroAssembler::cmpxchg_loop_body(ConditionRegister flag, Register dest_current_value,
+                                       Register compare_value, Register exchange_value,
+                                       Register addr_base, Register tmp1, Register tmp2,
+                                       Label &retry, Label &failed, bool cmpxchgx_hint, int size) {
+  // Sub-word instructions are available since Power 8.
+  // For older processors, instruction_type != size holds, and we
+  // emulate the sub-word instructions by constructing a 4-byte value
+  // that leaves the other bytes unchanged.
+  const int instruction_type = VM_Version::has_lqarx() ? size : 4;
+
+  Register shift_amount = noreg,
+           val32 = dest_current_value,
+           modval = exchange_value;
+
+  if (instruction_type != size) {
+    assert_different_registers(tmp1, tmp2, dest_current_value, compare_value, exchange_value, addr_base);
+    shift_amount = tmp1;
+    val32 = tmp2;
+    modval = tmp2;
+    // Need some preperation: Compute shift amount, align address. Note: shorts must be 2 byte aligned.
+#ifdef VM_LITTLE_ENDIAN
+    rldic(shift_amount, addr_base, 3, 64-5); // (dest & 3) * 8;
+    clrrdi(addr_base, addr_base, 2);
+#else
+    xori(shift_amount, addr_base, (size == 1) ? 3 : 2);
+    clrrdi(addr_base, addr_base, 2);
+    rldic(shift_amount, shift_amount, 3, 64-5); // byte: ((3-dest) & 3) * 8; short: ((1-dest/2) & 1) * 16;
+#endif
+    // Transform exchange value such that the replacement can be done by one xor instruction.
+    xorr(exchange_value, compare_value, exchange_value);
+    clrldi(exchange_value, exchange_value, (size == 1) ? 56 : 48);
+    slw(exchange_value, exchange_value, shift_amount);
+  }
+
+  // atomic emulation loop
+  bind(retry);
+
+  switch (instruction_type) {
+    case 4: lwarx(val32, addr_base, cmpxchgx_hint); break;
+    case 2: lharx(val32, addr_base, cmpxchgx_hint); break;
+    case 1: lbarx(val32, addr_base, cmpxchgx_hint); break;
+    default: ShouldNotReachHere();
+  }
+
+  if (instruction_type != size) {
+    srw(dest_current_value, val32, shift_amount);
+  }
+  if (size == 1) {
+    extsb(dest_current_value, dest_current_value);
+  } else if (size == 2) {
+    extsh(dest_current_value, dest_current_value);
+  };
+
   cmpw(flag, dest_current_value, compare_value);
   if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
     bne_predict_not_taken(flag, failed);
@@ -1467,7 +1593,60 @@
   // branch to done  => (flag == ne), (dest_current_value != compare_value)
   // fall through    => (flag == eq), (dest_current_value == compare_value)
 
-  stwcx_(exchange_value, addr_base);
+  if (instruction_type != size) {
+    xorr(modval, val32, exchange_value);
+  }
+
+  switch (instruction_type) {
+    case 4: stwcx_(modval, addr_base); break;
+    case 2: sthcx_(modval, addr_base); break;
+    case 1: stbcx_(modval, addr_base); break;
+    default: ShouldNotReachHere();
+  }
+}
+
+// CmpxchgX sets condition register to cmpX(current, compare).
+void MacroAssembler::cmpxchg_generic(ConditionRegister flag, Register dest_current_value,
+                                     Register compare_value, Register exchange_value,
+                                     Register addr_base, Register tmp1, Register tmp2,
+                                     int semantics, bool cmpxchgx_hint,
+                                     Register int_flag_success, bool contention_hint, bool weak, int size) {
+  Label retry;
+  Label failed;
+  Label done;
+
+  // Save one branch if result is returned via register and
+  // result register is different from the other ones.
+  bool use_result_reg    = (int_flag_success != noreg);
+  bool preset_result_reg = (int_flag_success != dest_current_value && int_flag_success != compare_value &&
+                            int_flag_success != exchange_value && int_flag_success != addr_base &&
+                            int_flag_success != tmp1 && int_flag_success != tmp2);
+  assert(!weak || flag == CCR0, "weak only supported with CCR0");
+  assert(size == 1 || size == 2 || size == 4, "unsupported");
+
+  if (use_result_reg && preset_result_reg) {
+    li(int_flag_success, 0); // preset (assume cas failed)
+  }
+
+  // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM).
+  if (contention_hint) { // Don't try to reserve if cmp fails.
+    switch (size) {
+      case 1: lbz(dest_current_value, 0, addr_base); extsb(dest_current_value, dest_current_value); break;
+      case 2: lha(dest_current_value, 0, addr_base); break;
+      case 4: lwz(dest_current_value, 0, addr_base); break;
+      default: ShouldNotReachHere();
+    }
+    cmpw(flag, dest_current_value, compare_value);
+    bne(flag, failed);
+  }
+
+  // release/fence semantics
+  if (semantics & MemBarRel) {
+    release();
+  }
+
+  cmpxchg_loop_body(flag, dest_current_value, compare_value, exchange_value, addr_base, tmp1, tmp2,
+                    retry, failed, cmpxchgx_hint, size);
   if (!weak || use_result_reg) {
     if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
       bne_predict_not_taken(CCR0, weak ? failed : retry); // StXcx_ sets CCR0.
@@ -3751,454 +3930,6 @@
   bind(Ldone);
 }
 
-
-// Intrinsics for non-CompactStrings
-
-// Search for a single jchar in an jchar[].
-//
-// Assumes that result differs from all other registers.
-//
-// 'haystack' is the addresses of a jchar-array.
-// 'needle' is either the character to search for or R0.
-// 'needleChar' is the character to search for if 'needle' == R0..
-// 'haycnt' is the length of the haystack. We assume 'haycnt' >=1.
-//
-// Preserves haystack, haycnt, needle and kills all other registers.
-//
-// If needle == R0, we search for the constant needleChar.
-void MacroAssembler::string_indexof_1(Register result, Register haystack, Register haycnt,
-                                      Register needle, jchar needleChar,
-                                      Register tmp1, Register tmp2) {
-
-  assert_different_registers(result, haystack, haycnt, needle, tmp1, tmp2);
-
-  Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_Found3, L_NotFound, L_End;
-  Register addr = tmp1,
-           ch1 = tmp2,
-           ch2 = R0;
-
-//3:
-   dcbtct(haystack, 0x00);                        // Indicate R/O access to haystack.
-
-   srwi_(tmp2, haycnt, 1);   // Shift right by exact_log2(UNROLL_FACTOR).
-   mr(addr, haystack);
-   beq(CCR0, L_FinalCheck);
-   mtctr(tmp2);              // Move to count register.
-//8:
-  bind(L_InnerLoop);             // Main work horse (2x unrolled search loop).
-   lhz(ch1, 0, addr);        // Load characters from haystack.
-   lhz(ch2, 2, addr);
-   (needle != R0) ? cmpw(CCR0, ch1, needle) : cmplwi(CCR0, ch1, needleChar);
-   (needle != R0) ? cmpw(CCR1, ch2, needle) : cmplwi(CCR1, ch2, needleChar);
-   beq(CCR0, L_Found1);   // Did we find the needle?
-   beq(CCR1, L_Found2);
-   addi(addr, addr, 4);
-   bdnz(L_InnerLoop);
-//16:
-  bind(L_FinalCheck);
-   andi_(R0, haycnt, 1);
-   beq(CCR0, L_NotFound);
-   lhz(ch1, 0, addr);        // One position left at which we have to compare.
-   (needle != R0) ? cmpw(CCR1, ch1, needle) : cmplwi(CCR1, ch1, needleChar);
-   beq(CCR1, L_Found3);
-//21:
-  bind(L_NotFound);
-   li(result, -1);           // Not found.
-   b(L_End);
-
-  bind(L_Found2);
-   addi(addr, addr, 2);
-//24:
-  bind(L_Found1);
-  bind(L_Found3);                  // Return index ...
-   subf(addr, haystack, addr); // relative to haystack,
-   srdi(result, addr, 1);      // in characters.
-  bind(L_End);
-}
-
-
-// Implementation of IndexOf for jchar arrays.
-//
-// The length of haystack and needle are not constant, i.e. passed in a register.
-//
-// Preserves registers haystack, needle.
-// Kills registers haycnt, needlecnt.
-// Assumes that result differs from all other registers.
-// Haystack, needle are the addresses of jchar-arrays.
-// Haycnt, needlecnt are the lengths of them, respectively.
-//
-// Needlecntval must be zero or 15-bit unsigned immediate and > 1.
-void MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt,
-                                    Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
-                                    Register tmp1, Register tmp2, Register tmp3, Register tmp4) {
-
-  // Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite!
-  Label L_TooShort, L_Found, L_NotFound, L_End;
-  Register last_addr = haycnt, // Kill haycnt at the beginning.
-           addr      = tmp1,
-           n_start   = tmp2,
-           ch1       = tmp3,
-           ch2       = R0;
-
-  // **************************************************************************************************
-  // Prepare for main loop: optimized for needle count >=2, bail out otherwise.
-  // **************************************************************************************************
-
-//1 (variable) or 3 (const):
-   dcbtct(needle, 0x00);    // Indicate R/O access to str1.
-   dcbtct(haystack, 0x00);  // Indicate R/O access to str2.
-
-  // Compute last haystack addr to use if no match gets found.
-  if (needlecntval == 0) { // variable needlecnt
-//3:
-   subf(ch1, needlecnt, haycnt);      // Last character index to compare is haycnt-needlecnt.
-   addi(addr, haystack, -2);          // Accesses use pre-increment.
-   cmpwi(CCR6, needlecnt, 2);
-   blt(CCR6, L_TooShort);          // Variable needlecnt: handle short needle separately.
-   slwi(ch1, ch1, 1);                 // Scale to number of bytes.
-   lwz(n_start, 0, needle);           // Load first 2 characters of needle.
-   add(last_addr, haystack, ch1);     // Point to last address to compare (haystack+2*(haycnt-needlecnt)).
-   addi(needlecnt, needlecnt, -2);    // Rest of needle.
-  } else { // constant needlecnt
-  guarantee(needlecntval != 1, "IndexOf with single-character needle must be handled separately");
-  assert((needlecntval & 0x7fff) == needlecntval, "wrong immediate");
-//5:
-   addi(ch1, haycnt, -needlecntval);  // Last character index to compare is haycnt-needlecnt.
-   lwz(n_start, 0, needle);           // Load first 2 characters of needle.
-   addi(addr, haystack, -2);          // Accesses use pre-increment.
-   slwi(ch1, ch1, 1);                 // Scale to number of bytes.
-   add(last_addr, haystack, ch1);     // Point to last address to compare (haystack+2*(haycnt-needlecnt)).
-   li(needlecnt, needlecntval-2);     // Rest of needle.
-  }
-
-  // Main Loop (now we have at least 3 characters).
-//11:
-  Label L_OuterLoop, L_InnerLoop, L_FinalCheck, L_Comp1, L_Comp2, L_Comp3;
-  bind(L_OuterLoop); // Search for 1st 2 characters.
-  Register addr_diff = tmp4;
-   subf(addr_diff, addr, last_addr); // Difference between already checked address and last address to check.
-   addi(addr, addr, 2);              // This is the new address we want to use for comparing.
-   srdi_(ch2, addr_diff, 2);
-   beq(CCR0, L_FinalCheck);       // 2 characters left?
-   mtctr(ch2);                       // addr_diff/4
-//16:
-  bind(L_InnerLoop);                // Main work horse (2x unrolled search loop)
-   lwz(ch1, 0, addr);           // Load 2 characters of haystack (ignore alignment).
-   lwz(ch2, 2, addr);
-   cmpw(CCR0, ch1, n_start); // Compare 2 characters (1 would be sufficient but try to reduce branches to CompLoop).
-   cmpw(CCR1, ch2, n_start);
-   beq(CCR0, L_Comp1);       // Did we find the needle start?
-   beq(CCR1, L_Comp2);
-   addi(addr, addr, 4);
-   bdnz(L_InnerLoop);
-//24:
-  bind(L_FinalCheck);
-   rldicl_(addr_diff, addr_diff, 64-1, 63); // Remaining characters not covered by InnerLoop: (addr_diff>>1)&1.
-   beq(CCR0, L_NotFound);
-   lwz(ch1, 0, addr);                       // One position left at which we have to compare.
-   cmpw(CCR1, ch1, n_start);
-   beq(CCR1, L_Comp3);
-//29:
-  bind(L_NotFound);
-   li(result, -1); // not found
-   b(L_End);
-
-
-   // **************************************************************************************************
-   // Special Case: unfortunately, the variable needle case can be called with needlecnt<2
-   // **************************************************************************************************
-//31:
- if ((needlecntval>>1) !=1 ) { // Const needlecnt is 2 or 3? Reduce code size.
-  int nopcnt = 5;
-  if (needlecntval !=0 ) ++nopcnt; // Balance alignment (other case: see below).
-  if (needlecntval == 0) {         // We have to handle these cases separately.
-  Label L_OneCharLoop;
-  bind(L_TooShort);
-   mtctr(haycnt);
-   lhz(n_start, 0, needle);    // First character of needle
-  bind(L_OneCharLoop);
-   lhzu(ch1, 2, addr);
-   cmpw(CCR1, ch1, n_start);
-   beq(CCR1, L_Found);      // Did we find the one character needle?
-   bdnz(L_OneCharLoop);
-   li(result, -1);             // Not found.
-   b(L_End);
-  } // 8 instructions, so no impact on alignment.
-  for (int x = 0; x < nopcnt; ++x) nop();
- }
-
-  // **************************************************************************************************
-  // Regular Case Part II: compare rest of needle (first 2 characters have been compared already)
-  // **************************************************************************************************
-
-  // Compare the rest
-//36 if needlecntval==0, else 37:
-  bind(L_Comp2);
-   addi(addr, addr, 2); // First comparison has failed, 2nd one hit.
-  bind(L_Comp1);            // Addr points to possible needle start.
-  bind(L_Comp3);            // Could have created a copy and use a different return address but saving code size here.
-  if (needlecntval != 2) {  // Const needlecnt==2?
-   if (needlecntval != 3) {
-    if (needlecntval == 0) beq(CCR6, L_Found); // Variable needlecnt==2?
-    Register ind_reg = tmp4;
-    li(ind_reg, 2*2);   // First 2 characters are already compared, use index 2.
-    mtctr(needlecnt);   // Decremented by 2, still > 0.
-//40:
-   Label L_CompLoop;
-   bind(L_CompLoop);
-    lhzx(ch2, needle, ind_reg);
-    lhzx(ch1, addr, ind_reg);
-    cmpw(CCR1, ch1, ch2);
-    bne(CCR1, L_OuterLoop);
-    addi(ind_reg, ind_reg, 2);
-    bdnz(L_CompLoop);
-   } else { // No loop required if there's only one needle character left.
-    lhz(ch2, 2*2, needle);
-    lhz(ch1, 2*2, addr);
-    cmpw(CCR1, ch1, ch2);
-    bne(CCR1, L_OuterLoop);
-   }
-  }
-  // Return index ...
-//46:
-  bind(L_Found);
-   subf(addr, haystack, addr); // relative to haystack, ...
-   srdi(result, addr, 1);      // in characters.
-//48:
-  bind(L_End);
-}
-
-// Implementation of Compare for jchar arrays.
-//
-// Kills the registers str1, str2, cnt1, cnt2.
-// Kills cr0, ctr.
-// Assumes that result differes from the input registers.
-void MacroAssembler::string_compare(Register str1_reg, Register str2_reg, Register cnt1_reg, Register cnt2_reg,
-                                    Register result_reg, Register tmp_reg) {
-   assert_different_registers(result_reg, str1_reg, str2_reg, cnt1_reg, cnt2_reg, tmp_reg);
-
-   Label Ldone, Lslow_case, Lslow_loop, Lfast_loop;
-   Register cnt_diff = R0,
-            limit_reg = cnt1_reg,
-            chr1_reg = result_reg,
-            chr2_reg = cnt2_reg,
-            addr_diff = str2_reg;
-
-   // 'cnt_reg' contains the number of characters in the string's character array for the
-   // pre-CompactStrings strings implementation and the number of bytes in the string's
-   // byte array for the CompactStrings strings implementation.
-   const int HAS_COMPACT_STRING = java_lang_String::has_coder_field() ? 1 : 0; // '1' = byte array, '0' = char array
-
-   // Offset 0 should be 32 byte aligned.
-//-6:
-    srawi(cnt1_reg, cnt1_reg, HAS_COMPACT_STRING);
-    srawi(cnt2_reg, cnt2_reg, HAS_COMPACT_STRING);
-//-4:
-    dcbtct(str1_reg, 0x00);  // Indicate R/O access to str1.
-    dcbtct(str2_reg, 0x00);  // Indicate R/O access to str2.
-//-2:
-   // Compute min(cnt1, cnt2) and check if 0 (bail out if we don't need to compare characters).
-    subf(result_reg, cnt2_reg, cnt1_reg);  // difference between cnt1/2
-    subf_(addr_diff, str1_reg, str2_reg);  // alias?
-    beq(CCR0, Ldone);                   // return cnt difference if both ones are identical
-    srawi(limit_reg, result_reg, 31);      // generate signmask (cnt1/2 must be non-negative so cnt_diff can't overflow)
-    mr(cnt_diff, result_reg);
-    andr(limit_reg, result_reg, limit_reg); // difference or zero (negative): cnt1<cnt2 ? cnt1-cnt2 : 0
-    add_(limit_reg, cnt2_reg, limit_reg);  // min(cnt1, cnt2)==0?
-    beq(CCR0, Ldone);                   // return cnt difference if one has 0 length
-
-    lhz(chr1_reg, 0, str1_reg);            // optional: early out if first characters mismatch
-    lhzx(chr2_reg, str1_reg, addr_diff);   // optional: early out if first characters mismatch
-    addi(tmp_reg, limit_reg, -1);          // min(cnt1, cnt2)-1
-    subf_(result_reg, chr2_reg, chr1_reg); // optional: early out if first characters mismatch
-    bne(CCR0, Ldone);                   // optional: early out if first characters mismatch
-
-   // Set loop counter by scaling down tmp_reg
-    srawi_(chr2_reg, tmp_reg, exact_log2(4)); // (min(cnt1, cnt2)-1)/4
-    ble(CCR0, Lslow_case);                 // need >4 characters for fast loop
-    andi(limit_reg, tmp_reg, 4-1);            // remaining characters
-
-   // Adapt str1_reg str2_reg for the first loop iteration
-    mtctr(chr2_reg);                 // (min(cnt1, cnt2)-1)/4
-    addi(limit_reg, limit_reg, 4+1); // compare last 5-8 characters in slow_case if mismatch found in fast_loop
-//16:
-   // Compare the rest of the characters
-   bind(Lfast_loop);
-    ld(chr1_reg, 0, str1_reg);
-    ldx(chr2_reg, str1_reg, addr_diff);
-    cmpd(CCR0, chr2_reg, chr1_reg);
-    bne(CCR0, Lslow_case); // return chr1_reg
-    addi(str1_reg, str1_reg, 4*2);
-    bdnz(Lfast_loop);
-    addi(limit_reg, limit_reg, -4); // no mismatch found in fast_loop, only 1-4 characters missing
-//23:
-   bind(Lslow_case);
-    mtctr(limit_reg);
-//24:
-   bind(Lslow_loop);
-    lhz(chr1_reg, 0, str1_reg);
-    lhzx(chr2_reg, str1_reg, addr_diff);
-    subf_(result_reg, chr2_reg, chr1_reg);
-    bne(CCR0, Ldone); // return chr1_reg
-    addi(str1_reg, str1_reg, 1*2);
-    bdnz(Lslow_loop);
-//30:
-   // If strings are equal up to min length, return the length difference.
-    mr(result_reg, cnt_diff);
-    nop(); // alignment
-//32:
-   // Otherwise, return the difference between the first mismatched chars.
-   bind(Ldone);
-}
-
-
-// Compare char[] arrays.
-//
-// str1_reg   USE only
-// str2_reg   USE only
-// cnt_reg    USE_DEF, due to tmp reg shortage
-// result_reg DEF only, might compromise USE only registers
-void MacroAssembler::char_arrays_equals(Register str1_reg, Register str2_reg, Register cnt_reg, Register result_reg,
-                                        Register tmp1_reg, Register tmp2_reg, Register tmp3_reg, Register tmp4_reg,
-                                        Register tmp5_reg) {
-
-  // Str1 may be the same register as str2 which can occur e.g. after scalar replacement.
-  assert_different_registers(result_reg, str1_reg, cnt_reg, tmp1_reg, tmp2_reg, tmp3_reg, tmp4_reg, tmp5_reg);
-  assert_different_registers(result_reg, str2_reg, cnt_reg, tmp1_reg, tmp2_reg, tmp3_reg, tmp4_reg, tmp5_reg);
-
-  // Offset 0 should be 32 byte aligned.
-  Label Linit_cbc, Lcbc, Lloop, Ldone_true, Ldone_false;
-  Register index_reg = tmp5_reg;
-  Register cbc_iter  = tmp4_reg;
-
-  // 'cnt_reg' contains the number of characters in the string's character array for the
-  // pre-CompactStrings strings implementation and the number of bytes in the string's
-  // byte array for the CompactStrings strings implementation.
-  const int HAS_COMPACT_STRING = java_lang_String::has_coder_field() ? 1 : 0; // '1' = byte array, '0' = char array
-
-//-1:
-  dcbtct(str1_reg, 0x00);  // Indicate R/O access to str1.
-  dcbtct(str2_reg, 0x00);  // Indicate R/O access to str2.
-//1:
-  // cbc_iter: remaining characters after the '4 java characters per iteration' loop.
-  rlwinm(cbc_iter, cnt_reg, 32 - HAS_COMPACT_STRING, 30, 31); // (cnt_reg % (HAS_COMPACT_STRING ? 8 : 4)) >> HAS_COMPACT_STRING
-  li(index_reg, 0); // init
-  li(result_reg, 0); // assume false
-  // tmp2_reg: units of 4 java characters (i.e. 8 bytes) per iteration (main loop).
-  srwi_(tmp2_reg, cnt_reg, exact_log2(4 << HAS_COMPACT_STRING)); // cnt_reg / (HAS_COMPACT_STRING ? 8 : 4)
-
-  cmpwi(CCR1, cbc_iter, 0);             // CCR1 = (cbc_iter==0)
-  beq(CCR0, Linit_cbc);                 // too short
-    mtctr(tmp2_reg);
-//8:
-    bind(Lloop);
-      ldx(tmp1_reg, str1_reg, index_reg);
-      ldx(tmp2_reg, str2_reg, index_reg);
-      cmpd(CCR0, tmp1_reg, tmp2_reg);
-      bne(CCR0, Ldone_false);  // Unequal char pair found -> done.
-      addi(index_reg, index_reg, 4*sizeof(jchar));
-      bdnz(Lloop);
-//14:
-  bind(Linit_cbc);
-  beq(CCR1, Ldone_true);
-    mtctr(cbc_iter);
-//16:
-    bind(Lcbc);
-      lhzx(tmp1_reg, str1_reg, index_reg);
-      lhzx(tmp2_reg, str2_reg, index_reg);
-      cmpw(CCR0, tmp1_reg, tmp2_reg);
-      bne(CCR0, Ldone_false);  // Unequal char pair found -> done.
-      addi(index_reg, index_reg, 1*sizeof(jchar));
-      bdnz(Lcbc);
-    nop();
-  bind(Ldone_true);
-  li(result_reg, 1);
-//24:
-  bind(Ldone_false);
-}
-
-
-void MacroAssembler::char_arrays_equalsImm(Register str1_reg, Register str2_reg, int cntval, Register result_reg,
-                                           Register tmp1_reg, Register tmp2_reg) {
-  // Str1 may be the same register as str2 which can occur e.g. after scalar replacement.
-  assert_different_registers(result_reg, str1_reg, tmp1_reg, tmp2_reg);
-  assert_different_registers(result_reg, str2_reg, tmp1_reg, tmp2_reg);
-  assert(sizeof(jchar) == 2, "must be");
-  assert(cntval >= 0 && ((cntval & 0x7fff) == cntval), "wrong immediate");
-
-  // 'cntval' contains the number of characters in the string's character array for the
-  // pre-CompactStrings strings implementation and the number of bytes in the string's
-  // byte array for the CompactStrings strings implementation.
-  cntval >>= (java_lang_String::has_coder_field() ? 1 : 0); // '1' = byte array strings, '0' = char array strings
-
-  Label Ldone_false;
-
-  if (cntval < 16) { // short case
-    if (cntval != 0) li(result_reg, 0); // assume false
-
-    const int num_bytes = cntval*sizeof(jchar);
-    int index = 0;
-    for (int next_index; (next_index = index + 8) <= num_bytes; index = next_index) {
-      ld(tmp1_reg, index, str1_reg);
-      ld(tmp2_reg, index, str2_reg);
-      cmpd(CCR0, tmp1_reg, tmp2_reg);
-      bne(CCR0, Ldone_false);
-    }
-    if (cntval & 2) {
-      lwz(tmp1_reg, index, str1_reg);
-      lwz(tmp2_reg, index, str2_reg);
-      cmpw(CCR0, tmp1_reg, tmp2_reg);
-      bne(CCR0, Ldone_false);
-      index += 4;
-    }
-    if (cntval & 1) {
-      lhz(tmp1_reg, index, str1_reg);
-      lhz(tmp2_reg, index, str2_reg);
-      cmpw(CCR0, tmp1_reg, tmp2_reg);
-      bne(CCR0, Ldone_false);
-    }
-    // fallthrough: true
-  } else {
-    Label Lloop;
-    Register index_reg = tmp1_reg;
-    const int loopcnt = cntval/4;
-    assert(loopcnt > 0, "must be");
-    // Offset 0 should be 32 byte aligned.
-    //2:
-    dcbtct(str1_reg, 0x00);  // Indicate R/O access to str1.
-    dcbtct(str2_reg, 0x00);  // Indicate R/O access to str2.
-    li(tmp2_reg, loopcnt);
-    li(index_reg, 0); // init
-    li(result_reg, 0); // assume false
-    mtctr(tmp2_reg);
-    //8:
-    bind(Lloop);
-    ldx(R0, str1_reg, index_reg);
-    ldx(tmp2_reg, str2_reg, index_reg);
-    cmpd(CCR0, R0, tmp2_reg);
-    bne(CCR0, Ldone_false);  // Unequal char pair found -> done.
-    addi(index_reg, index_reg, 4*sizeof(jchar));
-    bdnz(Lloop);
-    //14:
-    if (cntval & 2) {
-      lwzx(R0, str1_reg, index_reg);
-      lwzx(tmp2_reg, str2_reg, index_reg);
-      cmpw(CCR0, R0, tmp2_reg);
-      bne(CCR0, Ldone_false);
-      if (cntval & 1) addi(index_reg, index_reg, 2*sizeof(jchar));
-    }
-    if (cntval & 1) {
-      lhzx(R0, str1_reg, index_reg);
-      lhzx(tmp2_reg, str2_reg, index_reg);
-      cmpw(CCR0, R0, tmp2_reg);
-      bne(CCR0, Ldone_false);
-    }
-    // fallthru: true
-  }
-  li(result_reg, 1);
-  bind(Ldone_false);
-}
-
 #endif // Compiler2
 
 // Helpers for Intrinsic Emitters
--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp	Thu Jun 23 05:13:55 2016 +0000
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp	Thu Jun 23 17:38:29 2016 +0200
@@ -431,10 +431,81 @@
     MemBarAcq  = 2,
     MemBarFenceAfter = 4 // use powers of 2
   };
+ private:
+  // Helper functions for word/sub-word atomics.
+  void atomic_get_and_modify_generic(Register dest_current_value, Register exchange_value,
+                                     Register addr_base, Register tmp1, Register tmp2, Register tmp3,
+                                     bool cmpxchgx_hint, bool is_add, int size);
+  void cmpxchg_loop_body(ConditionRegister flag, Register dest_current_value,
+                         Register compare_value, Register exchange_value,
+                         Register addr_base, Register tmp1, Register tmp2,
+                         Label &retry, Label &failed, bool cmpxchgx_hint, int size);
+  void cmpxchg_generic(ConditionRegister flag,
+                       Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base,
+                       Register tmp1, Register tmp2,
+                       int semantics, bool cmpxchgx_hint, Register int_flag_success, bool contention_hint, bool weak, int size);
+ public:
+  // Temps and addr_base are killed if processor does not support Power 8 instructions.
+  // Result will be sign extended.
+  void getandsetb(Register dest_current_value, Register exchange_value, Register addr_base,
+                  Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) {
+    atomic_get_and_modify_generic(dest_current_value, exchange_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, false, 1);
+  }
+  // Temps and addr_base are killed if processor does not support Power 8 instructions.
+  // Result will be sign extended.
+  void getandseth(Register dest_current_value, Register exchange_value, Register addr_base,
+                  Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) {
+    atomic_get_and_modify_generic(dest_current_value, exchange_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, false, 2);
+  }
+  void getandsetw(Register dest_current_value, Register exchange_value, Register addr_base,
+                  bool cmpxchgx_hint) {
+    atomic_get_and_modify_generic(dest_current_value, exchange_value, addr_base, noreg, noreg, noreg, cmpxchgx_hint, false, 4);
+  }
+  void getandsetd(Register dest_current_value, Register exchange_value, Register addr_base,
+                  bool cmpxchgx_hint);
+  // tmp2/3 and addr_base are killed if processor does not support Power 8 instructions (tmp1 is always needed).
+  // Result will be sign extended.
+  void getandaddb(Register dest_current_value, Register inc_value, Register addr_base,
+                  Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) {
+    atomic_get_and_modify_generic(dest_current_value, inc_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, true, 1);
+  }
+  // tmp2/3 and addr_base are killed if processor does not support Power 8 instructions (tmp1 is always needed).
+  // Result will be sign extended.
+  void getandaddh(Register dest_current_value, Register inc_value, Register addr_base,
+                  Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) {
+    atomic_get_and_modify_generic(dest_current_value, inc_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, true, 2);
+  }
+  void getandaddw(Register dest_current_value, Register inc_value, Register addr_base,
+                  Register tmp1, bool cmpxchgx_hint) {
+    atomic_get_and_modify_generic(dest_current_value, inc_value, addr_base, tmp1, noreg, noreg, cmpxchgx_hint, true, 4);
+  }
+  void getandaddd(Register dest_current_value, Register exchange_value, Register addr_base,
+                  Register tmp, bool cmpxchgx_hint);
+  // Temps, addr_base and exchange_value are killed if processor does not support Power 8 instructions.
+  // compare_value must be at least 32 bit sign extended. Result will be sign extended.
+  void cmpxchgb(ConditionRegister flag,
+                Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base,
+                Register tmp1, Register tmp2, int semantics, bool cmpxchgx_hint = false,
+                Register int_flag_success = noreg, bool contention_hint = false, bool weak = false) {
+    cmpxchg_generic(flag, dest_current_value, compare_value, exchange_value, addr_base, tmp1, tmp2,
+                    semantics, cmpxchgx_hint, int_flag_success, contention_hint, weak, 1);
+  }
+  // Temps, addr_base and exchange_value are killed if processor does not support Power 8 instructions.
+  // compare_value must be at least 32 bit sign extended. Result will be sign extended.
+  void cmpxchgh(ConditionRegister flag,
+                Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base,
+                Register tmp1, Register tmp2, int semantics, bool cmpxchgx_hint = false,
+                Register int_flag_success = noreg, bool contention_hint = false, bool weak = false) {
+    cmpxchg_generic(flag, dest_current_value, compare_value, exchange_value, addr_base, tmp1, tmp2,
+                    semantics, cmpxchgx_hint, int_flag_success, contention_hint, weak, 2);
+  }
   void cmpxchgw(ConditionRegister flag,
                 Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base,
                 int semantics, bool cmpxchgx_hint = false,
-                Register int_flag_success = noreg, bool contention_hint = false, bool weak = false);
+                Register int_flag_success = noreg, bool contention_hint = false, bool weak = false) {
+    cmpxchg_generic(flag, dest_current_value, compare_value, exchange_value, addr_base, noreg, noreg,
+                    semantics, cmpxchgx_hint, int_flag_success, contention_hint, weak, 4);
+  }
   void cmpxchgd(ConditionRegister flag,
                 Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value,
                 Register addr_base, int semantics, bool cmpxchgx_hint = false,
@@ -717,23 +788,6 @@
                            Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte);
 
   void has_negatives(Register src, Register cnt, Register result, Register tmp1, Register tmp2);
-
-  // Intrinsics for non-CompactStrings
-  // Needle of length 1.
-  void string_indexof_1(Register result, Register haystack, Register haycnt,
-                        Register needle, jchar needleChar,
-                        Register tmp1, Register tmp2);
-  // General indexof, eventually with constant needle length.
-  void string_indexof(Register result, Register haystack, Register haycnt,
-                      Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
-                      Register tmp1, Register tmp2, Register tmp3, Register tmp4);
-  void string_compare(Register str1_reg, Register str2_reg, Register cnt1_reg, Register cnt2_reg,
-                      Register result_reg, Register tmp_reg);
-  void char_arrays_equals(Register str1_reg, Register str2_reg, Register cnt_reg, Register result_reg,
-                          Register tmp1_reg, Register tmp2_reg, Register tmp3_reg, Register tmp4_reg,
-                          Register tmp5_reg);
-  void char_arrays_equalsImm(Register str1_reg, Register str2_reg, int cntval, Register result_reg,
-                             Register tmp1_reg, Register tmp2_reg);
 #endif
 
   // Emitters for BigInteger.multiplyToLen intrinsic.
--- a/hotspot/src/cpu/ppc/vm/ppc.ad	Thu Jun 23 05:13:55 2016 +0000
+++ b/hotspot/src/cpu/ppc/vm/ppc.ad	Thu Jun 23 17:38:29 2016 +0200
@@ -965,41 +965,9 @@
 // is the number of bytes (not instructions) which will be inserted before
 // the instruction. The padding must match the size of a NOP instruction.
 
-int string_indexOf_imm1_charNode::compute_padding(int current_offset) const {
-  return (3*4-current_offset)&31;  // see MacroAssembler::string_indexof_1
-}
-
-int string_indexOf_imm1Node::compute_padding(int current_offset) const {
-  return (3*4-current_offset)&31;  // see MacroAssembler::string_indexof_1
-}
-
-int string_indexOfCharNode::compute_padding(int current_offset) const {
-  return (3*4-current_offset)&31;  // see MacroAssembler::string_indexof_1
-}
-
-int string_indexOf_immNode::compute_padding(int current_offset) const {
-  return (3*4-current_offset)&31;  // see MacroAssembler::string_indexof(constant needlecount)
-}
-
-int string_indexOfNode::compute_padding(int current_offset) const {
-  return (1*4-current_offset)&31;  // see MacroAssembler::string_indexof(variable needlecount)
-}
-
-int string_compareNode::compute_padding(int current_offset) const {
-  return (2*4-current_offset)&31;  // see MacroAssembler::string_compare
-}
-
-int string_equals_immNode::compute_padding(int current_offset) const {
-  if (opnd_array(3)->constant() < 16) return 0; // For strlen < 16 no nops because loop completely unrolled
-  return (2*4-current_offset)&31;               // Genral case - see MacroAssembler::char_arrays_equalsImm
-}
-
-int string_equalsNode::compute_padding(int current_offset) const {
-  return (7*4-current_offset)&31;  // see MacroAssembler::char_arrays_equals
-}
-
 int inlineCallClearArrayNode::compute_padding(int current_offset) const {
-  return (2*4-current_offset)&31;  // see MacroAssembler::clear_memory_doubleword
+  int desired_padding = (2*4-current_offset)&31; // see MacroAssembler::clear_memory_doubleword
+  return (desired_padding <= 3*4) ? desired_padding : 0;
 }
 
 //=============================================================================
@@ -3064,121 +3032,6 @@
     __ bind(done);
   %}
 
-  // New atomics.
-  enc_class enc_GetAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-
-    MacroAssembler _masm(&cbuf);
-    Register Rtmp   = R0;
-    Register Rres   = $res$$Register;
-    Register Rsrc   = $src$$Register;
-    Register Rptr   = $mem_ptr$$Register;
-    bool RegCollision = (Rres == Rsrc) || (Rres == Rptr);
-    Register Rold   = RegCollision ? Rtmp : Rres;
-
-    Label Lretry;
-    __ bind(Lretry);
-    __ lwarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update());
-    __ add(Rtmp, Rsrc, Rold);
-    __ stwcx_(Rtmp, Rptr);
-    if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
-      __ bne_predict_not_taken(CCR0, Lretry);
-    } else {
-      __ bne(                  CCR0, Lretry);
-    }
-    if (RegCollision) __ subf(Rres, Rsrc, Rtmp);
-    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
-      __ isync();
-    } else {
-      __ sync();
-    }
-  %}
-
-  enc_class enc_GetAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-
-    MacroAssembler _masm(&cbuf);
-    Register Rtmp   = R0;
-    Register Rres   = $res$$Register;
-    Register Rsrc   = $src$$Register;
-    Register Rptr   = $mem_ptr$$Register;
-    bool RegCollision = (Rres == Rsrc) || (Rres == Rptr);
-    Register Rold   = RegCollision ? Rtmp : Rres;
-
-    Label Lretry;
-    __ bind(Lretry);
-    __ ldarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update());
-    __ add(Rtmp, Rsrc, Rold);
-    __ stdcx_(Rtmp, Rptr);
-    if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
-      __ bne_predict_not_taken(CCR0, Lretry);
-    } else {
-      __ bne(                  CCR0, Lretry);
-    }
-    if (RegCollision) __ subf(Rres, Rsrc, Rtmp);
-    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
-      __ isync();
-    } else {
-      __ sync();
-    }
-  %}
-
-  enc_class enc_GetAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-
-    MacroAssembler _masm(&cbuf);
-    Register Rtmp   = R0;
-    Register Rres   = $res$$Register;
-    Register Rsrc   = $src$$Register;
-    Register Rptr   = $mem_ptr$$Register;
-    bool RegCollision = (Rres == Rsrc) || (Rres == Rptr);
-    Register Rold   = RegCollision ? Rtmp : Rres;
-
-    Label Lretry;
-    __ bind(Lretry);
-    __ lwarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update());
-    __ stwcx_(Rsrc, Rptr);
-    if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
-      __ bne_predict_not_taken(CCR0, Lretry);
-    } else {
-      __ bne(                  CCR0, Lretry);
-    }
-    if (RegCollision) __ mr(Rres, Rtmp);
-    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
-      __ isync();
-    } else {
-      __ sync();
-    }
-  %}
-
-  enc_class enc_GetAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-
-    MacroAssembler _masm(&cbuf);
-    Register Rtmp   = R0;
-    Register Rres   = $res$$Register;
-    Register Rsrc   = $src$$Register;
-    Register Rptr   = $mem_ptr$$Register;
-    bool RegCollision = (Rres == Rsrc) || (Rres == Rptr);
-    Register Rold   = RegCollision ? Rtmp : Rres;
-
-    Label Lretry;
-    __ bind(Lretry);
-    __ ldarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update());
-    __ stdcx_(Rsrc, Rptr);
-    if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
-      __ bne_predict_not_taken(CCR0, Lretry);
-    } else {
-      __ bne(                  CCR0, Lretry);
-    }
-    if (RegCollision) __ mr(Rres, Rtmp);
-    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
-      __ isync();
-    } else {
-      __ sync();
-    }
-  %}
-
   // This enc_class is needed so that scheduler gets proper
   // input mapping for latency computation.
   enc_class enc_andc(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
@@ -7575,11 +7428,90 @@
 
 // Strong versions:
 
+instruct compareAndSwapB_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
+  match(Set res (CompareAndSwapB mem_ptr (Binary src1 src2)));
+  predicate(VM_Version::has_lqarx());
+  effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
+  format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
+    __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
+                MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+                $res$$Register, true);
+    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+      __ isync();
+    } else {
+      __ sync();
+    }
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct compareAndSwapB4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
+  match(Set res (CompareAndSwapB mem_ptr (Binary src1 src2)));
+  predicate(!VM_Version::has_lqarx());
+  effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
+  format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
+    __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
+                MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+                $res$$Register, true);
+    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+      __ isync();
+    } else {
+      __ sync();
+    }
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct compareAndSwapS_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
+  match(Set res (CompareAndSwapS mem_ptr (Binary src1 src2)));
+  predicate(VM_Version::has_lqarx());
+  effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
+  format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
+    __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
+                MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+                $res$$Register, true);
+    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+      __ isync();
+    } else {
+      __ sync();
+    }
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct compareAndSwapS4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
+  match(Set res (CompareAndSwapS mem_ptr (Binary src1 src2)));
+  predicate(!VM_Version::has_lqarx());
+  effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
+  format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
+    __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
+                MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+                $res$$Register, true);
+    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+      __ isync();
+    } else {
+      __ sync();
+    }
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
   match(Set res (CompareAndSwapI mem_ptr (Binary src1 src2)));
-  effect(TEMP cr0);
+  effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
-  // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
@@ -7597,9 +7529,8 @@
 
 instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
   match(Set res (CompareAndSwapN mem_ptr (Binary src1 src2)));
-  effect(TEMP cr0);
+  effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
-  // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
@@ -7617,9 +7548,8 @@
 
 instruct compareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
   match(Set res (CompareAndSwapL mem_ptr (Binary src1 src2)));
-  effect(TEMP cr0);
+  effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool" %}
-  // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
@@ -7637,9 +7567,8 @@
 
 instruct compareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
   match(Set res (CompareAndSwapP mem_ptr (Binary src1 src2)));
-  effect(TEMP cr0);
+  effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
-  // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
@@ -7657,12 +7586,131 @@
 
 // Weak versions:
 
+instruct weakCompareAndSwapB_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
+  match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2)));
+  predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx());
+  effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
+  format %{ "weak CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
+    __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
+                MacroAssembler::MemBarNone,
+                MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct weakCompareAndSwapB4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
+  match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2)));
+  predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx());
+  effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
+  format %{ "weak CMPXCHGB $res, $mem_ptr, $src1, $src2; as bool" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
+    __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
+                MacroAssembler::MemBarNone,
+                MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct weakCompareAndSwapB_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
+  match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2)));
+  predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx());
+  effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
+  format %{ "weak CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as bool" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
+    __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
+                support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
+                MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct weakCompareAndSwapB4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
+  match(Set res (WeakCompareAndSwapB mem_ptr (Binary src1 src2)));
+  predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx());
+  effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
+  format %{ "weak CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as bool" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
+    __ cmpxchgb(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
+                support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
+                MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct weakCompareAndSwapS_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
+  match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2)));
+  predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx());
+  effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
+  format %{ "weak CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
+    __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
+                MacroAssembler::MemBarNone,
+                MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct weakCompareAndSwapS4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
+  match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2)));
+  predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx());
+  effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
+  format %{ "weak CMPXCHGH $res, $mem_ptr, $src1, $src2; as bool" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
+    __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
+                MacroAssembler::MemBarNone,
+                MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct weakCompareAndSwapS_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
+  match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2)));
+  predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx());
+  effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
+  format %{ "weak CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as bool" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
+    __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
+                support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
+                MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct weakCompareAndSwapS4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, iRegIdst tmp2, flagsRegCR0 cr0) %{
+  match(Set res (WeakCompareAndSwapS mem_ptr (Binary src1 src2)));
+  predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx());
+  effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0); // TEMP_DEF to avoid jump
+  format %{ "weak CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as bool" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
+    __ cmpxchgh(CCR0, R0, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, $tmp2$$Register,
+                support_IRIW_for_not_multiple_copy_atomic_cpu ? MacroAssembler::MemBarAcq : MacroAssembler::MemBarFenceAfter,
+                MacroAssembler::cmpxchgx_hint_atomic_update(), $res$$Register, true, /*weak*/ true);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 instruct weakCompareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
   match(Set res (WeakCompareAndSwapI mem_ptr (Binary src1 src2)));
   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
-  effect(TEMP cr0);
+  effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
   format %{ "weak CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
-  // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
@@ -7676,9 +7724,8 @@
 instruct weakCompareAndSwapI_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
   match(Set res (WeakCompareAndSwapI mem_ptr (Binary src1 src2)));
   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
-  effect(TEMP cr0);
+  effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
   format %{ "weak CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as bool" %}
-  // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
@@ -7694,9 +7741,8 @@
 instruct weakCompareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
   match(Set res (WeakCompareAndSwapN mem_ptr (Binary src1 src2)));
   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
-  effect(TEMP cr0);
+  effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
   format %{ "weak CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
-  // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
@@ -7710,9 +7756,8 @@
 instruct weakCompareAndSwapN_acq_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
   match(Set res (WeakCompareAndSwapN mem_ptr (Binary src1 src2)));
   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
-  effect(TEMP cr0);
+  effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
   format %{ "weak CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as bool" %}
-  // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
@@ -7728,9 +7773,8 @@
 instruct weakCompareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
   match(Set res (WeakCompareAndSwapL mem_ptr (Binary src1 src2)));
   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
-  effect(TEMP cr0);
+  effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
   format %{ "weak CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool" %}
-  // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
@@ -7745,9 +7789,8 @@
 instruct weakCompareAndSwapL_acq_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
   match(Set res (WeakCompareAndSwapL mem_ptr (Binary src1 src2)));
   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
-  effect(TEMP cr0);
+  effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
   format %{ "weak CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as bool" %}
-  // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
@@ -7763,9 +7806,8 @@
 instruct weakCompareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
   match(Set res (WeakCompareAndSwapP mem_ptr (Binary src1 src2)));
   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
-  effect(TEMP cr0);
+  effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
   format %{ "weak CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
-  // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
@@ -7779,9 +7821,8 @@
 instruct weakCompareAndSwapP_acq_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
   match(Set res (WeakCompareAndSwapP mem_ptr (Binary src1 src2)));
   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
-  effect(TEMP cr0);
+  effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump
   format %{ "weak CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
-  // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
@@ -7796,12 +7837,155 @@
 
 // CompareAndExchange
 
+instruct compareAndExchangeB_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
+  match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2)));
+  predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx());
+  effect(TEMP_DEF res, TEMP cr0);
+  format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as int" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
+    __ cmpxchgb(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
+                MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+                noreg, true);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct compareAndExchangeB4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{
+  match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2)));
+  predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx());
+  effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0);
+  format %{ "CMPXCHGB $res, $mem_ptr, $src1, $src2; as int" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
+    __ cmpxchgb(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0,
+                MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+                noreg, true);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct compareAndExchangeB_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
+  match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2)));
+  predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx());
+  effect(TEMP_DEF res, TEMP cr0);
+  format %{ "CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as int" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
+    __ cmpxchgb(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
+                MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+                noreg, true);
+    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+      __ isync();
+    } else {
+      // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
+      __ sync();
+    }
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct compareAndExchangeB4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{
+  match(Set res (CompareAndExchangeB mem_ptr (Binary src1 src2)));
+  predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx());
+  effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0);
+  format %{ "CMPXCHGB acq $res, $mem_ptr, $src1, $src2; as int" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
+    __ cmpxchgb(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0,
+                MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+                noreg, true);
+    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+      __ isync();
+    } else {
+      // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
+      __ sync();
+    }
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct compareAndExchangeS_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
+  match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2)));
+  predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && VM_Version::has_lqarx());
+  effect(TEMP_DEF res, TEMP cr0);
+  format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as int" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
+    __ cmpxchgh(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
+                MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+                noreg, true);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct compareAndExchangeS4_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{
+  match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2)));
+  predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && !VM_Version::has_lqarx());
+  effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0);
+  format %{ "CMPXCHGH $res, $mem_ptr, $src1, $src2; as int" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
+    __ cmpxchgh(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0,
+                MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+                noreg, true);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct compareAndExchangeS_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
+  match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2)));
+  predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && VM_Version::has_lqarx());
+  effect(TEMP_DEF res, TEMP cr0);
+  format %{ "CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as int" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
+    __ cmpxchgh(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, noreg, noreg,
+                MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+                noreg, true);
+    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+      __ isync();
+    } else {
+      // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
+      __ sync();
+    }
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct compareAndExchangeS4_acq_regP_regI_regI(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src1, rarg4RegI src2, iRegIdst tmp1, flagsRegCR0 cr0) %{
+  match(Set res (CompareAndExchangeS mem_ptr (Binary src1 src2)));
+  predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && !VM_Version::has_lqarx());
+  effect(TEMP_DEF res, USE_KILL src2, USE_KILL mem_ptr, TEMP tmp1, TEMP cr0);
+  format %{ "CMPXCHGH acq $res, $mem_ptr, $src1, $src2; as int" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
+    __ cmpxchgh(CCR0, $res$$Register, $src1$$Register, $src2$$Register, $mem_ptr$$Register, $tmp1$$Register, R0,
+                MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
+                noreg, true);
+    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+      __ isync();
+    } else {
+      // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that.
+      __ sync();
+    }
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 instruct compareAndExchangeI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
   match(Set res (CompareAndExchangeI mem_ptr (Binary src1 src2)));
   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
   effect(TEMP_DEF res, TEMP cr0);
   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as int" %}
-  // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
@@ -7817,7 +8001,6 @@
   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
   effect(TEMP_DEF res, TEMP cr0);
   format %{ "CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as int" %}
-  // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
@@ -7839,7 +8022,6 @@
   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
   effect(TEMP_DEF res, TEMP cr0);
   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as narrow oop" %}
-  // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
@@ -7855,7 +8037,6 @@
   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
   effect(TEMP_DEF res, TEMP cr0);
   format %{ "CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as narrow oop" %}
-  // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
@@ -7877,7 +8058,6 @@
   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
   effect(TEMP_DEF res, TEMP cr0);
   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as long" %}
-  // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
@@ -7893,7 +8073,6 @@
   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
   effect(TEMP_DEF res, TEMP cr0);
   format %{ "CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as long" %}
-  // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
@@ -7915,7 +8094,6 @@
   predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst);
   effect(TEMP_DEF res, TEMP cr0);
   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as ptr; ptr" %}
-  // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
@@ -7931,7 +8109,6 @@
   predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst);
   effect(TEMP_DEF res, TEMP cr0);
   format %{ "CMPXCHGD acq $res, $mem_ptr, $src1, $src2; as ptr; ptr" %}
-  // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     // CmpxchgX sets CCR0 to cmpX(src1, src2) and Rres to 'true'/'false'.
@@ -7950,57 +8127,235 @@
 
 // Special RMW
 
+instruct getAndAddB(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
+  match(Set res (GetAndAddB mem_ptr src));
+  predicate(VM_Version::has_lqarx());
+  effect(TEMP_DEF res, TEMP cr0);
+  format %{ "GetAndAddB $res, $mem_ptr, $src" %}
+  ins_encode %{
+    __ getandaddb($res$$Register, $src$$Register, $mem_ptr$$Register,
+                  R0, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update());
+    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+      __ isync();
+    } else {
+      __ sync();
+    }
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct getAndAddB4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{
+  match(Set res (GetAndAddB mem_ptr src));
+  predicate(!VM_Version::has_lqarx());
+  effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0);
+  format %{ "GetAndAddB $res, $mem_ptr, $src" %}
+  ins_encode %{
+    __ getandaddb($res$$Register, $src$$Register, $mem_ptr$$Register,
+                  R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
+    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+      __ isync();
+    } else {
+      __ sync();
+    }
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct getAndAddS(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
+  match(Set res (GetAndAddS mem_ptr src));
+  predicate(VM_Version::has_lqarx());
+  effect(TEMP_DEF res, TEMP cr0);
+  format %{ "GetAndAddS $res, $mem_ptr, $src" %}
+  ins_encode %{
+    __ getandaddh($res$$Register, $src$$Register, $mem_ptr$$Register,
+                  R0, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update());
+    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+      __ isync();
+    } else {
+      __ sync();
+    }
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct getAndAddS4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{
+  match(Set res (GetAndAddS mem_ptr src));
+  predicate(!VM_Version::has_lqarx());
+  effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0);
+  format %{ "GetAndAddS $res, $mem_ptr, $src" %}
+  ins_encode %{
+    __ getandaddh($res$$Register, $src$$Register, $mem_ptr$$Register,
+                  R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
+    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+      __ isync();
+    } else {
+      __ sync();
+    }
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 instruct getAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
   match(Set res (GetAndAddI mem_ptr src));
-  effect(TEMP cr0);
+  effect(TEMP_DEF res, TEMP cr0);
   format %{ "GetAndAddI $res, $mem_ptr, $src" %}
-  // Variable size: instruction count smaller if regs are disjoint.
-  ins_encode( enc_GetAndAddI(res, mem_ptr, src) );
+  ins_encode %{
+    __ getandaddw($res$$Register, $src$$Register, $mem_ptr$$Register,
+                  R0, MacroAssembler::cmpxchgx_hint_atomic_update());
+    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+      __ isync();
+    } else {
+      __ sync();
+    }
+  %}
   ins_pipe(pipe_class_default);
 %}
 
 instruct getAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{
   match(Set res (GetAndAddL mem_ptr src));
-  effect(TEMP cr0);
+  effect(TEMP_DEF res, TEMP cr0);
   format %{ "GetAndAddL $res, $mem_ptr, $src" %}
-  // Variable size: instruction count smaller if regs are disjoint.
-  ins_encode( enc_GetAndAddL(res, mem_ptr, src) );
+  ins_encode %{
+    __ getandaddd($res$$Register, $src$$Register, $mem_ptr$$Register,
+                  R0, MacroAssembler::cmpxchgx_hint_atomic_update());
+    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+      __ isync();
+    } else {
+      __ sync();
+    }
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct getAndSetB(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
+  match(Set res (GetAndSetB mem_ptr src));
+  predicate(VM_Version::has_lqarx());
+  effect(TEMP_DEF res, TEMP cr0);
+  format %{ "GetAndSetB $res, $mem_ptr, $src" %}
+  ins_encode %{
+    __ getandsetb($res$$Register, $src$$Register, $mem_ptr$$Register,
+                  noreg, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update());
+    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+      __ isync();
+    } else {
+      __ sync();
+    }
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct getAndSetB4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{
+  match(Set res (GetAndSetB mem_ptr src));
+  predicate(!VM_Version::has_lqarx());
+  effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0);
+  format %{ "GetAndSetB $res, $mem_ptr, $src" %}
+  ins_encode %{
+    __ getandsetb($res$$Register, $src$$Register, $mem_ptr$$Register,
+                  R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
+    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+      __ isync();
+    } else {
+      __ sync();
+    }
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct getAndSetS(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
+  match(Set res (GetAndSetS mem_ptr src));
+  predicate(VM_Version::has_lqarx());
+  effect(TEMP_DEF res, TEMP cr0);
+  format %{ "GetAndSetS $res, $mem_ptr, $src" %}
+  ins_encode %{
+    __ getandseth($res$$Register, $src$$Register, $mem_ptr$$Register,
+                  noreg, noreg, noreg, MacroAssembler::cmpxchgx_hint_atomic_update());
+    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+      __ isync();
+    } else {
+      __ sync();
+    }
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct getAndSetS4(iRegIdst res, rarg3RegP mem_ptr, iRegIsrc src, iRegIsrc tmp1, iRegIsrc tmp2, flagsRegCR0 cr0) %{
+  match(Set res (GetAndSetS mem_ptr src));
+  predicate(!VM_Version::has_lqarx());
+  effect(TEMP_DEF res, USE_KILL mem_ptr, TEMP tmp1, TEMP tmp2, TEMP cr0);
+  format %{ "GetAndSetS $res, $mem_ptr, $src" %}
+  ins_encode %{
+    __ getandseth($res$$Register, $src$$Register, $mem_ptr$$Register,
+                  R0, $tmp1$$Register, $tmp2$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
+    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+      __ isync();
+    } else {
+      __ sync();
+    }
+  %}
   ins_pipe(pipe_class_default);
 %}
 
 instruct getAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
   match(Set res (GetAndSetI mem_ptr src));
-  effect(TEMP cr0);
+  effect(TEMP_DEF res, TEMP cr0);
   format %{ "GetAndSetI $res, $mem_ptr, $src" %}
-  // Variable size: instruction count smaller if regs are disjoint.
-  ins_encode( enc_GetAndSetI(res, mem_ptr, src) );
+  ins_encode %{
+    __ getandsetw($res$$Register, $src$$Register, $mem_ptr$$Register,
+                  MacroAssembler::cmpxchgx_hint_atomic_update());
+    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+      __ isync();
+    } else {
+      __ sync();
+    }
+  %}
   ins_pipe(pipe_class_default);
 %}
 
 instruct getAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{
   match(Set res (GetAndSetL mem_ptr src));
-  effect(TEMP cr0);
+  effect(TEMP_DEF res, TEMP cr0);
   format %{ "GetAndSetL $res, $mem_ptr, $src" %}
-  // Variable size: instruction count smaller if regs are disjoint.
-  ins_encode( enc_GetAndSetL(res, mem_ptr, src) );
+  ins_encode %{
+    __ getandsetd($res$$Register, $src$$Register, $mem_ptr$$Register,
+                  MacroAssembler::cmpxchgx_hint_atomic_update());
+    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+      __ isync();
+    } else {
+      __ sync();
+    }
+  %}
   ins_pipe(pipe_class_default);
 %}
 
 instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src, flagsRegCR0 cr0) %{
   match(Set res (GetAndSetP mem_ptr src));
-  effect(TEMP cr0);
+  effect(TEMP_DEF res, TEMP cr0);
   format %{ "GetAndSetP $res, $mem_ptr, $src" %}
-  // Variable size: instruction count smaller if regs are disjoint.
-  ins_encode( enc_GetAndSetL(res, mem_ptr, src) );
+  ins_encode %{
+    __ getandsetd($res$$Register, $src$$Register, $mem_ptr$$Register,
+                  MacroAssembler::cmpxchgx_hint_atomic_update());
+    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+      __ isync();
+    } else {
+      __ sync();
+    }
+  %}
   ins_pipe(pipe_class_default);
 %}
 
 instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src, flagsRegCR0 cr0) %{
   match(Set res (GetAndSetN mem_ptr src));
-  effect(TEMP cr0);
+  effect(TEMP_DEF res, TEMP cr0);
   format %{ "GetAndSetN $res, $mem_ptr, $src" %}
-  // Variable size: instruction count smaller if regs are disjoint.
-  ins_encode( enc_GetAndSetI(res, mem_ptr, src) );
+  ins_encode %{
+    __ getandsetw($res$$Register, $src$$Register, $mem_ptr$$Register,
+                  MacroAssembler::cmpxchgx_hint_atomic_update());
+    if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+      __ isync();
+    } else {
+      __ sync();
+    }
+  %}
   ins_pipe(pipe_class_default);
 %}
 
@@ -11360,7 +11715,7 @@
   effect(USE_KILL cnt, USE_KILL base, KILL ctr);
   ins_cost(MEMORY_REF_COST);
 
-  ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.
+  ins_alignment(4); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.
 
   format %{ "ClearArray $cnt, $base" %}
   ins_encode %{
@@ -11686,7 +12041,6 @@
                        flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
   match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
   effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
-  predicate(CompactStrings);
   ins_cost(180);
 
   format %{ "String IndexOfChar $haystack[0..$haycnt], $ch"
@@ -11948,283 +12302,6 @@
 %}
 
 
-// String_IndexOf for needle of length 1.
-//
-// Match needle into immediate operands: no loadConP node needed. Saves one
-// register and two instructions over string_indexOf_imm1Node.
-//
-// Assumes register result differs from all input registers.
-//
-// Preserves registers haystack, haycnt
-// Kills     registers tmp1, tmp2
-// Defines   registers result
-//
-// Use dst register classes if register gets killed, as it is the case for tmp registers!
-//
-// Unfortunately this does not match too often. In many situations the AddP is used
-// by several nodes, even several StrIndexOf nodes, breaking the match tree.
-instruct string_indexOf_imm1_char(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
-                                  immP needleImm, immL offsetImm, immI_1 needlecntImm,
-                                  iRegIdst tmp1, iRegIdst tmp2,
-                                  flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
-  predicate(SpecialStringIndexOf && !CompactStrings);  // type check implicit by parameter type, See Matcher::match_rule_supported
-  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary (AddP needleImm offsetImm) needlecntImm)));
-
-  effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
-
-  ins_cost(150);
-  format %{ "String IndexOf CSCL1 $haystack[0..$haycnt], $needleImm+$offsetImm[0..$needlecntImm]"
-            "-> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
-
-  ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-    immPOper *needleOper = (immPOper *)$needleImm;
-    const TypeOopPtr *t = needleOper->type()->isa_oopptr();
-    ciTypeArray* needle_values = t->const_oop()->as_type_array();  // Pointer to live char *
-    jchar chr;
-    if (java_lang_String::has_coder_field()) {
-      // New compact strings byte array strings
-#ifdef VM_LITTLE_ENDIAN
-    chr = (((jchar)(unsigned char)needle_values->element_value(1).as_byte()) << 8) |
-           ((jchar)(unsigned char)needle_values->element_value(0).as_byte());
-#else
-    chr = (((jchar)(unsigned char)needle_values->element_value(0).as_byte()) << 8) |
-           ((jchar)(unsigned char)needle_values->element_value(1).as_byte());
-#endif
-    } else {
-      // Old char array strings
-      chr = needle_values->char_at(0);
-    }
-    __ string_indexof_1($result$$Register,
-                        $haystack$$Register, $haycnt$$Register,
-                        R0, chr,
-                        $tmp1$$Register, $tmp2$$Register);
-  %}
-  ins_pipe(pipe_class_compare);
-%}
-
-// String_IndexOf for needle of length 1.
-//
-// Special case requires less registers and emits less instructions.
-//
-// Assumes register result differs from all input registers.
-//
-// Preserves registers haystack, haycnt
-// Kills     registers tmp1, tmp2, needle
-// Defines   registers result
-//
-// Use dst register classes if register gets killed, as it is the case for tmp registers!
-instruct string_indexOf_imm1(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
-                             rscratch2RegP needle, immI_1 needlecntImm,
-                             iRegIdst tmp1, iRegIdst tmp2,
-                             flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
-  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
-  effect(USE_KILL needle, /* TDEF needle, */ TEMP_DEF result,
-         TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
-  // Required for EA: check if it is still a type_array.
-  predicate(SpecialStringIndexOf && !CompactStrings &&
-            n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
-            n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
-  ins_cost(180);
-
-  ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.
-
-  format %{ "String IndexOf SCL1 $haystack[0..$haycnt], $needle[0..$needlecntImm]"
-            " -> $result \t// KILL $haycnt, $needle, $tmp1, $tmp2, $cr0, $cr1" %}
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-    Node *ndl = in(operand_index($needle));  // The node that defines needle.
-    ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
-    guarantee(needle_values, "sanity");
-    jchar chr;
-    if (java_lang_String::has_coder_field()) {
-      // New compact strings byte array strings
-#ifdef VM_LITTLE_ENDIAN
-    chr = (((jchar)(unsigned char)needle_values->element_value(1).as_byte()) << 8) |
-           ((jchar)(unsigned char)needle_values->element_value(0).as_byte());
-#else
-    chr = (((jchar)(unsigned char)needle_values->element_value(0).as_byte()) << 8) |
-           ((jchar)(unsigned char)needle_values->element_value(1).as_byte());
-#endif
-    } else {
-      // Old char array strings
-      chr = needle_values->char_at(0);
-    }
-    __ string_indexof_1($result$$Register,
-                        $haystack$$Register, $haycnt$$Register,
-                        R0, chr,
-                        $tmp1$$Register, $tmp2$$Register);
-  %}
-  ins_pipe(pipe_class_compare);
-%}
-
-// String_IndexOfChar
-//
-// Assumes register result differs from all input registers.
-//
-// Preserves registers haystack, haycnt
-// Kills     registers tmp1, tmp2
-// Defines   registers result
-//
-// Use dst register classes if register gets killed, as it is the case for tmp registers!
-instruct string_indexOfChar(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
-                            iRegIsrc ch, iRegIdst tmp1, iRegIdst tmp2,
-                            flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
-  match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
-  effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
-  predicate(SpecialStringIndexOf && !CompactStrings);
-  ins_cost(180);
-
-  ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.
-
-  format %{ "String IndexOfChar $haystack[0..$haycnt], $ch"
-            " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
-  ins_encode %{
-    __ string_indexof_1($result$$Register,
-                        $haystack$$Register, $haycnt$$Register,
-                        $ch$$Register, 0 /* this is not used if the character is already in a register */,
-                        $tmp1$$Register, $tmp2$$Register);
-  %}
-  ins_pipe(pipe_class_compare);
-%}
-
-// String_IndexOf.
-//
-// Length of needle as immediate. This saves instruction loading constant needle
-// length.
-// @@@ TODO Specify rules for length < 8 or so, and roll out comparison of needle
-// completely or do it in vector instruction. This should save registers for
-// needlecnt and needle.
-//
-// Assumes register result differs from all input registers.
-// Overwrites haycnt, needlecnt.
-// Use dst register classes if register gets killed, as it is the case for tmp registers!
-instruct string_indexOf_imm(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt,
-                            iRegPsrc needle, uimmI15 needlecntImm,
-                            iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5,
-                            flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
-  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
-  effect(USE_KILL haycnt, /* better: TDEF haycnt, */ TEMP_DEF result,
-         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
-  // Required for EA: check if it is still a type_array.
-  predicate(SpecialStringIndexOf && !CompactStrings && n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
-            n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
-  ins_cost(250);
-
-  ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.
-
-  format %{ "String IndexOf SCL $haystack[0..$haycnt], $needle[0..$needlecntImm]"
-            " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5, $cr0, $cr1" %}
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-    Node *ndl = in(operand_index($needle));  // The node that defines needle.
-    ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
-
-    __ string_indexof($result$$Register,
-                      $haystack$$Register, $haycnt$$Register,
-                      $needle$$Register, needle_values, $tmp5$$Register, $needlecntImm$$constant,
-                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register);
-  %}
-  ins_pipe(pipe_class_compare);
-%}
-
-// StrIndexOf node.
-//
-// Assumes register result differs from all input registers.
-// Overwrites haycnt, needlecnt.
-// Use dst register classes if register gets killed, as it is the case for tmp registers!
-instruct string_indexOf(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, iRegPsrc needle, rscratch2RegI needlecnt,
-                        iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4,
-                        flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
-  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
-  effect(USE_KILL haycnt, USE_KILL needlecnt, /*better: TDEF haycnt, TDEF needlecnt,*/
-         TEMP_DEF result,
-         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
-  predicate(SpecialStringIndexOf && !CompactStrings);  // See Matcher::match_rule_supported.
-  ins_cost(300);
-
-  ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.
-
-  format %{ "String IndexOf $haystack[0..$haycnt], $needle[0..$needlecnt]"
-             " -> $result \t// KILL $haycnt, $needlecnt, $tmp1, $tmp2, $tmp3, $tmp4, $cr0, $cr1" %}
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-    __ string_indexof($result$$Register,
-                      $haystack$$Register, $haycnt$$Register,
-                      $needle$$Register, NULL, $needlecnt$$Register, 0,  // needlecnt not constant.
-                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register);
-  %}
-  ins_pipe(pipe_class_compare);
-%}
-
-// String equals with immediate.
-instruct string_equals_imm(iRegPsrc str1, iRegPsrc str2, uimmI15 cntImm, iRegIdst result,
-                           iRegPdst tmp1, iRegPdst tmp2,
-                           flagsRegCR0 cr0, flagsRegCR6 cr6, regCTR ctr) %{
-  match(Set result (StrEquals (Binary str1 str2) cntImm));
-  effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2,
-         KILL cr0, KILL cr6, KILL ctr);
-  predicate(SpecialStringEquals && !CompactStrings);  // See Matcher::match_rule_supported.
-  ins_cost(250);
-
-  ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.
-
-  format %{ "String Equals SCL [0..$cntImm]($str1),[0..$cntImm]($str2)"
-            " -> $result \t// KILL $cr0, $cr6, $ctr, TEMP $result, $tmp1, $tmp2" %}
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-    __ char_arrays_equalsImm($str1$$Register, $str2$$Register, $cntImm$$constant,
-                             $result$$Register, $tmp1$$Register, $tmp2$$Register);
-  %}
-  ins_pipe(pipe_class_compare);
-%}
-
-// String equals.
-// Use dst register classes if register gets killed, as it is the case for TEMP operands!
-instruct string_equals(iRegPsrc str1, iRegPsrc str2, iRegIsrc cnt, iRegIdst result,
-                       iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3, iRegPdst tmp4, iRegPdst tmp5,
-                       flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
-  match(Set result (StrEquals (Binary str1 str2) cnt));
-  effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5,
-         KILL cr0, KILL cr1, KILL cr6, KILL ctr);
-  predicate(SpecialStringEquals && !CompactStrings);  // See Matcher::match_rule_supported.
-  ins_cost(300);
-
-  ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.
-
-  format %{ "String Equals [0..$cnt]($str1),[0..$cnt]($str2) -> $result"
-            " \t// KILL $cr0, $cr1, $cr6, $ctr, TEMP $result, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-    __ char_arrays_equals($str1$$Register, $str2$$Register, $cnt$$Register, $result$$Register,
-                          $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, $tmp5$$Register);
-  %}
-  ins_pipe(pipe_class_compare);
-%}
-
-// String compare.
-// Char[] pointers are passed in.
-// Use dst register classes if register gets killed, as it is the case for TEMP operands!
-instruct string_compare(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
-                        iRegPdst tmp, flagsRegCR0 cr0, regCTR ctr) %{
-  predicate(!CompactStrings);
-  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
-  effect(USE_KILL cnt1, USE_KILL cnt2, USE_KILL str1, USE_KILL str2, TEMP_DEF result, TEMP tmp, KILL cr0, KILL ctr);
-  ins_cost(300);
-
-  ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.
-
-  format %{ "String Compare $str1[0..$cnt1], $str2[0..$cnt2] -> $result"
-            " \t// TEMP $tmp, $result KILLs $str1, $cnt1, $str2, $cnt2, $cr0, $ctr" %}
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-    __ string_compare($str1$$Register, $str2$$Register, $cnt1$$Register, $cnt2$$Register,
-                      $result$$Register, $tmp$$Register);
-  %}
-  ins_pipe(pipe_class_compare);
-%}
-
 //---------- Min/Max Instructions ---------------------------------------------
 
 instruct minI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{