changeset 13041:70c6fae64754

Merge
author jwilhelm
date Mon, 19 Dec 2016 00:49:34 +0100
parents 33252c0b0ab4 66e2100be052
children 98fe046473c9
files
diffstat 10 files changed, 133 insertions(+), 127 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/ppc/vm/c1_MacroAssembler_ppc.cpp	Sun Dec 18 15:37:50 2016 +0100
+++ b/src/cpu/ppc/vm/c1_MacroAssembler_ppc.cpp	Mon Dec 19 00:49:34 2016 +0100
@@ -238,72 +238,15 @@
                                         int obj_size_in_bytes, int hdr_size_in_bytes) {
   const int index = (obj_size_in_bytes - hdr_size_in_bytes) / HeapWordSize;
 
-  const int cl_size         = VM_Version::L1_data_cache_line_size(),
-            cl_dwords       = cl_size>>3,
-            cl_dw_addr_bits = exact_log2(cl_dwords);
+  // 2x unrolled loop is shorter with more than 9 HeapWords.
+  if (index <= 9) {
+    clear_memory_unrolled(obj, index, R0, hdr_size_in_bytes);
+  } else {
+    const Register base_ptr = tmp1,
+                   cnt_dwords = tmp2;
 
-  const Register tmp = R0,
-                 base_ptr = tmp1,
-                 cnt_dwords = tmp2;
-
-  if (index <= 6) {
-    // Use explicit NULL stores.
-    if (index > 0) { li(tmp, 0); }
-    for (int i = 0; i < index; ++i) { std(tmp, hdr_size_in_bytes + i * HeapWordSize, obj); }
-
-  } else if (index < (2<<cl_dw_addr_bits)-1) {
-    // simple loop
-    Label loop;
-
-    li(cnt_dwords, index);
     addi(base_ptr, obj, hdr_size_in_bytes); // Compute address of first element.
-    li(tmp, 0);
-    mtctr(cnt_dwords);                      // Load counter.
-  bind(loop);
-    std(tmp, 0, base_ptr);                  // Clear 8byte aligned block.
-    addi(base_ptr, base_ptr, 8);
-    bdnz(loop);
-
-  } else {
-    // like clear_memory_doubleword
-    Label startloop, fast, fastloop, restloop, done;
-
-    addi(base_ptr, obj, hdr_size_in_bytes);           // Compute address of first element.
-    load_const_optimized(cnt_dwords, index);
-    rldicl_(tmp, base_ptr, 64-3, 64-cl_dw_addr_bits); // Extract dword offset within first cache line.
-    beq(CCR0, fast);                                  // Already 128byte aligned.
-
-    subfic(tmp, tmp, cl_dwords);
-    mtctr(tmp);                        // Set ctr to hit 128byte boundary (0<ctr<cl_dwords).
-    subf(cnt_dwords, tmp, cnt_dwords); // rest.
-    li(tmp, 0);
-
-  bind(startloop);                     // Clear at the beginning to reach 128byte boundary.
-    std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
-    addi(base_ptr, base_ptr, 8);
-    bdnz(startloop);
-
-  bind(fast);                                  // Clear 128byte blocks.
-    srdi(tmp, cnt_dwords, cl_dw_addr_bits);    // Loop count for 128byte loop (>0).
-    andi(cnt_dwords, cnt_dwords, cl_dwords-1); // Rest in dwords.
-    mtctr(tmp);                                // Load counter.
-
-  bind(fastloop);
-    dcbz(base_ptr);                    // Clear 128byte aligned block.
-    addi(base_ptr, base_ptr, cl_size);
-    bdnz(fastloop);
-
-    cmpdi(CCR0, cnt_dwords, 0);        // size 0?
-    beq(CCR0, done);                   // rest == 0
-    li(tmp, 0);
-    mtctr(cnt_dwords);                 // Load counter.
-
-  bind(restloop);                      // Clear rest.
-    std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
-    addi(base_ptr, base_ptr, 8);
-    bdnz(restloop);
-
-  bind(done);
+    clear_memory_doubleword(base_ptr, cnt_dwords, R0, index);
   }
 }
 
--- a/src/cpu/ppc/vm/globals_ppc.hpp	Sun Dec 18 15:37:50 2016 +0100
+++ b/src/cpu/ppc/vm/globals_ppc.hpp	Mon Dec 19 00:49:34 2016 +0100
@@ -77,7 +77,8 @@
 
 define_pd_global(bool, CompactStrings, true);
 
-define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
+// 2x unrolled loop is shorter with more than 9 HeapWords.
+define_pd_global(intx, InitArrayShortSize, 9*BytesPerLong);
 
 // Platform dependent flag handling: flags only defined on this platform.
 #define ARCH_FLAGS(develop, \
--- a/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Sun Dec 18 15:37:50 2016 +0100
+++ b/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Mon Dec 19 00:49:34 2016 +0100
@@ -3332,53 +3332,90 @@
 }
 
 // Clear Array
+// For very short arrays. tmp == R0 is allowed.
+void MacroAssembler::clear_memory_unrolled(Register base_ptr, int cnt_dwords, Register tmp, int offset) {
+  if (cnt_dwords > 0) { li(tmp, 0); }
+  for (int i = 0; i < cnt_dwords; ++i) { std(tmp, offset + i * 8, base_ptr); }
+}
+
+// Version for constant short array length. Kills base_ptr. tmp == R0 is allowed.
+void MacroAssembler::clear_memory_constlen(Register base_ptr, int cnt_dwords, Register tmp) {
+  if (cnt_dwords < 8) {
+    clear_memory_unrolled(base_ptr, cnt_dwords, tmp);
+    return;
+  }
+
+  Label loop;
+  const long loopcnt   = cnt_dwords >> 1,
+             remainder = cnt_dwords & 1;
+
+  li(tmp, loopcnt);
+  mtctr(tmp);
+  li(tmp, 0);
+  bind(loop);
+    std(tmp, 0, base_ptr);
+    std(tmp, 8, base_ptr);
+    addi(base_ptr, base_ptr, 16);
+    bdnz(loop);
+  if (remainder) { std(tmp, 0, base_ptr); }
+}
+
 // Kills both input registers. tmp == R0 is allowed.
-void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp) {
+void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp, long const_cnt) {
   // Procedure for large arrays (uses data cache block zero instruction).
     Label startloop, fast, fastloop, small_rest, restloop, done;
     const int cl_size         = VM_Version::L1_data_cache_line_size(),
-              cl_dwords       = cl_size>>3,
+              cl_dwords       = cl_size >> 3,
               cl_dw_addr_bits = exact_log2(cl_dwords),
-              dcbz_min        = 1;                     // Min count of dcbz executions, needs to be >0.
-
-//2:
-    cmpdi(CCR1, cnt_dwords, ((dcbz_min+1)<<cl_dw_addr_bits)-1); // Big enough? (ensure >=dcbz_min lines included).
-    blt(CCR1, small_rest);                                      // Too small.
-    rldicl_(tmp, base_ptr, 64-3, 64-cl_dw_addr_bits);           // Extract dword offset within first cache line.
-    beq(CCR0, fast);                                            // Already 128byte aligned.
+              dcbz_min        = 1,  // Min count of dcbz executions, needs to be >0.
+              min_cnt         = ((dcbz_min + 1) << cl_dw_addr_bits) - 1;
+
+  if (const_cnt >= 0) {
+    // Constant case.
+    if (const_cnt < min_cnt) {
+      clear_memory_constlen(base_ptr, const_cnt, tmp);
+      return;
+    }
+    load_const_optimized(cnt_dwords, const_cnt, tmp);
+  } else {
+    // cnt_dwords already loaded in register. Need to check size.
+    cmpdi(CCR1, cnt_dwords, min_cnt); // Big enough? (ensure >= dcbz_min lines included).
+    blt(CCR1, small_rest);
+  }
+    rldicl_(tmp, base_ptr, 64-3, 64-cl_dw_addr_bits); // Extract dword offset within first cache line.
+    beq(CCR0, fast);                                  // Already 128byte aligned.
 
     subfic(tmp, tmp, cl_dwords);
     mtctr(tmp);                        // Set ctr to hit 128byte boundary (0<ctr<cl_dwords).
     subf(cnt_dwords, tmp, cnt_dwords); // rest.
     li(tmp, 0);
-//10:
+
   bind(startloop);                     // Clear at the beginning to reach 128byte boundary.
     std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
     addi(base_ptr, base_ptr, 8);
     bdnz(startloop);
-//13:
+
   bind(fast);                                  // Clear 128byte blocks.
     srdi(tmp, cnt_dwords, cl_dw_addr_bits);    // Loop count for 128byte loop (>0).
     andi(cnt_dwords, cnt_dwords, cl_dwords-1); // Rest in dwords.
     mtctr(tmp);                                // Load counter.
-//16:
+
   bind(fastloop);
     dcbz(base_ptr);                    // Clear 128byte aligned block.
     addi(base_ptr, base_ptr, cl_size);
     bdnz(fastloop);
-    if (InsertEndGroupPPC64) { endgroup(); } else { nop(); }
-//20:
+
   bind(small_rest);
     cmpdi(CCR0, cnt_dwords, 0);        // size 0?
     beq(CCR0, done);                   // rest == 0
     li(tmp, 0);
     mtctr(cnt_dwords);                 // Load counter.
-//24:
+
   bind(restloop);                      // Clear rest.
     std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
     addi(base_ptr, base_ptr, 8);
     bdnz(restloop);
-//27:
+
   bind(done);
 }
 
--- a/src/cpu/ppc/vm/macroAssembler_ppc.hpp	Sun Dec 18 15:37:50 2016 +0100
+++ b/src/cpu/ppc/vm/macroAssembler_ppc.hpp	Mon Dec 19 00:49:34 2016 +0100
@@ -755,7 +755,9 @@
            is_trap_range_check_g(x) || is_trap_range_check_ge(x);
   }
 
-  void clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp = R0);
+  void clear_memory_unrolled(Register base_ptr, int cnt_dwords, Register tmp = R0, int offset = 0);
+  void clear_memory_constlen(Register base_ptr, int cnt_dwords, Register tmp = R0);
+  void clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp = R0, long const_cnt = -1);
 
 #ifdef COMPILER2
   // Intrinsics for CompactStrings
--- a/src/cpu/ppc/vm/ppc.ad	Sun Dec 18 15:37:50 2016 +0100
+++ b/src/cpu/ppc/vm/ppc.ad	Mon Dec 19 00:49:34 2016 +0100
@@ -965,10 +965,7 @@
 // is the number of bytes (not instructions) which will be inserted before
 // the instruction. The padding must match the size of a NOP instruction.
 
-int inlineCallClearArrayNode::compute_padding(int current_offset) const {
-  int desired_padding = (2*4-current_offset)&31; // see MacroAssembler::clear_memory_doubleword
-  return (desired_padding <= 3*4) ? desired_padding : 0;
-}
+// Currently not used on this platform.
 
 //=============================================================================
 
@@ -4066,6 +4063,14 @@
   interface(CONST_INTER);
 %}
 
+operand immLmax30() %{
+  predicate((n->get_long() <= 30));
+  match(ConL);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
 // Long Immediate: 16-bit
 operand immL16() %{
   predicate(Assembler::is_simm(n->get_long(), 16));
@@ -11735,18 +11740,44 @@
   ins_pipe(pipe_class_default);
 %}
 
-// Clear-array with dynamic array-size.
+// Clear-array with constant short array length. The versions below can use dcbz with cnt > 30.
+instruct inlineCallClearArrayShort(immLmax30 cnt, rarg2RegP base, Universe dummy, regCTR ctr) %{
+  match(Set dummy (ClearArray cnt base));
+  effect(USE_KILL base, KILL ctr);
+  ins_cost(2 * MEMORY_REF_COST);
+
+  format %{ "ClearArray $cnt, $base" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ clear_memory_constlen($base$$Register, $cnt$$constant, R0); // kills base, R0
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// Clear-array with constant large array length.
+instruct inlineCallClearArrayLarge(immL cnt, rarg2RegP base, Universe dummy, iRegLdst tmp, regCTR ctr) %{
+  match(Set dummy (ClearArray cnt base));
+  effect(USE_KILL base, TEMP tmp, KILL ctr);
+  ins_cost(3 * MEMORY_REF_COST);
+
+  format %{ "ClearArray $cnt, $base \t// KILL $tmp" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ clear_memory_doubleword($base$$Register, $tmp$$Register, R0, $cnt$$constant); // kills base, R0
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// Clear-array with dynamic array length.
 instruct inlineCallClearArray(rarg1RegL cnt, rarg2RegP base, Universe dummy, regCTR ctr) %{
   match(Set dummy (ClearArray cnt base));
   effect(USE_KILL cnt, USE_KILL base, KILL ctr);
-  ins_cost(MEMORY_REF_COST);
-
-  ins_alignment(4); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.
+  ins_cost(4 * MEMORY_REF_COST);
 
   format %{ "ClearArray $cnt, $base" %}
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-    __ clear_memory_doubleword($base$$Register, $cnt$$Register); // kills cnt, base, R0
+    __ clear_memory_doubleword($base$$Register, $cnt$$Register, R0); // kills cnt, base, R0
   %}
   ins_pipe(pipe_class_default);
 %}
--- a/src/share/vm/aot/aotLoader.cpp	Sun Dec 18 15:37:50 2016 +0100
+++ b/src/share/vm/aot/aotLoader.cpp	Mon Dec 19 00:49:34 2016 +0100
@@ -125,14 +125,18 @@
   if (UseAOT) {
     // EagerInitialization is not compatible with AOT
     if (EagerInitialization) {
-      warning("EagerInitialization is not compatible with AOT (switching AOT off)");
+      if (PrintAOT) {
+        warning("EagerInitialization is not compatible with AOT (switching AOT off)");
+      }
       FLAG_SET_DEFAULT(UseAOT, false);
       return;
     }
 
     // -Xint is not compatible with AOT
     if (Arguments::is_interpreter_only()) {
-      warning("-Xint is not compatible with AOT (switching AOT off)");
+      if (PrintAOT) {
+        warning("-Xint is not compatible with AOT (switching AOT off)");
+      }
       FLAG_SET_DEFAULT(UseAOT, false);
       return;
     }
--- a/test/Makefile	Sun Dec 18 15:37:50 2016 +0100
+++ b/test/Makefile	Mon Dec 19 00:49:34 2016 +0100
@@ -95,29 +95,6 @@
 ALT_MAKE ?= closed
 -include $(ALT_MAKE)/Makefile
 
-# Make sure jtreg exists
-$(JTREG): $(JT_HOME)
-
-jtreg_tests: prep $(PRODUCT_HOME) $(JTREG)
-	(                                                                    \
-	  ( JT_HOME=$(shell $(GETMIXEDPATH) "$(JT_HOME)");                   \
-            export JT_HOME;                                                  \
-            $(shell $(GETMIXEDPATH) "$(JTREG)")                              \
-              $(JTREG_BASIC_OPTIONS)                                         \
-              -r:$(shell $(GETMIXEDPATH) "$(ABS_TEST_OUTPUT_DIR)/JTreport")  \
-              -w:$(shell $(GETMIXEDPATH) "$(ABS_TEST_OUTPUT_DIR)/JTwork")    \
-              -jdk:$(shell $(GETMIXEDPATH) "$(PRODUCT_HOME)")                \
-              $(JTREG_NATIVE_PATH)                                           \
-              $(JTREG_FAILURE_HANDLER_OPTIONS)                               \
-              $(JTREG_EXCLUSIONS)                                            \
-              $(JTREG_TEST_OPTIONS)                                          \
-              $(TEST_SELECTION)                                              \
-	  ) ;                                                                \
-	  $(BUNDLE_UP_AND_EXIT)                                              \
-	) 2>&1 | $(TEE) $(ABS_TEST_OUTPUT_DIR)/output.txt ; $(TESTEXIT)
-
-PHONY_LIST += jtreg_tests
-
 # flags used to execute java in test targets
 TEST_FLAGS += -version -Xinternalversion -X -help
 
--- a/test/compiler/aot/DeoptimizationTest.java	Sun Dec 18 15:37:50 2016 +0100
+++ b/test/compiler/aot/DeoptimizationTest.java	Mon Dec 19 00:49:34 2016 +0100
@@ -33,7 +33,9 @@
  * @run main compiler.aot.AotCompiler -libname libDeoptimizationTest.so
  *     -class compiler.aot.DeoptimizationTest
  *     -compile compiler.aot.DeoptimizationTest.testMethod()D
+ *     -extraopt -XX:-UseCompressedOops
  * @run main/othervm -Xmixed -XX:+UseAOT -XX:+TieredCompilation
+ *     -XX:-UseCompressedOops
  *     -XX:CompileCommand=dontinline,compiler.aot.DeoptimizationTest::*
  *     -XX:AOTLibrary=./libDeoptimizationTest.so -Xbootclasspath/a:.
  *     -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
--- a/test/compiler/aot/RecompilationTest.java	Sun Dec 18 15:37:50 2016 +0100
+++ b/test/compiler/aot/RecompilationTest.java	Mon Dec 19 00:49:34 2016 +0100
@@ -30,29 +30,36 @@
  *        compiler.aot.AotCompiler
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *     sun.hotspot.WhiteBox$WhiteBoxPermission
- * @run main compiler.aot.AotCompiler -libname libRecompilationTest.so
+ * @run main compiler.aot.AotCompiler -libname libRecompilationTest1.so
  *     -class compiler.whitebox.SimpleTestCaseHelper
  *     -extraopt -Dgraal.TieredAOT=true -extraopt -Dgraal.ProfileSimpleMethods=true
  *     -extraopt -XX:+UnlockDiagnosticVMOptions -extraopt -XX:+WhiteBoxAPI -extraopt -Xbootclasspath/a:.
- * @run main/othervm -Xmixed -Xbatch -XX:+UseAOT -XX:+TieredCompilation -XX:-UseCounterDecay
- *     -XX:CompileCommand=dontinline,*.*
- *     -XX:AOTLibrary=./libRecompilationTest.so -Xbootclasspath/a:.
+ *     -extraopt -XX:-UseCompressedOops
+ *     -extraopt -XX:CompileCommand=dontinline,compiler.whitebox.SimpleTestCaseHelper::*
+ * @run main/othervm -Xmixed -Xbatch -XX:+UseAOT -XX:+TieredCompilation
+ *     -XX:-UseCounterDecay -XX:-UseCompressedOops
+ *     -XX:-Inline
+ *     -XX:AOTLibrary=./libRecompilationTest1.so -Xbootclasspath/a:.
  *     -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
  *     -Dcompiler.aot.RecompilationTest.check_level=1
  *     compiler.aot.RecompilationTest
- * @run main compiler.aot.AotCompiler -libname libRecompilationTest.so
+ * @run main compiler.aot.AotCompiler -libname libRecompilationTest2.so
  *     -class compiler.whitebox.SimpleTestCaseHelper
  *     -extraopt -Dgraal.TieredAOT=false
  *     -extraopt -XX:+UnlockDiagnosticVMOptions -extraopt -XX:+WhiteBoxAPI -extraopt -Xbootclasspath/a:.
- * @run main/othervm -Xmixed -Xbatch -XX:+UseAOT -XX:+TieredCompilation -XX:-UseCounterDecay
- *     -XX:CompileCommand=dontinline,*.*
- *     -XX:AOTLibrary=./libRecompilationTest.so -Xbootclasspath/a:.
+ *     -extraopt -XX:-UseCompressedOops
+ *     -extraopt -XX:CompileCommand=dontinline,compiler.whitebox.SimpleTestCaseHelper::*
+ * @run main/othervm -Xmixed -Xbatch -XX:+UseAOT -XX:+TieredCompilation
+ *     -XX:-UseCounterDecay -XX:-UseCompressedOops
+ *     -XX:-Inline
+ *     -XX:AOTLibrary=./libRecompilationTest2.so -Xbootclasspath/a:.
  *     -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
  *     -Dcompiler.aot.RecompilationTest.check_level=-1
  *     compiler.aot.RecompilationTest
- * @run main/othervm -Xmixed -Xbatch -XX:+UseAOT -XX:-TieredCompilation -XX:-UseCounterDecay
- *     -XX:CompileCommand=dontinline,*.*
- *     -XX:AOTLibrary=./libRecompilationTest.so -Xbootclasspath/a:.
+ * @run main/othervm -Xmixed -Xbatch -XX:+UseAOT -XX:-TieredCompilation
+ *     -XX:-UseCounterDecay -XX:-UseCompressedOops
+ *     -XX:-Inline
+ *     -XX:AOTLibrary=./libRecompilationTest2.so -Xbootclasspath/a:.
  *     -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
  *     -Dcompiler.aot.RecompilationTest.check_level=-1
  *     compiler.aot.RecompilationTest
--- a/test/compiler/aot/SharedUsageTest.java	Sun Dec 18 15:37:50 2016 +0100
+++ b/test/compiler/aot/SharedUsageTest.java	Mon Dec 19 00:49:34 2016 +0100
@@ -30,7 +30,9 @@
  *        compiler.aot.AotCompiler
  * @run main compiler.aot.AotCompiler -libname libSharedUsageTest.so
  *      -class compiler.aot.SharedUsageTest
+ *      -extraopt -XX:-UseCompressedOops
  * @run main/othervm -XX:+UseAOT -XX:AOTLibrary=./libSharedUsageTest.so
+ *      -XX:-UseCompressedOops
  *      -Dcompiler.aot.SharedUsageTest.parent=true
  *      compiler.aot.SharedUsageTest
  * @summary check if .so can be successfully shared with 2 java processes