changeset 3451:6f8f439e247d

7177923: SIGBUS on sparc in compiled code for java.util.Calendar.clear() Summary: disable vectorization of a memory access with more elements per vector than one which is used for alignment on sparc Reviewed-by: twisti
author kvn
date Tue, 19 Jun 2012 15:12:56 -0700
parents 765ee2d1674b
children 40782a131183
files src/cpu/x86/vm/x86.ad src/share/vm/opto/c2_globals.hpp src/share/vm/opto/superword.cpp src/share/vm/opto/superword.hpp
diffstat 4 files changed, 39 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/x86/vm/x86.ad	Mon Jun 18 15:17:30 2012 -0700
+++ b/src/cpu/x86/vm/x86.ad	Tue Jun 19 15:12:56 2012 -0700
@@ -2061,7 +2061,7 @@
 // Integer could be loaded into xmm register directly from memory.
 instruct Repl2I_mem(vecD dst, memory mem) %{
   predicate(n->as_Vector()->length() == 2);
-  match(Set dst (ReplicateI mem));
+  match(Set dst (ReplicateI (LoadVector mem)));
   format %{ "movd    $dst,$mem\n\t"
             "pshufd  $dst,$dst,0x00\t! replicate2I" %}
   ins_encode %{
@@ -2073,7 +2073,7 @@
 
 instruct Repl4I_mem(vecX dst, memory mem) %{
   predicate(n->as_Vector()->length() == 4);
-  match(Set dst (ReplicateI mem));
+  match(Set dst (ReplicateI (LoadVector mem)));
   format %{ "movd    $dst,$mem\n\t"
             "pshufd  $dst,$dst,0x00\t! replicate4I" %}
   ins_encode %{
@@ -2085,7 +2085,7 @@
 
 instruct Repl8I_mem(vecY dst, memory mem) %{
   predicate(n->as_Vector()->length() == 8);
-  match(Set dst (ReplicateI mem));
+  match(Set dst (ReplicateI (LoadVector mem)));
   format %{ "movd    $dst,$mem\n\t"
             "pshufd  $dst,$dst,0x00\n\t"
             "vinsertf128h $dst,$dst,$dst\t! replicate8I" %}
@@ -2225,7 +2225,7 @@
 // Long could be loaded into xmm register directly from memory.
 instruct Repl2L_mem(vecX dst, memory mem) %{
   predicate(n->as_Vector()->length() == 2);
-  match(Set dst (ReplicateL mem));
+  match(Set dst (ReplicateL (LoadVector mem)));
   format %{ "movq    $dst,$mem\n\t"
             "movlhps $dst,$dst\t! replicate2L" %}
   ins_encode %{
@@ -2237,7 +2237,7 @@
 
 instruct Repl4L_mem(vecY dst, memory mem) %{
   predicate(n->as_Vector()->length() == 4);
-  match(Set dst (ReplicateL mem));
+  match(Set dst (ReplicateL (LoadVector mem)));
   format %{ "movq    $dst,$mem\n\t"
             "movlhps $dst,$dst\n\t"
             "vinsertf128h $dst,$dst,$dst\t! replicate4L" %}
--- a/src/share/vm/opto/c2_globals.hpp	Mon Jun 18 15:17:30 2012 -0700
+++ b/src/share/vm/opto/c2_globals.hpp	Tue Jun 19 15:12:56 2012 -0700
@@ -299,9 +299,12 @@
   develop(bool, SuperWordRTDepCheck, false,                                 \
           "Enable runtime dependency checks.")                              \
                                                                             \
-  product(bool, TraceSuperWord, false,                                      \
+  notproduct(bool, TraceSuperWord, false,                                   \
           "Trace superword transforms")                                     \
                                                                             \
+  notproduct(bool, TraceNewVectors, false,                                  \
+          "Trace creation of Vector nodes")                                 \
+                                                                            \
   product_pd(bool, OptoBundling,                                            \
           "Generate nops to fill i-cache lines")                            \
                                                                             \
--- a/src/share/vm/opto/superword.cpp	Mon Jun 18 15:17:30 2012 -0700
+++ b/src/share/vm/opto/superword.cpp	Tue Jun 19 15:12:56 2012 -0700
@@ -222,7 +222,18 @@
     // Create initial pack pairs of memory operations for which
     // alignment is set and vectors will be aligned.
     bool create_pack = true;
-    if (memory_alignment(mem_ref, best_iv_adjustment) != 0) {
+    if (memory_alignment(mem_ref, best_iv_adjustment) == 0) {
+      if (!Matcher::misaligned_vectors_ok()) {
+        int vw = vector_width(mem_ref);
+        int vw_best = vector_width(best_align_to_mem_ref);
+        if (vw > vw_best) {
+          // Do not vectorize a memory access with more elements per vector
+          // if unaligned memory access is not allowed because number of
+          // iterations in pre-loop will be not enough to align it.
+          create_pack = false;
+        }
+      }
+    } else {
       if (same_velt_type(mem_ref, best_align_to_mem_ref)) {
         // Can't allow vectorization of unaligned memory accesses with the
         // same type since it could be overlapped accesses to the same array.
@@ -357,7 +368,7 @@
   for (uint j = 0; j < memops.size(); j++) {
     MemNode* s = memops.at(j)->as_Mem();
     if (s->is_Store()) {
-      int vw = vector_width_in_bytes(velt_basic_type(s));
+      int vw = vector_width_in_bytes(s);
       assert(vw > 1, "sanity");
       SWPointer p(s, this);
       if (cmp_ct.at(j) >  max_ct ||
@@ -380,7 +391,7 @@
     for (uint j = 0; j < memops.size(); j++) {
       MemNode* s = memops.at(j)->as_Mem();
       if (s->is_Load()) {
-        int vw = vector_width_in_bytes(velt_basic_type(s));
+        int vw = vector_width_in_bytes(s);
         assert(vw > 1, "sanity");
         SWPointer p(s, this);
         if (cmp_ct.at(j) >  max_ct ||
@@ -440,8 +451,7 @@
 
   // If initial offset from start of object is computable,
   // compute alignment within the vector.
-  BasicType bt = velt_basic_type(p.mem());
-  int vw = vector_width_in_bytes(bt);
+  int vw = vector_width_in_bytes(p.mem());
   assert(vw > 1, "sanity");
   if (vw % span == 0) {
     Node* init_nd = pre_end->init_trip();
@@ -468,8 +478,7 @@
   SWPointer align_to_ref_p(mem_ref, this);
   int offset = align_to_ref_p.offset_in_bytes();
   int scale  = align_to_ref_p.scale_in_bytes();
-  BasicType bt = velt_basic_type(mem_ref);
-  int vw       = vector_width_in_bytes(bt);
+  int vw       = vector_width_in_bytes(mem_ref);
   assert(vw > 1, "sanity");
   int stride_sign   = (scale * iv_stride()) > 0 ? 1 : -1;
   int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw;
@@ -1361,7 +1370,7 @@
       }
       _igvn._worklist.push(vn);
 #ifdef ASSERT
-      if (TraceSuperWord) {
+      if (TraceNewVectors) {
         tty->print("new Vector node: ");
         vn->dump();
       }
@@ -1401,7 +1410,7 @@
     _phase->_igvn.register_new_node_with_optimizer(vn);
     _phase->set_ctrl(vn, _phase->get_ctrl(opd));
 #ifdef ASSERT
-    if (TraceSuperWord) {
+    if (TraceNewVectors) {
       tty->print("new Vector node: ");
       vn->dump();
     }
@@ -1424,8 +1433,8 @@
   _phase->_igvn.register_new_node_with_optimizer(pk);
   _phase->set_ctrl(pk, _phase->get_ctrl(opd));
 #ifdef ASSERT
-    if (TraceSuperWord) {
-      tty->print("new Pack node: ");
+    if (TraceNewVectors) {
+      tty->print("new Vector node: ");
       pk->dump();
     }
 #endif
@@ -1764,7 +1773,7 @@
   if (!p.valid()) {
     return bottom_align;
   }
-  int vw = vector_width_in_bytes(velt_basic_type(s));
+  int vw = vector_width_in_bytes(s);
   if (vw < 2) {
     return bottom_align; // No vectors for this type
   }
@@ -1978,12 +1987,12 @@
   //     N = (V - (e - lim0)) % V
   //     lim = lim0 - (V - (e - lim0)) % V
 
-  int vw = vector_width_in_bytes(velt_basic_type(align_to_ref));
-  assert(vw > 1, "sanity");
+  int vw = vector_width_in_bytes(align_to_ref);
   int stride   = iv_stride();
   int scale    = align_to_ref_p.scale_in_bytes();
   int elt_size = align_to_ref_p.memory_size();
   int v_align  = vw / elt_size;
+  assert(v_align > 1, "sanity");
   int k        = align_to_ref_p.offset_in_bytes() / elt_size;
 
   Node *kn   = _igvn.intcon(k);
--- a/src/share/vm/opto/superword.hpp	Mon Jun 18 15:17:30 2012 -0700
+++ b/src/share/vm/opto/superword.hpp	Tue Jun 19 15:12:56 2012 -0700
@@ -264,11 +264,14 @@
                                      _iv = lp->as_CountedLoop()->phi()->as_Phi(); }
   int      iv_stride()             { return lp()->as_CountedLoop()->stride_con(); }
 
-  int vector_width_in_bytes(BasicType bt) {
-    return MIN2(ABS(iv_stride())*type2aelembytes(bt),
-                Matcher::vector_width_in_bytes(bt));
+  int vector_width(Node* n) {
+    BasicType bt = velt_basic_type(n);
+    return MIN2(ABS(iv_stride()), Matcher::max_vector_size(bt));
   }
-
+  int vector_width_in_bytes(Node* n) {
+    BasicType bt = velt_basic_type(n);
+    return vector_width(n)*type2aelembytes(bt);
+  }
   MemNode* align_to_ref()            { return _align_to_ref; }
   void  set_align_to_ref(MemNode* m) { _align_to_ref = m; }