annotate src/hotspot/share/opto/superword.cpp @ 54048:744dc9c33676

8217417: Decorator name typo: C2_TIGHLY_COUPLED_ALLOC Summary: Fixed typo in decorator name, variables, and comments. Reviewed-by: tschatzl
author kbarrett
date Mon, 11 Mar 2019 02:05:07 -0400
parents 6ffb8d7fe1e4
children 3ebf58dbf5d8
rev   line source
duke@1 1 /*
dlong@48603 2 * Copyright (c) 2007, 2018, Oracle and/or its affiliates. All rights reserved.
duke@1 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
duke@1 4 *
duke@1 5 * This code is free software; you can redistribute it and/or modify it
duke@1 6 * under the terms of the GNU General Public License version 2 only, as
duke@1 7 * published by the Free Software Foundation.
duke@1 8 *
duke@1 9 * This code is distributed in the hope that it will be useful, but WITHOUT
duke@1 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
duke@1 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
duke@1 12 * version 2 for more details (a copy is included in the LICENSE file that
duke@1 13 * accompanied this code).
duke@1 14 *
duke@1 15 * You should have received a copy of the GNU General Public License version
duke@1 16 * 2 along with this work; if not, write to the Free Software Foundation,
duke@1 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
duke@1 18 *
trims@5547 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
trims@5547 20 * or visit www.oracle.com if you need additional information or have any
trims@5547 21 * questions.
duke@1 22 */
duke@1 23
stefank@7397 24 #include "precompiled.hpp"
stefank@7397 25 #include "compiler/compileLog.hpp"
stefank@7397 26 #include "libadt/vectset.hpp"
stefank@7397 27 #include "memory/allocation.inline.hpp"
jprovino@37248 28 #include "memory/resourceArea.hpp"
stefank@7397 29 #include "opto/addnode.hpp"
stefank@7397 30 #include "opto/callnode.hpp"
morris@23528 31 #include "opto/castnode.hpp"
morris@23528 32 #include "opto/convertnode.hpp"
stefank@7397 33 #include "opto/divnode.hpp"
stefank@7397 34 #include "opto/matcher.hpp"
stefank@7397 35 #include "opto/memnode.hpp"
stefank@7397 36 #include "opto/mulnode.hpp"
stefank@7397 37 #include "opto/opcodes.hpp"
morris@23528 38 #include "opto/opaquenode.hpp"
stefank@7397 39 #include "opto/superword.hpp"
stefank@7397 40 #include "opto/vectornode.hpp"
iveresov@33469 41 #include "opto/movenode.hpp"
duke@1 42
duke@1 43 //
duke@1 44 // S U P E R W O R D T R A N S F O R M
duke@1 45 //=============================================================================
duke@1 46
duke@1 47 //------------------------------SuperWord---------------------------
duke@1 48 SuperWord::SuperWord(PhaseIdealLoop* phase) :
duke@1 49 _phase(phase),
tschatzl@51333 50 _arena(phase->C->comp_arena()),
duke@1 51 _igvn(phase->_igvn),
duke@1 52 _packset(arena(), 8, 0, NULL), // packs for the current block
duke@1 53 _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb
duke@1 54 _block(arena(), 8, 0, NULL), // nodes in current block
mcberg@38049 55 _post_block(arena(), 8, 0, NULL), // nodes common to current block which are marked as post loop vectorizable
duke@1 56 _data_entry(arena(), 8, 0, NULL), // nodes with all inputs from outside
duke@1 57 _mem_slice_head(arena(), 8, 0, NULL), // memory slice heads
duke@1 58 _mem_slice_tail(arena(), 8, 0, NULL), // memory slice tails
duke@1 59 _node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node
kvn@30593 60 _clone_map(phase->C->clone_map()), // map of nodes created in cloning
kvn@48309 61 _cmovev_kit(_arena, this), // map to facilitate CMoveV creation
duke@1 62 _align_to_ref(NULL), // memory reference to align vectors to
duke@1 63 _disjoint_ptrs(arena(), 8, 0, OrderedPair::initial), // runtime disambiguated pointer pairs
duke@1 64 _dg(_arena), // dependence graph
duke@1 65 _visited(arena()), // visited node set
duke@1 66 _post_visited(arena()), // post visited node set
duke@1 67 _n_idx_list(arena(), 8), // scratch list of (node,index) pairs
tschatzl@51333 68 _nlist(arena(), 8, 0, NULL), // scratch list of nodes
duke@1 69 _stk(arena(), 8, 0, NULL), // scratch stack of nodes
duke@1 70 _lpt(NULL), // loop tree node
duke@1 71 _lp(NULL), // LoopNode
duke@1 72 _bb(NULL), // basic block
kvn@30211 73 _iv(NULL), // induction var
kvn@30588 74 _race_possible(false), // cases where SDMU is true
mcberg@31403 75 _early_return(true), // analysis evaluations routine
tschatzl@51333 76 _do_vector_loop(phase->C->do_vector_loop()), // whether to do vectorization/simd style
tschatzl@51333 77 _do_reserve_copy(DoReserveCopyInSuperWord),
kvn@30588 78 _num_work_vecs(0), // amount of vector work we have
kvn@30593 79 _num_reductions(0), // amount of reduction work we have
kvn@30593 80 _ii_first(-1), // first loop generation index - only if do_vector_loop()
kvn@30593 81 _ii_last(-1), // last loop generation index - only if do_vector_loop()
kvn@31858 82 _ii_order(arena(), 8, 0, 0)
kvn@31858 83 {
kvn@31858 84 #ifndef PRODUCT
kvn@31858 85 _vector_loop_debug = 0;
kvn@31858 86 if (_phase->C->method() != NULL) {
neliasso@33451 87 _vector_loop_debug = phase->C->directive()->VectorizeDebugOption;
kvn@31858 88 }
neliasso@33451 89
kvn@31858 90 #endif
kvn@31858 91 }
duke@1 92
duke@1 93 //------------------------------transform_loop---------------------------
mcberg@31403 94 void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
kvn@13104 95 assert(UseSuperWord, "should be");
kvn@13104 96 // Do vectors exist on this architecture?
kvn@13104 97 if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
kvn@13104 98
duke@1 99 assert(lpt->_head->is_CountedLoop(), "must be");
duke@1 100 CountedLoopNode *cl = lpt->_head->as_CountedLoop();
duke@1 101
kvn@10263 102 if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop
kvn@10263 103
mcberg@38049 104 bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop());
mcberg@38049 105 if (post_loop_allowed) {
mcberg@38049 106 if (cl->is_reduction_loop()) return; // no predication mapping
mcberg@38049 107 Node *limit = cl->limit();
mcberg@38049 108 if (limit->is_Con()) return; // non constant limits only
mcberg@38049 109 // Now check the limit for expressions we do not handle
mcberg@38049 110 if (limit->is_Add()) {
mcberg@38049 111 Node *in2 = limit->in(2);
mcberg@38049 112 if (in2->is_Con()) {
mcberg@38049 113 int val = in2->get_int();
mcberg@38049 114 // should not try to program these cases
mcberg@38049 115 if (val < 0) return;
mcberg@38049 116 }
mcberg@38049 117 }
mcberg@38049 118 }
mcberg@38049 119
mcberg@38049 120 // skip any loop that has not been assigned max unroll by analysis
mcberg@38049 121 if (do_optimization) {
roland@38136 122 if (SuperWordLoopUnrollAnalysis && cl->slp_max_unroll() == 0) return;
mcberg@38049 123 }
mcberg@38049 124
duke@1 125 // Check for no control flow in body (other than exit)
duke@1 126 Node *cl_exit = cl->loopexit();
mcberg@38049 127 if (cl->is_main_loop() && (cl_exit->in(0) != lpt->_head)) {
iveresov@33469 128 #ifndef PRODUCT
iveresov@33469 129 if (TraceSuperWord) {
iveresov@33469 130 tty->print_cr("SuperWord::transform_loop: loop too complicated, cl_exit->in(0) != lpt->_head");
iveresov@33469 131 tty->print("cl_exit %d", cl_exit->_idx); cl_exit->dump();
iveresov@33469 132 tty->print("cl_exit->in(0) %d", cl_exit->in(0)->_idx); cl_exit->in(0)->dump();
iveresov@33469 133 tty->print("lpt->_head %d", lpt->_head->_idx); lpt->_head->dump();
iveresov@33469 134 lpt->dump_head();
iveresov@33469 135 }
iveresov@33469 136 #endif
iveresov@33469 137 return;
iveresov@33469 138 }
duke@1 139
never@352 140 // Make sure the are no extra control users of the loop backedge
never@352 141 if (cl->back_control()->outcnt() != 1) {
never@352 142 return;
never@352 143 }
never@352 144
mcberg@38049 145 // Skip any loops already optimized by slp
mcberg@38049 146 if (cl->is_vectorized_loop()) return;
mcberg@38049 147
zyao@47591 148 if (cl->do_unroll_only()) return;
zyao@47591 149
mcberg@38049 150 if (cl->is_main_loop()) {
mcberg@38049 151 // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit))))
mcberg@38049 152 CountedLoopEndNode* pre_end = get_pre_loop_end(cl);
mcberg@38049 153 if (pre_end == NULL) return;
mcberg@38049 154 Node *pre_opaq1 = pre_end->limit();
mcberg@38049 155 if (pre_opaq1->Opcode() != Op_Opaque1) return;
mcberg@38049 156 }
duke@1 157
duke@1 158 init(); // initialize data structures
duke@1 159
duke@1 160 set_lpt(lpt);
duke@1 161 set_lp(cl);
duke@1 162
kvn@13104 163 // For now, define one block which is the entire loop body
duke@1 164 set_bb(cl);
duke@1 165
mcberg@31403 166 if (do_optimization) {
mcberg@31403 167 assert(_packset.length() == 0, "packset must be empty");
mcberg@31403 168 SLP_extract();
mcberg@38049 169 if (PostLoopMultiversioning && Matcher::has_predicated_vectors()) {
mcberg@38049 170 if (cl->is_vectorized_loop() && cl->is_main_loop() && !cl->is_reduction_loop()) {
mcberg@38049 171 IdealLoopTree *lpt_next = lpt->_next;
mcberg@38049 172 CountedLoopNode *cl_next = lpt_next->_head->as_CountedLoop();
mcberg@38049 173 _phase->has_range_checks(lpt_next);
mcberg@38049 174 if (cl_next->is_post_loop() && !cl_next->range_checks_present()) {
mcberg@38049 175 if (!cl_next->is_vectorized_loop()) {
mcberg@38049 176 int slp_max_unroll_factor = cl->slp_max_unroll();
mcberg@38049 177 cl_next->set_slp_max_unroll(slp_max_unroll_factor);
mcberg@38049 178 }
mcberg@38049 179 }
mcberg@38049 180 }
mcberg@38049 181 }
mcberg@31403 182 }
mcberg@31403 183 }
mcberg@31403 184
mcberg@31403 185 //------------------------------early unrolling analysis------------------------------
kvn@31772 186 void SuperWord::unrolling_analysis(int &local_loop_unroll_factor) {
mcberg@31403 187 bool is_slp = true;
mcberg@31403 188 ResourceMark rm;
mcberg@31403 189 size_t ignored_size = lpt()->_body.size();
mcberg@31403 190 int *ignored_loop_nodes = NEW_RESOURCE_ARRAY(int, ignored_size);
mcberg@31403 191 Node_Stack nstack((int)ignored_size);
kvn@31772 192 CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
dlong@48603 193 Node *cl_exit = cl->loopexit_or_null();
mcberg@38049 194 int rpo_idx = _post_block.length();
mcberg@38049 195
mcberg@38049 196 assert(rpo_idx == 0, "post loop block is empty");
mcberg@31403 197
mcberg@31403 198 // First clear the entries
mcberg@31403 199 for (uint i = 0; i < lpt()->_body.size(); i++) {
mcberg@31403 200 ignored_loop_nodes[i] = -1;
mcberg@31403 201 }
mcberg@31403 202
roland@38235 203 int max_vector = Matcher::max_vector_size(T_BYTE);
mcberg@38049 204 bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop());
mcberg@31403 205
mcberg@31403 206 // Process the loop, some/all of the stack entries will not be in order, ergo
mcberg@31403 207 // need to preprocess the ignored initial state before we process the loop
mcberg@31403 208 for (uint i = 0; i < lpt()->_body.size(); i++) {
mcberg@31403 209 Node* n = lpt()->_body.at(i);
mcberg@31403 210 if (n == cl->incr() ||
mcberg@31403 211 n->is_reduction() ||
mcberg@31403 212 n->is_AddP() ||
mcberg@31403 213 n->is_Cmp() ||
mcberg@31403 214 n->is_IfTrue() ||
mcberg@31403 215 n->is_CountedLoop() ||
mcberg@31403 216 (n == cl_exit)) {
mcberg@31403 217 ignored_loop_nodes[i] = n->_idx;
mcberg@31403 218 continue;
mcberg@31403 219 }
mcberg@31403 220
mcberg@31403 221 if (n->is_If()) {
mcberg@31403 222 IfNode *iff = n->as_If();
mcberg@31403 223 if (iff->_fcnt != COUNT_UNKNOWN && iff->_prob != PROB_UNKNOWN) {
mcberg@31403 224 if (lpt()->is_loop_exit(iff)) {
mcberg@31403 225 ignored_loop_nodes[i] = n->_idx;
mcberg@31403 226 continue;
mcberg@31403 227 }
mcberg@31403 228 }
mcberg@31403 229 }
mcberg@31403 230
mcberg@31403 231 if (n->is_Phi() && (n->bottom_type() == Type::MEMORY)) {
mcberg@31403 232 Node* n_tail = n->in(LoopNode::LoopBackControl);
mcberg@31403 233 if (n_tail != n->in(LoopNode::EntryControl)) {
mcberg@31403 234 if (!n_tail->is_Mem()) {
mcberg@31403 235 is_slp = false;
mcberg@31403 236 break;
mcberg@31403 237 }
mcberg@31403 238 }
mcberg@31403 239 }
mcberg@31403 240
mcberg@31403 241 // This must happen after check of phi/if
mcberg@31403 242 if (n->is_Phi() || n->is_If()) {
mcberg@31403 243 ignored_loop_nodes[i] = n->_idx;
mcberg@31403 244 continue;
mcberg@31403 245 }
mcberg@31403 246
mcberg@31403 247 if (n->is_LoadStore() || n->is_MergeMem() ||
mcberg@31403 248 (n->is_Proj() && !n->as_Proj()->is_CFG())) {
mcberg@31403 249 is_slp = false;
mcberg@31403 250 break;
mcberg@31403 251 }
mcberg@31403 252
kvn@31520 253 // Ignore nodes with non-primitive type.
kvn@31520 254 BasicType bt;
kvn@31520 255 if (n->is_Mem()) {
kvn@31520 256 bt = n->as_Mem()->memory_type();
kvn@31520 257 } else {
kvn@31520 258 bt = n->bottom_type()->basic_type();
kvn@31520 259 }
kvn@31520 260 if (is_java_primitive(bt) == false) {
kvn@31520 261 ignored_loop_nodes[i] = n->_idx;
kvn@31520 262 continue;
kvn@31520 263 }
kvn@31520 264
mcberg@31403 265 if (n->is_Mem()) {
kvn@31405 266 MemNode* current = n->as_Mem();
mcberg@31403 267 Node* adr = n->in(MemNode::Address);
mcberg@31403 268 Node* n_ctrl = _phase->get_ctrl(adr);
mcberg@31403 269
mcberg@31403 270 // save a queue of post process nodes
mcberg@31403 271 if (n_ctrl != NULL && lpt()->is_member(_phase->get_loop(n_ctrl))) {
mcberg@31403 272 // Process the memory expression
mcberg@31403 273 int stack_idx = 0;
mcberg@31403 274 bool have_side_effects = true;
mcberg@31403 275 if (adr->is_AddP() == false) {
mcberg@31403 276 nstack.push(adr, stack_idx++);
mcberg@31403 277 } else {
mcberg@31403 278 // Mark the components of the memory operation in nstack
mcberg@31403 279 SWPointer p1(current, this, &nstack, true);
mcberg@31403 280 have_side_effects = p1.node_stack()->is_nonempty();
mcberg@31403 281 }
mcberg@31403 282
mcberg@31403 283 // Process the pointer stack
mcberg@31403 284 while (have_side_effects) {
mcberg@31403 285 Node* pointer_node = nstack.node();
mcberg@31403 286 for (uint j = 0; j < lpt()->_body.size(); j++) {
mcberg@31403 287 Node* cur_node = lpt()->_body.at(j);
mcberg@31403 288 if (cur_node == pointer_node) {
mcberg@31403 289 ignored_loop_nodes[j] = cur_node->_idx;
mcberg@31403 290 break;
mcberg@31403 291 }
mcberg@31403 292 }
mcberg@31403 293 nstack.pop();
mcberg@31403 294 have_side_effects = nstack.is_nonempty();
mcberg@31403 295 }
mcberg@31403 296 }
mcberg@31403 297 }
mcberg@31403 298 }
mcberg@31403 299
mcberg@31403 300 if (is_slp) {
mcberg@31403 301 // Now we try to find the maximum supported consistent vector which the machine
mcberg@31403 302 // description can use
mcberg@38049 303 bool small_basic_type = false;
vdeshpande@46692 304 bool flag_small_bt = false;
mcberg@31403 305 for (uint i = 0; i < lpt()->_body.size(); i++) {
mcberg@31403 306 if (ignored_loop_nodes[i] != -1) continue;
mcberg@31403 307
mcberg@31403 308 BasicType bt;
mcberg@31403 309 Node* n = lpt()->_body.at(i);
kvn@31520 310 if (n->is_Mem()) {
mcberg@31403 311 bt = n->as_Mem()->memory_type();
kvn@31405 312 } else {
mcberg@31403 313 bt = n->bottom_type()->basic_type();
mcberg@31403 314 }
mcberg@38049 315
mcberg@38049 316 if (post_loop_allowed) {
mcberg@38049 317 if (!small_basic_type) {
mcberg@38049 318 switch (bt) {
mcberg@38049 319 case T_CHAR:
mcberg@38049 320 case T_BYTE:
mcberg@38049 321 case T_SHORT:
mcberg@38049 322 small_basic_type = true;
mcberg@38049 323 break;
mcberg@38049 324
mcberg@38049 325 case T_LONG:
mcberg@38049 326 // TODO: Remove when support completed for mask context with LONG.
mcberg@38049 327 // Support needs to be augmented for logical qword operations, currently we map to dword
mcberg@38049 328 // buckets for vectors on logicals as these were legacy.
mcberg@38049 329 small_basic_type = true;
mcberg@38049 330 break;
jwilhelm@46630 331
jwilhelm@46630 332 default:
jwilhelm@46630 333 break;
mcberg@38049 334 }
mcberg@38049 335 }
mcberg@38049 336 }
mcberg@38049 337
kvn@31520 338 if (is_java_primitive(bt) == false) continue;
mcberg@31403 339
vdeshpande@46692 340 int cur_max_vector = Matcher::max_vector_size(bt);
mcberg@31403 341
mcberg@31403 342 // If a max vector exists which is not larger than _local_loop_unroll_factor
mcberg@31403 343 // stop looking, we already have the max vector to map to.
kvn@31772 344 if (cur_max_vector < local_loop_unroll_factor) {
mcberg@31403 345 is_slp = false;
twisti@34174 346 if (TraceSuperWordLoopUnrollAnalysis) {
twisti@34174 347 tty->print_cr("slp analysis fails: unroll limit greater than max vector\n");
twisti@34174 348 }
mcberg@31403 349 break;
mcberg@31403 350 }
mcberg@31403 351
mcberg@31403 352 // Map the maximal common vector
mcberg@31403 353 if (VectorNode::implemented(n->Opcode(), cur_max_vector, bt)) {
vdeshpande@46692 354 if (cur_max_vector < max_vector && !flag_small_bt) {
mcberg@31403 355 max_vector = cur_max_vector;
vdeshpande@46692 356 } else if (cur_max_vector > max_vector && UseSubwordForMaxVector) {
vdeshpande@46692 357 // Analyse subword in the loop to set maximum vector size to take advantage of full vector width for subword types.
vdeshpande@46692 358 // Here we analyze if narrowing is likely to happen and if it is we set vector size more aggressively.
vdeshpande@46692 359 // We check for possibility of narrowing by looking through chain operations using subword types.
vdeshpande@46692 360 if (is_subword_type(bt)) {
vdeshpande@46692 361 uint start, end;
vdeshpande@46692 362 VectorNode::vector_operands(n, &start, &end);
vdeshpande@46692 363
vdeshpande@46692 364 for (uint j = start; j < end; j++) {
vdeshpande@46692 365 Node* in = n->in(j);
vdeshpande@46692 366 // Don't propagate through a memory
vdeshpande@46692 367 if (!in->is_Mem() && in_bb(in) && in->bottom_type()->basic_type() == T_INT) {
vdeshpande@46692 368 bool same_type = true;
vdeshpande@46692 369 for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) {
vdeshpande@46692 370 Node *use = in->fast_out(k);
vdeshpande@46692 371 if (!in_bb(use) && use->bottom_type()->basic_type() != bt) {
vdeshpande@46692 372 same_type = false;
vdeshpande@46692 373 break;
vdeshpande@46692 374 }
vdeshpande@46692 375 }
vdeshpande@46692 376 if (same_type) {
vdeshpande@46692 377 max_vector = cur_max_vector;
vdeshpande@46692 378 flag_small_bt = true;
vdeshpande@51017 379 cl->mark_subword_loop();
vdeshpande@46692 380 }
vdeshpande@46692 381 }
vdeshpande@46692 382 }
vdeshpande@46692 383 }
mcberg@31403 384 }
mcberg@38049 385 // We only process post loops on predicated targets where we want to
mcberg@38049 386 // mask map the loop to a single iteration
mcberg@38049 387 if (post_loop_allowed) {
mcberg@38049 388 _post_block.at_put_grow(rpo_idx++, n);
mcberg@38049 389 }
mcberg@31403 390 }
mcberg@31403 391 }
mcberg@31403 392 if (is_slp) {
mcberg@31403 393 local_loop_unroll_factor = max_vector;
kvn@31772 394 cl->mark_passed_slp();
mcberg@31403 395 }
kvn@31772 396 cl->mark_was_slp();
mcberg@38049 397 if (cl->is_main_loop()) {
mcberg@38049 398 cl->set_slp_max_unroll(local_loop_unroll_factor);
mcberg@38049 399 } else if (post_loop_allowed) {
mcberg@38049 400 if (!small_basic_type) {
mcberg@38049 401 // avoid replication context for small basic types in programmable masked loops
mcberg@38049 402 cl->set_slp_max_unroll(local_loop_unroll_factor);
mcberg@38049 403 }
mcberg@38049 404 }
mcberg@31403 405 }
duke@1 406 }
duke@1 407
duke@1 408 //------------------------------SLP_extract---------------------------
duke@1 409 // Extract the superword level parallelism
duke@1 410 //
duke@1 411 // 1) A reverse post-order of nodes in the block is constructed. By scanning
duke@1 412 // this list from first to last, all definitions are visited before their uses.
duke@1 413 //
duke@1 414 // 2) A point-to-point dependence graph is constructed between memory references.
duke@1 415 // This simplies the upcoming "independence" checker.
duke@1 416 //
duke@1 417 // 3) The maximum depth in the node graph from the beginning of the block
duke@1 418 // to each node is computed. This is used to prune the graph search
duke@1 419 // in the independence checker.
duke@1 420 //
duke@1 421 // 4) For integer types, the necessary bit width is propagated backwards
duke@1 422 // from stores to allow packed operations on byte, char, and short
duke@1 423 // integers. This reverses the promotion to type "int" that javac
duke@1 424 // did for operations like: char c1,c2,c3; c1 = c2 + c3.
duke@1 425 //
duke@1 426 // 5) One of the memory references is picked to be an aligned vector reference.
duke@1 427 // The pre-loop trip count is adjusted to align this reference in the
duke@1 428 // unrolled body.
duke@1 429 //
duke@1 430 // 6) The initial set of pack pairs is seeded with memory references.
duke@1 431 //
duke@1 432 // 7) The set of pack pairs is extended by following use->def and def->use links.
duke@1 433 //
duke@1 434 // 8) The pairs are combined into vector sized packs.
duke@1 435 //
duke@1 436 // 9) Reorder the memory slices to co-locate members of the memory packs.
duke@1 437 //
duke@1 438 // 10) Generate ideal vector nodes for the final set of packs and where necessary,
duke@1 439 // inserting scalar promotion, vector creation from multiple scalars, and
duke@1 440 // extraction of scalar values from vectors.
duke@1 441 //
duke@1 442 void SuperWord::SLP_extract() {
duke@1 443
kvn@30593 444 #ifndef PRODUCT
kvn@30593 445 if (_do_vector_loop && TraceSuperWord) {
kvn@30593 446 tty->print("SuperWord::SLP_extract\n");
kvn@30593 447 tty->print("input loop\n");
kvn@30593 448 _lpt->dump_head();
kvn@30593 449 _lpt->dump();
kvn@30593 450 for (uint i = 0; i < _lpt->_body.size(); i++) {
kvn@30593 451 _lpt->_body.at(i)->dump();
kvn@30593 452 }
kvn@30593 453 }
kvn@30593 454 #endif
duke@1 455 // Ready the block
kvn@30593 456 if (!construct_bb()) {
kvn@15755 457 return; // Exit if no interesting nodes or complex graph.
kvn@30593 458 }
mcberg@38049 459
kvn@30593 460 // build _dg, _disjoint_ptrs
duke@1 461 dependence_graph();
duke@1 462
kvn@30593 463 // compute function depth(Node*)
duke@1 464 compute_max_depth();
duke@1 465
mcberg@38049 466 CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
mcberg@38049 467 bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop());
mcberg@38049 468 if (cl->is_main_loop()) {
mcberg@38049 469 if (_do_vector_loop) {
mcberg@38049 470 if (mark_generations() != -1) {
mcberg@38049 471 hoist_loads_in_graph(); // this only rebuild the graph; all basic structs need rebuild explicitly
mcberg@38049 472
mcberg@38049 473 if (!construct_bb()) {
mcberg@38049 474 return; // Exit if no interesting nodes or complex graph.
mcberg@38049 475 }
mcberg@38049 476 dependence_graph();
mcberg@38049 477 compute_max_depth();
kvn@30593 478 }
mcberg@38049 479
mcberg@38049 480 #ifndef PRODUCT
mcberg@38049 481 if (TraceSuperWord) {
mcberg@38049 482 tty->print_cr("\nSuperWord::_do_vector_loop: graph after hoist_loads_in_graph");
mcberg@38049 483 _lpt->dump_head();
mcberg@38049 484 for (int j = 0; j < _block.length(); j++) {
mcberg@38049 485 Node* n = _block.at(j);
mcberg@38049 486 int d = depth(n);
mcberg@38049 487 for (int i = 0; i < d; i++) tty->print("%s", " ");
mcberg@38049 488 tty->print("%d :", d);
mcberg@38049 489 n->dump();
mcberg@38049 490 }
mcberg@38049 491 }
mcberg@38049 492 #endif
kvn@30593 493 }
kvn@30593 494
mcberg@38049 495 compute_vector_element_type();
mcberg@38049 496
mcberg@38049 497 // Attempt vectorization
mcberg@38049 498
mcberg@38049 499 find_adjacent_refs();
mcberg@38049 500
mcberg@38049 501 extend_packlist();
mcberg@38049 502
mcberg@38049 503 if (_do_vector_loop) {
mcberg@38049 504 if (_packset.length() == 0) {
mcberg@38049 505 if (TraceSuperWord) {
mcberg@38049 506 tty->print_cr("\nSuperWord::_do_vector_loop DFA could not build packset, now trying to build anyway");
mcberg@38049 507 }
mcberg@38049 508 pack_parallel();
kvn@30593 509 }
kvn@30593 510 }
mcberg@38049 511
mcberg@38049 512 combine_packs();
mcberg@38049 513
mcberg@38049 514 construct_my_pack_map();
kvn@48309 515 if (UseVectorCmov) {
mcberg@38049 516 merge_packs_to_cmovd();
mcberg@38049 517 }
mcberg@38049 518
mcberg@38049 519 filter_packs();
mcberg@38049 520
mcberg@38049 521 schedule();
mcberg@38049 522 } else if (post_loop_allowed) {
mcberg@38049 523 int saved_mapped_unroll_factor = cl->slp_max_unroll();
mcberg@38049 524 if (saved_mapped_unroll_factor) {
mcberg@38049 525 int vector_mapped_unroll_factor = saved_mapped_unroll_factor;
mcberg@38049 526
mcberg@38049 527 // now reset the slp_unroll_factor so that we can check the analysis mapped
mcberg@38049 528 // what the vector loop was mapped to
mcberg@38049 529 cl->set_slp_max_unroll(0);
mcberg@38049 530
mcberg@38049 531 // do the analysis on the post loop
mcberg@38049 532 unrolling_analysis(vector_mapped_unroll_factor);
mcberg@38049 533
mcberg@38049 534 // if our analyzed loop is a canonical fit, start processing it
mcberg@38049 535 if (vector_mapped_unroll_factor == saved_mapped_unroll_factor) {
mcberg@38049 536 // now add the vector nodes to packsets
mcberg@38049 537 for (int i = 0; i < _post_block.length(); i++) {
mcberg@38049 538 Node* n = _post_block.at(i);
mcberg@38049 539 Node_List* singleton = new Node_List();
mcberg@38049 540 singleton->push(n);
mcberg@38049 541 _packset.append(singleton);
mcberg@38049 542 set_my_pack(n, singleton);
mcberg@38049 543 }
mcberg@38049 544
mcberg@38049 545 // map base types for vector usage
mcberg@38049 546 compute_vector_element_type();
mcberg@38049 547 } else {
mcberg@38049 548 return;
kvn@30593 549 }
mcberg@38049 550 } else {
mcberg@38049 551 // for some reason we could not map the slp analysis state of the vectorized loop
mcberg@38049 552 return;
kvn@30593 553 }
kvn@30593 554 }
kvn@30593 555
duke@1 556 output();
duke@1 557 }
duke@1 558
duke@1 559 //------------------------------find_adjacent_refs---------------------------
duke@1 560 // Find the adjacent memory references and create pack pairs for them.
duke@1 561 // This is the initial set of packs that will then be extended by
duke@1 562 // following use->def and def->use links. The align positions are
duke@1 563 // assigned relative to the reference "align_to_ref"
duke@1 564 void SuperWord::find_adjacent_refs() {
duke@1 565 // Get list of memory operations
duke@1 566 Node_List memops;
duke@1 567 for (int i = 0; i < _block.length(); i++) {
duke@1 568 Node* n = _block.at(i);
kvn@13104 569 if (n->is_Mem() && !n->is_LoadStore() && in_bb(n) &&
kvn@202 570 is_java_primitive(n->as_Mem()->memory_type())) {
duke@1 571 int align = memory_alignment(n->as_Mem(), 0);
duke@1 572 if (align != bottom_align) {
duke@1 573 memops.push(n);
duke@1 574 }
duke@1 575 }
duke@1 576 }
duke@1 577
kvn@13104 578 Node_List align_to_refs;
kvn@13104 579 int best_iv_adjustment = 0;
kvn@13104 580 MemNode* best_align_to_mem_ref = NULL;
duke@1 581
kvn@13104 582 while (memops.size() != 0) {
kvn@13104 583 // Find a memory reference to align to.
kvn@13104 584 MemNode* mem_ref = find_align_to_ref(memops);
kvn@13104 585 if (mem_ref == NULL) break;
kvn@13104 586 align_to_refs.push(mem_ref);
kvn@13104 587 int iv_adjustment = get_iv_adjustment(mem_ref);
duke@1 588
kvn@13104 589 if (best_align_to_mem_ref == NULL) {
kvn@13104 590 // Set memory reference which is the best from all memory operations
kvn@13104 591 // to be used for alignment. The pre-loop trip count is modified to align
kvn@13104 592 // this reference to a vector-aligned address.
kvn@13104 593 best_align_to_mem_ref = mem_ref;
kvn@13104 594 best_iv_adjustment = iv_adjustment;
kvn@31858 595 NOT_PRODUCT(find_adjacent_refs_trace_1(best_align_to_mem_ref, best_iv_adjustment);)
kvn@13104 596 }
duke@1 597
mcberg@31403 598 SWPointer align_to_ref_p(mem_ref, this, NULL, false);
kvn@13104 599 // Set alignment relative to "align_to_ref" for all related memory operations.
kvn@13104 600 for (int i = memops.size() - 1; i >= 0; i--) {
kvn@13104 601 MemNode* s = memops.at(i)->as_Mem();
kvn@31858 602 if (isomorphic(s, mem_ref) &&
kvn@31858 603 (!_do_vector_loop || same_origin_idx(s, mem_ref))) {
mcberg@31403 604 SWPointer p2(s, this, NULL, false);
kvn@13104 605 if (p2.comparable(align_to_ref_p)) {
kvn@13104 606 int align = memory_alignment(s, iv_adjustment);
kvn@13104 607 set_alignment(s, align);
duke@1 608 }
duke@1 609 }
duke@1 610 }
kvn@13104 611
kvn@13104 612 // Create initial pack pairs of memory operations for which
kvn@13104 613 // alignment is set and vectors will be aligned.
kvn@13104 614 bool create_pack = true;
kvn@30593 615 if (memory_alignment(mem_ref, best_iv_adjustment) == 0 || _do_vector_loop) {
roland@53639 616 if (!Matcher::misaligned_vectors_ok() || AlignVector) {
kvn@13108 617 int vw = vector_width(mem_ref);
kvn@13108 618 int vw_best = vector_width(best_align_to_mem_ref);
kvn@13108 619 if (vw > vw_best) {
kvn@13108 620 // Do not vectorize a memory access with more elements per vector
kvn@13108 621 // if unaligned memory access is not allowed because number of
kvn@13108 622 // iterations in pre-loop will be not enough to align it.
kvn@13108 623 create_pack = false;
thartmann@30625 624 } else {
mcberg@31403 625 SWPointer p2(best_align_to_mem_ref, this, NULL, false);
thartmann@30625 626 if (align_to_ref_p.invar() != p2.invar()) {
thartmann@30625 627 // Do not vectorize memory accesses with different invariants
thartmann@30625 628 // if unaligned memory accesses are not allowed.
thartmann@30625 629 create_pack = false;
thartmann@30625 630 }
kvn@13108 631 }
kvn@13108 632 }
kvn@13108 633 } else {
kvn@13104 634 if (same_velt_type(mem_ref, best_align_to_mem_ref)) {
kvn@13104 635 // Can't allow vectorization of unaligned memory accesses with the
kvn@13104 636 // same type since it could be overlapped accesses to the same array.
kvn@13104 637 create_pack = false;
kvn@13104 638 } else {
kvn@13104 639 // Allow independent (different type) unaligned memory operations
kvn@13104 640 // if HW supports them.
roland@53639 641 if (!Matcher::misaligned_vectors_ok() || AlignVector) {
kvn@13104 642 create_pack = false;
kvn@13104 643 } else {
kvn@13104 644 // Check if packs of the same memory type but
kvn@13104 645 // with a different alignment were created before.
kvn@13104 646 for (uint i = 0; i < align_to_refs.size(); i++) {
kvn@13104 647 MemNode* mr = align_to_refs.at(i)->as_Mem();
vdeshpande@52992 648 if (mr == mem_ref) {
vdeshpande@52992 649 // Skip when we are looking at same memory operation.
vdeshpande@52992 650 continue;
vdeshpande@52992 651 }
kvn@13104 652 if (same_velt_type(mr, mem_ref) &&
kvn@13104 653 memory_alignment(mr, iv_adjustment) != 0)
kvn@13104 654 create_pack = false;
kvn@13104 655 }
kvn@13104 656 }
kvn@13104 657 }
kvn@13104 658 }
kvn@13104 659 if (create_pack) {
kvn@13104 660 for (uint i = 0; i < memops.size(); i++) {
kvn@13104 661 Node* s1 = memops.at(i);
kvn@13104 662 int align = alignment(s1);
kvn@13104 663 if (align == top_align) continue;
kvn@13104 664 for (uint j = 0; j < memops.size(); j++) {
kvn@13104 665 Node* s2 = memops.at(j);
kvn@13104 666 if (alignment(s2) == top_align) continue;
kvn@13104 667 if (s1 != s2 && are_adjacent_refs(s1, s2)) {
kvn@13104 668 if (stmts_can_pack(s1, s2, align)) {
kvn@13104 669 Node_List* pair = new Node_List();
kvn@13104 670 pair->push(s1);
kvn@13104 671 pair->push(s2);
kvn@31858 672 if (!_do_vector_loop || same_origin_idx(s1, s2)) {
kvn@30593 673 _packset.append(pair);
kvn@30593 674 }
kvn@13104 675 }
kvn@13104 676 }
kvn@13104 677 }
kvn@13104 678 }
kvn@13104 679 } else { // Don't create unaligned pack
kvn@13104 680 // First, remove remaining memory ops of the same type from the list.
kvn@13104 681 for (int i = memops.size() - 1; i >= 0; i--) {
kvn@13104 682 MemNode* s = memops.at(i)->as_Mem();
kvn@13104 683 if (same_velt_type(s, mem_ref)) {
kvn@13104 684 memops.remove(i);
kvn@13104 685 }
kvn@13104 686 }
kvn@13104 687
kvn@13104 688 // Second, remove already constructed packs of the same type.
kvn@13104 689 for (int i = _packset.length() - 1; i >= 0; i--) {
kvn@13104 690 Node_List* p = _packset.at(i);
kvn@13104 691 MemNode* s = p->at(0)->as_Mem();
kvn@13104 692 if (same_velt_type(s, mem_ref)) {
kvn@13104 693 remove_pack_at(i);
kvn@13104 694 }
kvn@13104 695 }
kvn@13104 696
kvn@13104 697 // If needed find the best memory reference for loop alignment again.
kvn@13104 698 if (same_velt_type(mem_ref, best_align_to_mem_ref)) {
kvn@13104 699 // Put memory ops from remaining packs back on memops list for
kvn@13104 700 // the best alignment search.
kvn@13104 701 uint orig_msize = memops.size();
kvn@13104 702 for (int i = 0; i < _packset.length(); i++) {
kvn@13104 703 Node_List* p = _packset.at(i);
kvn@13104 704 MemNode* s = p->at(0)->as_Mem();
kvn@13104 705 assert(!same_velt_type(s, mem_ref), "sanity");
kvn@13104 706 memops.push(s);
kvn@13104 707 }
kvn@34168 708 best_align_to_mem_ref = find_align_to_ref(memops);
kvn@31858 709 if (best_align_to_mem_ref == NULL) {
twisti@34174 710 if (TraceSuperWord) {
twisti@34174 711 tty->print_cr("SuperWord::find_adjacent_refs(): best_align_to_mem_ref == NULL");
twisti@34174 712 }
kvn@31858 713 break;
kvn@31858 714 }
kvn@13104 715 best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref);
kvn@31858 716 NOT_PRODUCT(find_adjacent_refs_trace_1(best_align_to_mem_ref, best_iv_adjustment);)
kvn@13104 717 // Restore list.
kvn@13104 718 while (memops.size() > orig_msize)
kvn@13104 719 (void)memops.pop();
kvn@13104 720 }
kvn@13104 721 } // unaligned memory accesses
kvn@13104 722
kvn@13104 723 // Remove used mem nodes.
kvn@13104 724 for (int i = memops.size() - 1; i >= 0; i--) {
kvn@13104 725 MemNode* m = memops.at(i)->as_Mem();
kvn@13104 726 if (alignment(m) != top_align) {
kvn@13104 727 memops.remove(i);
kvn@13104 728 }
kvn@13104 729 }
kvn@13104 730
kvn@13104 731 } // while (memops.size() != 0
kvn@13104 732 set_align_to_ref(best_align_to_mem_ref);
duke@1 733
duke@1 734 if (TraceSuperWord) {
duke@1 735 tty->print_cr("\nAfter find_adjacent_refs");
duke@1 736 print_packset();
duke@1 737 }
duke@1 738 }
duke@1 739
kvn@31858 740 #ifndef PRODUCT
kvn@31858 741 void SuperWord::find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best_iv_adjustment) {
kvn@31858 742 if (is_trace_adjacent()) {
kvn@31858 743 tty->print("SuperWord::find_adjacent_refs best_align_to_mem_ref = %d, best_iv_adjustment = %d",
kvn@31858 744 best_align_to_mem_ref->_idx, best_iv_adjustment);
kvn@31858 745 best_align_to_mem_ref->dump();
kvn@31858 746 }
kvn@31858 747 }
kvn@31858 748 #endif
kvn@31858 749
duke@1 750 //------------------------------find_align_to_ref---------------------------
duke@1 751 // Find a memory reference to align the loop induction variable to.
duke@1 752 // Looks first at stores then at loads, looking for a memory reference
duke@1 753 // with the largest number of references similar to it.
kvn@13104 754 MemNode* SuperWord::find_align_to_ref(Node_List &memops) {
duke@1 755 GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0);
duke@1 756
duke@1 757 // Count number of comparable memory ops
duke@1 758 for (uint i = 0; i < memops.size(); i++) {
duke@1 759 MemNode* s1 = memops.at(i)->as_Mem();
mcberg@31403 760 SWPointer p1(s1, this, NULL, false);
duke@1 761 // Discard if pre loop can't align this reference
duke@1 762 if (!ref_is_alignable(p1)) {
duke@1 763 *cmp_ct.adr_at(i) = 0;
duke@1 764 continue;
duke@1 765 }
duke@1 766 for (uint j = i+1; j < memops.size(); j++) {
duke@1 767 MemNode* s2 = memops.at(j)->as_Mem();
duke@1 768 if (isomorphic(s1, s2)) {
mcberg@31403 769 SWPointer p2(s2, this, NULL, false);
duke@1 770 if (p1.comparable(p2)) {
duke@1 771 (*cmp_ct.adr_at(i))++;
duke@1 772 (*cmp_ct.adr_at(j))++;
duke@1 773 }
duke@1 774 }
duke@1 775 }
duke@1 776 }
duke@1 777
kvn@13104 778 // Find Store (or Load) with the greatest number of "comparable" references,
kvn@13104 779 // biggest vector size, smallest data size and smallest iv offset.
duke@1 780 int max_ct = 0;
kvn@13104 781 int max_vw = 0;
duke@1 782 int max_idx = -1;
duke@1 783 int min_size = max_jint;
duke@1 784 int min_iv_offset = max_jint;
duke@1 785 for (uint j = 0; j < memops.size(); j++) {
duke@1 786 MemNode* s = memops.at(j)->as_Mem();
duke@1 787 if (s->is_Store()) {
kvn@13108 788 int vw = vector_width_in_bytes(s);
kvn@13104 789 assert(vw > 1, "sanity");
mcberg@31403 790 SWPointer p(s, this, NULL, false);
jwilhelm@46630 791 if ( cmp_ct.at(j) > max_ct ||
jwilhelm@46630 792 (cmp_ct.at(j) == max_ct &&
jwilhelm@46630 793 ( vw > max_vw ||
jwilhelm@46630 794 (vw == max_vw &&
jwilhelm@46630 795 ( data_size(s) < min_size ||
jwilhelm@46630 796 (data_size(s) == min_size &&
jwilhelm@46630 797 p.offset_in_bytes() < min_iv_offset)))))) {
duke@1 798 max_ct = cmp_ct.at(j);
kvn@13104 799 max_vw = vw;
duke@1 800 max_idx = j;
duke@1 801 min_size = data_size(s);
duke@1 802 min_iv_offset = p.offset_in_bytes();
duke@1 803 }
duke@1 804 }
duke@1 805 }
duke@1 806 // If no stores, look at loads
duke@1 807 if (max_ct == 0) {
duke@1 808 for (uint j = 0; j < memops.size(); j++) {
duke@1 809 MemNode* s = memops.at(j)->as_Mem();
duke@1 810 if (s->is_Load()) {
kvn@13108 811 int vw = vector_width_in_bytes(s);
kvn@13104 812 assert(vw > 1, "sanity");
mcberg@31403 813 SWPointer p(s, this, NULL, false);
jwilhelm@46630 814 if ( cmp_ct.at(j) > max_ct ||
jwilhelm@46630 815 (cmp_ct.at(j) == max_ct &&
jwilhelm@46630 816 ( vw > max_vw ||
jwilhelm@46630 817 (vw == max_vw &&
jwilhelm@46630 818 ( data_size(s) < min_size ||
jwilhelm@46630 819 (data_size(s) == min_size &&
jwilhelm@46630 820 p.offset_in_bytes() < min_iv_offset)))))) {
duke@1 821 max_ct = cmp_ct.at(j);
kvn@13104 822 max_vw = vw;
duke@1 823 max_idx = j;
duke@1 824 min_size = data_size(s);
duke@1 825 min_iv_offset = p.offset_in_bytes();
duke@1 826 }
duke@1 827 }
duke@1 828 }
duke@1 829 }
duke@1 830
kvn@13104 831 #ifdef ASSERT
duke@1 832 if (TraceSuperWord && Verbose) {
kvn@30593 833 tty->print_cr("\nVector memops after find_align_to_ref");
duke@1 834 for (uint i = 0; i < memops.size(); i++) {
duke@1 835 MemNode* s = memops.at(i)->as_Mem();
duke@1 836 s->dump();
duke@1 837 }
duke@1 838 }
duke@1 839 #endif
kvn@13104 840
kvn@13104 841 if (max_ct > 0) {
kvn@13104 842 #ifdef ASSERT
kvn@13104 843 if (TraceSuperWord) {
kvn@13104 844 tty->print("\nVector align to node: ");
kvn@13104 845 memops.at(max_idx)->as_Mem()->dump();
kvn@13104 846 }
kvn@13104 847 #endif
kvn@13104 848 return memops.at(max_idx)->as_Mem();
kvn@13104 849 }
kvn@13104 850 return NULL;
duke@1 851 }
duke@1 852
vdeshpande@52992 853 //------------------span_works_for_memory_size-----------------------------
vdeshpande@52992 854 static bool span_works_for_memory_size(MemNode* mem, int span, int mem_size, int offset) {
vdeshpande@52992 855 bool span_matches_memory = false;
vdeshpande@52992 856 if ((mem_size == type2aelembytes(T_BYTE) || mem_size == type2aelembytes(T_SHORT))
vdeshpande@52992 857 && ABS(span) == type2aelembytes(T_INT)) {
vdeshpande@52992 858 // There is a mismatch on span size compared to memory.
vdeshpande@52992 859 for (DUIterator_Fast jmax, j = mem->fast_outs(jmax); j < jmax; j++) {
vdeshpande@52992 860 Node* use = mem->fast_out(j);
vdeshpande@52992 861 if (!VectorNode::is_type_transition_to_int(use)) {
vdeshpande@52992 862 return false;
vdeshpande@52992 863 }
vdeshpande@52992 864 }
vdeshpande@52992 865 // If all uses transition to integer, it means that we can successfully align even on mismatch.
vdeshpande@52992 866 return true;
vdeshpande@52992 867 }
vdeshpande@52992 868 else {
vdeshpande@52992 869 span_matches_memory = ABS(span) == mem_size;
vdeshpande@52992 870 }
vdeshpande@52992 871 return span_matches_memory && (ABS(offset) % mem_size) == 0;
vdeshpande@52992 872 }
vdeshpande@52992 873
duke@1 874 //------------------------------ref_is_alignable---------------------------
duke@1 875 // Can the preloop align the reference to position zero in the vector?
duke@1 876 bool SuperWord::ref_is_alignable(SWPointer& p) {
duke@1 877 if (!p.has_iv()) {
duke@1 878 return true; // no induction variable
duke@1 879 }
duke@1 880 CountedLoopEndNode* pre_end = get_pre_loop_end(lp()->as_CountedLoop());
adlertz@22919 881 assert(pre_end != NULL, "we must have a correct pre-loop");
duke@1 882 assert(pre_end->stride_is_con(), "pre loop stride is constant");
duke@1 883 int preloop_stride = pre_end->stride_con();
duke@1 884
duke@1 885 int span = preloop_stride * p.scale_in_bytes();
kvn@30215 886 int mem_size = p.memory_size();
kvn@30215 887 int offset = p.offset_in_bytes();
kvn@30215 888 // Stride one accesses are alignable if offset is aligned to memory operation size.
kvn@30215 889 // Offset can be unaligned when UseUnalignedAccesses is used.
vdeshpande@52992 890 if (span_works_for_memory_size(p.mem(), span, mem_size, offset)) {
duke@1 891 return true;
kvn@30215 892 }
thartmann@30625 893 // If the initial offset from start of the object is computable,
thartmann@30625 894 // check if the pre-loop can align the final offset accordingly.
thartmann@30625 895 //
thartmann@30625 896 // In other words: Can we find an i such that the offset
thartmann@30625 897 // after i pre-loop iterations is aligned to vw?
thartmann@30625 898 // (init_offset + pre_loop) % vw == 0 (1)
thartmann@30625 899 // where
thartmann@30625 900 // pre_loop = i * span
thartmann@30625 901 // is the number of bytes added to the offset by i pre-loop iterations.
thartmann@30625 902 //
thartmann@30625 903 // For this to hold we need pre_loop to increase init_offset by
thartmann@30625 904 // pre_loop = vw - (init_offset % vw)
thartmann@30625 905 //
thartmann@30625 906 // This is only possible if pre_loop is divisible by span because each
thartmann@30625 907 // pre-loop iteration increases the initial offset by 'span' bytes:
thartmann@30625 908 // (vw - (init_offset % vw)) % span == 0
thartmann@30625 909 //
kvn@13108 910 int vw = vector_width_in_bytes(p.mem());
kvn@13104 911 assert(vw > 1, "sanity");
thartmann@30625 912 Node* init_nd = pre_end->init_trip();
thartmann@30625 913 if (init_nd->is_Con() && p.invar() == NULL) {
thartmann@30625 914 int init = init_nd->bottom_type()->is_int()->get_con();
thartmann@30625 915 int init_offset = init * p.scale_in_bytes() + offset;
kvn@50588 916 if (init_offset < 0) { // negative offset from object start?
kvn@50588 917 return false; // may happen in dead loop
kvn@50588 918 }
thartmann@30625 919 if (vw % span == 0) {
thartmann@30625 920 // If vm is a multiple of span, we use formula (1).
duke@1 921 if (span > 0) {
duke@1 922 return (vw - (init_offset % vw)) % span == 0;
duke@1 923 } else {
duke@1 924 assert(span < 0, "nonzero stride * scale");
duke@1 925 return (init_offset % vw) % -span == 0;
duke@1 926 }
thartmann@30625 927 } else if (span % vw == 0) {
thartmann@30625 928 // If span is a multiple of vw, we can simplify formula (1) to:
thartmann@30625 929 // (init_offset + i * span) % vw == 0
thartmann@30625 930 // =>
thartmann@30625 931 // (init_offset % vw) + ((i * span) % vw) == 0
thartmann@30625 932 // =>
thartmann@30625 933 // init_offset % vw == 0
thartmann@30625 934 //
thartmann@30625 935 // Because we add a multiple of vw to the initial offset, the final
thartmann@30625 936 // offset is a multiple of vw if and only if init_offset is a multiple.
thartmann@30625 937 //
thartmann@30625 938 return (init_offset % vw) == 0;
duke@1 939 }
duke@1 940 }
duke@1 941 return false;
duke@1 942 }
vdeshpande@52992 943 //---------------------------get_vw_bytes_special------------------------
vdeshpande@52992 944 int SuperWord::get_vw_bytes_special(MemNode* s) {
vdeshpande@52992 945 // Get the vector width in bytes.
vdeshpande@52992 946 int vw = vector_width_in_bytes(s);
vdeshpande@52992 947
vdeshpande@52992 948 // Check for special case where there is an MulAddS2I usage where short vectors are going to need combined.
vdeshpande@52992 949 BasicType btype = velt_basic_type(s);
vdeshpande@52992 950 if (type2aelembytes(btype) == 2) {
vdeshpande@52992 951 bool should_combine_adjacent = true;
vdeshpande@52992 952 for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) {
vdeshpande@52992 953 Node* user = s->fast_out(i);
vdeshpande@52992 954 if (!VectorNode::is_muladds2i(user)) {
vdeshpande@52992 955 should_combine_adjacent = false;
vdeshpande@52992 956 }
vdeshpande@52992 957 }
vdeshpande@52992 958 if (should_combine_adjacent) {
vdeshpande@52992 959 vw = MIN2(Matcher::max_vector_size(btype)*type2aelembytes(btype), vw * 2);
vdeshpande@52992 960 }
vdeshpande@52992 961 }
vdeshpande@52992 962
vdeshpande@52992 963 return vw;
vdeshpande@52992 964 }
duke@1 965
kvn@13104 966 //---------------------------get_iv_adjustment---------------------------
kvn@13104 967 // Calculate loop's iv adjustment for this memory ops.
kvn@13104 968 int SuperWord::get_iv_adjustment(MemNode* mem_ref) {
mcberg@31403 969 SWPointer align_to_ref_p(mem_ref, this, NULL, false);
kvn@13104 970 int offset = align_to_ref_p.offset_in_bytes();
kvn@13104 971 int scale = align_to_ref_p.scale_in_bytes();
thartmann@30625 972 int elt_size = align_to_ref_p.memory_size();
vdeshpande@52992 973 int vw = get_vw_bytes_special(mem_ref);
kvn@13104 974 assert(vw > 1, "sanity");
thartmann@30625 975 int iv_adjustment;
thartmann@30625 976 if (scale != 0) {
thartmann@30625 977 int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
thartmann@30625 978 // At least one iteration is executed in pre-loop by default. As result
thartmann@30625 979 // several iterations are needed to align memory operations in main-loop even
thartmann@30625 980 // if offset is 0.
thartmann@30625 981 int iv_adjustment_in_bytes = (stride_sign * vw - (offset % vw));
thartmann@30625 982 assert(((ABS(iv_adjustment_in_bytes) % elt_size) == 0),
david@33105 983 "(%d) should be divisible by (%d)", iv_adjustment_in_bytes, elt_size);
thartmann@30625 984 iv_adjustment = iv_adjustment_in_bytes/elt_size;
thartmann@30625 985 } else {
thartmann@30625 986 // This memory op is not dependent on iv (scale == 0)
thartmann@30625 987 iv_adjustment = 0;
thartmann@30625 988 }
kvn@13104 989
kvn@13104 990 #ifndef PRODUCT
kvn@31858 991 if (TraceSuperWord) {
kvn@31858 992 tty->print("SuperWord::get_iv_adjustment: n = %d, noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d: ",
kvn@31858 993 mem_ref->_idx, offset, iv_adjustment, elt_size, scale, iv_stride(), vw);
kvn@31858 994 mem_ref->dump();
kvn@31858 995 }
kvn@13104 996 #endif
kvn@13104 997 return iv_adjustment;
kvn@13104 998 }
kvn@13104 999
duke@1 1000 //---------------------------dependence_graph---------------------------
duke@1 1001 // Construct dependency graph.
duke@1 1002 // Add dependence edges to load/store nodes for memory dependence
duke@1 1003 // A.out()->DependNode.in(1) and DependNode.out()->B.prec(x)
duke@1 1004 void SuperWord::dependence_graph() {
mcberg@38049 1005 CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
duke@1 1006 // First, assign a dependence node to each memory node
duke@1 1007 for (int i = 0; i < _block.length(); i++ ) {
duke@1 1008 Node *n = _block.at(i);
jwilhelm@46630 1009 if (n->is_Mem() || (n->is_Phi() && n->bottom_type() == Type::MEMORY)) {
duke@1 1010 _dg.make_node(n);
duke@1 1011 }
duke@1 1012 }
duke@1 1013
duke@1 1014 // For each memory slice, create the dependences
duke@1 1015 for (int i = 0; i < _mem_slice_head.length(); i++) {
duke@1 1016 Node* n = _mem_slice_head.at(i);
duke@1 1017 Node* n_tail = _mem_slice_tail.at(i);
duke@1 1018
duke@1 1019 // Get slice in predecessor order (last is first)
mcberg@38049 1020 if (cl->is_main_loop()) {
mcberg@38049 1021 mem_slice_preds(n_tail, n, _nlist);
mcberg@38049 1022 }
duke@1 1023
kvn@30593 1024 #ifndef PRODUCT
kvn@30593 1025 if(TraceSuperWord && Verbose) {
kvn@30593 1026 tty->print_cr("SuperWord::dependence_graph: built a new mem slice");
kvn@30593 1027 for (int j = _nlist.length() - 1; j >= 0 ; j--) {
kvn@30593 1028 _nlist.at(j)->dump();
kvn@30593 1029 }
kvn@30593 1030 }
kvn@30593 1031 #endif
duke@1 1032 // Make the slice dependent on the root
duke@1 1033 DepMem* slice = _dg.dep(n);
duke@1 1034 _dg.make_edge(_dg.root(), slice);
duke@1 1035
duke@1 1036 // Create a sink for the slice
duke@1 1037 DepMem* slice_sink = _dg.make_node(NULL);
duke@1 1038 _dg.make_edge(slice_sink, _dg.tail());
duke@1 1039
duke@1 1040 // Now visit each pair of memory ops, creating the edges
duke@1 1041 for (int j = _nlist.length() - 1; j >= 0 ; j--) {
duke@1 1042 Node* s1 = _nlist.at(j);
duke@1 1043
duke@1 1044 // If no dependency yet, use slice
duke@1 1045 if (_dg.dep(s1)->in_cnt() == 0) {
duke@1 1046 _dg.make_edge(slice, s1);
duke@1 1047 }
mcberg@31403 1048 SWPointer p1(s1->as_Mem(), this, NULL, false);
duke@1 1049 bool sink_dependent = true;
duke@1 1050 for (int k = j - 1; k >= 0; k--) {
duke@1 1051 Node* s2 = _nlist.at(k);
duke@1 1052 if (s1->is_Load() && s2->is_Load())
duke@1 1053 continue;
mcberg@31403 1054 SWPointer p2(s2->as_Mem(), this, NULL, false);
duke@1 1055
duke@1 1056 int cmp = p1.cmp(p2);
duke@1 1057 if (SuperWordRTDepCheck &&
duke@1 1058 p1.base() != p2.base() && p1.valid() && p2.valid()) {
duke@1 1059 // Create a runtime check to disambiguate
duke@1 1060 OrderedPair pp(p1.base(), p2.base());
duke@1 1061 _disjoint_ptrs.append_if_missing(pp);
duke@1 1062 } else if (!SWPointer::not_equal(cmp)) {
duke@1 1063 // Possibly same address
duke@1 1064 _dg.make_edge(s1, s2);
duke@1 1065 sink_dependent = false;
duke@1 1066 }
duke@1 1067 }
duke@1 1068 if (sink_dependent) {
duke@1 1069 _dg.make_edge(s1, slice_sink);
duke@1 1070 }
duke@1 1071 }
twisti@34174 1072
duke@1 1073 if (TraceSuperWord) {
duke@1 1074 tty->print_cr("\nDependence graph for slice: %d", n->_idx);
duke@1 1075 for (int q = 0; q < _nlist.length(); q++) {
duke@1 1076 _dg.print(_nlist.at(q));
duke@1 1077 }
duke@1 1078 tty->cr();
duke@1 1079 }
twisti@34174 1080
duke@1 1081 _nlist.clear();
duke@1 1082 }
duke@1 1083
duke@1 1084 if (TraceSuperWord) {
duke@1 1085 tty->print_cr("\ndisjoint_ptrs: %s", _disjoint_ptrs.length() > 0 ? "" : "NONE");
duke@1 1086 for (int r = 0; r < _disjoint_ptrs.length(); r++) {
duke@1 1087 _disjoint_ptrs.at(r).print();
duke@1 1088 tty->cr();
duke@1 1089 }
duke@1 1090 tty->cr();
duke@1 1091 }
twisti@34174 1092
duke@1 1093 }
duke@1 1094
duke@1 1095 //---------------------------mem_slice_preds---------------------------
duke@1 1096 // Return a memory slice (node list) in predecessor order starting at "start"
duke@1 1097 void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds) {
duke@1 1098 assert(preds.length() == 0, "start empty");
duke@1 1099 Node* n = start;
duke@1 1100 Node* prev = NULL;
duke@1 1101 while (true) {
kvn@31858 1102 NOT_PRODUCT( if(is_trace_mem_slice()) tty->print_cr("SuperWord::mem_slice_preds: n %d", n->_idx);)
duke@1 1103 assert(in_bb(n), "must be in block");
duke@1 1104 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
duke@1 1105 Node* out = n->fast_out(i);
duke@1 1106 if (out->is_Load()) {
duke@1 1107 if (in_bb(out)) {
duke@1 1108 preds.push(out);
twisti@34174 1109 if (TraceSuperWord && Verbose) {
twisti@34174 1110 tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", out->_idx);
twisti@34174 1111 }
duke@1 1112 }
duke@1 1113 } else {
duke@1 1114 // FIXME
duke@1 1115 if (out->is_MergeMem() && !in_bb(out)) {
duke@1 1116 // Either unrolling is causing a memory edge not to disappear,
duke@1 1117 // or need to run igvn.optimize() again before SLP
duke@1 1118 } else if (out->is_Phi() && out->bottom_type() == Type::MEMORY && !in_bb(out)) {
duke@1 1119 // Ditto. Not sure what else to check further.
cfang@2334 1120 } else if (out->Opcode() == Op_StoreCM && out->in(MemNode::OopStore) == n) {
duke@1 1121 // StoreCM has an input edge used as a precedence edge.
duke@1 1122 // Maybe an issue when oop stores are vectorized.
duke@1 1123 } else {
duke@1 1124 assert(out == prev || prev == NULL, "no branches off of store slice");
duke@1 1125 }
kvn@31858 1126 }//else
kvn@31858 1127 }//for
duke@1 1128 if (n == stop) break;
duke@1 1129 preds.push(n);
twisti@34174 1130 if (TraceSuperWord && Verbose) {
twisti@34174 1131 tty->print_cr("SuperWord::mem_slice_preds: added pred(%d)", n->_idx);
twisti@34174 1132 }
duke@1 1133 prev = n;
david@33105 1134 assert(n->is_Mem(), "unexpected node %s", n->Name());
duke@1 1135 n = n->in(MemNode::Memory);
duke@1 1136 }
duke@1 1137 }
duke@1 1138
duke@1 1139 //------------------------------stmts_can_pack---------------------------
twisti@2131 1140 // Can s1 and s2 be in a pack with s1 immediately preceding s2 and
duke@1 1141 // s1 aligned at "align"
duke@1 1142 bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) {
cfang@3906 1143
cfang@3906 1144 // Do not use superword for non-primitives
kvn@13104 1145 BasicType bt1 = velt_basic_type(s1);
kvn@13104 1146 BasicType bt2 = velt_basic_type(s2);
kvn@13104 1147 if(!is_java_primitive(bt1) || !is_java_primitive(bt2))
cfang@3906 1148 return false;
kvn@13104 1149 if (Matcher::max_vector_size(bt1) < 2) {
kvn@13104 1150 return false; // No vectors for this type
kvn@13104 1151 }
cfang@3906 1152
duke@1 1153 if (isomorphic(s1, s2)) {
njian@48136 1154 if ((independent(s1, s2) && have_similar_inputs(s1, s2)) || reduction(s1, s2)) {
duke@1 1155 if (!exists_at(s1, 0) && !exists_at(s2, 1)) {
duke@1 1156 if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) {
duke@1 1157 int s1_align = alignment(s1);
duke@1 1158 int s2_align = alignment(s2);
duke@1 1159 if (s1_align == top_align || s1_align == align) {
duke@1 1160 if (s2_align == top_align || s2_align == align + data_size(s1)) {
duke@1 1161 return true;
duke@1 1162 }
duke@1 1163 }
duke@1 1164 }
duke@1 1165 }
duke@1 1166 }
duke@1 1167 }
duke@1 1168 return false;
duke@1 1169 }
duke@1 1170
duke@1 1171 //------------------------------exists_at---------------------------
duke@1 1172 // Does s exist in a pack at position pos?
duke@1 1173 bool SuperWord::exists_at(Node* s, uint pos) {
duke@1 1174 for (int i = 0; i < _packset.length(); i++) {
duke@1 1175 Node_List* p = _packset.at(i);
duke@1 1176 if (p->at(pos) == s) {
duke@1 1177 return true;
duke@1 1178 }
duke@1 1179 }
duke@1 1180 return false;
duke@1 1181 }
duke@1 1182
duke@1 1183 //------------------------------are_adjacent_refs---------------------------
duke@1 1184 // Is s1 immediately before s2 in memory?
duke@1 1185 bool SuperWord::are_adjacent_refs(Node* s1, Node* s2) {
duke@1 1186 if (!s1->is_Mem() || !s2->is_Mem()) return false;
duke@1 1187 if (!in_bb(s1) || !in_bb(s2)) return false;
never@5708 1188
never@5708 1189 // Do not use superword for non-primitives
never@5708 1190 if (!is_java_primitive(s1->as_Mem()->memory_type()) ||
never@5708 1191 !is_java_primitive(s2->as_Mem()->memory_type())) {
never@5708 1192 return false;
never@5708 1193 }
never@5708 1194
duke@1 1195 // FIXME - co_locate_pack fails on Stores in different mem-slices, so
duke@1 1196 // only pack memops that are in the same alias set until that's fixed.
duke@1 1197 if (_phase->C->get_alias_index(s1->as_Mem()->adr_type()) !=
duke@1 1198 _phase->C->get_alias_index(s2->as_Mem()->adr_type()))
duke@1 1199 return false;
mcberg@31403 1200 SWPointer p1(s1->as_Mem(), this, NULL, false);
mcberg@31403 1201 SWPointer p2(s2->as_Mem(), this, NULL, false);
duke@1 1202 if (p1.base() != p2.base() || !p1.comparable(p2)) return false;
duke@1 1203 int diff = p2.offset_in_bytes() - p1.offset_in_bytes();
duke@1 1204 return diff == data_size(s1);
duke@1 1205 }
duke@1 1206
duke@1 1207 //------------------------------isomorphic---------------------------
duke@1 1208 // Are s1 and s2 similar?
duke@1 1209 bool SuperWord::isomorphic(Node* s1, Node* s2) {
duke@1 1210 if (s1->Opcode() != s2->Opcode()) return false;
duke@1 1211 if (s1->req() != s2->req()) return false;
kvn@13104 1212 if (!same_velt_type(s1, s2)) return false;
vdeshpande@54005 1213 Node* s1_ctrl = s1->in(0);
vdeshpande@54005 1214 Node* s2_ctrl = s2->in(0);
vdeshpande@54005 1215 // If the control nodes are equivalent, no further checks are required to test for isomorphism.
vdeshpande@54005 1216 if (s1_ctrl == s2_ctrl) {
vdeshpande@54005 1217 return true;
vdeshpande@54005 1218 } else {
vdeshpande@54005 1219 bool s1_ctrl_inv = ((s1_ctrl == NULL) ? true : lpt()->is_invariant(s1_ctrl));
vdeshpande@54005 1220 bool s2_ctrl_inv = ((s2_ctrl == NULL) ? true : lpt()->is_invariant(s2_ctrl));
vdeshpande@54005 1221 // If the control nodes are not invariant for the loop, fail isomorphism test.
vdeshpande@54005 1222 if (!s1_ctrl_inv || !s2_ctrl_inv) {
vdeshpande@54005 1223 return false;
vdeshpande@54005 1224 }
vdeshpande@54005 1225 if(s1_ctrl != NULL && s2_ctrl != NULL) {
vdeshpande@54005 1226 if (s1_ctrl->is_Proj()) {
vdeshpande@54005 1227 s1_ctrl = s1_ctrl->in(0);
vdeshpande@54005 1228 assert(lpt()->is_invariant(s1_ctrl), "must be invariant");
vdeshpande@54005 1229 }
vdeshpande@54005 1230 if (s2_ctrl->is_Proj()) {
vdeshpande@54005 1231 s2_ctrl = s2_ctrl->in(0);
vdeshpande@54005 1232 assert(lpt()->is_invariant(s2_ctrl), "must be invariant");
vdeshpande@54005 1233 }
vdeshpande@54005 1234 if (!s1_ctrl->is_RangeCheck() || !s2_ctrl->is_RangeCheck()) {
vdeshpande@54005 1235 return false;
vdeshpande@54005 1236 }
vdeshpande@54005 1237 }
vdeshpande@54005 1238 // Control nodes are invariant. However, we have no way of checking whether they resolve
vdeshpande@54005 1239 // in an equivalent manner. But, we know that invariant range checks are guaranteed to
vdeshpande@54005 1240 // throw before the loop (if they would have thrown). Thus, the loop would not have been reached.
vdeshpande@54005 1241 // Therefore, if the control nodes for both are range checks, we accept them to be isomorphic.
vdeshpande@54005 1242 for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
vdeshpande@54005 1243 Node* t1 = s1->fast_out(i);
vdeshpande@54007 1244 for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) {
vdeshpande@54007 1245 Node* t2 = s2->fast_out(j);
vdeshpande@54005 1246 if (VectorNode::is_muladds2i(t1) && VectorNode::is_muladds2i(t2)) {
vdeshpande@54005 1247 return true;
vdeshpande@54005 1248 }
vdeshpande@54005 1249 }
vdeshpande@54005 1250 }
vdeshpande@54005 1251 }
vdeshpande@54005 1252 return false;
duke@1 1253 }
duke@1 1254
duke@1 1255 //------------------------------independent---------------------------
duke@1 1256 // Is there no data path from s1 to s2 or s2 to s1?
duke@1 1257 bool SuperWord::independent(Node* s1, Node* s2) {
duke@1 1258 // assert(s1->Opcode() == s2->Opcode(), "check isomorphic first");
duke@1 1259 int d1 = depth(s1);
duke@1 1260 int d2 = depth(s2);
duke@1 1261 if (d1 == d2) return s1 != s2;
duke@1 1262 Node* deep = d1 > d2 ? s1 : s2;
duke@1 1263 Node* shallow = d1 > d2 ? s2 : s1;
duke@1 1264
duke@1 1265 visited_clear();
duke@1 1266
duke@1 1267 return independent_path(shallow, deep);
duke@1 1268 }
duke@1 1269
njian@48136 1270 //--------------------------have_similar_inputs-----------------------
njian@48136 1271 // For a node pair (s1, s2) which is isomorphic and independent,
njian@48136 1272 // do s1 and s2 have similar input edges?
njian@48136 1273 bool SuperWord::have_similar_inputs(Node* s1, Node* s2) {
njian@48136 1274 // assert(isomorphic(s1, s2) == true, "check isomorphic");
njian@48136 1275 // assert(independent(s1, s2) == true, "check independent");
njian@48136 1276 if (s1->req() > 1 && !s1->is_Store() && !s1->is_Load()) {
njian@48136 1277 for (uint i = 1; i < s1->req(); i++) {
njian@48136 1278 if (s1->in(i)->Opcode() != s2->in(i)->Opcode()) return false;
njian@48136 1279 }
njian@48136 1280 }
njian@48136 1281 return true;
njian@48136 1282 }
njian@48136 1283
kvn@30211 1284 //------------------------------reduction---------------------------
kvn@30211 1285 // Is there a data path between s1 and s2 and the nodes reductions?
kvn@30211 1286 bool SuperWord::reduction(Node* s1, Node* s2) {
kvn@30211 1287 bool retValue = false;
kvn@30211 1288 int d1 = depth(s1);
kvn@30211 1289 int d2 = depth(s2);
kvn@30211 1290 if (d1 + 1 == d2) {
kvn@30211 1291 if (s1->is_reduction() && s2->is_reduction()) {
kvn@30211 1292 // This is an ordered set, so s1 should define s2
kvn@30211 1293 for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
kvn@30211 1294 Node* t1 = s1->fast_out(i);
kvn@30211 1295 if (t1 == s2) {
kvn@30211 1296 // both nodes are reductions and connected
kvn@30211 1297 retValue = true;
kvn@30211 1298 }
kvn@30211 1299 }
kvn@30211 1300 }
kvn@30211 1301 }
kvn@30211 1302
kvn@30211 1303 return retValue;
kvn@30211 1304 }
kvn@30211 1305
duke@1 1306 //------------------------------independent_path------------------------------
duke@1 1307 // Helper for independent
duke@1 1308 bool SuperWord::independent_path(Node* shallow, Node* deep, uint dp) {
duke@1 1309 if (dp >= 1000) return false; // stop deep recursion
duke@1 1310 visited_set(deep);
duke@1 1311 int shal_depth = depth(shallow);
duke@1 1312 assert(shal_depth <= depth(deep), "must be");
duke@1 1313 for (DepPreds preds(deep, _dg); !preds.done(); preds.next()) {
duke@1 1314 Node* pred = preds.current();
duke@1 1315 if (in_bb(pred) && !visited_test(pred)) {
duke@1 1316 if (shallow == pred) {
duke@1 1317 return false;
duke@1 1318 }
duke@1 1319 if (shal_depth < depth(pred) && !independent_path(shallow, pred, dp+1)) {
duke@1 1320 return false;
duke@1 1321 }
duke@1 1322 }
duke@1 1323 }
duke@1 1324 return true;
duke@1 1325 }
duke@1 1326
duke@1 1327 //------------------------------set_alignment---------------------------
duke@1 1328 void SuperWord::set_alignment(Node* s1, Node* s2, int align) {
duke@1 1329 set_alignment(s1, align);
kvn@13104 1330 if (align == top_align || align == bottom_align) {
kvn@13104 1331 set_alignment(s2, align);
kvn@13104 1332 } else {
kvn@13104 1333 set_alignment(s2, align + data_size(s1));
kvn@13104 1334 }
duke@1 1335 }
duke@1 1336
duke@1 1337 //------------------------------data_size---------------------------
duke@1 1338 int SuperWord::data_size(Node* s) {
kvn@48309 1339 Node* use = NULL; //test if the node is a candidate for CMoveV optimization, then return the size of CMov
kvn@48309 1340 if (UseVectorCmov) {
iveresov@33469 1341 use = _cmovev_kit.is_Bool_candidate(s);
iveresov@33469 1342 if (use != NULL) {
iveresov@33469 1343 return data_size(use);
iveresov@33469 1344 }
iveresov@33469 1345 use = _cmovev_kit.is_CmpD_candidate(s);
iveresov@33469 1346 if (use != NULL) {
iveresov@33469 1347 return data_size(use);
iveresov@33469 1348 }
iveresov@33469 1349 }
kvn@48309 1350
kvn@13104 1351 int bsize = type2aelembytes(velt_basic_type(s));
duke@1 1352 assert(bsize != 0, "valid size");
duke@1 1353 return bsize;
duke@1 1354 }
duke@1 1355
duke@1 1356 //------------------------------extend_packlist---------------------------
duke@1 1357 // Extend packset by following use->def and def->use links from pack members.
duke@1 1358 void SuperWord::extend_packlist() {
duke@1 1359 bool changed;
duke@1 1360 do {
kvn@30211 1361 packset_sort(_packset.length());
duke@1 1362 changed = false;
duke@1 1363 for (int i = 0; i < _packset.length(); i++) {
duke@1 1364 Node_List* p = _packset.at(i);
duke@1 1365 changed |= follow_use_defs(p);
duke@1 1366 changed |= follow_def_uses(p);
duke@1 1367 }
duke@1 1368 } while (changed);
duke@1 1369
kvn@30211 1370 if (_race_possible) {
kvn@30211 1371 for (int i = 0; i < _packset.length(); i++) {
kvn@30211 1372 Node_List* p = _packset.at(i);
kvn@30211 1373 order_def_uses(p);
kvn@30211 1374 }
kvn@30211 1375 }
kvn@30211 1376
duke@1 1377 if (TraceSuperWord) {
duke@1 1378 tty->print_cr("\nAfter extend_packlist");
duke@1 1379 print_packset();
duke@1 1380 }
duke@1 1381 }
duke@1 1382
duke@1 1383 //------------------------------follow_use_defs---------------------------
duke@1 1384 // Extend the packset by visiting operand definitions of nodes in pack p
duke@1 1385 bool SuperWord::follow_use_defs(Node_List* p) {
kvn@13104 1386 assert(p->size() == 2, "just checking");
duke@1 1387 Node* s1 = p->at(0);
duke@1 1388 Node* s2 = p->at(1);
duke@1 1389 assert(s1->req() == s2->req(), "just checking");
duke@1 1390 assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
duke@1 1391
duke@1 1392 if (s1->is_Load()) return false;
duke@1 1393
duke@1 1394 int align = alignment(s1);
iveresov@33469 1395 NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_use_defs: s1 %d, align %d", s1->_idx, align);)
duke@1 1396 bool changed = false;
duke@1 1397 int start = s1->is_Store() ? MemNode::ValueIn : 1;
duke@1 1398 int end = s1->is_Store() ? MemNode::ValueIn+1 : s1->req();
duke@1 1399 for (int j = start; j < end; j++) {
duke@1 1400 Node* t1 = s1->in(j);
duke@1 1401 Node* t2 = s2->in(j);
duke@1 1402 if (!in_bb(t1) || !in_bb(t2))
duke@1 1403 continue;
duke@1 1404 if (stmts_can_pack(t1, t2, align)) {
duke@1 1405 if (est_savings(t1, t2) >= 0) {
duke@1 1406 Node_List* pair = new Node_List();
duke@1 1407 pair->push(t1);
duke@1 1408 pair->push(t2);
duke@1 1409 _packset.append(pair);
iveresov@33469 1410 NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_use_defs: set_alignment(%d, %d, %d)", t1->_idx, t2->_idx, align);)
duke@1 1411 set_alignment(t1, t2, align);
duke@1 1412 changed = true;
duke@1 1413 }
duke@1 1414 }
duke@1 1415 }
duke@1 1416 return changed;
duke@1 1417 }
duke@1 1418
duke@1 1419 //------------------------------follow_def_uses---------------------------
duke@1 1420 // Extend the packset by visiting uses of nodes in pack p
duke@1 1421 bool SuperWord::follow_def_uses(Node_List* p) {
duke@1 1422 bool changed = false;
duke@1 1423 Node* s1 = p->at(0);
duke@1 1424 Node* s2 = p->at(1);
duke@1 1425 assert(p->size() == 2, "just checking");
duke@1 1426 assert(s1->req() == s2->req(), "just checking");
duke@1 1427 assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
duke@1 1428
duke@1 1429 if (s1->is_Store()) return false;
duke@1 1430
duke@1 1431 int align = alignment(s1);
iveresov@33469 1432 NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_def_uses: s1 %d, align %d", s1->_idx, align);)
duke@1 1433 int savings = -1;
kvn@30211 1434 int num_s1_uses = 0;
duke@1 1435 Node* u1 = NULL;
duke@1 1436 Node* u2 = NULL;
duke@1 1437 for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
duke@1 1438 Node* t1 = s1->fast_out(i);
kvn@30211 1439 num_s1_uses++;
duke@1 1440 if (!in_bb(t1)) continue;
duke@1 1441 for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) {
duke@1 1442 Node* t2 = s2->fast_out(j);
duke@1 1443 if (!in_bb(t2)) continue;
roland@48145 1444 if (t2->Opcode() == Op_AddI && t2 == _lp->as_CountedLoop()->incr()) continue; // don't mess with the iv
duke@1 1445 if (!opnd_positions_match(s1, t1, s2, t2))
duke@1 1446 continue;
duke@1 1447 if (stmts_can_pack(t1, t2, align)) {
duke@1 1448 int my_savings = est_savings(t1, t2);
duke@1 1449 if (my_savings > savings) {
duke@1 1450 savings = my_savings;
duke@1 1451 u1 = t1;
duke@1 1452 u2 = t2;
duke@1 1453 }
duke@1 1454 }
duke@1 1455 }
duke@1 1456 }
kvn@30211 1457 if (num_s1_uses > 1) {
kvn@30211 1458 _race_possible = true;
kvn@30211 1459 }
duke@1 1460 if (savings >= 0) {
duke@1 1461 Node_List* pair = new Node_List();
duke@1 1462 pair->push(u1);
duke@1 1463 pair->push(u2);
duke@1 1464 _packset.append(pair);
iveresov@33469 1465 NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SuperWord::follow_def_uses: set_alignment(%d, %d, %d)", u1->_idx, u2->_idx, align);)
duke@1 1466 set_alignment(u1, u2, align);
duke@1 1467 changed = true;
duke@1 1468 }
duke@1 1469 return changed;
duke@1 1470 }
duke@1 1471
kvn@30211 1472 //------------------------------order_def_uses---------------------------
kvn@30211 1473 // For extended packsets, ordinally arrange uses packset by major component
kvn@30211 1474 void SuperWord::order_def_uses(Node_List* p) {
kvn@30211 1475 Node* s1 = p->at(0);
kvn@30211 1476
kvn@30211 1477 if (s1->is_Store()) return;
kvn@30211 1478
kvn@30211 1479 // reductions are always managed beforehand
kvn@30211 1480 if (s1->is_reduction()) return;
kvn@30211 1481
kvn@30211 1482 for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
kvn@30211 1483 Node* t1 = s1->fast_out(i);
kvn@30211 1484
kvn@30211 1485 // Only allow operand swap on commuting operations
vdeshpande@53336 1486 if (!t1->is_Add() && !t1->is_Mul() && !VectorNode::is_muladds2i(t1)) {
kvn@30211 1487 break;
kvn@30211 1488 }
kvn@30211 1489
kvn@30211 1490 // Now find t1's packset
kvn@30211 1491 Node_List* p2 = NULL;
kvn@30211 1492 for (int j = 0; j < _packset.length(); j++) {
kvn@30211 1493 p2 = _packset.at(j);
kvn@30211 1494 Node* first = p2->at(0);
kvn@30211 1495 if (t1 == first) {
kvn@30211 1496 break;
kvn@30211 1497 }
kvn@30211 1498 p2 = NULL;
kvn@30211 1499 }
kvn@30211 1500 // Arrange all sub components by the major component
kvn@30211 1501 if (p2 != NULL) {
kvn@30211 1502 for (uint j = 1; j < p->size(); j++) {
kvn@30211 1503 Node* d1 = p->at(j);
kvn@30211 1504 Node* u1 = p2->at(j);
kvn@30211 1505 opnd_positions_match(s1, t1, d1, u1);
kvn@30211 1506 }
kvn@30211 1507 }
kvn@30211 1508 }
kvn@30211 1509 }
kvn@30211 1510
duke@1 1511 //---------------------------opnd_positions_match-------------------------
duke@1 1512 // Is the use of d1 in u1 at the same operand position as d2 in u2?
duke@1 1513 bool SuperWord::opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2) {
kvn@30211 1514 // check reductions to see if they are marshalled to represent the reduction
kvn@30211 1515 // operator in a specified opnd
kvn@30211 1516 if (u1->is_reduction() && u2->is_reduction()) {
kvn@30211 1517 // ensure reductions have phis and reduction definitions feeding the 1st operand
kvn@30211 1518 Node* first = u1->in(2);
kvn@30211 1519 if (first->is_Phi() || first->is_reduction()) {
kvn@30211 1520 u1->swap_edges(1, 2);
kvn@30211 1521 }
kvn@30211 1522 // ensure reductions have phis and reduction definitions feeding the 1st operand
kvn@30211 1523 first = u2->in(2);
kvn@30211 1524 if (first->is_Phi() || first->is_reduction()) {
kvn@30211 1525 u2->swap_edges(1, 2);
kvn@30211 1526 }
kvn@30211 1527 return true;
kvn@30211 1528 }
kvn@30211 1529
duke@1 1530 uint ct = u1->req();
duke@1 1531 if (ct != u2->req()) return false;
duke@1 1532 uint i1 = 0;
duke@1 1533 uint i2 = 0;
duke@1 1534 do {
duke@1 1535 for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break;
duke@1 1536 for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break;
duke@1 1537 if (i1 != i2) {
kvn@13104 1538 if ((i1 == (3-i2)) && (u2->is_Add() || u2->is_Mul())) {
kvn@13104 1539 // Further analysis relies on operands position matching.
kvn@13104 1540 u2->swap_edges(i1, i2);
vdeshpande@53336 1541 } else if (VectorNode::is_muladds2i(u2) && u1 != u2) {
vdeshpande@53336 1542 if (i1 == 5 - i2) { // ((i1 == 3 && i2 == 2) || (i1 == 2 && i2 == 3) || (i1 == 1 && i2 == 4) || (i1 == 4 && i2 == 1))
vdeshpande@53336 1543 u2->swap_edges(1, 2);
vdeshpande@53336 1544 u2->swap_edges(3, 4);
vdeshpande@53336 1545 }
vdeshpande@53336 1546 if (i1 == 3 - i2 || i1 == 7 - i2) { // ((i1 == 1 && i2 == 2) || (i1 == 2 && i2 == 1) || (i1 == 3 && i2 == 4) || (i1 == 4 && i2 == 3))
vdeshpande@53336 1547 u2->swap_edges(2, 3);
vdeshpande@53336 1548 u2->swap_edges(1, 4);
vdeshpande@53336 1549 }
vdeshpande@53336 1550 return false; // Just swap the edges, the muladds2i nodes get packed in follow_use_defs
kvn@13104 1551 } else {
kvn@13104 1552 return false;
kvn@13104 1553 }
vdeshpande@53336 1554 } else if (i1 == i2 && VectorNode::is_muladds2i(u2) && u1 != u2) {
vdeshpande@53336 1555 u2->swap_edges(1, 3);
vdeshpande@53336 1556 u2->swap_edges(2, 4);
vdeshpande@53336 1557 return false; // Just swap the edges, the muladds2i nodes get packed in follow_use_defs
duke@1 1558 }
duke@1 1559 } while (i1 < ct);
duke@1 1560 return true;
duke@1 1561 }
duke@1 1562
duke@1 1563 //------------------------------est_savings---------------------------
duke@1 1564 // Estimate the savings from executing s1 and s2 as a pack
duke@1 1565 int SuperWord::est_savings(Node* s1, Node* s2) {
kvn@13104 1566 int save_in = 2 - 1; // 2 operations per instruction in packed form
duke@1 1567
duke@1 1568 // inputs
duke@1 1569 for (uint i = 1; i < s1->req(); i++) {
duke@1 1570 Node* x1 = s1->in(i);
duke@1 1571 Node* x2 = s2->in(i);
duke@1 1572 if (x1 != x2) {
duke@1 1573 if (are_adjacent_refs(x1, x2)) {
kvn@13104 1574 save_in += adjacent_profit(x1, x2);
duke@1 1575 } else if (!in_packset(x1, x2)) {
kvn@13104 1576 save_in -= pack_cost(2);
duke@1 1577 } else {
kvn@13104 1578 save_in += unpack_cost(2);
duke@1 1579 }
duke@1 1580 }
duke@1 1581 }
duke@1 1582
duke@1 1583 // uses of result
duke@1 1584 uint ct = 0;
kvn@13104 1585 int save_use = 0;
duke@1 1586 for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
duke@1 1587 Node* s1_use = s1->fast_out(i);
duke@1 1588 for (int j = 0; j < _packset.length(); j++) {
duke@1 1589 Node_List* p = _packset.at(j);
duke@1 1590 if (p->at(0) == s1_use) {
duke@1 1591 for (DUIterator_Fast kmax, k = s2->fast_outs(kmax); k < kmax; k++) {
duke@1 1592 Node* s2_use = s2->fast_out(k);
duke@1 1593 if (p->at(p->size()-1) == s2_use) {
duke@1 1594 ct++;
duke@1 1595 if (are_adjacent_refs(s1_use, s2_use)) {
kvn@13104 1596 save_use += adjacent_profit(s1_use, s2_use);
duke@1 1597 }
duke@1 1598 }
duke@1 1599 }
duke@1 1600 }
duke@1 1601 }
duke@1 1602 }
duke@1 1603
kvn@13104 1604 if (ct < s1->outcnt()) save_use += unpack_cost(1);
kvn@13104 1605 if (ct < s2->outcnt()) save_use += unpack_cost(1);
duke@1 1606
kvn@13104 1607 return MAX2(save_in, save_use);
duke@1 1608 }
duke@1 1609
duke@1 1610 //------------------------------costs---------------------------
duke@1 1611 int SuperWord::adjacent_profit(Node* s1, Node* s2) { return 2; }
duke@1 1612 int SuperWord::pack_cost(int ct) { return ct; }
duke@1 1613 int SuperWord::unpack_cost(int ct) { return ct; }
duke@1 1614
duke@1 1615 //------------------------------combine_packs---------------------------
duke@1 1616 // Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last
duke@1 1617 void SuperWord::combine_packs() {
kvn@13104 1618 bool changed = true;
kvn@13104 1619 // Combine packs regardless max vector size.
kvn@13104 1620 while (changed) {
duke@1 1621 changed = false;
duke@1 1622 for (int i = 0; i < _packset.length(); i++) {
duke@1 1623 Node_List* p1 = _packset.at(i);
duke@1 1624 if (p1 == NULL) continue;
kvn@30211 1625 // Because of sorting we can start at i + 1
kvn@30211 1626 for (int j = i + 1; j < _packset.length(); j++) {
duke@1 1627 Node_List* p2 = _packset.at(j);
duke@1 1628 if (p2 == NULL) continue;
kvn@13104 1629 if (i == j) continue;
duke@1 1630 if (p1->at(p1->size()-1) == p2->at(0)) {
duke@1 1631 for (uint k = 1; k < p2->size(); k++) {
duke@1 1632 p1->push(p2->at(k));
duke@1 1633 }
duke@1 1634 _packset.at_put(j, NULL);
duke@1 1635 changed = true;
duke@1 1636 }
duke@1 1637 }
duke@1 1638 }
kvn@13104 1639 }
duke@1 1640
kvn@13104 1641 // Split packs which have size greater then max vector size.
kvn@13104 1642 for (int i = 0; i < _packset.length(); i++) {
kvn@13104 1643 Node_List* p1 = _packset.at(i);
kvn@13104 1644 if (p1 != NULL) {
kvn@13104 1645 BasicType bt = velt_basic_type(p1->at(0));
kvn@13104 1646 uint max_vlen = Matcher::max_vector_size(bt); // Max elements in vector
kvn@13104 1647 assert(is_power_of_2(max_vlen), "sanity");
kvn@13104 1648 uint psize = p1->size();
kvn@13104 1649 if (!is_power_of_2(psize)) {
kvn@13104 1650 // Skip pack which can't be vector.
kvn@13104 1651 // case1: for(...) { a[i] = i; } elements values are different (i+x)
kvn@13104 1652 // case2: for(...) { a[i] = b[i+1]; } can't align both, load and store
kvn@13104 1653 _packset.at_put(i, NULL);
kvn@13104 1654 continue;
kvn@13104 1655 }
kvn@13104 1656 if (psize > max_vlen) {
kvn@13104 1657 Node_List* pack = new Node_List();
kvn@13104 1658 for (uint j = 0; j < psize; j++) {
kvn@13104 1659 pack->push(p1->at(j));
kvn@13104 1660 if (pack->size() >= max_vlen) {
kvn@13104 1661 assert(is_power_of_2(pack->size()), "sanity");
kvn@13104 1662 _packset.append(pack);
kvn@13104 1663 pack = new Node_List();
kvn@13104 1664 }
kvn@13104 1665 }
kvn@13104 1666 _packset.at_put(i, NULL);
kvn@13104 1667 }
kvn@13104 1668 }
kvn@13104 1669 }
kvn@13104 1670
kvn@13104 1671 // Compress list.
duke@1 1672 for (int i = _packset.length() - 1; i >= 0; i--) {
duke@1 1673 Node_List* p1 = _packset.at(i);
duke@1 1674 if (p1 == NULL) {
duke@1 1675 _packset.remove_at(i);
duke@1 1676 }
duke@1 1677 }
duke@1 1678
duke@1 1679 if (TraceSuperWord) {
duke@1 1680 tty->print_cr("\nAfter combine_packs");
duke@1 1681 print_packset();
duke@1 1682 }
duke@1 1683 }
duke@1 1684
duke@1 1685 //-----------------------------construct_my_pack_map--------------------------
duke@1 1686 // Construct the map from nodes to packs. Only valid after the
duke@1 1687 // point where a node is only in one pack (after combine_packs).
duke@1 1688 void SuperWord::construct_my_pack_map() {
duke@1 1689 Node_List* rslt = NULL;
duke@1 1690 for (int i = 0; i < _packset.length(); i++) {
duke@1 1691 Node_List* p = _packset.at(i);
duke@1 1692 for (uint j = 0; j < p->size(); j++) {
duke@1 1693 Node* s = p->at(j);
duke@1 1694 assert(my_pack(s) == NULL, "only in one pack");
duke@1 1695 set_my_pack(s, p);
duke@1 1696 }
duke@1 1697 }
duke@1 1698 }
duke@1 1699
duke@1 1700 //------------------------------filter_packs---------------------------
duke@1 1701 // Remove packs that are not implemented or not profitable.
duke@1 1702 void SuperWord::filter_packs() {
duke@1 1703 // Remove packs that are not implemented
duke@1 1704 for (int i = _packset.length() - 1; i >= 0; i--) {
duke@1 1705 Node_List* pk = _packset.at(i);
duke@1 1706 bool impl = implemented(pk);
duke@1 1707 if (!impl) {
duke@1 1708 #ifndef PRODUCT
duke@1 1709 if (TraceSuperWord && Verbose) {
duke@1 1710 tty->print_cr("Unimplemented");
duke@1 1711 pk->at(0)->dump();
duke@1 1712 }
duke@1 1713 #endif
duke@1 1714 remove_pack_at(i);
duke@1 1715 }
kvn@30588 1716 Node *n = pk->at(0);
kvn@30588 1717 if (n->is_reduction()) {
kvn@30588 1718 _num_reductions++;
kvn@30588 1719 } else {
kvn@30588 1720 _num_work_vecs++;
kvn@30588 1721 }
duke@1 1722 }
duke@1 1723
duke@1 1724 // Remove packs that are not profitable
duke@1 1725 bool changed;
duke@1 1726 do {
duke@1 1727 changed = false;
duke@1 1728 for (int i = _packset.length() - 1; i >= 0; i--) {
duke@1 1729 Node_List* pk = _packset.at(i);
duke@1 1730 bool prof = profitable(pk);
duke@1 1731 if (!prof) {
duke@1 1732 #ifndef PRODUCT
duke@1 1733 if (TraceSuperWord && Verbose) {
duke@1 1734 tty->print_cr("Unprofitable");
duke@1 1735 pk->at(0)->dump();
duke@1 1736 }
duke@1 1737 #endif
duke@1 1738 remove_pack_at(i);
duke@1 1739 changed = true;
duke@1 1740 }
duke@1 1741 }
duke@1 1742 } while (changed);
duke@1 1743
duke@1 1744 #ifndef PRODUCT
duke@1 1745 if (TraceSuperWord) {
duke@1 1746 tty->print_cr("\nAfter filter_packs");
duke@1 1747 print_packset();
duke@1 1748 tty->cr();
duke@1 1749 }
duke@1 1750 #endif
duke@1 1751 }
duke@1 1752
iveresov@33469 1753 //------------------------------merge_packs_to_cmovd---------------------------
iveresov@33469 1754 // Merge CMoveD into new vector-nodes
iveresov@33469 1755 // We want to catch this pattern and subsume CmpD and Bool into CMoveD
iveresov@33469 1756 //
iveresov@33469 1757 // SubD ConD
iveresov@33469 1758 // / | /
iveresov@33469 1759 // / | / /
iveresov@33469 1760 // / | / /
iveresov@33469 1761 // / | / /
iveresov@33469 1762 // / / /
iveresov@33469 1763 // / / | /
iveresov@33469 1764 // v / | /
iveresov@33469 1765 // CmpD | /
iveresov@33469 1766 // | | /
iveresov@33469 1767 // v | /
iveresov@33469 1768 // Bool | /
iveresov@33469 1769 // \ | /
iveresov@33469 1770 // \ | /
iveresov@33469 1771 // \ | /
iveresov@33469 1772 // \ | /
iveresov@33469 1773 // \ v /
iveresov@33469 1774 // CMoveD
iveresov@33469 1775 //
iveresov@33469 1776
iveresov@33469 1777 void SuperWord::merge_packs_to_cmovd() {
iveresov@33469 1778 for (int i = _packset.length() - 1; i >= 0; i--) {
iveresov@33469 1779 _cmovev_kit.make_cmovevd_pack(_packset.at(i));
iveresov@33469 1780 }
iveresov@33469 1781 #ifndef PRODUCT
iveresov@33469 1782 if (TraceSuperWord) {
iveresov@33469 1783 tty->print_cr("\nSuperWord::merge_packs_to_cmovd(): After merge");
iveresov@33469 1784 print_packset();
iveresov@33469 1785 tty->cr();
iveresov@33469 1786 }
iveresov@33469 1787 #endif
iveresov@33469 1788 }
iveresov@33469 1789
iveresov@33469 1790 Node* CMoveKit::is_Bool_candidate(Node* def) const {
iveresov@33469 1791 Node* use = NULL;
iveresov@33469 1792 if (!def->is_Bool() || def->in(0) != NULL || def->outcnt() != 1) {
iveresov@33469 1793 return NULL;
iveresov@33469 1794 }
iveresov@33469 1795 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
iveresov@33469 1796 use = def->fast_out(j);
iveresov@33469 1797 if (!_sw->same_generation(def, use) || !use->is_CMove()) {
iveresov@33469 1798 return NULL;
iveresov@33469 1799 }
iveresov@33469 1800 }
iveresov@33469 1801 return use;
iveresov@33469 1802 }
iveresov@33469 1803
iveresov@33469 1804 Node* CMoveKit::is_CmpD_candidate(Node* def) const {
iveresov@33469 1805 Node* use = NULL;
iveresov@33469 1806 if (!def->is_Cmp() || def->in(0) != NULL || def->outcnt() != 1) {
iveresov@33469 1807 return NULL;
iveresov@33469 1808 }
iveresov@33469 1809 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
iveresov@33469 1810 use = def->fast_out(j);
iveresov@33469 1811 if (!_sw->same_generation(def, use) || (use = is_Bool_candidate(use)) == NULL || !_sw->same_generation(def, use)) {
iveresov@33469 1812 return NULL;
iveresov@33469 1813 }
iveresov@33469 1814 }
iveresov@33469 1815 return use;
iveresov@33469 1816 }
iveresov@33469 1817
iveresov@33469 1818 Node_List* CMoveKit::make_cmovevd_pack(Node_List* cmovd_pk) {
iveresov@33469 1819 Node *cmovd = cmovd_pk->at(0);
iveresov@33469 1820 if (!cmovd->is_CMove()) {
iveresov@33469 1821 return NULL;
iveresov@33469 1822 }
kvn@48309 1823 if (cmovd->Opcode() != Op_CMoveF && cmovd->Opcode() != Op_CMoveD) {
kvn@48309 1824 return NULL;
kvn@48309 1825 }
iveresov@33469 1826 if (pack(cmovd) != NULL) { // already in the cmov pack
iveresov@33469 1827 return NULL;
iveresov@33469 1828 }
iveresov@33469 1829 if (cmovd->in(0) != NULL) {
iveresov@33469 1830 NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: CMoveD %d has control flow, escaping...", cmovd->_idx); cmovd->dump();})
iveresov@33469 1831 return NULL;
iveresov@33469 1832 }
iveresov@33469 1833
iveresov@33469 1834 Node* bol = cmovd->as_CMove()->in(CMoveNode::Condition);
iveresov@33469 1835 if (!bol->is_Bool()
iveresov@33469 1836 || bol->outcnt() != 1
iveresov@33469 1837 || !_sw->same_generation(bol, cmovd)
iveresov@33469 1838 || bol->in(0) != NULL // BoolNode has control flow!!
iveresov@33469 1839 || _sw->my_pack(bol) == NULL) {
iveresov@33469 1840 NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: Bool %d does not fit CMoveD %d for building vector, escaping...", bol->_idx, cmovd->_idx); bol->dump();})
iveresov@33469 1841 return NULL;
iveresov@33469 1842 }
iveresov@33469 1843 Node_List* bool_pk = _sw->my_pack(bol);
iveresov@33469 1844 if (bool_pk->size() != cmovd_pk->size() ) {
iveresov@33469 1845 return NULL;
iveresov@33469 1846 }
iveresov@33469 1847
iveresov@33469 1848 Node* cmpd = bol->in(1);
iveresov@33469 1849 if (!cmpd->is_Cmp()
iveresov@33469 1850 || cmpd->outcnt() != 1
iveresov@33469 1851 || !_sw->same_generation(cmpd, cmovd)
iveresov@33469 1852 || cmpd->in(0) != NULL // CmpDNode has control flow!!
iveresov@33469 1853 || _sw->my_pack(cmpd) == NULL) {
iveresov@33469 1854 NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: CmpD %d does not fit CMoveD %d for building vector, escaping...", cmpd->_idx, cmovd->_idx); cmpd->dump();})
iveresov@33469 1855 return NULL;
iveresov@33469 1856 }
iveresov@33469 1857 Node_List* cmpd_pk = _sw->my_pack(cmpd);
iveresov@33469 1858 if (cmpd_pk->size() != cmovd_pk->size() ) {
iveresov@33469 1859 return NULL;
iveresov@33469 1860 }
iveresov@33469 1861
iveresov@33469 1862 if (!test_cmpd_pack(cmpd_pk, cmovd_pk)) {
iveresov@33469 1863 NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print("CMoveKit::make_cmovevd_pack: cmpd pack for CmpD %d failed vectorization test", cmpd->_idx); cmpd->dump();})
iveresov@33469 1864 return NULL;
iveresov@33469 1865 }
iveresov@33469 1866
iveresov@33469 1867 Node_List* new_cmpd_pk = new Node_List();
iveresov@33469 1868 uint sz = cmovd_pk->size() - 1;
iveresov@33469 1869 for (uint i = 0; i <= sz; ++i) {
iveresov@33469 1870 Node* cmov = cmovd_pk->at(i);
iveresov@33469 1871 Node* bol = bool_pk->at(i);
iveresov@33469 1872 Node* cmp = cmpd_pk->at(i);
iveresov@33469 1873
iveresov@33469 1874 new_cmpd_pk->insert(i, cmov);
iveresov@33469 1875
iveresov@33469 1876 map(cmov, new_cmpd_pk);
iveresov@33469 1877 map(bol, new_cmpd_pk);
iveresov@33469 1878 map(cmp, new_cmpd_pk);
iveresov@33469 1879
iveresov@33469 1880 _sw->set_my_pack(cmov, new_cmpd_pk); // and keep old packs for cmp and bool
iveresov@33469 1881 }
iveresov@33469 1882 _sw->_packset.remove(cmovd_pk);
iveresov@33469 1883 _sw->_packset.remove(bool_pk);
iveresov@33469 1884 _sw->_packset.remove(cmpd_pk);
iveresov@33469 1885 _sw->_packset.append(new_cmpd_pk);
iveresov@33469 1886 NOT_PRODUCT(if(_sw->is_trace_cmov()) {tty->print_cr("CMoveKit::make_cmovevd_pack: added syntactic CMoveD pack"); _sw->print_pack(new_cmpd_pk);})
iveresov@33469 1887 return new_cmpd_pk;
iveresov@33469 1888 }
iveresov@33469 1889
iveresov@33469 1890 bool CMoveKit::test_cmpd_pack(Node_List* cmpd_pk, Node_List* cmovd_pk) {
iveresov@33469 1891 Node* cmpd0 = cmpd_pk->at(0);
iveresov@33469 1892 assert(cmpd0->is_Cmp(), "CMoveKit::test_cmpd_pack: should be CmpDNode");
iveresov@33469 1893 assert(cmovd_pk->at(0)->is_CMove(), "CMoveKit::test_cmpd_pack: should be CMoveD");
iveresov@33469 1894 assert(cmpd_pk->size() == cmovd_pk->size(), "CMoveKit::test_cmpd_pack: should be same size");
iveresov@33469 1895 Node* in1 = cmpd0->in(1);
iveresov@33469 1896 Node* in2 = cmpd0->in(2);
iveresov@33469 1897 Node_List* in1_pk = _sw->my_pack(in1);
iveresov@33469 1898 Node_List* in2_pk = _sw->my_pack(in2);
iveresov@33469 1899
jwilhelm@46630 1900 if ( (in1_pk != NULL && in1_pk->size() != cmpd_pk->size())
jwilhelm@46630 1901 || (in2_pk != NULL && in2_pk->size() != cmpd_pk->size()) ) {
iveresov@33469 1902 return false;
iveresov@33469 1903 }
iveresov@33469 1904
iveresov@33469 1905 // test if "all" in1 are in the same pack or the same node
iveresov@33469 1906 if (in1_pk == NULL) {
iveresov@33469 1907 for (uint j = 1; j < cmpd_pk->size(); j++) {
iveresov@33469 1908 if (cmpd_pk->at(j)->in(1) != in1) {
iveresov@33469 1909 return false;
iveresov@33469 1910 }
iveresov@33469 1911 }//for: in1_pk is not pack but all CmpD nodes in the pack have the same in(1)
iveresov@33469 1912 }
iveresov@33469 1913 // test if "all" in2 are in the same pack or the same node
iveresov@33469 1914 if (in2_pk == NULL) {
iveresov@33469 1915 for (uint j = 1; j < cmpd_pk->size(); j++) {
iveresov@33469 1916 if (cmpd_pk->at(j)->in(2) != in2) {
iveresov@33469 1917 return false;
iveresov@33469 1918 }
iveresov@33469 1919 }//for: in2_pk is not pack but all CmpD nodes in the pack have the same in(2)
iveresov@33469 1920 }
iveresov@33469 1921 //now check if cmpd_pk may be subsumed in vector built for cmovd_pk
iveresov@33469 1922 int cmovd_ind1, cmovd_ind2;
iveresov@33469 1923 if (cmpd_pk->at(0)->in(1) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfFalse)
iveresov@33469 1924 && cmpd_pk->at(0)->in(2) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfTrue)) {
iveresov@33469 1925 cmovd_ind1 = CMoveNode::IfFalse;
iveresov@33469 1926 cmovd_ind2 = CMoveNode::IfTrue;
iveresov@33469 1927 } else if (cmpd_pk->at(0)->in(2) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfFalse)
iveresov@33469 1928 && cmpd_pk->at(0)->in(1) == cmovd_pk->at(0)->as_CMove()->in(CMoveNode::IfTrue)) {
iveresov@33469 1929 cmovd_ind2 = CMoveNode::IfFalse;
iveresov@33469 1930 cmovd_ind1 = CMoveNode::IfTrue;
iveresov@33469 1931 }
iveresov@33469 1932 else {
iveresov@33469 1933 return false;
iveresov@33469 1934 }
iveresov@33469 1935
iveresov@33469 1936 for (uint j = 1; j < cmpd_pk->size(); j++) {
iveresov@33469 1937 if (cmpd_pk->at(j)->in(1) != cmovd_pk->at(j)->as_CMove()->in(cmovd_ind1)
iveresov@33469 1938 || cmpd_pk->at(j)->in(2) != cmovd_pk->at(j)->as_CMove()->in(cmovd_ind2)) {
iveresov@33469 1939 return false;
iveresov@33469 1940 }//if
iveresov@33469 1941 }
iveresov@33469 1942 NOT_PRODUCT(if(_sw->is_trace_cmov()) { tty->print("CMoveKit::test_cmpd_pack: cmpd pack for 1st CmpD %d is OK for vectorization: ", cmpd0->_idx); cmpd0->dump(); })
iveresov@33469 1943 return true;
iveresov@33469 1944 }
iveresov@33469 1945
duke@1 1946 //------------------------------implemented---------------------------
duke@1 1947 // Can code be generated for pack p?
duke@1 1948 bool SuperWord::implemented(Node_List* p) {
kvn@30211 1949 bool retValue = false;
duke@1 1950 Node* p0 = p->at(0);
kvn@30211 1951 if (p0 != NULL) {
kvn@30211 1952 int opc = p0->Opcode();
kvn@30211 1953 uint size = p->size();
kvn@30211 1954 if (p0->is_reduction()) {
kvn@30211 1955 const Type *arith_type = p0->bottom_type();
kvn@30588 1956 // Length 2 reductions of INT/LONG do not offer performance benefits
kvn@30588 1957 if (((arith_type->basic_type() == T_INT) || (arith_type->basic_type() == T_LONG)) && (size == 2)) {
kvn@30588 1958 retValue = false;
kvn@30588 1959 } else {
kvn@30588 1960 retValue = ReductionNode::implemented(opc, size, arith_type->basic_type());
kvn@30588 1961 }
kvn@30211 1962 } else {
kvn@30211 1963 retValue = VectorNode::implemented(opc, size, velt_basic_type(p0));
kvn@30211 1964 }
iveresov@33469 1965 if (!retValue) {
iveresov@33469 1966 if (is_cmov_pack(p)) {
iveresov@33469 1967 NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::implemented: found cmpd pack"); print_pack(p);})
iveresov@33469 1968 return true;
iveresov@33469 1969 }
iveresov@33469 1970 }
kvn@30211 1971 }
kvn@30211 1972 return retValue;
kvn@13490 1973 }
kvn@13490 1974
iveresov@33469 1975 bool SuperWord::is_cmov_pack(Node_List* p) {
iveresov@33469 1976 return _cmovev_kit.pack(p->at(0)) != NULL;
iveresov@33469 1977 }
kvn@13490 1978 //------------------------------same_inputs--------------------------
kvn@13490 1979 // For pack p, are all idx operands the same?
iveresov@33469 1980 bool SuperWord::same_inputs(Node_List* p, int idx) {
kvn@13490 1981 Node* p0 = p->at(0);
kvn@13490 1982 uint vlen = p->size();
kvn@13490 1983 Node* p0_def = p0->in(idx);
kvn@13490 1984 for (uint i = 1; i < vlen; i++) {
kvn@13490 1985 Node* pi = p->at(i);
kvn@13490 1986 Node* pi_def = pi->in(idx);
iveresov@33469 1987 if (p0_def != pi_def) {
kvn@13490 1988 return false;
iveresov@33469 1989 }
kvn@13488 1990 }
kvn@13490 1991 return true;
duke@1 1992 }
duke@1 1993
duke@1 1994 //------------------------------profitable---------------------------
duke@1 1995 // For pack p, are all operands and all uses (with in the block) vector?
duke@1 1996 bool SuperWord::profitable(Node_List* p) {
duke@1 1997 Node* p0 = p->at(0);
duke@1 1998 uint start, end;
kvn@13490 1999 VectorNode::vector_operands(p0, &start, &end);
duke@1 2000
kvn@13894 2001 // Return false if some inputs are not vectors or vectors with different
kvn@13894 2002 // size or alignment.
kvn@13894 2003 // Also, for now, return false if not scalar promotion case when inputs are
kvn@13894 2004 // the same. Later, implement PackNode and allow differing, non-vector inputs
kvn@13894 2005 // (maybe just the ones from outside the block.)
duke@1 2006 for (uint i = start; i < end; i++) {
iveresov@33469 2007 if (!is_vector_use(p0, i)) {
kvn@13894 2008 return false;
iveresov@33469 2009 }
duke@1 2010 }
kvn@30211 2011 // Check if reductions are connected
kvn@30211 2012 if (p0->is_reduction()) {
kvn@30211 2013 Node* second_in = p0->in(2);
kvn@30211 2014 Node_List* second_pk = my_pack(second_in);
kvn@30588 2015 if ((second_pk == NULL) || (_num_work_vecs == _num_reductions)) {
kvn@30588 2016 // Remove reduction flag if no parent pack or if not enough work
kvn@30588 2017 // to cover reduction expansion overhead
kvn@30211 2018 p0->remove_flag(Node::Flag_is_reduction);
kvn@30211 2019 return false;
kvn@30211 2020 } else if (second_pk->size() != p->size()) {
kvn@30211 2021 return false;
kvn@30211 2022 }
kvn@30211 2023 }
kvn@13490 2024 if (VectorNode::is_shift(p0)) {
kvn@13894 2025 // For now, return false if shift count is vector or not scalar promotion
kvn@13894 2026 // case (different shift counts) because it is not supported yet.
kvn@13894 2027 Node* cnt = p0->in(2);
kvn@13894 2028 Node_List* cnt_pk = my_pack(cnt);
kvn@13894 2029 if (cnt_pk != NULL)
kvn@13490 2030 return false;
kvn@13490 2031 if (!same_inputs(p, 2))
kvn@13490 2032 return false;
kvn@13490 2033 }
duke@1 2034 if (!p0->is_Store()) {
duke@1 2035 // For now, return false if not all uses are vector.
duke@1 2036 // Later, implement ExtractNode and allow non-vector uses (maybe
duke@1 2037 // just the ones outside the block.)
duke@1 2038 for (uint i = 0; i < p->size(); i++) {
duke@1 2039 Node* def = p->at(i);
iveresov@33469 2040 if (is_cmov_pack_internal_node(p, def)) {
iveresov@33469 2041 continue;
iveresov@33469 2042 }
duke@1 2043 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
duke@1 2044 Node* use = def->fast_out(j);
duke@1 2045 for (uint k = 0; k < use->req(); k++) {
duke@1 2046 Node* n = use->in(k);
duke@1 2047 if (def == n) {
roland@49870 2048 // reductions should only have a Phi use at the the loop
roland@49870 2049 // head and out of loop uses
roland@49870 2050 if (def->is_reduction() &&
roland@49870 2051 ((use->is_Phi() && use->in(0) == _lpt->_head) ||
roland@49870 2052 !_lpt->is_member(_phase->get_loop(_phase->ctrl_or_self(use))))) {
roland@49870 2053 assert(i == p->size()-1, "must be last element of the pack");
kvn@30211 2054 continue;
roland@49870 2055 }
duke@1 2056 if (!is_vector_use(use, k)) {
duke@1 2057 return false;
duke@1 2058 }
duke@1 2059 }
duke@1 2060 }
duke@1 2061 }
duke@1 2062 }
duke@1 2063 }
duke@1 2064 return true;
duke@1 2065 }
duke@1 2066
duke@1 2067 //------------------------------schedule---------------------------
duke@1 2068 // Adjust the memory graph for the packed operations
duke@1 2069 void SuperWord::schedule() {
duke@1 2070
duke@1 2071 // Co-locate in the memory graph the members of each memory pack
duke@1 2072 for (int i = 0; i < _packset.length(); i++) {
duke@1 2073 co_locate_pack(_packset.at(i));
duke@1 2074 }
duke@1 2075 }
duke@1 2076
cfang@2334 2077 //-------------------------------remove_and_insert-------------------
kvn@13104 2078 // Remove "current" from its current position in the memory graph and insert
kvn@13104 2079 // it after the appropriate insertion point (lip or uip).
cfang@2334 2080 void SuperWord::remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip,
cfang@2334 2081 Node *uip, Unique_Node_List &sched_before) {
cfang@2334 2082 Node* my_mem = current->in(MemNode::Memory);
kvn@13104 2083 bool sched_up = sched_before.member(current);
cfang@2334 2084
kvn@13104 2085 // remove current_store from its current position in the memmory graph
cfang@2334 2086 for (DUIterator i = current->outs(); current->has_out(i); i++) {
cfang@2334 2087 Node* use = current->out(i);
cfang@2334 2088 if (use->is_Mem()) {
cfang@2334 2089 assert(use->in(MemNode::Memory) == current, "must be");
cfang@2334 2090 if (use == prev) { // connect prev to my_mem
kvn@13104 2091 _igvn.replace_input_of(use, MemNode::Memory, my_mem);
kvn@13104 2092 --i; //deleted this edge; rescan position
cfang@2334 2093 } else if (sched_before.member(use)) {
kvn@13104 2094 if (!sched_up) { // Will be moved together with current
kvn@13104 2095 _igvn.replace_input_of(use, MemNode::Memory, uip);
kvn@13104 2096 --i; //deleted this edge; rescan position
kvn@13104 2097 }
cfang@2334 2098 } else {
kvn@13104 2099 if (sched_up) { // Will be moved together with current
kvn@13104 2100 _igvn.replace_input_of(use, MemNode::Memory, lip);
kvn@13104 2101 --i; //deleted this edge; rescan position
kvn@13104 2102 }
cfang@2334 2103 }
cfang@2334 2104 }
cfang@2334 2105 }
cfang@2334 2106
cfang@2334 2107 Node *insert_pt = sched_up ? uip : lip;
cfang@2334 2108
cfang@2334 2109 // all uses of insert_pt's memory state should use current's instead
cfang@2334 2110 for (DUIterator i = insert_pt->outs(); insert_pt->has_out(i); i++) {
cfang@2334 2111 Node* use = insert_pt->out(i);
cfang@2334 2112 if (use->is_Mem()) {
cfang@2334 2113 assert(use->in(MemNode::Memory) == insert_pt, "must be");
kvn@12958 2114 _igvn.replace_input_of(use, MemNode::Memory, current);
cfang@2334 2115 --i; //deleted this edge; rescan position
cfang@2334 2116 } else if (!sched_up && use->is_Phi() && use->bottom_type() == Type::MEMORY) {
cfang@2334 2117 uint pos; //lip (lower insert point) must be the last one in the memory slice
cfang@2334 2118 for (pos=1; pos < use->req(); pos++) {
cfang@2334 2119 if (use->in(pos) == insert_pt) break;
cfang@2334 2120 }
kvn@12958 2121 _igvn.replace_input_of(use, pos, current);
cfang@2334 2122 --i;
cfang@2334 2123 }
cfang@2334 2124 }
cfang@2334 2125
cfang@2334 2126 //connect current to insert_pt
kvn@13104 2127 _igvn.replace_input_of(current, MemNode::Memory, insert_pt);
cfang@2334 2128 }
cfang@2334 2129
cfang@2334 2130 //------------------------------co_locate_pack----------------------------------
cfang@2334 2131 // To schedule a store pack, we need to move any sandwiched memory ops either before
cfang@2334 2132 // or after the pack, based upon dependence information:
cfang@2334 2133 // (1) If any store in the pack depends on the sandwiched memory op, the
cfang@2334 2134 // sandwiched memory op must be scheduled BEFORE the pack;
cfang@2334 2135 // (2) If a sandwiched memory op depends on any store in the pack, the
cfang@2334 2136 // sandwiched memory op must be scheduled AFTER the pack;
cfang@2334 2137 // (3) If a sandwiched memory op (say, memA) depends on another sandwiched
cfang@2334 2138 // memory op (say memB), memB must be scheduled before memA. So, if memA is
cfang@2334 2139 // scheduled before the pack, memB must also be scheduled before the pack;
cfang@2334 2140 // (4) If there is no dependence restriction for a sandwiched memory op, we simply
cfang@2334 2141 // schedule this store AFTER the pack
cfang@2334 2142 // (5) We know there is no dependence cycle, so there in no other case;
cfang@2334 2143 // (6) Finally, all memory ops in another single pack should be moved in the same direction.
cfang@2334 2144 //
cfang@3799 2145 // To schedule a load pack, we use the memory state of either the first or the last load in
cfang@3799 2146 // the pack, based on the dependence constraint.
duke@1 2147 void SuperWord::co_locate_pack(Node_List* pk) {
duke@1 2148 if (pk->at(0)->is_Store()) {
duke@1 2149 MemNode* first = executed_first(pk)->as_Mem();
duke@1 2150 MemNode* last = executed_last(pk)->as_Mem();
cfang@2334 2151 Unique_Node_List schedule_before_pack;
cfang@2334 2152 Unique_Node_List memops;
cfang@2334 2153
duke@1 2154 MemNode* current = last->in(MemNode::Memory)->as_Mem();
cfang@2334 2155 MemNode* previous = last;
duke@1 2156 while (true) {
duke@1 2157 assert(in_bb(current), "stay in block");
cfang@2334 2158 memops.push(previous);
cfang@2334 2159 for (DUIterator i = current->outs(); current->has_out(i); i++) {
cfang@2334 2160 Node* use = current->out(i);
cfang@2334 2161 if (use->is_Mem() && use != previous)
cfang@2334 2162 memops.push(use);
cfang@2334 2163 }
kvn@13104 2164 if (current == first) break;
cfang@2334 2165 previous = current;
cfang@2334 2166 current = current->in(MemNode::Memory)->as_Mem();
cfang@2334 2167 }
cfang@2334 2168
cfang@2334 2169 // determine which memory operations should be scheduled before the pack
cfang@2334 2170 for (uint i = 1; i < memops.size(); i++) {
cfang@2334 2171 Node *s1 = memops.at(i);
cfang@2334 2172 if (!in_pack(s1, pk) && !schedule_before_pack.member(s1)) {
cfang@2334 2173 for (uint j = 0; j< i; j++) {
cfang@2334 2174 Node *s2 = memops.at(j);
cfang@2334 2175 if (!independent(s1, s2)) {
cfang@2334 2176 if (in_pack(s2, pk) || schedule_before_pack.member(s2)) {
kvn@13104 2177 schedule_before_pack.push(s1); // s1 must be scheduled before
cfang@2334 2178 Node_List* mem_pk = my_pack(s1);
cfang@2334 2179 if (mem_pk != NULL) {
cfang@2334 2180 for (uint ii = 0; ii < mem_pk->size(); ii++) {
kvn@13104 2181 Node* s = mem_pk->at(ii); // follow partner
cfang@2334 2182 if (memops.member(s) && !schedule_before_pack.member(s))
cfang@2334 2183 schedule_before_pack.push(s);
cfang@2334 2184 }
cfang@2334 2185 }
kvn@13104 2186 break;
cfang@2334 2187 }
cfang@2334 2188 }
cfang@2334 2189 }
cfang@2334 2190 }
cfang@2334 2191 }
cfang@2334 2192
kvn@13104 2193 Node* upper_insert_pt = first->in(MemNode::Memory);
kvn@13104 2194 // Following code moves loads connected to upper_insert_pt below aliased stores.
kvn@13104 2195 // Collect such loads here and reconnect them back to upper_insert_pt later.
kvn@13104 2196 memops.clear();
kvn@13104 2197 for (DUIterator i = upper_insert_pt->outs(); upper_insert_pt->has_out(i); i++) {
kvn@13104 2198 Node* use = upper_insert_pt->out(i);
kvn@24090 2199 if (use->is_Mem() && !use->is_Store()) {
kvn@13104 2200 memops.push(use);
kvn@24090 2201 }
kvn@13104 2202 }
kvn@13104 2203
cfang@2334 2204 MemNode* lower_insert_pt = last;
cfang@2334 2205 previous = last; //previous store in pk
cfang@2334 2206 current = last->in(MemNode::Memory)->as_Mem();
cfang@2334 2207
kvn@13104 2208 // start scheduling from "last" to "first"
cfang@2334 2209 while (true) {
cfang@2334 2210 assert(in_bb(current), "stay in block");
cfang@2334 2211 assert(in_pack(previous, pk), "previous stays in pack");
duke@1 2212 Node* my_mem = current->in(MemNode::Memory);
cfang@2334 2213
duke@1 2214 if (in_pack(current, pk)) {
cfang@2334 2215 // Forward users of my memory state (except "previous) to my input memory state
duke@1 2216 for (DUIterator i = current->outs(); current->has_out(i); i++) {
duke@1 2217 Node* use = current->out(i);
cfang@2334 2218 if (use->is_Mem() && use != previous) {
duke@1 2219 assert(use->in(MemNode::Memory) == current, "must be");
cfang@2334 2220 if (schedule_before_pack.member(use)) {
kvn@12958 2221 _igvn.replace_input_of(use, MemNode::Memory, upper_insert_pt);
cfang@2334 2222 } else {
kvn@12958 2223 _igvn.replace_input_of(use, MemNode::Memory, lower_insert_pt);
cfang@2334 2224 }
duke@1 2225 --i; // deleted this edge; rescan position
duke@1 2226 }
duke@1 2227 }
cfang@2334 2228 previous = current;
cfang@2334 2229 } else { // !in_pack(current, pk) ==> a sandwiched store
cfang@2334 2230 remove_and_insert(current, previous, lower_insert_pt, upper_insert_pt, schedule_before_pack);
duke@1 2231 }
cfang@2334 2232
duke@1 2233 if (current == first) break;
duke@1 2234 current = my_mem->as_Mem();
cfang@2334 2235 } // end while
kvn@13104 2236
kvn@13104 2237 // Reconnect loads back to upper_insert_pt.
kvn@13104 2238 for (uint i = 0; i < memops.size(); i++) {
kvn@13104 2239 Node *ld = memops.at(i);
kvn@13104 2240 if (ld->in(MemNode::Memory) != upper_insert_pt) {
kvn@13104 2241 _igvn.replace_input_of(ld, MemNode::Memory, upper_insert_pt);
kvn@13104 2242 }
kvn@13104 2243 }
cfang@2334 2244 } else if (pk->at(0)->is_Load()) { //load
cfang@3799 2245 // all loads in the pack should have the same memory state. By default,
cfang@3799 2246 // we use the memory state of the last load. However, if any load could
cfang@3799 2247 // not be moved down due to the dependence constraint, we use the memory
cfang@3799 2248 // state of the first load.
roland@49905 2249 Node* first_mem = pk->at(0)->in(MemNode::Memory);
roland@49905 2250 Node* last_mem = first_mem;
roland@49905 2251 for (uint i = 1; i < pk->size(); i++) {
roland@49905 2252 Node* ld = pk->at(i);
roland@49905 2253 Node* mem = ld->in(MemNode::Memory);
roland@49905 2254 assert(in_bb(first_mem) || in_bb(mem) || mem == first_mem, "2 different memory state from outside the loop?");
roland@49905 2255 if (in_bb(mem)) {
roland@49905 2256 if (in_bb(first_mem) && bb_idx(mem) < bb_idx(first_mem)) {
roland@49905 2257 first_mem = mem;
roland@49905 2258 }
roland@49905 2259 if (!in_bb(last_mem) || bb_idx(mem) > bb_idx(last_mem)) {
roland@49905 2260 last_mem = mem;
roland@49905 2261 }
roland@49905 2262 }
roland@49905 2263 }
cfang@3799 2264 bool schedule_last = true;
cfang@3799 2265 for (uint i = 0; i < pk->size(); i++) {
cfang@3799 2266 Node* ld = pk->at(i);
cfang@3799 2267 for (Node* current = last_mem; current != ld->in(MemNode::Memory);
cfang@3799 2268 current=current->in(MemNode::Memory)) {
cfang@3799 2269 assert(current != first_mem, "corrupted memory graph");
cfang@3799 2270 if(current->is_Mem() && !independent(current, ld)){
cfang@3799 2271 schedule_last = false; // a later store depends on this load
cfang@3799 2272 break;
cfang@3799 2273 }
cfang@3799 2274 }
cfang@3799 2275 }
cfang@3799 2276
cfang@3799 2277 Node* mem_input = schedule_last ? last_mem : first_mem;
cfang@3799 2278 _igvn.hash_delete(mem_input);
cfang@3799 2279 // Give each load the same memory state
duke@1 2280 for (uint i = 0; i < pk->size(); i++) {
duke@1 2281 LoadNode* ld = pk->at(i)->as_Load();
kvn@12958 2282 _igvn.replace_input_of(ld, MemNode::Memory, mem_input);
duke@1 2283 }
duke@1 2284 }
duke@1 2285 }
duke@1 2286
iveresov@33166 2287 #ifndef PRODUCT
iveresov@33166 2288 void SuperWord::print_loop(bool whole) {
iveresov@33166 2289 Node_Stack stack(_arena, _phase->C->unique() >> 2);
iveresov@33166 2290 Node_List rpo_list;
iveresov@33166 2291 VectorSet visited(_arena);
iveresov@33166 2292 visited.set(lpt()->_head->_idx);
iveresov@33166 2293 _phase->rpo(lpt()->_head, stack, visited, rpo_list);
iveresov@33166 2294 _phase->dump(lpt(), rpo_list.size(), rpo_list );
iveresov@33166 2295 if(whole) {
iveresov@33166 2296 tty->print_cr("\n Whole loop tree");
iveresov@33166 2297 _phase->dump();
iveresov@33166 2298 tty->print_cr(" End of whole loop tree\n");
iveresov@33166 2299 }
iveresov@33166 2300 }
iveresov@33166 2301 #endif
iveresov@33166 2302
duke@1 2303 //------------------------------output---------------------------
duke@1 2304 // Convert packs into vector node operations
duke@1 2305 void SuperWord::output() {
zyao@47591 2306 CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
zyao@47591 2307 Compile* C = _phase->C;
zyao@47591 2308 if (_packset.length() == 0) {
kvn@47758 2309 if (cl->is_main_loop()) {
kvn@47758 2310 // Instigate more unrolling for optimization when vectorization fails.
kvn@47758 2311 C->set_major_progress();
kvn@47758 2312 cl->set_notpassed_slp();
kvn@47758 2313 cl->mark_do_unroll_only();
kvn@47758 2314 }
zyao@47591 2315 return;
zyao@47591 2316 }
duke@1 2317
kvn@9101 2318 #ifndef PRODUCT
kvn@9101 2319 if (TraceLoopOpts) {
iveresov@33166 2320 tty->print("SuperWord::output ");
kvn@9101 2321 lpt()->dump_head();
kvn@9101 2322 }
kvn@9101 2323 #endif
kvn@9101 2324
mcberg@38049 2325 if (cl->is_main_loop()) {
mcberg@38049 2326 // MUST ENSURE main loop's initial value is properly aligned:
mcberg@38049 2327 // (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0
mcberg@38049 2328
mcberg@38049 2329 align_initial_loop_index(align_to_ref());
mcberg@38049 2330
mcberg@38049 2331 // Insert extract (unpack) operations for scalar uses
mcberg@38049 2332 for (int i = 0; i < _packset.length(); i++) {
mcberg@38049 2333 insert_extracts(_packset.at(i));
mcberg@38049 2334 }
duke@1 2335 }
duke@1 2336
kvn@13883 2337 uint max_vlen_in_bytes = 0;
kvn@31772 2338 uint max_vlen = 0;
mcberg@38049 2339 bool can_process_post_loop = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop());
iveresov@33166 2340
iveresov@33469 2341 NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("SWPointer::output: print loop before create_reserve_version_of_loop"); print_loop(true);})
iveresov@33469 2342
iveresov@33469 2343 CountedLoopReserveKit make_reversable(_phase, _lpt, do_reserve_copy());
iveresov@33469 2344
iveresov@33469 2345 NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("SWPointer::output: print loop after create_reserve_version_of_loop"); print_loop(true);})
iveresov@33469 2346
iveresov@33469 2347 if (do_reserve_copy() && !make_reversable.has_reserved()) {
iveresov@33469 2348 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: loop was not reserved correctly, exiting SuperWord");})
iveresov@33166 2349 return;
iveresov@33166 2350 }
iveresov@33166 2351
duke@1 2352 for (int i = 0; i < _block.length(); i++) {
duke@1 2353 Node* n = _block.at(i);
duke@1 2354 Node_List* p = my_pack(n);
duke@1 2355 if (p && n == executed_last(p)) {
duke@1 2356 uint vlen = p->size();
kvn@13883 2357 uint vlen_in_bytes = 0;
duke@1 2358 Node* vn = NULL;
duke@1 2359 Node* low_adr = p->at(0);
duke@1 2360 Node* first = executed_first(p);
mcberg@38049 2361 if (can_process_post_loop) {
mcberg@38049 2362 // override vlen with the main loops vector length
mcberg@38049 2363 vlen = cl->slp_max_unroll();
mcberg@38049 2364 }
iveresov@33469 2365 NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: %d executed first, %d executed last in pack", first->_idx, n->_idx); print_pack(p);})
kvn@13104 2366 int opc = n->Opcode();
duke@1 2367 if (n->is_Load()) {
duke@1 2368 Node* ctl = n->in(MemNode::Control);
duke@1 2369 Node* mem = first->in(MemNode::Memory);
mcberg@31403 2370 SWPointer p1(n->as_Mem(), this, NULL, false);
kvn@25932 2371 // Identify the memory dependency for the new loadVector node by
kvn@25932 2372 // walking up through memory chain.
kvn@25932 2373 // This is done to give flexibility to the new loadVector node so that
kvn@25932 2374 // it can move above independent storeVector nodes.
kvn@25932 2375 while (mem->is_StoreVector()) {
mcberg@31403 2376 SWPointer p2(mem->as_Mem(), this, NULL, false);
kvn@25932 2377 int cmp = p1.cmp(p2);
kvn@25932 2378 if (SWPointer::not_equal(cmp) || !SWPointer::comparable(cmp)) {
kvn@25932 2379 mem = mem->in(MemNode::Memory);
kvn@25932 2380 } else {
kvn@25932 2381 break; // dependent memory
kvn@25932 2382 }
kvn@25932 2383 }
duke@1 2384 Node* adr = low_adr->in(MemNode::Address);
duke@1 2385 const TypePtr* atyp = n->adr_type();
roland@31035 2386 vn = LoadVectorNode::make(opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n), control_dependency(p));
kvn@13883 2387 vlen_in_bytes = vn->as_LoadVector()->memory_size();
duke@1 2388 } else if (n->is_Store()) {
duke@1 2389 // Promote value to be stored to vector
kvn@10255 2390 Node* val = vector_opd(p, MemNode::ValueIn);
iveresov@33469 2391 if (val == NULL) {
iveresov@33469 2392 if (do_reserve_copy()) {
iveresov@33469 2393 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: val should not be NULL, exiting SuperWord");})
iveresov@33469 2394 return; //and reverse to backup IG
iveresov@33469 2395 }
iveresov@33469 2396 ShouldNotReachHere();
iveresov@33469 2397 }
iveresov@33469 2398
duke@1 2399 Node* ctl = n->in(MemNode::Control);
duke@1 2400 Node* mem = first->in(MemNode::Memory);
duke@1 2401 Node* adr = low_adr->in(MemNode::Address);
duke@1 2402 const TypePtr* atyp = n->adr_type();
thartmann@25930 2403 vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen);
kvn@13883 2404 vlen_in_bytes = vn->as_StoreVector()->memory_size();
vdeshpande@52992 2405 } else if (VectorNode::is_muladds2i(n)) {
vdeshpande@52992 2406 assert(n->req() == 5u, "MulAddS2I should have 4 operands.");
vdeshpande@52992 2407 Node* in1 = vector_opd(p, 1);
vdeshpande@52992 2408 Node* in2 = vector_opd(p, 2);
vdeshpande@52992 2409 vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
vdeshpande@52992 2410 vlen_in_bytes = vn->as_Vector()->length_in_bytes();
iveresov@33469 2411 } else if (n->req() == 3 && !is_cmov_pack(p)) {
duke@1 2412 // Promote operands to vector
kvn@30211 2413 Node* in1 = NULL;
kvn@30211 2414 bool node_isa_reduction = n->is_reduction();
kvn@30211 2415 if (node_isa_reduction) {
kvn@30211 2416 // the input to the first reduction operation is retained
kvn@30211 2417 in1 = low_adr->in(1);
kvn@30211 2418 } else {
kvn@30211 2419 in1 = vector_opd(p, 1);
iveresov@33469 2420 if (in1 == NULL) {
iveresov@33469 2421 if (do_reserve_copy()) {
iveresov@33469 2422 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: in1 should not be NULL, exiting SuperWord");})
iveresov@33469 2423 return; //and reverse to backup IG
iveresov@33469 2424 }
iveresov@33469 2425 ShouldNotReachHere();
iveresov@33469 2426 }
kvn@30211 2427 }
duke@1 2428 Node* in2 = vector_opd(p, 2);
iveresov@33469 2429 if (in2 == NULL) {
iveresov@33469 2430 if (do_reserve_copy()) {
iveresov@33469 2431 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: in2 should not be NULL, exiting SuperWord");})
iveresov@33469 2432 return; //and reverse to backup IG
iveresov@33469 2433 }
iveresov@33469 2434 ShouldNotReachHere();
iveresov@33469 2435 }
kvn@30211 2436 if (VectorNode::is_invariant_vector(in1) && (node_isa_reduction == false) && (n->is_Add() || n->is_Mul())) {
kvn@13485 2437 // Move invariant vector input into second position to avoid register spilling.
kvn@13485 2438 Node* tmp = in1;
kvn@13485 2439 in1 = in2;
kvn@13485 2440 in2 = tmp;
kvn@13485 2441 }
kvn@30211 2442 if (node_isa_reduction) {
kvn@30211 2443 const Type *arith_type = n->bottom_type();
kvn@30211 2444 vn = ReductionNode::make(opc, NULL, in1, in2, arith_type->basic_type());
kvn@30211 2445 if (in2->is_Load()) {
kvn@30211 2446 vlen_in_bytes = in2->as_LoadVector()->memory_size();
kvn@30211 2447 } else {
kvn@30211 2448 vlen_in_bytes = in2->as_Vector()->length_in_bytes();
kvn@30211 2449 }
kvn@30211 2450 } else {
kvn@30211 2451 vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
kvn@30211 2452 vlen_in_bytes = vn->as_Vector()->length_in_bytes();
kvn@30211 2453 }
rlupusoru@49384 2454 } else if (opc == Op_SqrtF || opc == Op_SqrtD ||
rlupusoru@49384 2455 opc == Op_AbsF || opc == Op_AbsD ||
rlupusoru@49384 2456 opc == Op_NegF || opc == Op_NegD ||
rlupusoru@49384 2457 opc == Op_PopCountI) {
rlupusoru@49384 2458 assert(n->req() == 2, "only one input expected");
mcberg@32723 2459 Node* in = vector_opd(p, 1);
mcberg@32723 2460 vn = VectorNode::make(opc, in, NULL, vlen, velt_basic_type(n));
mcberg@32723 2461 vlen_in_bytes = vn->as_Vector()->length_in_bytes();
iveresov@33469 2462 } else if (is_cmov_pack(p)) {
mcberg@38049 2463 if (can_process_post_loop) {
mcberg@38049 2464 // do not refactor of flow in post loop context
mcberg@38049 2465 return;
mcberg@38049 2466 }
iveresov@33469 2467 if (!n->is_CMove()) {
iveresov@33469 2468 continue;
iveresov@33469 2469 }
iveresov@33469 2470 // place here CMoveVDNode
iveresov@33469 2471 NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: print before CMove vectorization"); print_loop(false);})
iveresov@33469 2472 Node* bol = n->in(CMoveNode::Condition);
iveresov@33469 2473 if (!bol->is_Bool() && bol->Opcode() == Op_ExtractI && bol->req() > 1 ) {
iveresov@33469 2474 NOT_PRODUCT(if(is_trace_cmov()) {tty->print_cr("SWPointer::output: %d is not Bool node, trying its in(1) node %d", bol->_idx, bol->in(1)->_idx); bol->dump(); bol->in(1)->dump();})
iveresov@33469 2475 bol = bol->in(1); //may be ExtractNode
iveresov@33469 2476 }
iveresov@33469 2477
iveresov@33469 2478 assert(bol->is_Bool(), "should be BoolNode - too late to bail out!");
iveresov@33469 2479 if (!bol->is_Bool()) {
iveresov@33469 2480 if (do_reserve_copy()) {
iveresov@33469 2481 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: expected %d bool node, exiting SuperWord", bol->_idx); bol->dump();})
iveresov@33469 2482 return; //and reverse to backup IG
iveresov@33469 2483 }
iveresov@33469 2484 ShouldNotReachHere();
iveresov@33469 2485 }
iveresov@33469 2486
iveresov@33469 2487 int cond = (int)bol->as_Bool()->_test._test;
iveresov@33469 2488 Node* in_cc = _igvn.intcon(cond);
iveresov@33469 2489 NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created intcon in_cc node %d", in_cc->_idx); in_cc->dump();})
iveresov@33469 2490 Node* cc = bol->clone();
iveresov@33469 2491 cc->set_req(1, in_cc);
iveresov@33469 2492 NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created bool cc node %d", cc->_idx); cc->dump();})
iveresov@33469 2493
iveresov@33469 2494 Node* src1 = vector_opd(p, 2); //2=CMoveNode::IfFalse
iveresov@33469 2495 if (src1 == NULL) {
iveresov@33469 2496 if (do_reserve_copy()) {
iveresov@33469 2497 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: src1 should not be NULL, exiting SuperWord");})
iveresov@33469 2498 return; //and reverse to backup IG
iveresov@33469 2499 }
iveresov@33469 2500 ShouldNotReachHere();
iveresov@33469 2501 }
iveresov@33469 2502 Node* src2 = vector_opd(p, 3); //3=CMoveNode::IfTrue
iveresov@33469 2503 if (src2 == NULL) {
iveresov@33469 2504 if (do_reserve_copy()) {
iveresov@33469 2505 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: src2 should not be NULL, exiting SuperWord");})
iveresov@33469 2506 return; //and reverse to backup IG
iveresov@33469 2507 }
iveresov@33469 2508 ShouldNotReachHere();
iveresov@33469 2509 }
iveresov@33469 2510 BasicType bt = velt_basic_type(n);
iveresov@33469 2511 const TypeVect* vt = TypeVect::make(bt, vlen);
kvn@48309 2512 assert(bt == T_FLOAT || bt == T_DOUBLE, "Only vectorization for FP cmovs is supported");
kvn@48309 2513 if (bt == T_FLOAT) {
kvn@48309 2514 vn = new CMoveVFNode(cc, src1, src2, vt);
kvn@48309 2515 } else {
kvn@48309 2516 assert(bt == T_DOUBLE, "Expected double");
kvn@48309 2517 vn = new CMoveVDNode(cc, src1, src2, vt);
kvn@48309 2518 }
iveresov@33469 2519 NOT_PRODUCT(if(is_trace_cmov()) {tty->print("SWPointer::output: created new CMove node %d: ", vn->_idx); vn->dump();})
vdeshpande@46528 2520 } else if (opc == Op_FmaD || opc == Op_FmaF) {
vdeshpande@46528 2521 // Promote operands to vector
vdeshpande@46528 2522 Node* in1 = vector_opd(p, 1);
vdeshpande@46528 2523 Node* in2 = vector_opd(p, 2);
vdeshpande@46528 2524 Node* in3 = vector_opd(p, 3);
vdeshpande@46528 2525 vn = VectorNode::make(opc, in1, in2, in3, vlen, velt_basic_type(n));
vdeshpande@46528 2526 vlen_in_bytes = vn->as_Vector()->length_in_bytes();
duke@1 2527 } else {
iveresov@33469 2528 if (do_reserve_copy()) {
iveresov@33469 2529 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: ShouldNotReachHere, exiting SuperWord");})
iveresov@33469 2530 return; //and reverse to backup IG
iveresov@33469 2531 }
duke@1 2532 ShouldNotReachHere();
duke@1 2533 }
iveresov@33469 2534
kvn@13104 2535 assert(vn != NULL, "sanity");
iveresov@33469 2536 if (vn == NULL) {
iveresov@33469 2537 if (do_reserve_copy()){
iveresov@33469 2538 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("SWPointer::output: got NULL node, cannot proceed, exiting SuperWord");})
iveresov@33469 2539 return; //and reverse to backup IG
iveresov@33469 2540 }
iveresov@33469 2541 ShouldNotReachHere();
iveresov@33469 2542 }
iveresov@33469 2543
mcberg@38049 2544 _block.at_put(i, vn);
kvn@13894 2545 _igvn.register_new_node_with_optimizer(vn);
duke@1 2546 _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0)));
duke@1 2547 for (uint j = 0; j < p->size(); j++) {
duke@1 2548 Node* pm = p->at(j);
kvn@5901 2549 _igvn.replace_node(pm, vn);
duke@1 2550 }
duke@1 2551 _igvn._worklist.push(vn);
kvn@13883 2552
mcberg@38049 2553 if (can_process_post_loop) {
mcberg@38049 2554 // first check if the vector size if the maximum vector which we can use on the machine,
mcberg@38049 2555 // other vector size have reduced values for predicated data mapping.
mcberg@38049 2556 if (vlen_in_bytes != (uint)MaxVectorSize) {
mcberg@38049 2557 return;
mcberg@38049 2558 }
mcberg@38049 2559 }
mcberg@38049 2560
vdeshpande@46692 2561 if (vlen_in_bytes >= max_vlen_in_bytes && vlen > max_vlen) {
kvn@31772 2562 max_vlen = vlen;
kvn@13883 2563 max_vlen_in_bytes = vlen_in_bytes;
kvn@13883 2564 }
kvn@13104 2565 #ifdef ASSERT
kvn@13108 2566 if (TraceNewVectors) {
kvn@13104 2567 tty->print("new Vector node: ");
kvn@13104 2568 vn->dump();
kvn@13104 2569 }
kvn@13104 2570 #endif
duke@1 2571 }
iveresov@33469 2572 }//for (int i = 0; i < _block.length(); i++)
iveresov@33469 2573
roland@48370 2574 if (max_vlen_in_bytes > C->max_vector_size()) {
roland@48370 2575 C->set_max_vector_size(max_vlen_in_bytes);
roland@48370 2576 }
kvn@47758 2577 if (max_vlen_in_bytes > 0) {
kvn@47758 2578 cl->mark_loop_vectorized();
kvn@47758 2579 }
iveresov@33166 2580
kvn@31772 2581 if (SuperWordLoopUnrollAnalysis) {
kvn@31772 2582 if (cl->has_passed_slp()) {
kvn@31772 2583 uint slp_max_unroll_factor = cl->slp_max_unroll();
kvn@31772 2584 if (slp_max_unroll_factor == max_vlen) {
twisti@34174 2585 if (TraceSuperWordLoopUnrollAnalysis) {
twisti@34174 2586 tty->print_cr("vector loop(unroll=%d, len=%d)\n", max_vlen, max_vlen_in_bytes*BitsPerByte);
twisti@34174 2587 }
mcberg@38049 2588
mcberg@38049 2589 // For atomic unrolled loops which are vector mapped, instigate more unrolling
kvn@31772 2590 cl->set_notpassed_slp();
mcberg@38049 2591 if (cl->is_main_loop()) {
mcberg@38049 2592 // if vector resources are limited, do not allow additional unrolling, also
mcberg@38049 2593 // do not unroll more on pure vector loops which were not reduced so that we can
mcberg@38049 2594 // program the post loop to single iteration execution.
mcberg@38049 2595 if (FLOATPRESSURE > 8) {
mcberg@38049 2596 C->set_major_progress();
mcberg@38049 2597 cl->mark_do_unroll_only();
mcberg@38049 2598 }
iveresov@34162 2599 }
mcberg@38049 2600
mcberg@36066 2601 if (do_reserve_copy()) {
mcberg@38049 2602 if (can_process_post_loop) {
mcberg@38049 2603 // Now create the difference of trip and limit and use it as our mask index.
mcberg@38049 2604 // Note: We limited the unroll of the vectorized loop so that
mcberg@38049 2605 // only vlen-1 size iterations can remain to be mask programmed.
mcberg@38049 2606 Node *incr = cl->incr();
mcberg@38049 2607 SubINode *index = new SubINode(cl->limit(), cl->init_trip());
mcberg@38049 2608 _igvn.register_new_node_with_optimizer(index);
mcberg@38049 2609 SetVectMaskINode *mask = new SetVectMaskINode(_phase->get_ctrl(cl->init_trip()), index);
mcberg@38049 2610 _igvn.register_new_node_with_optimizer(mask);
mcberg@38049 2611 // make this a single iteration loop
mcberg@38049 2612 AddINode *new_incr = new AddINode(incr->in(1), mask);
mcberg@38049 2613 _igvn.register_new_node_with_optimizer(new_incr);
mcberg@38049 2614 _phase->set_ctrl(new_incr, _phase->get_ctrl(incr));
mcberg@38049 2615 _igvn.replace_node(incr, new_incr);
mcberg@38049 2616 cl->mark_is_multiversioned();
mcberg@38049 2617 cl->loopexit()->add_flag(Node::Flag_has_vector_mask_set);
mcberg@38049 2618 }
mcberg@36066 2619 }
kvn@31772 2620 }
kvn@31772 2621 }
kvn@31772 2622 }
iveresov@33166 2623
iveresov@33469 2624 if (do_reserve_copy()) {
iveresov@33166 2625 make_reversable.use_new();
iveresov@33166 2626 }
iveresov@33469 2627 NOT_PRODUCT(if(is_trace_loop_reverse()) {tty->print_cr("\n Final loop after SuperWord"); print_loop(true);})
iveresov@33166 2628 return;
duke@1 2629 }
duke@1 2630
duke@1 2631 //------------------------------vector_opd---------------------------
duke@1 2632 // Create a vector operand for the nodes in pack p for operand: in(opd_idx)
kvn@10255 2633 Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
duke@1 2634 Node* p0 = p->at(0);
duke@1 2635 uint vlen = p->size();
duke@1 2636 Node* opd = p0->in(opd_idx);
mcberg@38049 2637 CountedLoopNode *cl = lpt()->_head->as_CountedLoop();
mcberg@38049 2638
mcberg@38049 2639 if (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop()) {
mcberg@38049 2640 // override vlen with the main loops vector length
mcberg@38049 2641 vlen = cl->slp_max_unroll();
mcberg@38049 2642 }
duke@1 2643
kvn@13490 2644 if (same_inputs(p, opd_idx)) {
kvn@13104 2645 if (opd->is_Vector() || opd->is_LoadVector()) {
kvn@13488 2646 assert(((opd_idx != 2) || !VectorNode::is_shift(p0)), "shift's count can't be vector");
iveresov@33469 2647 if (opd_idx == 2 && VectorNode::is_shift(p0)) {
iveresov@33469 2648 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("shift's count can't be vector");})
iveresov@33469 2649 return NULL;
iveresov@33469 2650 }
kvn@10255 2651 return opd; // input is matching vector
duke@1 2652 }
kvn@13485 2653 if ((opd_idx == 2) && VectorNode::is_shift(p0)) {
kvn@13485 2654 Compile* C = _phase->C;
kvn@13485 2655 Node* cnt = opd;
kvn@13930 2656 // Vector instructions do not mask shift count, do it here.
kvn@13485 2657 juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1);
kvn@13485 2658 const TypeInt* t = opd->find_int_type();
kvn@13485 2659 if (t != NULL && t->is_con()) {
kvn@13485 2660 juint shift = t->get_con();
kvn@13485 2661 if (shift > mask) { // Unsigned cmp
thartmann@25930 2662 cnt = ConNode::make(TypeInt::make(shift & mask));
kvn@13485 2663 }
kvn@13485 2664 } else {
kvn@13485 2665 if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) {
thartmann@25930 2666 cnt = ConNode::make(TypeInt::make(mask));
kvn@13894 2667 _igvn.register_new_node_with_optimizer(cnt);
thartmann@24923 2668 cnt = new AndINode(opd, cnt);
kvn@13894 2669 _igvn.register_new_node_with_optimizer(cnt);
kvn@13485 2670 _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
kvn@13485 2671 }
kvn@13485 2672 assert(opd->bottom_type()->isa_int(), "int type only");
iveresov@33469 2673 if (!opd->bottom_type()->isa_int()) {
iveresov@33469 2674 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("Should be int type only");})
iveresov@33469 2675 return NULL;
iveresov@33469 2676 }
kvn@13930 2677 // Move non constant shift count into vector register.
thartmann@25930 2678 cnt = VectorNode::shift_count(p0, cnt, vlen, velt_basic_type(p0));
kvn@13485 2679 }
kvn@13485 2680 if (cnt != opd) {
kvn@13894 2681 _igvn.register_new_node_with_optimizer(cnt);
kvn@13485 2682 _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
kvn@13485 2683 }
kvn@13485 2684 return cnt;
kvn@13485 2685 }
kvn@13104 2686 assert(!opd->is_StoreVector(), "such vector is not expected here");
iveresov@33469 2687 if (opd->is_StoreVector()) {
iveresov@33469 2688 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("StoreVector is not expected here");})
iveresov@33469 2689 return NULL;
iveresov@33469 2690 }
kvn@12594 2691 // Convert scalar input to vector with the same number of elements as
kvn@12594 2692 // p0's vector. Use p0's type because size of operand's container in
kvn@12594 2693 // vector should match p0's size regardless operand's size.
kvn@12594 2694 const Type* p0_t = velt_type(p0);
thartmann@25930 2695 VectorNode* vn = VectorNode::scalar2vector(opd, vlen, p0_t);
duke@1 2696
kvn@13894 2697 _igvn.register_new_node_with_optimizer(vn);
duke@1 2698 _phase->set_ctrl(vn, _phase->get_ctrl(opd));
kvn@13104 2699 #ifdef ASSERT
kvn@13108 2700 if (TraceNewVectors) {
kvn@13104 2701 tty->print("new Vector node: ");
kvn@13104 2702 vn->dump();
kvn@13104 2703 }
kvn@13104 2704 #endif
duke@1 2705 return vn;
duke@1 2706 }
duke@1 2707
duke@1 2708 // Insert pack operation
kvn@13104 2709 BasicType bt = velt_basic_type(p0);
thartmann@25930 2710 PackNode* pk = PackNode::make(opd, vlen, bt);
kvn@12594 2711 DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); )
duke@1 2712
duke@1 2713 for (uint i = 1; i < vlen; i++) {
duke@1 2714 Node* pi = p->at(i);
duke@1 2715 Node* in = pi->in(opd_idx);
duke@1 2716 assert(my_pack(in) == NULL, "Should already have been unpacked");
iveresov@33469 2717 if (my_pack(in) != NULL) {
iveresov@33469 2718 NOT_PRODUCT(if(is_trace_loop_reverse() || TraceLoopOpts) {tty->print_cr("Should already have been unpacked");})
iveresov@33469 2719 return NULL;
iveresov@33469 2720 }
kvn@12594 2721 assert(opd_bt == in->bottom_type()->basic_type(), "all same type");
kvn@13490 2722 pk->add_opd(in);
vdeshpande@52992 2723 if (VectorNode::is_muladds2i(pi)) {
vdeshpande@52992 2724 Node* in2 = pi->in(opd_idx + 2);
vdeshpande@52992 2725 assert(my_pack(in2) == NULL, "Should already have been unpacked");
vdeshpande@52992 2726 if (my_pack(in2) != NULL) {
vdeshpande@52992 2727 NOT_PRODUCT(if (is_trace_loop_reverse() || TraceLoopOpts) { tty->print_cr("Should already have been unpacked"); })
vdeshpande@52992 2728 return NULL;
vdeshpande@52992 2729 }
vdeshpande@52992 2730 assert(opd_bt == in2->bottom_type()->basic_type(), "all same type");
vdeshpande@52992 2731 pk->add_opd(in2);
vdeshpande@52992 2732 }
duke@1 2733 }
kvn@13894 2734 _igvn.register_new_node_with_optimizer(pk);
duke@1 2735 _phase->set_ctrl(pk, _phase->get_ctrl(opd));
kvn@13104 2736 #ifdef ASSERT
kvn@13883 2737 if (TraceNewVectors) {
kvn@13883 2738 tty->print("new Vector node: ");
kvn@13883 2739 pk->dump();
kvn@13883 2740 }
kvn@13104 2741 #endif
duke@1 2742 return pk;
duke@1 2743 }
duke@1 2744
duke@1 2745 //------------------------------insert_extracts---------------------------
duke@1 2746 // If a use of pack p is not a vector use, then replace the
duke@1 2747 // use with an extract operation.
duke@1 2748 void SuperWord::insert_extracts(Node_List* p) {
duke@1 2749 if (p->at(0)->is_Store()) return;
duke@1 2750 assert(_n_idx_list.is_empty(), "empty (node,index) list");
duke@1 2751
duke@1 2752 // Inspect each use of each pack member. For each use that is
duke@1 2753 // not a vector use, replace the use with an extract operation.
duke@1 2754
duke@1 2755 for (uint i = 0; i < p->size(); i++) {
duke@1 2756 Node* def = p->at(i);
duke@1 2757 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
duke@1 2758 Node* use = def->fast_out(j);
duke@1 2759 for (uint k = 0; k < use->req(); k++) {
duke@1 2760 Node* n = use->in(k);
duke@1 2761 if (def == n) {
iveresov@33469 2762 Node_List* u_pk = my_pack(use);
iveresov@33469 2763 if ((u_pk == NULL || !is_cmov_pack(u_pk) || use->is_CMove()) && !is_vector_use(use, k)) {
iveresov@33469 2764 _n_idx_list.push(use, k);
duke@1 2765 }
duke@1 2766 }
duke@1 2767 }
duke@1 2768 }
duke@1 2769 }
duke@1 2770
duke@1 2771 while (_n_idx_list.is_nonempty()) {
duke@1 2772 Node* use = _n_idx_list.node();
duke@1 2773 int idx = _n_idx_list.index();
duke@1 2774 _n_idx_list.pop();
duke@1 2775 Node* def = use->in(idx);
duke@1 2776
kvn@30211 2777 if (def->is_reduction()) continue;
kvn@30211 2778
duke@1 2779 // Insert extract operation
duke@1 2780 _igvn.hash_delete(def);
duke@1 2781 int def_pos = alignment(def) / data_size(def);
duke@1 2782
thartmann@25930 2783 Node* ex = ExtractNode::make(def, def_pos, velt_basic_type(def));
kvn@13894 2784 _igvn.register_new_node_with_optimizer(ex);
duke@1 2785 _phase->set_ctrl(ex, _phase->get_ctrl(def));
kvn@12958 2786 _igvn.replace_input_of(use, idx, ex);
duke@1 2787 _igvn._worklist.push(def);
duke@1 2788
duke@1 2789 bb_insert_after(ex, bb_idx(def));
kvn@13104 2790 set_velt_type(ex, velt_type(def));
duke@1 2791 }
duke@1 2792 }
duke@1 2793
duke@1 2794 //------------------------------is_vector_use---------------------------
duke@1 2795 // Is use->in(u_idx) a vector use?
duke@1 2796 bool SuperWord::is_vector_use(Node* use, int u_idx) {
duke@1 2797 Node_List* u_pk = my_pack(use);
duke@1 2798 if (u_pk == NULL) return false;
kvn@30211 2799 if (use->is_reduction()) return true;
duke@1 2800 Node* def = use->in(u_idx);
duke@1 2801 Node_List* d_pk = my_pack(def);
duke@1 2802 if (d_pk == NULL) {
duke@1 2803 // check for scalar promotion
duke@1 2804 Node* n = u_pk->at(0)->in(u_idx);
duke@1 2805 for (uint i = 1; i < u_pk->size(); i++) {
duke@1 2806 if (u_pk->at(i)->in(u_idx) != n) return false;
duke@1 2807 }
duke@1 2808 return true;
duke@1 2809 }
vdeshpande@52992 2810 if (VectorNode::is_muladds2i(use)) {
vdeshpande@52992 2811 // MulAddS2I takes shorts and produces ints - hence the special checks
vdeshpande@52992 2812 // on alignment and size.
vdeshpande@52992 2813 if (u_pk->size() * 2 != d_pk->size()) {
vdeshpande@52992 2814 return false;
vdeshpande@52992 2815 }
vdeshpande@52992 2816 for (uint i = 0; i < MIN2(d_pk->size(), u_pk->size()); i++) {
vdeshpande@52992 2817 Node* ui = u_pk->at(i);
vdeshpande@52992 2818 Node* di = d_pk->at(i);
vdeshpande@52992 2819 if (alignment(ui) != alignment(di) * 2) {
vdeshpande@52992 2820 return false;
vdeshpande@52992 2821 }
vdeshpande@52992 2822 }
vdeshpande@52992 2823 return true;
vdeshpande@52992 2824 }
duke@1 2825 if (u_pk->size() != d_pk->size())
duke@1 2826 return false;
duke@1 2827 for (uint i = 0; i < u_pk->size(); i++) {
duke@1 2828 Node* ui = u_pk->at(i);
duke@1 2829 Node* di = d_pk->at(i);
duke@1 2830 if (ui->in(u_idx) != di || alignment(ui) != alignment(di))
duke@1 2831 return false;
duke@1 2832 }
duke@1 2833 return true;
duke@1 2834 }
duke@1 2835
duke@1 2836 //------------------------------construct_bb---------------------------
duke@1 2837 // Construct reverse postorder list of block members
kvn@15755 2838 bool SuperWord::construct_bb() {
duke@1 2839 Node* entry = bb();
duke@1 2840
duke@1 2841 assert(_stk.length() == 0, "stk is empty");
duke@1 2842 assert(_block.length() == 0, "block is empty");
duke@1 2843 assert(_data_entry.length() == 0, "data_entry is empty");
duke@1 2844 assert(_mem_slice_head.length() == 0, "mem_slice_head is empty");
duke@1 2845 assert(_mem_slice_tail.length() == 0, "mem_slice_tail is empty");
duke@1 2846
duke@1 2847 // Find non-control nodes with no inputs from within block,
duke@1 2848 // create a temporary map from node _idx to bb_idx for use
duke@1 2849 // by the visited and post_visited sets,
duke@1 2850 // and count number of nodes in block.
duke@1 2851 int bb_ct = 0;
kvn@30211 2852 for (uint i = 0; i < lpt()->_body.size(); i++) {
duke@1 2853 Node *n = lpt()->_body.at(i);
duke@1 2854 set_bb_idx(n, i); // Create a temporary map
duke@1 2855 if (in_bb(n)) {
kvn@15755 2856 if (n->is_LoadStore() || n->is_MergeMem() ||
kvn@15755 2857 (n->is_Proj() && !n->as_Proj()->is_CFG())) {
kvn@15755 2858 // Bailout if the loop has LoadStore, MergeMem or data Proj
kvn@15755 2859 // nodes. Superword optimization does not work with them.
kvn@15755 2860 return false;
kvn@15755 2861 }
duke@1 2862 bb_ct++;
duke@1 2863 if (!n->is_CFG()) {
duke@1 2864 bool found = false;
duke@1 2865 for (uint j = 0; j < n->req(); j++) {
duke@1 2866 Node* def = n->in(j);
duke@1 2867 if (def && in_bb(def)) {
duke@1 2868 found = true;
duke@1 2869 break;
duke@1 2870 }
duke@1 2871 }
duke@1 2872 if (!found) {
duke@1 2873 assert(n != entry, "can't be entry");
duke@1 2874 _data_entry.push(n);
duke@1 2875 }
duke@1 2876 }
duke@1 2877 }
duke@1 2878 }
duke@1 2879
duke@1 2880 // Find memory slices (head and tail)
duke@1 2881 for (DUIterator_Fast imax, i = lp()->fast_outs(imax); i < imax; i++) {
duke@1 2882 Node *n = lp()->fast_out(i);
duke@1 2883 if (in_bb(n) && (n->is_Phi() && n->bottom_type() == Type::MEMORY)) {
duke@1 2884 Node* n_tail = n->in(LoopNode::LoopBackControl);
kvn@961 2885 if (n_tail != n->in(LoopNode::EntryControl)) {
kvn@15755 2886 if (!n_tail->is_Mem()) {
david@33105 2887 assert(n_tail->is_Mem(), "unexpected node for memory slice: %s", n_tail->Name());
kvn@15755 2888 return false; // Bailout
kvn@15755 2889 }
kvn@961 2890 _mem_slice_head.push(n);
kvn@961 2891 _mem_slice_tail.push(n_tail);
kvn@961 2892 }
duke@1 2893 }
duke@1 2894 }
duke@1 2895
duke@1 2896 // Create an RPO list of nodes in block
duke@1 2897
duke@1 2898 visited_clear();
duke@1 2899 post_visited_clear();
duke@1 2900
duke@1 2901 // Push all non-control nodes with no inputs from within block, then control entry
duke@1 2902 for (int j = 0; j < _data_entry.length(); j++) {
duke@1 2903 Node* n = _data_entry.at(j);
duke@1 2904 visited_set(n);
duke@1 2905 _stk.push(n);
duke@1 2906 }
duke@1 2907 visited_set(entry);
duke@1 2908 _stk.push(entry);
duke@1 2909
duke@1 2910 // Do a depth first walk over out edges
duke@1 2911 int rpo_idx = bb_ct - 1;
duke@1 2912 int size;
kvn@30211 2913 int reduction_uses = 0;
duke@1 2914 while ((size = _stk.length()) > 0) {
duke@1 2915 Node* n = _stk.top(); // Leave node on stack
duke@1 2916 if (!visited_test_set(n)) {
duke@1 2917 // forward arc in graph
duke@1 2918 } else if (!post_visited_test(n)) {
duke@1 2919 // cross or back arc
duke@1 2920 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
duke@1 2921 Node *use = n->fast_out(i);
duke@1 2922 if (in_bb(use) && !visited_test(use) &&
duke@1 2923 // Don't go around backedge
duke@1 2924 (!use->is_Phi() || n == entry)) {
kvn@30211 2925 if (use->is_reduction()) {
kvn@30211 2926 // First see if we can map the reduction on the given system we are on, then
kvn@30211 2927 // make a data entry operation for each reduction we see.
kvn@30211 2928 BasicType bt = use->bottom_type()->basic_type();
kvn@30211 2929 if (ReductionNode::implemented(use->Opcode(), Matcher::min_vector_size(bt), bt)) {
kvn@30211 2930 reduction_uses++;
kvn@30211 2931 }
kvn@30211 2932 }
duke@1 2933 _stk.push(use);
duke@1 2934 }
duke@1 2935 }
duke@1 2936 if (_stk.length() == size) {
duke@1 2937 // There were no additional uses, post visit node now
duke@1 2938 _stk.pop(); // Remove node from stack
duke@1 2939 assert(rpo_idx >= 0, "");
duke@1 2940 _block.at_put_grow(rpo_idx, n);
duke@1 2941 rpo_idx--;
duke@1 2942 post_visited_set(n);
duke@1 2943 assert(rpo_idx >= 0 || _stk.is_empty(), "");
duke@1 2944 }
duke@1 2945 } else {
duke@1 2946 _stk.pop(); // Remove post-visited node from stack
duke@1 2947 }
kvn@31858 2948 }//while
kvn@31858 2949
kvn@31858 2950 int ii_current = -1;
goetz@31864 2951 unsigned int load_idx = (unsigned int)-1;
kvn@31858 2952 _ii_order.clear();
duke@1 2953 // Create real map of block indices for nodes
duke@1 2954 for (int j = 0; j < _block.length(); j++) {
duke@1 2955 Node* n = _block.at(j);
duke@1 2956 set_bb_idx(n, j);
kvn@31858 2957 if (_do_vector_loop && n->is_Load()) {
kvn@31858 2958 if (ii_current == -1) {
kvn@31858 2959 ii_current = _clone_map.gen(n->_idx);
kvn@31858 2960 _ii_order.push(ii_current);
kvn@31858 2961 load_idx = _clone_map.idx(n->_idx);
kvn@31858 2962 } else if (_clone_map.idx(n->_idx) == load_idx && _clone_map.gen(n->_idx) != ii_current) {
kvn@31858 2963 ii_current = _clone_map.gen(n->_idx);
kvn@31858 2964 _ii_order.push(ii_current);
kvn@31858 2965 }
kvn@31858 2966 }
kvn@31858 2967 }//for
duke@1 2968
kvn@30211 2969 // Ensure extra info is allocated.
kvn@30211 2970 initialize_bb();
duke@1 2971
duke@1 2972 #ifndef PRODUCT
kvn@31858 2973 if (_vector_loop_debug && _ii_order.length() > 0) {
kvn@31858 2974 tty->print("SuperWord::construct_bb: List of generations: ");
kvn@31858 2975 for (int jj = 0; jj < _ii_order.length(); ++jj) {
kvn@31858 2976 tty->print(" %d:%d", jj, _ii_order.at(jj));
kvn@31858 2977 }
kvn@31858 2978 tty->print_cr(" ");
kvn@31858 2979 }
duke@1 2980 if (TraceSuperWord) {
duke@1 2981 print_bb();
duke@1 2982 tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE");
duke@1 2983 for (int m = 0; m < _data_entry.length(); m++) {
duke@1 2984 tty->print("%3d ", m);
duke@1 2985 _data_entry.at(m)->dump();
duke@1 2986 }
duke@1 2987 tty->print_cr("\nmemory slices: %s", _mem_slice_head.length() > 0 ? "" : "NONE");
duke@1 2988 for (int m = 0; m < _mem_slice_head.length(); m++) {
duke@1 2989 tty->print("%3d ", m); _mem_slice_head.at(m)->dump();
duke@1 2990 tty->print(" "); _mem_slice_tail.at(m)->dump();
duke@1 2991 }
duke@1 2992 }
duke@1 2993 #endif
duke@1 2994 assert(rpo_idx == -1 && bb_ct == _block.length(), "all block members found");
kvn@30211 2995 return (_mem_slice_head.length() > 0) || (reduction_uses > 0) || (_data_entry.length() > 0);
duke@1 2996 }
duke@1 2997
duke@1 2998 //------------------------------initialize_bb---------------------------
duke@1 2999 // Initialize per node info
duke@1 3000 void SuperWord::initialize_bb() {
duke@1 3001 Node* last = _block.at(_block.length() - 1);
duke@1 3002 grow_node_info(bb_idx(last));
duke@1 3003 }
duke@1 3004
duke@1 3005 //------------------------------bb_insert_after---------------------------
duke@1 3006 // Insert n into block after pos
duke@1 3007 void SuperWord::bb_insert_after(Node* n, int pos) {
duke@1 3008 int n_pos = pos + 1;
duke@1 3009 // Make room
duke@1 3010 for (int i = _block.length() - 1; i >= n_pos; i--) {
duke@1 3011 _block.at_put_grow(i+1, _block.at(i));
duke@1 3012 }
duke@1 3013 for (int j = _node_info.length() - 1; j >= n_pos; j--) {
duke@1 3014 _node_info.at_put_grow(j+1, _node_info.at(j));
duke@1 3015 }
duke@1 3016 // Set value
duke@1 3017 _block.at_put_grow(n_pos, n);
duke@1 3018 _node_info.at_put_grow(n_pos, SWNodeInfo::initial);
duke@1 3019 // Adjust map from node->_idx to _block index
duke@1 3020 for (int i = n_pos; i < _block.length(); i++) {
duke@1 3021 set_bb_idx(_block.at(i), i);
duke@1 3022 }
duke@1 3023 }
duke@1 3024
duke@1 3025 //------------------------------compute_max_depth---------------------------
duke@1 3026 // Compute max depth for expressions from beginning of block
duke@1 3027 // Use to prune search paths during test for independence.
duke@1 3028 void SuperWord::compute_max_depth() {
duke@1 3029 int ct = 0;
duke@1 3030 bool again;
duke@1 3031 do {
duke@1 3032 again = false;
duke@1 3033 for (int i = 0; i < _block.length(); i++) {
duke@1 3034 Node* n = _block.at(i);
duke@1 3035 if (!n->is_Phi()) {
duke@1 3036 int d_orig = depth(n);
duke@1 3037 int d_in = 0;
duke@1 3038 for (DepPreds preds(n, _dg); !preds.done(); preds.next()) {
duke@1 3039 Node* pred = preds.current();
duke@1 3040 if (in_bb(pred)) {
duke@1 3041 d_in = MAX2(d_in, depth(pred));
duke@1 3042 }
duke@1 3043 }
duke@1 3044 if (d_in + 1 != d_orig) {
duke@1 3045 set_depth(n, d_in + 1);
duke@1 3046 again = true;
duke@1 3047 }
duke@1 3048 }
duke@1 3049 }
duke@1 3050 ct++;
duke@1 3051 } while (again);
twisti@34174 3052
twisti@34174 3053 if (TraceSuperWord && Verbose) {
duke@1 3054 tty->print_cr("compute_max_depth iterated: %d times", ct);
twisti@34174 3055 }
duke@1 3056 }
duke@1 3057
duke@1 3058 //-------------------------compute_vector_element_type-----------------------
duke@1 3059 // Compute necessary vector element type for expressions
duke@1 3060 // This propagates backwards a narrower integer type when the
duke@1 3061 // upper bits of the value are not needed.
duke@1 3062 // Example: char a,b,c; a = b + c;
duke@1 3063 // Normally the type of the add is integer, but for packed character
duke@1 3064 // operations the type of the add needs to be char.
duke@1 3065 void SuperWord::compute_vector_element_type() {
twisti@34174 3066 if (TraceSuperWord && Verbose) {
duke@1 3067 tty->print_cr("\ncompute_velt_type:");
twisti@34174 3068 }
duke@1 3069
duke@1 3070 // Initial type
duke@1 3071 for (int i = 0; i < _block.length(); i++) {
duke@1 3072 Node* n = _block.at(i);
kvn@13104 3073 set_velt_type(n, container_type(n));
duke@1 3074 }
duke@1 3075
kvn@14131 3076 // Propagate integer narrowed type backwards through operations
duke@1 3077 // that don't depend on higher order bits
duke@1 3078 for (int i = _block.length() - 1; i >= 0; i--) {
duke@1 3079 Node* n = _block.at(i);
duke@1 3080 // Only integer types need be examined
kvn@14131 3081 const Type* vtn = velt_type(n);
kvn@14131 3082 if (vtn->basic_type() == T_INT) {
duke@1 3083 uint start, end;
kvn@13490 3084 VectorNode::vector_operands(n, &start, &end);
duke@1 3085
duke@1 3086 for (uint j = start; j < end; j++) {
duke@1 3087 Node* in = n->in(j);
kvn@13485 3088 // Don't propagate through a memory
kvn@13485 3089 if (!in->is_Mem() && in_bb(in) && velt_type(in)->basic_type() == T_INT &&
kvn@13485 3090 data_size(n) < data_size(in)) {
kvn@13485 3091 bool same_type = true;
kvn@13485 3092 for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) {
kvn@13485 3093 Node *use = in->fast_out(k);
kvn@13485 3094 if (!in_bb(use) || !same_velt_type(use, n)) {
kvn@13485 3095 same_type = false;
kvn@13485 3096 break;
duke@1 3097 }
kvn@13485 3098 }
kvn@13485 3099 if (same_type) {
kvn@14131 3100 // For right shifts of small integer types (bool, byte, char, short)
kvn@14131 3101 // we need precise information about sign-ness. Only Load nodes have
kvn@14131 3102 // this information because Store nodes are the same for signed and
kvn@14131 3103 // unsigned values. And any arithmetic operation after a load may
kvn@14131 3104 // expand a value to signed Int so such right shifts can't be used
kvn@14131 3105 // because vector elements do not have upper bits of Int.
kvn@14131 3106 const Type* vt = vtn;
kvn@14131 3107 if (VectorNode::is_shift(in)) {
kvn@14131 3108 Node* load = in->in(1);
kvn@14134 3109 if (load->is_Load() && in_bb(load) && (velt_type(load)->basic_type() == T_INT)) {
kvn@14131 3110 vt = velt_type(load);
kvn@14131 3111 } else if (in->Opcode() != Op_LShiftI) {
kvn@14131 3112 // Widen type to Int to avoid creation of right shift vector
kvn@14131 3113 // (align + data_size(s1) check in stmts_can_pack() will fail).
kvn@14131 3114 // Note, left shifts work regardless type.
kvn@14131 3115 vt = TypeInt::INT;
kvn@14131 3116 }
kvn@14131 3117 }
kvn@13485 3118 set_velt_type(in, vt);
duke@1 3119 }
duke@1 3120 }
duke@1 3121 }
duke@1 3122 }
duke@1 3123 }
duke@1 3124 #ifndef PRODUCT
duke@1 3125 if (TraceSuperWord && Verbose) {
duke@1 3126 for (int i = 0; i < _block.length(); i++) {
duke@1 3127 Node* n = _block.at(i);
duke@1 3128 velt_type(n)->dump();
duke@1 3129 tty->print("\t");
duke@1 3130 n->dump();
duke@1 3131 }
duke@1 3132 }
duke@1 3133 #endif
duke@1 3134 }
duke@1 3135
duke@1 3136 //------------------------------memory_alignment---------------------------
duke@1 3137 // Alignment within a vector memory reference
kvn@13885 3138 int SuperWord::memory_alignment(MemNode* s, int iv_adjust) {
kvn@31858 3139 #ifndef PRODUCT
kvn@31858 3140 if(TraceSuperWord && Verbose) {
kvn@31858 3141 tty->print("SuperWord::memory_alignment within a vector memory reference for %d: ", s->_idx); s->dump();
kvn@31858 3142 }
kvn@31858 3143 #endif
kvn@31858 3144 NOT_PRODUCT(SWPointer::Tracer::Depth ddd(0);)
mcberg@31403 3145 SWPointer p(s, this, NULL, false);
duke@1 3146 if (!p.valid()) {
kvn@31858 3147 NOT_PRODUCT(if(is_trace_alignment()) tty->print("SWPointer::memory_alignment: SWPointer p invalid, return bottom_align");)
duke@1 3148 return bottom_align;
duke@1 3149 }
vdeshpande@52992 3150 int vw = get_vw_bytes_special(s);
kvn@13104 3151 if (vw < 2) {
kvn@31858 3152 NOT_PRODUCT(if(is_trace_alignment()) tty->print_cr("SWPointer::memory_alignment: vector_width_in_bytes < 2, return bottom_align");)
kvn@13104 3153 return bottom_align; // No vectors for this type
kvn@13104 3154 }
duke@1 3155 int offset = p.offset_in_bytes();
kvn@13885 3156 offset += iv_adjust*p.memory_size();
kvn@13104 3157 int off_rem = offset % vw;
kvn@13104 3158 int off_mod = off_rem >= 0 ? off_rem : off_rem + vw;
twisti@34174 3159 if (TraceSuperWord && Verbose) {
twisti@34174 3160 tty->print_cr("SWPointer::memory_alignment: off_rem = %d, off_mod = %d", off_rem, off_mod);
twisti@34174 3161 }
duke@1 3162 return off_mod;
duke@1 3163 }
duke@1 3164
duke@1 3165 //---------------------------container_type---------------------------
duke@1 3166 // Smallest type containing range of values
kvn@13104 3167 const Type* SuperWord::container_type(Node* n) {
kvn@13104 3168 if (n->is_Mem()) {
kvn@14131 3169 BasicType bt = n->as_Mem()->memory_type();
kvn@14131 3170 if (n->is_Store() && (bt == T_CHAR)) {
kvn@14131 3171 // Use T_SHORT type instead of T_CHAR for stored values because any
kvn@14131 3172 // preceding arithmetic operation extends values to signed Int.
kvn@14131 3173 bt = T_SHORT;
kvn@14131 3174 }
kvn@14131 3175 if (n->Opcode() == Op_LoadUB) {
kvn@14131 3176 // Adjust type for unsigned byte loads, it is important for right shifts.
kvn@14131 3177 // T_BOOLEAN is used because there is no basic type representing type
kvn@14131 3178 // TypeInt::UBYTE. Use of T_BOOLEAN for vectors is fine because only
kvn@14131 3179 // size (one byte) and sign is important.
kvn@14131 3180 bt = T_BOOLEAN;
kvn@14131 3181 }
kvn@14131 3182 return Type::get_const_basic_type(bt);
duke@1 3183 }
kvn@13104 3184 const Type* t = _igvn.type(n);
duke@1 3185 if (t->basic_type() == T_INT) {
kvn@13485 3186 // A narrow type of arithmetic operations will be determined by
kvn@13485 3187 // propagating the type of memory operations.
duke@1 3188 return TypeInt::INT;
duke@1 3189 }
duke@1 3190 return t;
duke@1 3191 }
duke@1 3192
kvn@13104 3193 bool SuperWord::same_velt_type(Node* n1, Node* n2) {
kvn@13104 3194 const Type* vt1 = velt_type(n1);
kvn@13885 3195 const Type* vt2 = velt_type(n2);
kvn@13104 3196 if (vt1->basic_type() == T_INT && vt2->basic_type() == T_INT) {
kvn@13104 3197 // Compare vectors element sizes for integer types.
kvn@13104 3198 return data_size(n1) == data_size(n2);
kvn@13104 3199 }
kvn@13104 3200 return vt1 == vt2;
kvn@13104 3201 }
kvn@13104 3202
duke@1 3203 //------------------------------in_packset---------------------------
duke@1 3204 // Are s1 and s2 in a pack pair and ordered as s1,s2?
duke@1 3205 bool SuperWord::in_packset(Node* s1, Node* s2) {
duke@1 3206 for (int i = 0; i < _packset.length(); i++) {
duke@1 3207 Node_List* p = _packset.at(i);
duke@1 3208 assert(p->size() == 2, "must be");
duke@1 3209 if (p->at(0) == s1 && p->at(p->size()-1) == s2) {
duke@1 3210 return true;
duke@1 3211 }
duke@1 3212 }
duke@1 3213 return false;
duke@1 3214 }
duke@1 3215
duke@1 3216 //------------------------------in_pack---------------------------
duke@1 3217 // Is s in pack p?
duke@1 3218 Node_List* SuperWord::in_pack(Node* s, Node_List* p) {
duke@1 3219 for (uint i = 0; i < p->size(); i++) {
duke@1 3220 if (p->at(i) == s) {
duke@1 3221 return p;
duke@1 3222 }
duke@1 3223 }
duke@1 3224 return NULL;
duke@1 3225 }
duke@1 3226
duke@1 3227 //------------------------------remove_pack_at---------------------------
duke@1 3228 // Remove the pack at position pos in the packset
duke@1 3229 void SuperWord::remove_pack_at(int pos) {
duke@1 3230 Node_List* p = _packset.at(pos);
duke@1 3231 for (uint i = 0; i < p->size(); i++) {
duke@1 3232 Node* s = p->at(i);
duke@1 3233 set_my_pack(s, NULL);
duke@1 3234 }
duke@1 3235 _packset.remove_at(pos);
duke@1 3236 }
duke@1 3237
kvn@30211 3238 void SuperWord::packset_sort(int n) {
kvn@30211 3239 // simple bubble sort so that we capitalize with O(n) when its already sorted
kvn@30211 3240 while (n != 0) {
kvn@30211 3241 bool swapped = false;
kvn@30211 3242 for (int i = 1; i < n; i++) {
kvn@30211 3243 Node_List* q_low = _packset.at(i-1);
kvn@30211 3244 Node_List* q_i = _packset.at(i);
kvn@30211 3245
kvn@30211 3246 // only swap when we find something to swap
kvn@30211 3247 if (alignment(q_low->at(0)) > alignment(q_i->at(0))) {
kvn@30211 3248 Node_List* t = q_i;
kvn@30211 3249 *(_packset.adr_at(i)) = q_low;
kvn@30211 3250 *(_packset.adr_at(i-1)) = q_i;
kvn@30211 3251 swapped = true;
kvn@30211 3252 }
kvn@30211 3253 }
kvn@30211 3254 if (swapped == false) break;
kvn@30211 3255 n--;
kvn@30211 3256 }
kvn@30211 3257 }
kvn@30211 3258
duke@1 3259 //------------------------------executed_first---------------------------
duke@1 3260 // Return the node executed first in pack p. Uses the RPO block list
duke@1 3261 // to determine order.
duke@1 3262 Node* SuperWord::executed_first(Node_List* p) {
duke@1 3263 Node* n = p->at(0);
duke@1 3264 int n_rpo = bb_idx(n);
duke@1 3265 for (uint i = 1; i < p->size(); i++) {
duke@1 3266 Node* s = p->at(i);
duke@1 3267 int s_rpo = bb_idx(s);
duke@1 3268 if (s_rpo < n_rpo) {
duke@1 3269 n = s;
duke@1 3270 n_rpo = s_rpo;
duke@1 3271 }
duke@1 3272 }
duke@1 3273 return n;
duke@1 3274 }
duke@1 3275
duke@1 3276 //------------------------------executed_last---------------------------
duke@1 3277 // Return the node executed last in pack p.
duke@1 3278 Node* SuperWord::executed_last(Node_List* p) {
duke@1 3279 Node* n = p->at(0);
duke@1 3280 int n_rpo = bb_idx(n);
duke@1 3281 for (uint i = 1; i < p->size(); i++) {
duke@1 3282 Node* s = p->at(i);
duke@1 3283 int s_rpo = bb_idx(s);
duke@1 3284 if (s_rpo > n_rpo) {
duke@1 3285 n = s;
duke@1 3286 n_rpo = s_rpo;
duke@1 3287 }
duke@1 3288 }
duke@1 3289 return n;
duke@1 3290 }
duke@1 3291
roland@31035 3292 LoadNode::ControlDependency SuperWord::control_dependency(Node_List* p) {
roland@31035 3293 LoadNode::ControlDependency dep = LoadNode::DependsOnlyOnTest;
roland@31035 3294 for (uint i = 0; i < p->size(); i++) {
roland@31035 3295 Node* n = p->at(i);
roland@31035 3296 assert(n->is_Load(), "only meaningful for loads");
roland@31035 3297 if (!n->depends_only_on_test()) {
roland@31035 3298 dep = LoadNode::Pinned;
roland@31035 3299 }
roland@31035 3300 }
roland@31035 3301 return dep;
roland@31035 3302 }
roland@31035 3303
roland@31035 3304
duke@1 3305 //----------------------------align_initial_loop_index---------------------------
duke@1 3306 // Adjust pre-loop limit so that in main loop, a load/store reference
duke@1 3307 // to align_to_ref will be a position zero in the vector.
duke@1 3308 // (iv + k) mod vector_align == 0
duke@1 3309 void SuperWord::align_initial_loop_index(MemNode* align_to_ref) {
duke@1 3310 CountedLoopNode *main_head = lp()->as_CountedLoop();
duke@1 3311 assert(main_head->is_main_loop(), "");
duke@1 3312 CountedLoopEndNode* pre_end = get_pre_loop_end(main_head);
adlertz@22919 3313 assert(pre_end != NULL, "we must have a correct pre-loop");
duke@1 3314 Node *pre_opaq1 = pre_end->limit();
duke@1 3315 assert(pre_opaq1->Opcode() == Op_Opaque1, "");
duke@1 3316 Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
never@245 3317 Node *lim0 = pre_opaq->in(1);
duke@1 3318
duke@1 3319 // Where we put new limit calculations
duke@1 3320 Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);
duke@1 3321
duke@1 3322 // Ensure the original loop limit is available from the
duke@1 3323 // pre-loop Opaque1 node.
duke@1 3324 Node *orig_limit = pre_opaq->original_loop_limit();
duke@1 3325 assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, "");
duke@1 3326
mcberg@31403 3327 SWPointer align_to_ref_p(align_to_ref, this, NULL, false);
kvn@13104 3328 assert(align_to_ref_p.valid(), "sanity");
duke@1 3329
never@245 3330 // Given:
never@245 3331 // lim0 == original pre loop limit
never@245 3332 // V == v_align (power of 2)
never@245 3333 // invar == extra invariant piece of the address expression
kvn@13485 3334 // e == offset [ +/- invar ]
duke@1 3335 //
never@245 3336 // When reassociating expressions involving '%' the basic rules are:
never@245 3337 // (a - b) % k == 0 => a % k == b % k
never@245 3338 // and:
never@245 3339 // (a + b) % k == 0 => a % k == (k - b) % k
never@245 3340 //
never@245 3341 // For stride > 0 && scale > 0,
never@245 3342 // Derive the new pre-loop limit "lim" such that the two constraints:
never@245 3343 // (1) lim = lim0 + N (where N is some positive integer < V)
never@245 3344 // (2) (e + lim) % V == 0
never@245 3345 // are true.
never@245 3346 //
never@245 3347 // Substituting (1) into (2),
never@245 3348 // (e + lim0 + N) % V == 0
never@245 3349 // solve for N:
never@245 3350 // N = (V - (e + lim0)) % V
never@245 3351 // substitute back into (1), so that new limit
never@245 3352 // lim = lim0 + (V - (e + lim0)) % V
never@245 3353 //
never@245 3354 // For stride > 0 && scale < 0
never@245 3355 // Constraints:
never@245 3356 // lim = lim0 + N
never@245 3357 // (e - lim) % V == 0
never@245 3358 // Solving for lim:
never@245 3359 // (e - lim0 - N) % V == 0
never@245 3360 // N = (e - lim0) % V
never@245 3361 // lim = lim0 + (e - lim0) % V
never@245 3362 //
never@245 3363 // For stride < 0 && scale > 0
never@245 3364 // Constraints:
never@245 3365 // lim = lim0 - N
never@245 3366 // (e + lim) % V == 0
never@245 3367 // Solving for lim:
never@245 3368 // (e + lim0 - N) % V == 0
never@245 3369 // N = (e + lim0) % V
never@245 3370 // lim = lim0 - (e + lim0) % V
never@245 3371 //
never@245 3372 // For stride < 0 && scale < 0
never@245 3373 // Constraints:
never@245 3374 // lim = lim0 - N
never@245 3375 // (e - lim) % V == 0
never@245 3376 // Solving for lim:
never@245 3377 // (e - lim0 + N) % V == 0
never@245 3378 // N = (V - (e - lim0)) % V
never@245 3379 // lim = lim0 - (V - (e - lim0)) % V
duke@1 3380
kvn@13108 3381 int vw = vector_width_in_bytes(align_to_ref);
never@245 3382 int stride = iv_stride();
never@245 3383 int scale = align_to_ref_p.scale_in_bytes();
duke@1 3384 int elt_size = align_to_ref_p.memory_size();
kvn@13104 3385 int v_align = vw / elt_size;
kvn@13108 3386 assert(v_align > 1, "sanity");
kvn@13485 3387 int offset = align_to_ref_p.offset_in_bytes() / elt_size;
kvn@13485 3388 Node *offsn = _igvn.intcon(offset);
duke@1 3389
kvn@13485 3390 Node *e = offsn;
duke@1 3391 if (align_to_ref_p.invar() != NULL) {
kvn@13485 3392 // incorporate any extra invariant piece producing (offset +/- invar) >>> log2(elt)
duke@1 3393 Node* log2_elt = _igvn.intcon(exact_log2(elt_size));
roland@46728 3394 Node* invar = align_to_ref_p.invar();
roland@46728 3395 if (_igvn.type(invar)->isa_long()) {
roland@46728 3396 // Computations are done % (vector width/element size) so it's
roland@46728 3397 // safe to simply convert invar to an int and loose the upper 32
roland@46728 3398 // bit half.
roland@46728 3399 invar = new ConvL2INode(invar);
roland@46728 3400 _igvn.register_new_node_with_optimizer(invar);
roland@46728 3401 }
roland@46728 3402 Node* aref = new URShiftINode(invar, log2_elt);
kvn@13894 3403 _igvn.register_new_node_with_optimizer(aref);