annotate src/share/vm/opto/output.cpp @ 2346:e1162778c1c8

7009266: G1: assert(obj->is_oop_or_null(true )) failed: Error Summary: A referent object that is only weakly reachable at the start of concurrent marking but is re-attached to the strongly reachable object graph during marking may not be marked as live. This can cause the reference object to be processed prematurely and leave dangling pointers to the referent object. Implement a read barrier for the java.lang.ref.Reference::referent field by intrinsifying the Reference.get() method, and intercepting accesses though JNI, reflection, and Unsafe, so that when a non-null referent object is read it is also logged in an SATB buffer. Reviewed-by: kvn, iveresov, never, tonyp, dholmes
author johnc
date Thu, 07 Apr 2011 09:53:20 -0700
parents 1c0cf339481b
children 92add02409c9
rev   line source
duke@0 1 /*
trims@1472 2 * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
duke@0 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
duke@0 4 *
duke@0 5 * This code is free software; you can redistribute it and/or modify it
duke@0 6 * under the terms of the GNU General Public License version 2 only, as
duke@0 7 * published by the Free Software Foundation.
duke@0 8 *
duke@0 9 * This code is distributed in the hope that it will be useful, but WITHOUT
duke@0 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
duke@0 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
duke@0 12 * version 2 for more details (a copy is included in the LICENSE file that
duke@0 13 * accompanied this code).
duke@0 14 *
duke@0 15 * You should have received a copy of the GNU General Public License version
duke@0 16 * 2 along with this work; if not, write to the Free Software Foundation,
duke@0 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
duke@0 18 *
trims@1472 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
trims@1472 20 * or visit www.oracle.com if you need additional information or have any
trims@1472 21 * questions.
duke@0 22 *
duke@0 23 */
duke@0 24
stefank@1879 25 #include "precompiled.hpp"
stefank@1879 26 #include "asm/assembler.inline.hpp"
stefank@1879 27 #include "code/debugInfo.hpp"
stefank@1879 28 #include "code/debugInfoRec.hpp"
stefank@1879 29 #include "compiler/compileBroker.hpp"
stefank@1879 30 #include "compiler/oopMap.hpp"
stefank@1879 31 #include "memory/allocation.inline.hpp"
stefank@1879 32 #include "opto/callnode.hpp"
stefank@1879 33 #include "opto/cfgnode.hpp"
stefank@1879 34 #include "opto/locknode.hpp"
stefank@1879 35 #include "opto/machnode.hpp"
stefank@1879 36 #include "opto/output.hpp"
stefank@1879 37 #include "opto/regalloc.hpp"
stefank@1879 38 #include "opto/runtime.hpp"
stefank@1879 39 #include "opto/subnode.hpp"
stefank@1879 40 #include "opto/type.hpp"
stefank@1879 41 #include "runtime/handles.inline.hpp"
stefank@1879 42 #include "utilities/xmlstream.hpp"
duke@0 43
duke@0 44 extern uint size_java_to_interp();
duke@0 45 extern uint reloc_java_to_interp();
duke@0 46 extern uint size_exception_handler();
duke@0 47 extern uint size_deopt_handler();
duke@0 48
duke@0 49 #ifndef PRODUCT
duke@0 50 #define DEBUG_ARG(x) , x
duke@0 51 #else
duke@0 52 #define DEBUG_ARG(x)
duke@0 53 #endif
duke@0 54
duke@0 55 extern int emit_exception_handler(CodeBuffer &cbuf);
duke@0 56 extern int emit_deopt_handler(CodeBuffer &cbuf);
duke@0 57
duke@0 58 //------------------------------Output-----------------------------------------
duke@0 59 // Convert Nodes to instruction bits and pass off to the VM
duke@0 60 void Compile::Output() {
duke@0 61 // RootNode goes
duke@0 62 assert( _cfg->_broot->_nodes.size() == 0, "" );
duke@0 63
kvn@859 64 // The number of new nodes (mostly MachNop) is proportional to
kvn@859 65 // the number of java calls and inner loops which are aligned.
kvn@859 66 if ( C->check_node_count((NodeLimitFudgeFactor + C->java_calls()*3 +
kvn@859 67 C->inner_loops()*(OptoLoopAlignment-1)),
kvn@859 68 "out of nodes before code generation" ) ) {
kvn@859 69 return;
kvn@859 70 }
duke@0 71 // Make sure I can find the Start Node
duke@0 72 Block_Array& bbs = _cfg->_bbs;
duke@0 73 Block *entry = _cfg->_blocks[1];
duke@0 74 Block *broot = _cfg->_broot;
duke@0 75
duke@0 76 const StartNode *start = entry->_nodes[0]->as_Start();
duke@0 77
duke@0 78 // Replace StartNode with prolog
duke@0 79 MachPrologNode *prolog = new (this) MachPrologNode();
duke@0 80 entry->_nodes.map( 0, prolog );
duke@0 81 bbs.map( prolog->_idx, entry );
duke@0 82 bbs.map( start->_idx, NULL ); // start is no longer in any block
duke@0 83
duke@0 84 // Virtual methods need an unverified entry point
duke@0 85
duke@0 86 if( is_osr_compilation() ) {
duke@0 87 if( PoisonOSREntry ) {
duke@0 88 // TODO: Should use a ShouldNotReachHereNode...
duke@0 89 _cfg->insert( broot, 0, new (this) MachBreakpointNode() );
duke@0 90 }
duke@0 91 } else {
duke@0 92 if( _method && !_method->flags().is_static() ) {
duke@0 93 // Insert unvalidated entry point
duke@0 94 _cfg->insert( broot, 0, new (this) MachUEPNode() );
duke@0 95 }
duke@0 96
duke@0 97 }
duke@0 98
duke@0 99
duke@0 100 // Break before main entry point
duke@0 101 if( (_method && _method->break_at_execute())
duke@0 102 #ifndef PRODUCT
duke@0 103 ||(OptoBreakpoint && is_method_compilation())
duke@0 104 ||(OptoBreakpointOSR && is_osr_compilation())
duke@0 105 ||(OptoBreakpointC2R && !_method)
duke@0 106 #endif
duke@0 107 ) {
duke@0 108 // checking for _method means that OptoBreakpoint does not apply to
duke@0 109 // runtime stubs or frame converters
duke@0 110 _cfg->insert( entry, 1, new (this) MachBreakpointNode() );
duke@0 111 }
duke@0 112
duke@0 113 // Insert epilogs before every return
duke@0 114 for( uint i=0; i<_cfg->_num_blocks; i++ ) {
duke@0 115 Block *b = _cfg->_blocks[i];
duke@0 116 if( !b->is_connector() && b->non_connector_successor(0) == _cfg->_broot ) { // Found a program exit point?
duke@0 117 Node *m = b->end();
duke@0 118 if( m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt ) {
duke@0 119 MachEpilogNode *epilog = new (this) MachEpilogNode(m->as_Mach()->ideal_Opcode() == Op_Return);
duke@0 120 b->add_inst( epilog );
duke@0 121 bbs.map(epilog->_idx, b);
duke@0 122 //_regalloc->set_bad(epilog->_idx); // Already initialized this way.
duke@0 123 }
duke@0 124 }
duke@0 125 }
duke@0 126
duke@0 127 # ifdef ENABLE_ZAP_DEAD_LOCALS
duke@0 128 if ( ZapDeadCompiledLocals ) Insert_zap_nodes();
duke@0 129 # endif
duke@0 130
duke@0 131 ScheduleAndBundle();
duke@0 132
duke@0 133 #ifndef PRODUCT
duke@0 134 if (trace_opto_output()) {
duke@0 135 tty->print("\n---- After ScheduleAndBundle ----\n");
duke@0 136 for (uint i = 0; i < _cfg->_num_blocks; i++) {
duke@0 137 tty->print("\nBB#%03d:\n", i);
duke@0 138 Block *bb = _cfg->_blocks[i];
duke@0 139 for (uint j = 0; j < bb->_nodes.size(); j++) {
duke@0 140 Node *n = bb->_nodes[j];
duke@0 141 OptoReg::Name reg = _regalloc->get_reg_first(n);
duke@0 142 tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : "");
duke@0 143 n->dump();
duke@0 144 }
duke@0 145 }
duke@0 146 }
duke@0 147 #endif
duke@0 148
duke@0 149 if (failing()) return;
duke@0 150
duke@0 151 BuildOopMaps();
duke@0 152
duke@0 153 if (failing()) return;
duke@0 154
duke@0 155 Fill_buffer();
duke@0 156 }
duke@0 157
duke@0 158 bool Compile::need_stack_bang(int frame_size_in_bytes) const {
duke@0 159 // Determine if we need to generate a stack overflow check.
duke@0 160 // Do it if the method is not a stub function and
duke@0 161 // has java calls or has frame size > vm_page_size/8.
duke@0 162 return (stub_function() == NULL &&
duke@0 163 (has_java_calls() || frame_size_in_bytes > os::vm_page_size()>>3));
duke@0 164 }
duke@0 165
duke@0 166 bool Compile::need_register_stack_bang() const {
duke@0 167 // Determine if we need to generate a register stack overflow check.
duke@0 168 // This is only used on architectures which have split register
duke@0 169 // and memory stacks (ie. IA64).
duke@0 170 // Bang if the method is not a stub function and has java calls
duke@0 171 return (stub_function() == NULL && has_java_calls());
duke@0 172 }
duke@0 173
duke@0 174 # ifdef ENABLE_ZAP_DEAD_LOCALS
duke@0 175
duke@0 176
duke@0 177 // In order to catch compiler oop-map bugs, we have implemented
duke@0 178 // a debugging mode called ZapDeadCompilerLocals.
duke@0 179 // This mode causes the compiler to insert a call to a runtime routine,
duke@0 180 // "zap_dead_locals", right before each place in compiled code
duke@0 181 // that could potentially be a gc-point (i.e., a safepoint or oop map point).
duke@0 182 // The runtime routine checks that locations mapped as oops are really
duke@0 183 // oops, that locations mapped as values do not look like oops,
duke@0 184 // and that locations mapped as dead are not used later
duke@0 185 // (by zapping them to an invalid address).
duke@0 186
duke@0 187 int Compile::_CompiledZap_count = 0;
duke@0 188
duke@0 189 void Compile::Insert_zap_nodes() {
duke@0 190 bool skip = false;
duke@0 191
duke@0 192
duke@0 193 // Dink with static counts because code code without the extra
duke@0 194 // runtime calls is MUCH faster for debugging purposes
duke@0 195
duke@0 196 if ( CompileZapFirst == 0 ) ; // nothing special
duke@0 197 else if ( CompileZapFirst > CompiledZap_count() ) skip = true;
duke@0 198 else if ( CompileZapFirst == CompiledZap_count() )
duke@0 199 warning("starting zap compilation after skipping");
duke@0 200
duke@0 201 if ( CompileZapLast == -1 ) ; // nothing special
duke@0 202 else if ( CompileZapLast < CompiledZap_count() ) skip = true;
duke@0 203 else if ( CompileZapLast == CompiledZap_count() )
duke@0 204 warning("about to compile last zap");
duke@0 205
duke@0 206 ++_CompiledZap_count; // counts skipped zaps, too
duke@0 207
duke@0 208 if ( skip ) return;
duke@0 209
duke@0 210
duke@0 211 if ( _method == NULL )
duke@0 212 return; // no safepoints/oopmaps emitted for calls in stubs,so we don't care
duke@0 213
duke@0 214 // Insert call to zap runtime stub before every node with an oop map
duke@0 215 for( uint i=0; i<_cfg->_num_blocks; i++ ) {
duke@0 216 Block *b = _cfg->_blocks[i];
duke@0 217 for ( uint j = 0; j < b->_nodes.size(); ++j ) {
duke@0 218 Node *n = b->_nodes[j];
duke@0 219
duke@0 220 // Determining if we should insert a zap-a-lot node in output.
duke@0 221 // We do that for all nodes that has oopmap info, except for calls
duke@0 222 // to allocation. Calls to allocation passes in the old top-of-eden pointer
duke@0 223 // and expect the C code to reset it. Hence, there can be no safepoints between
duke@0 224 // the inlined-allocation and the call to new_Java, etc.
duke@0 225 // We also cannot zap monitor calls, as they must hold the microlock
duke@0 226 // during the call to Zap, which also wants to grab the microlock.
duke@0 227 bool insert = n->is_MachSafePoint() && (n->as_MachSafePoint()->oop_map() != NULL);
duke@0 228 if ( insert ) { // it is MachSafePoint
duke@0 229 if ( !n->is_MachCall() ) {
duke@0 230 insert = false;
duke@0 231 } else if ( n->is_MachCall() ) {
duke@0 232 MachCallNode* call = n->as_MachCall();
duke@0 233 if (call->entry_point() == OptoRuntime::new_instance_Java() ||
duke@0 234 call->entry_point() == OptoRuntime::new_array_Java() ||
duke@0 235 call->entry_point() == OptoRuntime::multianewarray2_Java() ||
duke@0 236 call->entry_point() == OptoRuntime::multianewarray3_Java() ||
duke@0 237 call->entry_point() == OptoRuntime::multianewarray4_Java() ||
duke@0 238 call->entry_point() == OptoRuntime::multianewarray5_Java() ||
duke@0 239 call->entry_point() == OptoRuntime::slow_arraycopy_Java() ||
duke@0 240 call->entry_point() == OptoRuntime::complete_monitor_locking_Java()
duke@0 241 ) {
duke@0 242 insert = false;
duke@0 243 }
duke@0 244 }
duke@0 245 if (insert) {
duke@0 246 Node *zap = call_zap_node(n->as_MachSafePoint(), i);
duke@0 247 b->_nodes.insert( j, zap );
duke@0 248 _cfg->_bbs.map( zap->_idx, b );
duke@0 249 ++j;
duke@0 250 }
duke@0 251 }
duke@0 252 }
duke@0 253 }
duke@0 254 }
duke@0 255
duke@0 256
duke@0 257 Node* Compile::call_zap_node(MachSafePointNode* node_to_check, int block_no) {
duke@0 258 const TypeFunc *tf = OptoRuntime::zap_dead_locals_Type();
duke@0 259 CallStaticJavaNode* ideal_node =
duke@0 260 new (this, tf->domain()->cnt()) CallStaticJavaNode( tf,
duke@0 261 OptoRuntime::zap_dead_locals_stub(_method->flags().is_native()),
duke@0 262 "call zap dead locals stub", 0, TypePtr::BOTTOM);
duke@0 263 // We need to copy the OopMap from the site we're zapping at.
duke@0 264 // We have to make a copy, because the zap site might not be
duke@0 265 // a call site, and zap_dead is a call site.
duke@0 266 OopMap* clone = node_to_check->oop_map()->deep_copy();
duke@0 267
duke@0 268 // Add the cloned OopMap to the zap node
duke@0 269 ideal_node->set_oop_map(clone);
duke@0 270 return _matcher->match_sfpt(ideal_node);
duke@0 271 }
duke@0 272
duke@0 273 //------------------------------is_node_getting_a_safepoint--------------------
duke@0 274 bool Compile::is_node_getting_a_safepoint( Node* n) {
duke@0 275 // This code duplicates the logic prior to the call of add_safepoint
duke@0 276 // below in this file.
duke@0 277 if( n->is_MachSafePoint() ) return true;
duke@0 278 return false;
duke@0 279 }
duke@0 280
duke@0 281 # endif // ENABLE_ZAP_DEAD_LOCALS
duke@0 282
duke@0 283 //------------------------------compute_loop_first_inst_sizes------------------
rasbold@418 284 // Compute the size of first NumberOfLoopInstrToAlign instructions at the top
duke@0 285 // of a loop. When aligning a loop we need to provide enough instructions
duke@0 286 // in cpu's fetch buffer to feed decoders. The loop alignment could be
duke@0 287 // avoided if we have enough instructions in fetch buffer at the head of a loop.
duke@0 288 // By default, the size is set to 999999 by Block's constructor so that
duke@0 289 // a loop will be aligned if the size is not reset here.
duke@0 290 //
duke@0 291 // Note: Mach instructions could contain several HW instructions
duke@0 292 // so the size is estimated only.
duke@0 293 //
duke@0 294 void Compile::compute_loop_first_inst_sizes() {
duke@0 295 // The next condition is used to gate the loop alignment optimization.
duke@0 296 // Don't aligned a loop if there are enough instructions at the head of a loop
duke@0 297 // or alignment padding is larger then MaxLoopPad. By default, MaxLoopPad
duke@0 298 // is equal to OptoLoopAlignment-1 except on new Intel cpus, where it is
duke@0 299 // equal to 11 bytes which is the largest address NOP instruction.
duke@0 300 if( MaxLoopPad < OptoLoopAlignment-1 ) {
duke@0 301 uint last_block = _cfg->_num_blocks-1;
duke@0 302 for( uint i=1; i <= last_block; i++ ) {
duke@0 303 Block *b = _cfg->_blocks[i];
duke@0 304 // Check the first loop's block which requires an alignment.
rasbold@418 305 if( b->loop_alignment() > (uint)relocInfo::addr_unit() ) {
duke@0 306 uint sum_size = 0;
duke@0 307 uint inst_cnt = NumberOfLoopInstrToAlign;
rasbold@418 308 inst_cnt = b->compute_first_inst_size(sum_size, inst_cnt, _regalloc);
rasbold@418 309
rasbold@418 310 // Check subsequent fallthrough blocks if the loop's first
rasbold@418 311 // block(s) does not have enough instructions.
rasbold@418 312 Block *nb = b;
rasbold@418 313 while( inst_cnt > 0 &&
rasbold@418 314 i < last_block &&
rasbold@418 315 !_cfg->_blocks[i+1]->has_loop_alignment() &&
rasbold@418 316 !nb->has_successor(b) ) {
rasbold@418 317 i++;
rasbold@418 318 nb = _cfg->_blocks[i];
rasbold@418 319 inst_cnt = nb->compute_first_inst_size(sum_size, inst_cnt, _regalloc);
rasbold@418 320 } // while( inst_cnt > 0 && i < last_block )
rasbold@418 321
duke@0 322 b->set_first_inst_size(sum_size);
duke@0 323 } // f( b->head()->is_Loop() )
duke@0 324 } // for( i <= last_block )
duke@0 325 } // if( MaxLoopPad < OptoLoopAlignment-1 )
duke@0 326 }
duke@0 327
duke@0 328 //----------------------Shorten_branches---------------------------------------
duke@0 329 // The architecture description provides short branch variants for some long
duke@0 330 // branch instructions. Replace eligible long branches with short branches.
twisti@1915 331 void Compile::Shorten_branches(Label *labels, int& code_size, int& reloc_size, int& stub_size) {
duke@0 332
duke@0 333 // fill in the nop array for bundling computations
duke@0 334 MachNode *_nop_list[Bundle::_nop_count];
duke@0 335 Bundle::initialize_nops(_nop_list, this);
duke@0 336
duke@0 337 // ------------------
duke@0 338 // Compute size of each block, method size, and relocation information size
duke@0 339 uint *jmp_end = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks);
duke@0 340 uint *blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1);
duke@0 341 DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); )
never@415 342 DEBUG_ONLY( uint *jmp_rule = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); )
duke@0 343 blk_starts[0] = 0;
duke@0 344
duke@0 345 // Initialize the sizes to 0
duke@0 346 code_size = 0; // Size in bytes of generated code
duke@0 347 stub_size = 0; // Size in bytes of all stub entries
duke@0 348 // Size in bytes of all relocation entries, including those in local stubs.
duke@0 349 // Start with 2-bytes of reloc info for the unvalidated entry point
duke@0 350 reloc_size = 1; // Number of relocation entries
duke@0 351
duke@0 352 // Make three passes. The first computes pessimistic blk_starts,
twisti@1915 353 // relative jmp_end and reloc_size information. The second performs
twisti@1915 354 // short branch substitution using the pessimistic sizing. The
twisti@1915 355 // third inserts nops where needed.
duke@0 356
duke@0 357 Node *nj; // tmp
duke@0 358
duke@0 359 // Step one, perform a pessimistic sizing pass.
duke@0 360 uint i;
duke@0 361 uint min_offset_from_last_call = 1; // init to a positive value
duke@0 362 uint nop_size = (new (this) MachNopNode())->size(_regalloc);
duke@0 363 for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks
duke@0 364 Block *b = _cfg->_blocks[i];
duke@0 365
duke@0 366 // Sum all instruction sizes to compute block size
duke@0 367 uint last_inst = b->_nodes.size();
duke@0 368 uint blk_size = 0;
duke@0 369 for( uint j = 0; j<last_inst; j++ ) {
duke@0 370 nj = b->_nodes[j];
duke@0 371 uint inst_size = nj->size(_regalloc);
duke@0 372 blk_size += inst_size;
duke@0 373 // Handle machine instruction nodes
duke@0 374 if( nj->is_Mach() ) {
duke@0 375 MachNode *mach = nj->as_Mach();
duke@0 376 blk_size += (mach->alignment_required() - 1) * relocInfo::addr_unit(); // assume worst case padding
duke@0 377 reloc_size += mach->reloc();
duke@0 378 if( mach->is_MachCall() ) {
duke@0 379 MachCallNode *mcall = mach->as_MachCall();
duke@0 380 // This destination address is NOT PC-relative
duke@0 381
duke@0 382 mcall->method_set((intptr_t)mcall->entry_point());
duke@0 383
duke@0 384 if( mcall->is_MachCallJava() && mcall->as_MachCallJava()->_method ) {
duke@0 385 stub_size += size_java_to_interp();
duke@0 386 reloc_size += reloc_java_to_interp();
duke@0 387 }
duke@0 388 } else if (mach->is_MachSafePoint()) {
duke@0 389 // If call/safepoint are adjacent, account for possible
duke@0 390 // nop to disambiguate the two safepoints.
duke@0 391 if (min_offset_from_last_call == 0) {
duke@0 392 blk_size += nop_size;
duke@0 393 }
duke@0 394 }
duke@0 395 }
duke@0 396 min_offset_from_last_call += inst_size;
duke@0 397 // Remember end of call offset
duke@0 398 if (nj->is_MachCall() && nj->as_MachCall()->is_safepoint_node()) {
duke@0 399 min_offset_from_last_call = 0;
duke@0 400 }
duke@0 401 }
duke@0 402
duke@0 403 // During short branch replacement, we store the relative (to blk_starts)
duke@0 404 // end of jump in jmp_end, rather than the absolute end of jump. This
duke@0 405 // is so that we do not need to recompute sizes of all nodes when we compute
duke@0 406 // correct blk_starts in our next sizing pass.
duke@0 407 jmp_end[i] = blk_size;
duke@0 408 DEBUG_ONLY( jmp_target[i] = 0; )
duke@0 409
duke@0 410 // When the next block starts a loop, we may insert pad NOP
duke@0 411 // instructions. Since we cannot know our future alignment,
duke@0 412 // assume the worst.
duke@0 413 if( i<_cfg->_num_blocks-1 ) {
duke@0 414 Block *nb = _cfg->_blocks[i+1];
duke@0 415 int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit();
duke@0 416 if( max_loop_pad > 0 ) {
duke@0 417 assert(is_power_of_2(max_loop_pad+relocInfo::addr_unit()), "");
duke@0 418 blk_size += max_loop_pad;
duke@0 419 }
duke@0 420 }
duke@0 421
duke@0 422 // Save block size; update total method size
duke@0 423 blk_starts[i+1] = blk_starts[i]+blk_size;
duke@0 424 }
duke@0 425
duke@0 426 // Step two, replace eligible long jumps.
duke@0 427
duke@0 428 // Note: this will only get the long branches within short branch
duke@0 429 // range. Another pass might detect more branches that became
duke@0 430 // candidates because the shortening in the first pass exposed
duke@0 431 // more opportunities. Unfortunately, this would require
duke@0 432 // recomputing the starting and ending positions for the blocks
duke@0 433 for( i=0; i<_cfg->_num_blocks; i++ ) {
duke@0 434 Block *b = _cfg->_blocks[i];
duke@0 435
duke@0 436 int j;
duke@0 437 // Find the branch; ignore trailing NOPs.
duke@0 438 for( j = b->_nodes.size()-1; j>=0; j-- ) {
duke@0 439 nj = b->_nodes[j];
duke@0 440 if( !nj->is_Mach() || nj->as_Mach()->ideal_Opcode() != Op_Con )
duke@0 441 break;
duke@0 442 }
duke@0 443
duke@0 444 if (j >= 0) {
duke@0 445 if( nj->is_Mach() && nj->as_Mach()->may_be_short_branch() ) {
duke@0 446 MachNode *mach = nj->as_Mach();
duke@0 447 // This requires the TRUE branch target be in succs[0]
duke@0 448 uint bnum = b->non_connector_successor(0)->_pre_order;
duke@0 449 uintptr_t target = blk_starts[bnum];
duke@0 450 if( mach->is_pc_relative() ) {
duke@0 451 int offset = target-(blk_starts[i] + jmp_end[i]);
never@415 452 if (_matcher->is_short_branch_offset(mach->rule(), offset)) {
duke@0 453 // We've got a winner. Replace this branch.
never@415 454 MachNode* replacement = mach->short_branch_version(this);
duke@0 455 b->_nodes.map(j, replacement);
never@222 456 mach->subsume_by(replacement);
duke@0 457
duke@0 458 // Update the jmp_end size to save time in our
duke@0 459 // next pass.
duke@0 460 jmp_end[i] -= (mach->size(_regalloc) - replacement->size(_regalloc));
duke@0 461 DEBUG_ONLY( jmp_target[i] = bnum; );
never@415 462 DEBUG_ONLY( jmp_rule[i] = mach->rule(); );
duke@0 463 }
duke@0 464 } else {
duke@0 465 #ifndef PRODUCT
duke@0 466 mach->dump(3);
duke@0 467 #endif
duke@0 468 Unimplemented();
duke@0 469 }
duke@0 470 }
duke@0 471 }
duke@0 472 }
duke@0 473
duke@0 474 // Compute the size of first NumberOfLoopInstrToAlign instructions at head
duke@0 475 // of a loop. It is used to determine the padding for loop alignment.
duke@0 476 compute_loop_first_inst_sizes();
duke@0 477
duke@0 478 // Step 3, compute the offsets of all the labels
duke@0 479 uint last_call_adr = max_uint;
duke@0 480 for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks
duke@0 481 // copy the offset of the beginning to the corresponding label
duke@0 482 assert(labels[i].is_unused(), "cannot patch at this point");
duke@0 483 labels[i].bind_loc(blk_starts[i], CodeBuffer::SECT_INSTS);
duke@0 484
duke@0 485 // insert padding for any instructions that need it
duke@0 486 Block *b = _cfg->_blocks[i];
duke@0 487 uint last_inst = b->_nodes.size();
duke@0 488 uint adr = blk_starts[i];
duke@0 489 for( uint j = 0; j<last_inst; j++ ) {
duke@0 490 nj = b->_nodes[j];
duke@0 491 if( nj->is_Mach() ) {
duke@0 492 int padding = nj->as_Mach()->compute_padding(adr);
duke@0 493 // If call/safepoint are adjacent insert a nop (5010568)
duke@0 494 if (padding == 0 && nj->is_MachSafePoint() && !nj->is_MachCall() &&
duke@0 495 adr == last_call_adr ) {
duke@0 496 padding = nop_size;
duke@0 497 }
duke@0 498 if(padding > 0) {
duke@0 499 assert((padding % nop_size) == 0, "padding is not a multiple of NOP size");
duke@0 500 int nops_cnt = padding / nop_size;
duke@0 501 MachNode *nop = new (this) MachNopNode(nops_cnt);
duke@0 502 b->_nodes.insert(j++, nop);
duke@0 503 _cfg->_bbs.map( nop->_idx, b );
duke@0 504 adr += padding;
duke@0 505 last_inst++;
duke@0 506 }
duke@0 507 }
duke@0 508 adr += nj->size(_regalloc);
duke@0 509
duke@0 510 // Remember end of call offset
duke@0 511 if (nj->is_MachCall() && nj->as_MachCall()->is_safepoint_node()) {
duke@0 512 last_call_adr = adr;
duke@0 513 }
duke@0 514 }
duke@0 515
duke@0 516 if ( i != _cfg->_num_blocks-1) {
duke@0 517 // Get the size of the block
duke@0 518 uint blk_size = adr - blk_starts[i];
duke@0 519
rasbold@418 520 // When the next block is the top of a loop, we may insert pad NOP
duke@0 521 // instructions.
duke@0 522 Block *nb = _cfg->_blocks[i+1];
duke@0 523 int current_offset = blk_starts[i] + blk_size;
duke@0 524 current_offset += nb->alignment_padding(current_offset);
duke@0 525 // Save block size; update total method size
duke@0 526 blk_starts[i+1] = current_offset;
duke@0 527 }
duke@0 528 }
duke@0 529
duke@0 530 #ifdef ASSERT
duke@0 531 for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks
duke@0 532 if( jmp_target[i] != 0 ) {
duke@0 533 int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_end[i]);
never@415 534 if (!_matcher->is_short_branch_offset(jmp_rule[i], offset)) {
duke@0 535 tty->print_cr("target (%d) - jmp_end(%d) = offset (%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_end[i], offset, i, jmp_target[i]);
duke@0 536 }
never@415 537 assert(_matcher->is_short_branch_offset(jmp_rule[i], offset), "Displacement too large for short jmp");
duke@0 538 }
duke@0 539 }
duke@0 540 #endif
duke@0 541
duke@0 542 // ------------------
duke@0 543 // Compute size for code buffer
duke@0 544 code_size = blk_starts[i-1] + jmp_end[i-1];
duke@0 545
duke@0 546 // Relocation records
duke@0 547 reloc_size += 1; // Relo entry for exception handler
duke@0 548
duke@0 549 // Adjust reloc_size to number of record of relocation info
duke@0 550 // Min is 2 bytes, max is probably 6 or 8, with a tax up to 25% for
duke@0 551 // a relocation index.
duke@0 552 // The CodeBuffer will expand the locs array if this estimate is too low.
duke@0 553 reloc_size *= 10 / sizeof(relocInfo);
duke@0 554 }
duke@0 555
duke@0 556 //------------------------------FillLocArray-----------------------------------
duke@0 557 // Create a bit of debug info and append it to the array. The mapping is from
duke@0 558 // Java local or expression stack to constant, register or stack-slot. For
duke@0 559 // doubles, insert 2 mappings and return 1 (to tell the caller that the next
duke@0 560 // entry has been taken care of and caller should skip it).
duke@0 561 static LocationValue *new_loc_value( PhaseRegAlloc *ra, OptoReg::Name regnum, Location::Type l_type ) {
duke@0 562 // This should never have accepted Bad before
duke@0 563 assert(OptoReg::is_valid(regnum), "location must be valid");
duke@0 564 return (OptoReg::is_reg(regnum))
duke@0 565 ? new LocationValue(Location::new_reg_loc(l_type, OptoReg::as_VMReg(regnum)) )
duke@0 566 : new LocationValue(Location::new_stk_loc(l_type, ra->reg2offset(regnum)));
duke@0 567 }
duke@0 568
kvn@63 569
kvn@63 570 ObjectValue*
kvn@63 571 Compile::sv_for_node_id(GrowableArray<ScopeValue*> *objs, int id) {
kvn@63 572 for (int i = 0; i < objs->length(); i++) {
kvn@63 573 assert(objs->at(i)->is_object(), "corrupt object cache");
kvn@63 574 ObjectValue* sv = (ObjectValue*) objs->at(i);
kvn@63 575 if (sv->id() == id) {
kvn@63 576 return sv;
kvn@63 577 }
kvn@63 578 }
kvn@63 579 // Otherwise..
kvn@63 580 return NULL;
kvn@63 581 }
kvn@63 582
kvn@63 583 void Compile::set_sv_for_object_node(GrowableArray<ScopeValue*> *objs,
kvn@63 584 ObjectValue* sv ) {
kvn@63 585 assert(sv_for_node_id(objs, sv->id()) == NULL, "Precondition");
kvn@63 586 objs->append(sv);
kvn@63 587 }
kvn@63 588
kvn@63 589
kvn@63 590 void Compile::FillLocArray( int idx, MachSafePointNode* sfpt, Node *local,
kvn@63 591 GrowableArray<ScopeValue*> *array,
kvn@63 592 GrowableArray<ScopeValue*> *objs ) {
duke@0 593 assert( local, "use _top instead of null" );
duke@0 594 if (array->length() != idx) {
duke@0 595 assert(array->length() == idx + 1, "Unexpected array count");
duke@0 596 // Old functionality:
duke@0 597 // return
duke@0 598 // New functionality:
duke@0 599 // Assert if the local is not top. In product mode let the new node
duke@0 600 // override the old entry.
duke@0 601 assert(local == top(), "LocArray collision");
duke@0 602 if (local == top()) {
duke@0 603 return;
duke@0 604 }
duke@0 605 array->pop();
duke@0 606 }
duke@0 607 const Type *t = local->bottom_type();
duke@0 608
kvn@63 609 // Is it a safepoint scalar object node?
kvn@63 610 if (local->is_SafePointScalarObject()) {
kvn@63 611 SafePointScalarObjectNode* spobj = local->as_SafePointScalarObject();
kvn@63 612
kvn@63 613 ObjectValue* sv = Compile::sv_for_node_id(objs, spobj->_idx);
kvn@63 614 if (sv == NULL) {
kvn@63 615 ciKlass* cik = t->is_oopptr()->klass();
kvn@63 616 assert(cik->is_instance_klass() ||
kvn@63 617 cik->is_array_klass(), "Not supported allocation.");
kvn@63 618 sv = new ObjectValue(spobj->_idx,
jrose@989 619 new ConstantOopWriteValue(cik->constant_encoding()));
kvn@63 620 Compile::set_sv_for_object_node(objs, sv);
kvn@63 621
kvn@63 622 uint first_ind = spobj->first_index();
kvn@63 623 for (uint i = 0; i < spobj->n_fields(); i++) {
kvn@63 624 Node* fld_node = sfpt->in(first_ind+i);
kvn@63 625 (void)FillLocArray(sv->field_values()->length(), sfpt, fld_node, sv->field_values(), objs);
kvn@63 626 }
kvn@63 627 }
kvn@63 628 array->append(sv);
kvn@63 629 return;
kvn@63 630 }
kvn@63 631
duke@0 632 // Grab the register number for the local
duke@0 633 OptoReg::Name regnum = _regalloc->get_reg_first(local);
duke@0 634 if( OptoReg::is_valid(regnum) ) {// Got a register/stack?
duke@0 635 // Record the double as two float registers.
duke@0 636 // The register mask for such a value always specifies two adjacent
duke@0 637 // float registers, with the lower register number even.
duke@0 638 // Normally, the allocation of high and low words to these registers
duke@0 639 // is irrelevant, because nearly all operations on register pairs
duke@0 640 // (e.g., StoreD) treat them as a single unit.
duke@0 641 // Here, we assume in addition that the words in these two registers
duke@0 642 // stored "naturally" (by operations like StoreD and double stores
duke@0 643 // within the interpreter) such that the lower-numbered register
duke@0 644 // is written to the lower memory address. This may seem like
duke@0 645 // a machine dependency, but it is not--it is a requirement on
duke@0 646 // the author of the <arch>.ad file to ensure that, for every
duke@0 647 // even/odd double-register pair to which a double may be allocated,
duke@0 648 // the word in the even single-register is stored to the first
duke@0 649 // memory word. (Note that register numbers are completely
duke@0 650 // arbitrary, and are not tied to any machine-level encodings.)
duke@0 651 #ifdef _LP64
duke@0 652 if( t->base() == Type::DoubleBot || t->base() == Type::DoubleCon ) {
duke@0 653 array->append(new ConstantIntValue(0));
duke@0 654 array->append(new_loc_value( _regalloc, regnum, Location::dbl ));
duke@0 655 } else if ( t->base() == Type::Long ) {
duke@0 656 array->append(new ConstantIntValue(0));
duke@0 657 array->append(new_loc_value( _regalloc, regnum, Location::lng ));
duke@0 658 } else if ( t->base() == Type::RawPtr ) {
duke@0 659 // jsr/ret return address which must be restored into a the full
duke@0 660 // width 64-bit stack slot.
duke@0 661 array->append(new_loc_value( _regalloc, regnum, Location::lng ));
duke@0 662 }
duke@0 663 #else //_LP64
duke@0 664 #ifdef SPARC
duke@0 665 if (t->base() == Type::Long && OptoReg::is_reg(regnum)) {
duke@0 666 // For SPARC we have to swap high and low words for
duke@0 667 // long values stored in a single-register (g0-g7).
duke@0 668 array->append(new_loc_value( _regalloc, regnum , Location::normal ));
duke@0 669 array->append(new_loc_value( _regalloc, OptoReg::add(regnum,1), Location::normal ));
duke@0 670 } else
duke@0 671 #endif //SPARC
duke@0 672 if( t->base() == Type::DoubleBot || t->base() == Type::DoubleCon || t->base() == Type::Long ) {
duke@0 673 // Repack the double/long as two jints.
duke@0 674 // The convention the interpreter uses is that the second local
duke@0 675 // holds the first raw word of the native double representation.
duke@0 676 // This is actually reasonable, since locals and stack arrays
duke@0 677 // grow downwards in all implementations.
duke@0 678 // (If, on some machine, the interpreter's Java locals or stack
duke@0 679 // were to grow upwards, the embedded doubles would be word-swapped.)
duke@0 680 array->append(new_loc_value( _regalloc, OptoReg::add(regnum,1), Location::normal ));
duke@0 681 array->append(new_loc_value( _regalloc, regnum , Location::normal ));
duke@0 682 }
duke@0 683 #endif //_LP64
duke@0 684 else if( (t->base() == Type::FloatBot || t->base() == Type::FloatCon) &&
duke@0 685 OptoReg::is_reg(regnum) ) {
kvn@1274 686 array->append(new_loc_value( _regalloc, regnum, Matcher::float_in_double()
duke@0 687 ? Location::float_in_dbl : Location::normal ));
duke@0 688 } else if( t->base() == Type::Int && OptoReg::is_reg(regnum) ) {
duke@0 689 array->append(new_loc_value( _regalloc, regnum, Matcher::int_in_long
duke@0 690 ? Location::int_in_long : Location::normal ));
kvn@331 691 } else if( t->base() == Type::NarrowOop ) {
kvn@331 692 array->append(new_loc_value( _regalloc, regnum, Location::narrowoop ));
duke@0 693 } else {
duke@0 694 array->append(new_loc_value( _regalloc, regnum, _regalloc->is_oop(local) ? Location::oop : Location::normal ));
duke@0 695 }
duke@0 696 return;
duke@0 697 }
duke@0 698
duke@0 699 // No register. It must be constant data.
duke@0 700 switch (t->base()) {
duke@0 701 case Type::Half: // Second half of a double
duke@0 702 ShouldNotReachHere(); // Caller should skip 2nd halves
duke@0 703 break;
duke@0 704 case Type::AnyPtr:
duke@0 705 array->append(new ConstantOopWriteValue(NULL));
duke@0 706 break;
duke@0 707 case Type::AryPtr:
duke@0 708 case Type::InstPtr:
duke@0 709 case Type::KlassPtr: // fall through
jrose@989 710 array->append(new ConstantOopWriteValue(t->isa_oopptr()->const_oop()->constant_encoding()));
duke@0 711 break;
kvn@331 712 case Type::NarrowOop:
kvn@331 713 if (t == TypeNarrowOop::NULL_PTR) {
kvn@331 714 array->append(new ConstantOopWriteValue(NULL));
kvn@331 715 } else {
jrose@989 716 array->append(new ConstantOopWriteValue(t->make_ptr()->isa_oopptr()->const_oop()->constant_encoding()));
kvn@331 717 }
kvn@331 718 break;
duke@0 719 case Type::Int:
duke@0 720 array->append(new ConstantIntValue(t->is_int()->get_con()));
duke@0 721 break;
duke@0 722 case Type::RawPtr:
duke@0 723 // A return address (T_ADDRESS).
duke@0 724 assert((intptr_t)t->is_ptr()->get_con() < (intptr_t)0x10000, "must be a valid BCI");
duke@0 725 #ifdef _LP64
duke@0 726 // Must be restored to the full-width 64-bit stack slot.
duke@0 727 array->append(new ConstantLongValue(t->is_ptr()->get_con()));
duke@0 728 #else
duke@0 729 array->append(new ConstantIntValue(t->is_ptr()->get_con()));
duke@0 730 #endif
duke@0 731 break;
duke@0 732 case Type::FloatCon: {
duke@0 733 float f = t->is_float_constant()->getf();
duke@0 734 array->append(new ConstantIntValue(jint_cast(f)));
duke@0 735 break;
duke@0 736 }
duke@0 737 case Type::DoubleCon: {
duke@0 738 jdouble d = t->is_double_constant()->getd();
duke@0 739 #ifdef _LP64
duke@0 740 array->append(new ConstantIntValue(0));
duke@0 741 array->append(new ConstantDoubleValue(d));
duke@0 742 #else
duke@0 743 // Repack the double as two jints.
duke@0 744 // The convention the interpreter uses is that the second local
duke@0 745 // holds the first raw word of the native double representation.
duke@0 746 // This is actually reasonable, since locals and stack arrays
duke@0 747 // grow downwards in all implementations.
duke@0 748 // (If, on some machine, the interpreter's Java locals or stack
duke@0 749 // were to grow upwards, the embedded doubles would be word-swapped.)
duke@0 750 jint *dp = (jint*)&d;
duke@0 751 array->append(new ConstantIntValue(dp[1]));
duke@0 752 array->append(new ConstantIntValue(dp[0]));
duke@0 753 #endif
duke@0 754 break;
duke@0 755 }
duke@0 756 case Type::Long: {
duke@0 757 jlong d = t->is_long()->get_con();
duke@0 758 #ifdef _LP64
duke@0 759 array->append(new ConstantIntValue(0));
duke@0 760 array->append(new ConstantLongValue(d));
duke@0 761 #else
duke@0 762 // Repack the long as two jints.
duke@0 763 // The convention the interpreter uses is that the second local
duke@0 764 // holds the first raw word of the native double representation.
duke@0 765 // This is actually reasonable, since locals and stack arrays
duke@0 766 // grow downwards in all implementations.
duke@0 767 // (If, on some machine, the interpreter's Java locals or stack
duke@0 768 // were to grow upwards, the embedded doubles would be word-swapped.)
duke@0 769 jint *dp = (jint*)&d;
duke@0 770 array->append(new ConstantIntValue(dp[1]));
duke@0 771 array->append(new ConstantIntValue(dp[0]));
duke@0 772 #endif
duke@0 773 break;
duke@0 774 }
duke@0 775 case Type::Top: // Add an illegal value here
duke@0 776 array->append(new LocationValue(Location()));
duke@0 777 break;
duke@0 778 default:
duke@0 779 ShouldNotReachHere();
duke@0 780 break;
duke@0 781 }
duke@0 782 }
duke@0 783
duke@0 784 // Determine if this node starts a bundle
duke@0 785 bool Compile::starts_bundle(const Node *n) const {
duke@0 786 return (_node_bundling_limit > n->_idx &&
duke@0 787 _node_bundling_base[n->_idx].starts_bundle());
duke@0 788 }
duke@0 789
duke@0 790 //--------------------------Process_OopMap_Node--------------------------------
duke@0 791 void Compile::Process_OopMap_Node(MachNode *mach, int current_offset) {
duke@0 792
duke@0 793 // Handle special safepoint nodes for synchronization
duke@0 794 MachSafePointNode *sfn = mach->as_MachSafePoint();
duke@0 795 MachCallNode *mcall;
duke@0 796
duke@0 797 #ifdef ENABLE_ZAP_DEAD_LOCALS
duke@0 798 assert( is_node_getting_a_safepoint(mach), "logic does not match; false negative");
duke@0 799 #endif
duke@0 800
duke@0 801 int safepoint_pc_offset = current_offset;
twisti@1137 802 bool is_method_handle_invoke = false;
kvn@1253 803 bool return_oop = false;
duke@0 804
duke@0 805 // Add the safepoint in the DebugInfoRecorder
duke@0 806 if( !mach->is_MachCall() ) {
duke@0 807 mcall = NULL;
duke@0 808 debug_info()->add_safepoint(safepoint_pc_offset, sfn->_oop_map);
duke@0 809 } else {
duke@0 810 mcall = mach->as_MachCall();
twisti@1137 811
twisti@1137 812 // Is the call a MethodHandle call?
twisti@1265 813 if (mcall->is_MachCallJava()) {
twisti@1265 814 if (mcall->as_MachCallJava()->_method_handle_invoke) {
twisti@1265 815 assert(has_method_handle_invokes(), "must have been set during call generation");
twisti@1265 816 is_method_handle_invoke = true;
twisti@1265 817 }
twisti@1265 818 }
twisti@1137 819
kvn@1253 820 // Check if a call returns an object.
kvn@1253 821 if (mcall->return_value_is_used() &&
kvn@1253 822 mcall->tf()->range()->field_at(TypeFunc::Parms)->isa_ptr()) {
kvn@1253 823 return_oop = true;
kvn@1253 824 }
duke@0 825 safepoint_pc_offset += mcall->ret_addr_offset();
duke@0 826 debug_info()->add_safepoint(safepoint_pc_offset, mcall->_oop_map);
duke@0 827 }
duke@0 828
duke@0 829 // Loop over the JVMState list to add scope information
duke@0 830 // Do not skip safepoints with a NULL method, they need monitor info
duke@0 831 JVMState* youngest_jvms = sfn->jvms();
duke@0 832 int max_depth = youngest_jvms->depth();
duke@0 833
kvn@63 834 // Allocate the object pool for scalar-replaced objects -- the map from
kvn@63 835 // small-integer keys (which can be recorded in the local and ostack
kvn@63 836 // arrays) to descriptions of the object state.
kvn@63 837 GrowableArray<ScopeValue*> *objs = new GrowableArray<ScopeValue*>();
kvn@63 838
duke@0 839 // Visit scopes from oldest to youngest.
duke@0 840 for (int depth = 1; depth <= max_depth; depth++) {
duke@0 841 JVMState* jvms = youngest_jvms->of_depth(depth);
duke@0 842 int idx;
duke@0 843 ciMethod* method = jvms->has_method() ? jvms->method() : NULL;
duke@0 844 // Safepoints that do not have method() set only provide oop-map and monitor info
duke@0 845 // to support GC; these do not support deoptimization.
duke@0 846 int num_locs = (method == NULL) ? 0 : jvms->loc_size();
duke@0 847 int num_exps = (method == NULL) ? 0 : jvms->stk_size();
duke@0 848 int num_mon = jvms->nof_monitors();
duke@0 849 assert(method == NULL || jvms->bci() < 0 || num_locs == method->max_locals(),
duke@0 850 "JVMS local count must match that of the method");
duke@0 851
duke@0 852 // Add Local and Expression Stack Information
duke@0 853
duke@0 854 // Insert locals into the locarray
duke@0 855 GrowableArray<ScopeValue*> *locarray = new GrowableArray<ScopeValue*>(num_locs);
duke@0 856 for( idx = 0; idx < num_locs; idx++ ) {
kvn@63 857 FillLocArray( idx, sfn, sfn->local(jvms, idx), locarray, objs );
duke@0 858 }
duke@0 859
duke@0 860 // Insert expression stack entries into the exparray
duke@0 861 GrowableArray<ScopeValue*> *exparray = new GrowableArray<ScopeValue*>(num_exps);
duke@0 862 for( idx = 0; idx < num_exps; idx++ ) {
kvn@63 863 FillLocArray( idx, sfn, sfn->stack(jvms, idx), exparray, objs );
duke@0 864 }
duke@0 865
duke@0 866 // Add in mappings of the monitors
duke@0 867 assert( !method ||
duke@0 868 !method->is_synchronized() ||
duke@0 869 method->is_native() ||
duke@0 870 num_mon > 0 ||
duke@0 871 !GenerateSynchronizationCode,
duke@0 872 "monitors must always exist for synchronized methods");
duke@0 873
duke@0 874 // Build the growable array of ScopeValues for exp stack
duke@0 875 GrowableArray<MonitorValue*> *monarray = new GrowableArray<MonitorValue*>(num_mon);
duke@0 876
duke@0 877 // Loop over monitors and insert into array
duke@0 878 for(idx = 0; idx < num_mon; idx++) {
duke@0 879 // Grab the node that defines this monitor
kvn@460 880 Node* box_node = sfn->monitor_box(jvms, idx);
kvn@460 881 Node* obj_node = sfn->monitor_obj(jvms, idx);
duke@0 882
duke@0 883 // Create ScopeValue for object
duke@0 884 ScopeValue *scval = NULL;
kvn@63 885
kvn@63 886 if( obj_node->is_SafePointScalarObject() ) {
kvn@63 887 SafePointScalarObjectNode* spobj = obj_node->as_SafePointScalarObject();
kvn@63 888 scval = Compile::sv_for_node_id(objs, spobj->_idx);
kvn@63 889 if (scval == NULL) {
kvn@63 890 const Type *t = obj_node->bottom_type();
kvn@63 891 ciKlass* cik = t->is_oopptr()->klass();
kvn@63 892 assert(cik->is_instance_klass() ||
kvn@63 893 cik->is_array_klass(), "Not supported allocation.");
kvn@63 894 ObjectValue* sv = new ObjectValue(spobj->_idx,
jrose@989 895 new ConstantOopWriteValue(cik->constant_encoding()));
kvn@63 896 Compile::set_sv_for_object_node(objs, sv);
kvn@63 897
kvn@63 898 uint first_ind = spobj->first_index();
kvn@63 899 for (uint i = 0; i < spobj->n_fields(); i++) {
kvn@63 900 Node* fld_node = sfn->in(first_ind+i);
kvn@63 901 (void)FillLocArray(sv->field_values()->length(), sfn, fld_node, sv->field_values(), objs);
kvn@63 902 }
kvn@63 903 scval = sv;
kvn@63 904 }
kvn@63 905 } else if( !obj_node->is_Con() ) {
duke@0 906 OptoReg::Name obj_reg = _regalloc->get_reg_first(obj_node);
kvn@331 907 if( obj_node->bottom_type()->base() == Type::NarrowOop ) {
kvn@331 908 scval = new_loc_value( _regalloc, obj_reg, Location::narrowoop );
kvn@331 909 } else {
kvn@331 910 scval = new_loc_value( _regalloc, obj_reg, Location::oop );
kvn@331 911 }
duke@0 912 } else {
kvn@331 913 const TypePtr *tp = obj_node->bottom_type()->make_ptr();
jrose@989 914 scval = new ConstantOopWriteValue(tp->is_instptr()->const_oop()->constant_encoding());
duke@0 915 }
duke@0 916
duke@0 917 OptoReg::Name box_reg = BoxLockNode::stack_slot(box_node);
kvn@66 918 Location basic_lock = Location::new_stk_loc(Location::normal,_regalloc->reg2offset(box_reg));
kvn@460 919 while( !box_node->is_BoxLock() ) box_node = box_node->in(1);
kvn@66 920 monarray->append(new MonitorValue(scval, basic_lock, box_node->as_BoxLock()->is_eliminated()));
duke@0 921 }
duke@0 922
kvn@63 923 // We dump the object pool first, since deoptimization reads it in first.
kvn@63 924 debug_info()->dump_object_pool(objs);
kvn@63 925
duke@0 926 // Build first class objects to pass to scope
duke@0 927 DebugToken *locvals = debug_info()->create_scope_values(locarray);
duke@0 928 DebugToken *expvals = debug_info()->create_scope_values(exparray);
duke@0 929 DebugToken *monvals = debug_info()->create_monitor_values(monarray);
duke@0 930
duke@0 931 // Make method available for all Safepoints
duke@0 932 ciMethod* scope_method = method ? method : _method;
duke@0 933 // Describe the scope here
duke@0 934 assert(jvms->bci() >= InvocationEntryBci && jvms->bci() <= 0x10000, "must be a valid or entry BCI");
twisti@1135 935 assert(!jvms->should_reexecute() || depth == max_depth, "reexecute allowed only for the youngest");
kvn@63 936 // Now we can describe the scope.
kvn@1253 937 debug_info()->describe_scope(safepoint_pc_offset, scope_method, jvms->bci(), jvms->should_reexecute(), is_method_handle_invoke, return_oop, locvals, expvals, monvals);
duke@0 938 } // End jvms loop
duke@0 939
duke@0 940 // Mark the end of the scope set.
duke@0 941 debug_info()->end_safepoint(safepoint_pc_offset);
duke@0 942 }
duke@0 943
duke@0 944
duke@0 945
duke@0 946 // A simplified version of Process_OopMap_Node, to handle non-safepoints.
duke@0 947 class NonSafepointEmitter {
duke@0 948 Compile* C;
duke@0 949 JVMState* _pending_jvms;
duke@0 950 int _pending_offset;
duke@0 951
duke@0 952 void emit_non_safepoint();
duke@0 953
duke@0 954 public:
duke@0 955 NonSafepointEmitter(Compile* compile) {
duke@0 956 this->C = compile;
duke@0 957 _pending_jvms = NULL;
duke@0 958 _pending_offset = 0;
duke@0 959 }
duke@0 960
duke@0 961 void observe_instruction(Node* n, int pc_offset) {
duke@0 962 if (!C->debug_info()->recording_non_safepoints()) return;
duke@0 963
duke@0 964 Node_Notes* nn = C->node_notes_at(n->_idx);
duke@0 965 if (nn == NULL || nn->jvms() == NULL) return;
duke@0 966 if (_pending_jvms != NULL &&
duke@0 967 _pending_jvms->same_calls_as(nn->jvms())) {
duke@0 968 // Repeated JVMS? Stretch it up here.
duke@0 969 _pending_offset = pc_offset;
duke@0 970 } else {
duke@0 971 if (_pending_jvms != NULL &&
duke@0 972 _pending_offset < pc_offset) {
duke@0 973 emit_non_safepoint();
duke@0 974 }
duke@0 975 _pending_jvms = NULL;
duke@0 976 if (pc_offset > C->debug_info()->last_pc_offset()) {
duke@0 977 // This is the only way _pending_jvms can become non-NULL:
duke@0 978 _pending_jvms = nn->jvms();
duke@0 979 _pending_offset = pc_offset;
duke@0 980 }
duke@0 981 }
duke@0 982 }
duke@0 983
duke@0 984 // Stay out of the way of real safepoints:
duke@0 985 void observe_safepoint(JVMState* jvms, int pc_offset) {
duke@0 986 if (_pending_jvms != NULL &&
duke@0 987 !_pending_jvms->same_calls_as(jvms) &&
duke@0 988 _pending_offset < pc_offset) {
duke@0 989 emit_non_safepoint();
duke@0 990 }
duke@0 991 _pending_jvms = NULL;
duke@0 992 }
duke@0 993
duke@0 994 void flush_at_end() {
duke@0 995 if (_pending_jvms != NULL) {
duke@0 996 emit_non_safepoint();
duke@0 997 }
duke@0 998 _pending_jvms = NULL;
duke@0 999 }
duke@0 1000 };
duke@0 1001
duke@0 1002 void NonSafepointEmitter::emit_non_safepoint() {
duke@0 1003 JVMState* youngest_jvms = _pending_jvms;
duke@0 1004 int pc_offset = _pending_offset;
duke@0 1005
duke@0 1006 // Clear it now:
duke@0 1007 _pending_jvms = NULL;
duke@0 1008
duke@0 1009 DebugInformationRecorder* debug_info = C->debug_info();
duke@0 1010 assert(debug_info->recording_non_safepoints(), "sanity");
duke@0 1011
duke@0 1012 debug_info->add_non_safepoint(pc_offset);
duke@0 1013 int max_depth = youngest_jvms->depth();
duke@0 1014
duke@0 1015 // Visit scopes from oldest to youngest.
duke@0 1016 for (int depth = 1; depth <= max_depth; depth++) {
duke@0 1017 JVMState* jvms = youngest_jvms->of_depth(depth);
duke@0 1018 ciMethod* method = jvms->has_method() ? jvms->method() : NULL;
cfang@900 1019 assert(!jvms->should_reexecute() || depth==max_depth, "reexecute allowed only for the youngest");
cfang@900 1020 debug_info->describe_scope(pc_offset, method, jvms->bci(), jvms->should_reexecute());
duke@0 1021 }
duke@0 1022
duke@0 1023 // Mark the end of the scope set.
duke@0 1024 debug_info->end_non_safepoint(pc_offset);
duke@0 1025 }
duke@0 1026
duke@0 1027
duke@0 1028
duke@0 1029 // helper for Fill_buffer bailout logic
duke@0 1030 static void turn_off_compiler(Compile* C) {
kvn@2200 1031 if (CodeCache::largest_free_block() >= CodeCacheMinimumFreeSpace*10) {
duke@0 1032 // Do not turn off compilation if a single giant method has
duke@0 1033 // blown the code cache size.
duke@0 1034 C->record_failure("excessive request to CodeCache");
duke@0 1035 } else {
kvn@28 1036 // Let CompilerBroker disable further compilations.
duke@0 1037 C->record_failure("CodeCache is full");
duke@0 1038 }
duke@0 1039 }
duke@0 1040
duke@0 1041
duke@0 1042 //------------------------------Fill_buffer------------------------------------
duke@0 1043 void Compile::Fill_buffer() {
duke@0 1044
duke@0 1045 // Set the initially allocated size
duke@0 1046 int code_req = initial_code_capacity;
duke@0 1047 int locs_req = initial_locs_capacity;
duke@0 1048 int stub_req = TraceJumps ? initial_stub_capacity * 10 : initial_stub_capacity;
duke@0 1049 int const_req = initial_const_capacity;
duke@0 1050 bool labels_not_set = true;
duke@0 1051
duke@0 1052 int pad_req = NativeCall::instruction_size;
duke@0 1053 // The extra spacing after the code is necessary on some platforms.
duke@0 1054 // Sometimes we need to patch in a jump after the last instruction,
duke@0 1055 // if the nmethod has been deoptimized. (See 4932387, 4894843.)
duke@0 1056
duke@0 1057 uint i;
duke@0 1058 // Compute the byte offset where we can store the deopt pc.
duke@0 1059 if (fixed_slots() != 0) {
duke@0 1060 _orig_pc_slot_offset_in_bytes = _regalloc->reg2offset(OptoReg::stack2reg(_orig_pc_slot));
duke@0 1061 }
duke@0 1062
duke@0 1063 // Compute prolog code size
duke@0 1064 _method_size = 0;
duke@0 1065 _frame_slots = OptoReg::reg2stack(_matcher->_old_SP)+_regalloc->_framesize;
duke@0 1066 #ifdef IA64
duke@0 1067 if (save_argument_registers()) {
duke@0 1068 // 4815101: this is a stub with implicit and unknown precision fp args.
duke@0 1069 // The usual spill mechanism can only generate stfd's in this case, which
duke@0 1070 // doesn't work if the fp reg to spill contains a single-precision denorm.
duke@0 1071 // Instead, we hack around the normal spill mechanism using stfspill's and
duke@0 1072 // ldffill's in the MachProlog and MachEpilog emit methods. We allocate
duke@0 1073 // space here for the fp arg regs (f8-f15) we're going to thusly spill.
duke@0 1074 //
duke@0 1075 // If we ever implement 16-byte 'registers' == stack slots, we can
duke@0 1076 // get rid of this hack and have SpillCopy generate stfspill/ldffill
duke@0 1077 // instead of stfd/stfs/ldfd/ldfs.
duke@0 1078 _frame_slots += 8*(16/BytesPerInt);
duke@0 1079 }
duke@0 1080 #endif
duke@0 1081 assert( _frame_slots >= 0 && _frame_slots < 1000000, "sanity check" );
duke@0 1082
duke@0 1083 // Create an array of unused labels, one for each basic block
duke@0 1084 Label *blk_labels = NEW_RESOURCE_ARRAY(Label, _cfg->_num_blocks+1);
duke@0 1085
duke@0 1086 for( i=0; i <= _cfg->_num_blocks; i++ ) {
duke@0 1087 blk_labels[i].init();
duke@0 1088 }
duke@0 1089
twisti@1915 1090 if (has_mach_constant_base_node()) {
twisti@1915 1091 // Fill the constant table.
twisti@1915 1092 // Note: This must happen before Shorten_branches.
twisti@1915 1093 for (i = 0; i < _cfg->_num_blocks; i++) {
twisti@1915 1094 Block* b = _cfg->_blocks[i];
twisti@1915 1095
twisti@1915 1096 for (uint j = 0; j < b->_nodes.size(); j++) {
twisti@1915 1097 Node* n = b->_nodes[j];
twisti@1915 1098
twisti@1915 1099 // If the node is a MachConstantNode evaluate the constant
twisti@1915 1100 // value section.
twisti@1915 1101 if (n->is_MachConstant()) {
twisti@1915 1102 MachConstantNode* machcon = n->as_MachConstant();
twisti@1915 1103 machcon->eval_constant(C);
twisti@1915 1104 }
twisti@1915 1105 }
twisti@1915 1106 }
twisti@1915 1107
twisti@1915 1108 // Calculate the offsets of the constants and the size of the
twisti@1915 1109 // constant table (including the padding to the next section).
twisti@1915 1110 constant_table().calculate_offsets_and_size();
twisti@1915 1111 const_req = constant_table().size();
twisti@1915 1112 }
twisti@1915 1113
twisti@1915 1114 // Initialize the space for the BufferBlob used to find and verify
twisti@1915 1115 // instruction size in MachNode::emit_size()
twisti@1915 1116 init_scratch_buffer_blob(const_req);
twisti@1915 1117 if (failing()) return; // Out of memory
twisti@1915 1118
duke@0 1119 // If this machine supports different size branch offsets, then pre-compute
duke@0 1120 // the length of the blocks
never@415 1121 if( _matcher->is_short_branch_offset(-1, 0) ) {
twisti@1915 1122 Shorten_branches(blk_labels, code_req, locs_req, stub_req);
duke@0 1123 labels_not_set = false;
duke@0 1124 }
duke@0 1125
duke@0 1126 // nmethod and CodeBuffer count stubs & constants as part of method's code.
duke@0 1127 int exception_handler_req = size_exception_handler();
duke@0 1128 int deopt_handler_req = size_deopt_handler();
duke@0 1129 exception_handler_req += MAX_stubs_size; // add marginal slop for handler
duke@0 1130 deopt_handler_req += MAX_stubs_size; // add marginal slop for handler
duke@0 1131 stub_req += MAX_stubs_size; // ensure per-stub margin
duke@0 1132 code_req += MAX_inst_size; // ensure per-instruction margin
twisti@1265 1133
duke@0 1134 if (StressCodeBuffers)
duke@0 1135 code_req = const_req = stub_req = exception_handler_req = deopt_handler_req = 0x10; // force expansion
twisti@1265 1136
twisti@1265 1137 int total_req =
twisti@1915 1138 const_req +
twisti@1265 1139 code_req +
twisti@1265 1140 pad_req +
twisti@1265 1141 stub_req +
twisti@1265 1142 exception_handler_req +
twisti@1915 1143 deopt_handler_req; // deopt handler
twisti@1265 1144
twisti@1265 1145 if (has_method_handle_invokes())
twisti@1265 1146 total_req += deopt_handler_req; // deopt MH handler
twisti@1265 1147
duke@0 1148 CodeBuffer* cb = code_buffer();
duke@0 1149 cb->initialize(total_req, locs_req);
duke@0 1150
duke@0 1151 // Have we run out of code space?
kvn@1202 1152 if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) {
duke@0 1153 turn_off_compiler(this);
duke@0 1154 return;
duke@0 1155 }
duke@0 1156 // Configure the code buffer.
duke@0 1157 cb->initialize_consts_size(const_req);
duke@0 1158 cb->initialize_stubs_size(stub_req);
duke@0 1159 cb->initialize_oop_recorder(env()->oop_recorder());
duke@0 1160
duke@0 1161 // fill in the nop array for bundling computations
duke@0 1162 MachNode *_nop_list[Bundle::_nop_count];
duke@0 1163 Bundle::initialize_nops(_nop_list, this);
duke@0 1164
duke@0 1165 // Create oopmap set.
duke@0 1166 _oop_map_set = new OopMapSet();
duke@0 1167
duke@0 1168 // !!!!! This preserves old handling of oopmaps for now
duke@0 1169 debug_info()->set_oopmaps(_oop_map_set);
duke@0 1170
duke@0 1171 // Count and start of implicit null check instructions
duke@0 1172 uint inct_cnt = 0;
duke@0 1173 uint *inct_starts = NEW_RESOURCE_ARRAY(uint, _cfg->_num_blocks+1);
duke@0 1174
duke@0 1175 // Count and start of calls
duke@0 1176 uint *call_returns = NEW_RESOURCE_ARRAY(uint, _cfg->_num_blocks+1);
duke@0 1177
duke@0 1178 uint return_offset = 0;
kvn@859 1179 int nop_size = (new (this) MachNopNode())->size(_regalloc);
duke@0 1180
duke@0 1181 int previous_offset = 0;
duke@0 1182 int current_offset = 0;
duke@0 1183 int last_call_offset = -1;
duke@0 1184
duke@0 1185 // Create an array of unused labels, one for each basic block, if printing is enabled
duke@0 1186 #ifndef PRODUCT
duke@0 1187 int *node_offsets = NULL;
duke@0 1188 uint node_offset_limit = unique();
duke@0 1189
duke@0 1190
duke@0 1191 if ( print_assembly() )
duke@0 1192 node_offsets = NEW_RESOURCE_ARRAY(int, node_offset_limit);
duke@0 1193 #endif
duke@0 1194
duke@0 1195 NonSafepointEmitter non_safepoints(this); // emit non-safepoints lazily
duke@0 1196
twisti@1915 1197 // Emit the constant table.
twisti@1915 1198 if (has_mach_constant_base_node()) {
twisti@1915 1199 constant_table().emit(*cb);
twisti@1915 1200 }
twisti@1915 1201
duke@0 1202 // ------------------
duke@0 1203 // Now fill in the code buffer
duke@0 1204 Node *delay_slot = NULL;
duke@0 1205
duke@0 1206 for( i=0; i < _cfg->_num_blocks; i++ ) {
duke@0 1207 Block *b = _cfg->_blocks[i];
duke@0 1208
duke@0 1209 Node *head = b->head();
duke@0 1210
duke@0 1211 // If this block needs to start aligned (i.e, can be reached other
duke@0 1212 // than by falling-thru from the previous block), then force the
duke@0 1213 // start of a new bundle.
duke@0 1214 if( Pipeline::requires_bundling() && starts_bundle(head) )
duke@0 1215 cb->flush_bundle(true);
duke@0 1216
duke@0 1217 // Define the label at the beginning of the basic block
twisti@1915 1218 if (labels_not_set) {
twisti@1915 1219 MacroAssembler(cb).bind(blk_labels[b->_pre_order]);
twisti@1915 1220 } else {
twisti@1915 1221 assert(blk_labels[b->_pre_order].loc_pos() == cb->insts_size(),
twisti@1915 1222 err_msg("label position does not match code offset: %d != %d",
twisti@1915 1223 blk_labels[b->_pre_order].loc_pos(), cb->insts_size()));
twisti@1915 1224 }
duke@0 1225
duke@0 1226 uint last_inst = b->_nodes.size();
duke@0 1227
duke@0 1228 // Emit block normally, except for last instruction.
duke@0 1229 // Emit means "dump code bits into code buffer".
duke@0 1230 for( uint j = 0; j<last_inst; j++ ) {
duke@0 1231
duke@0 1232 // Get the node
duke@0 1233 Node* n = b->_nodes[j];
duke@0 1234
duke@0 1235 // See if delay slots are supported
duke@0 1236 if (valid_bundle_info(n) &&
duke@0 1237 node_bundling(n)->used_in_unconditional_delay()) {
duke@0 1238 assert(delay_slot == NULL, "no use of delay slot node");
duke@0 1239 assert(n->size(_regalloc) == Pipeline::instr_unit_size(), "delay slot instruction wrong size");
duke@0 1240
duke@0 1241 delay_slot = n;
duke@0 1242 continue;
duke@0 1243 }
duke@0 1244
duke@0 1245 // If this starts a new instruction group, then flush the current one
duke@0 1246 // (but allow split bundles)
duke@0 1247 if( Pipeline::requires_bundling() && starts_bundle(n) )
duke@0 1248 cb->flush_bundle(false);
duke@0 1249
duke@0 1250 // The following logic is duplicated in the code ifdeffed for
twisti@605 1251 // ENABLE_ZAP_DEAD_LOCALS which appears above in this file. It
duke@0 1252 // should be factored out. Or maybe dispersed to the nodes?
duke@0 1253
duke@0 1254 // Special handling for SafePoint/Call Nodes
duke@0 1255 bool is_mcall = false;
duke@0 1256 if( n->is_Mach() ) {
duke@0 1257 MachNode *mach = n->as_Mach();
duke@0 1258 is_mcall = n->is_MachCall();
duke@0 1259 bool is_sfn = n->is_MachSafePoint();
duke@0 1260
duke@0 1261 // If this requires all previous instructions be flushed, then do so
duke@0 1262 if( is_sfn || is_mcall || mach->alignment_required() != 1) {
duke@0 1263 cb->flush_bundle(true);
twisti@1668 1264 current_offset = cb->insts_size();
duke@0 1265 }
duke@0 1266
duke@0 1267 // align the instruction if necessary
duke@0 1268 int padding = mach->compute_padding(current_offset);
duke@0 1269 // Make sure safepoint node for polling is distinct from a call's
duke@0 1270 // return by adding a nop if needed.
duke@0 1271 if (is_sfn && !is_mcall && padding == 0 && current_offset == last_call_offset ) {
duke@0 1272 padding = nop_size;
duke@0 1273 }
jcoomes@1409 1274 assert( labels_not_set || padding == 0, "instruction should already be aligned");
duke@0 1275
duke@0 1276 if(padding > 0) {
duke@0 1277 assert((padding % nop_size) == 0, "padding is not a multiple of NOP size");
duke@0 1278 int nops_cnt = padding / nop_size;
duke@0 1279 MachNode *nop = new (this) MachNopNode(nops_cnt);
duke@0 1280 b->_nodes.insert(j++, nop);
duke@0 1281 last_inst++;
duke@0 1282 _cfg->_bbs.map( nop->_idx, b );
duke@0 1283 nop->emit(*cb, _regalloc);
duke@0 1284 cb->flush_bundle(true);
twisti@1668 1285 current_offset = cb->insts_size();
duke@0 1286 }
duke@0 1287
duke@0 1288 // Remember the start of the last call in a basic block
duke@0 1289 if (is_mcall) {
duke@0 1290 MachCallNode *mcall = mach->as_MachCall();
duke@0 1291
duke@0 1292 // This destination address is NOT PC-relative
duke@0 1293 mcall->method_set((intptr_t)mcall->entry_point());
duke@0 1294
duke@0 1295 // Save the return address
duke@0 1296 call_returns[b->_pre_order] = current_offset + mcall->ret_addr_offset();
duke@0 1297
duke@0 1298 if (!mcall->is_safepoint_node()) {
duke@0 1299 is_mcall = false;
duke@0 1300 is_sfn = false;
duke@0 1301 }
duke@0 1302 }
duke@0 1303
duke@0 1304 // sfn will be valid whenever mcall is valid now because of inheritance
duke@0 1305 if( is_sfn || is_mcall ) {
duke@0 1306
duke@0 1307 // Handle special safepoint nodes for synchronization
duke@0 1308 if( !is_mcall ) {
duke@0 1309 MachSafePointNode *sfn = mach->as_MachSafePoint();
duke@0 1310 // !!!!! Stubs only need an oopmap right now, so bail out
duke@0 1311 if( sfn->jvms()->method() == NULL) {
duke@0 1312 // Write the oopmap directly to the code blob??!!
duke@0 1313 # ifdef ENABLE_ZAP_DEAD_LOCALS
duke@0 1314 assert( !is_node_getting_a_safepoint(sfn), "logic does not match; false positive");
duke@0 1315 # endif
duke@0 1316 continue;
duke@0 1317 }
duke@0 1318 } // End synchronization
duke@0 1319
duke@0 1320 non_safepoints.observe_safepoint(mach->as_MachSafePoint()->jvms(),
duke@0 1321 current_offset);
duke@0 1322 Process_OopMap_Node(mach, current_offset);
duke@0 1323 } // End if safepoint
duke@0 1324
duke@0 1325 // If this is a null check, then add the start of the previous instruction to the list
duke@0 1326 else if( mach->is_MachNullCheck() ) {
duke@0 1327 inct_starts[inct_cnt++] = previous_offset;
duke@0 1328 }
duke@0 1329
duke@0 1330 // If this is a branch, then fill in the label with the target BB's label
duke@0 1331 else if ( mach->is_Branch() ) {
duke@0 1332
duke@0 1333 if ( mach->ideal_Opcode() == Op_Jump ) {
duke@0 1334 for (uint h = 0; h < b->_num_succs; h++ ) {
duke@0 1335 Block* succs_block = b->_succs[h];
duke@0 1336 for (uint j = 1; j < succs_block->num_preds(); j++) {
duke@0 1337 Node* jpn = succs_block->pred(j);
duke@0 1338 if ( jpn->is_JumpProj() && jpn->in(0) == mach ) {
duke@0 1339 uint block_num = succs_block->non_connector()->_pre_order;
duke@0 1340 Label *blkLabel = &blk_labels[block_num];
duke@0 1341 mach->add_case_label(jpn->as_JumpProj()->proj_no(), blkLabel);
duke@0 1342 }
duke@0 1343 }
duke@0 1344 }
duke@0 1345 } else {
duke@0 1346 // For Branchs
duke@0 1347 // This requires the TRUE branch target be in succs[0]
duke@0 1348 uint block_num = b->non_connector_successor(0)->_pre_order;
duke@0 1349 mach->label_set( blk_labels[block_num], block_num );
duke@0 1350 }
duke@0 1351 }
duke@0 1352
duke@0 1353 #ifdef ASSERT
twisti@605 1354 // Check that oop-store precedes the card-mark
duke@0 1355 else if( mach->ideal_Opcode() == Op_StoreCM ) {
duke@0 1356 uint storeCM_idx = j;
never@2345 1357 int count = 0;
never@2345 1358 for (uint prec = mach->req(); prec < mach->len(); prec++) {
never@2345 1359 Node *oop_store = mach->in(prec); // Precedence edge
never@2345 1360 if (oop_store == NULL) continue;
never@2345 1361 count++;
never@2345 1362 uint i4;
never@2345 1363 for( i4 = 0; i4 < last_inst; ++i4 ) {
never@2345 1364 if( b->_nodes[i4] == oop_store ) break;
never@2345 1365 }
never@2345 1366 // Note: This test can provide a false failure if other precedence
never@2345 1367 // edges have been added to the storeCMNode.
never@2345 1368 assert( i4 == last_inst || i4 < storeCM_idx, "CM card-mark executes before oop-store");
duke@0 1369 }
never@2345 1370 assert(count > 0, "storeCM expects at least one precedence edge");
duke@0 1371 }
duke@0 1372 #endif
duke@0 1373
duke@0 1374 else if( !n->is_Proj() ) {
twisti@605 1375 // Remember the beginning of the previous instruction, in case
duke@0 1376 // it's followed by a flag-kill and a null-check. Happens on
duke@0 1377 // Intel all the time, with add-to-memory kind of opcodes.
duke@0 1378 previous_offset = current_offset;
duke@0 1379 }
duke@0 1380 }
duke@0 1381
duke@0 1382 // Verify that there is sufficient space remaining
duke@0 1383 cb->insts()->maybe_expand_to_ensure_remaining(MAX_inst_size);
kvn@1202 1384 if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) {
duke@0 1385 turn_off_compiler(this);
duke@0 1386 return;
duke@0 1387 }
duke@0 1388
duke@0 1389 // Save the offset for the listing
duke@0 1390 #ifndef PRODUCT
duke@0 1391 if( node_offsets && n->_idx < node_offset_limit )
twisti@1668 1392 node_offsets[n->_idx] = cb->insts_size();
duke@0 1393 #endif
duke@0 1394
duke@0 1395 // "Normal" instruction case
duke@0 1396 n->emit(*cb, _regalloc);
twisti@1668 1397 current_offset = cb->insts_size();
duke@0 1398 non_safepoints.observe_instruction(n, current_offset);
duke@0 1399
duke@0 1400 // mcall is last "call" that can be a safepoint
duke@0 1401 // record it so we can see if a poll will directly follow it
duke@0 1402 // in which case we'll need a pad to make the PcDesc sites unique
duke@0 1403 // see 5010568. This can be slightly inaccurate but conservative
duke@0 1404 // in the case that return address is not actually at current_offset.
duke@0 1405 // This is a small price to pay.
duke@0 1406
duke@0 1407 if (is_mcall) {
duke@0 1408 last_call_offset = current_offset;
duke@0 1409 }
duke@0 1410
duke@0 1411 // See if this instruction has a delay slot
duke@0 1412 if ( valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) {
duke@0 1413 assert(delay_slot != NULL, "expecting delay slot node");
duke@0 1414
duke@0 1415 // Back up 1 instruction
twisti@1668 1416 cb->set_insts_end(cb->insts_end() - Pipeline::instr_unit_size());
duke@0 1417
duke@0 1418 // Save the offset for the listing
duke@0 1419 #ifndef PRODUCT
duke@0 1420 if( node_offsets && delay_slot->_idx < node_offset_limit )
twisti@1668 1421 node_offsets[delay_slot->_idx] = cb->insts_size();
duke@0 1422 #endif
duke@0 1423
duke@0 1424 // Support a SafePoint in the delay slot
duke@0 1425 if( delay_slot->is_MachSafePoint() ) {
duke@0 1426 MachNode *mach = delay_slot->as_Mach();
duke@0 1427 // !!!!! Stubs only need an oopmap right now, so bail out
duke@0 1428 if( !mach->is_MachCall() && mach->as_MachSafePoint()->jvms()->method() == NULL ) {
duke@0 1429 // Write the oopmap directly to the code blob??!!
duke@0 1430 # ifdef ENABLE_ZAP_DEAD_LOCALS
duke@0 1431 assert( !is_node_getting_a_safepoint(mach), "logic does not match; false positive");
duke@0 1432 # endif
duke@0 1433 delay_slot = NULL;
duke@0 1434 continue;
duke@0 1435 }
duke@0 1436
duke@0 1437 int adjusted_offset = current_offset - Pipeline::instr_unit_size();
duke@0 1438 non_safepoints.observe_safepoint(mach->as_MachSafePoint()->jvms(),
duke@0 1439 adjusted_offset);
duke@0 1440 // Generate an OopMap entry
duke@0 1441 Process_OopMap_Node(mach, adjusted_offset);
duke@0 1442 }
duke@0 1443
duke@0 1444 // Insert the delay slot instruction
duke@0 1445 delay_slot->emit(*cb, _regalloc);
duke@0 1446
duke@0 1447 // Don't reuse it
duke@0 1448 delay_slot = NULL;
duke@0 1449 }
duke@0 1450
duke@0 1451 } // End for all instructions in block
duke@0 1452
rasbold@418 1453 // If the next block is the top of a loop, pad this block out to align
rasbold@418 1454 // the loop top a little. Helps prevent pipe stalls at loop back branches.
duke@0 1455 if( i<_cfg->_num_blocks-1 ) {
duke@0 1456 Block *nb = _cfg->_blocks[i+1];
duke@0 1457 uint padding = nb->alignment_padding(current_offset);
duke@0 1458 if( padding > 0 ) {
duke@0 1459 MachNode *nop = new (this) MachNopNode(padding / nop_size);
duke@0 1460 b->_nodes.insert( b->_nodes.size(), nop );
duke@0 1461 _cfg->_bbs.map( nop->_idx, b );
duke@0 1462 nop->emit(*cb, _regalloc);
twisti@1668 1463 current_offset = cb->insts_size();
duke@0 1464 }
duke@0 1465 }
duke@0 1466
duke@0 1467 } // End of for all blocks
duke@0 1468
duke@0 1469 non_safepoints.flush_at_end();
duke@0 1470
duke@0 1471 // Offset too large?
duke@0 1472 if (failing()) return;
duke@0 1473
duke@0 1474 // Define a pseudo-label at the end of the code
duke@0 1475 MacroAssembler(cb).bind( blk_labels[_cfg->_num_blocks] );
duke@0 1476
duke@0 1477 // Compute the size of the first block
duke@0 1478 _first_block_size = blk_labels[1].loc_pos() - blk_labels[0].loc_pos();
duke@0 1479
twisti@1668 1480 assert(cb->insts_size() < 500000, "method is unreasonably large");
duke@0 1481
duke@0 1482 // ------------------
duke@0 1483
duke@0 1484 #ifndef PRODUCT
duke@0 1485 // Information on the size of the method, without the extraneous code
twisti@1668 1486 Scheduling::increment_method_size(cb->insts_size());
duke@0 1487 #endif
duke@0 1488
duke@0 1489 // ------------------
duke@0 1490 // Fill in exception table entries.
duke@0 1491 FillExceptionTables(inct_cnt, call_returns, inct_starts, blk_labels);
duke@0 1492
duke@0 1493 // Only java methods have exception handlers and deopt handlers
duke@0 1494 if (_method) {
duke@0 1495 // Emit the exception handler code.
duke@0 1496 _code_offsets.set_value(CodeOffsets::Exceptions, emit_exception_handler(*cb));
duke@0 1497 // Emit the deopt handler code.
duke@0 1498 _code_offsets.set_value(CodeOffsets::Deopt, emit_deopt_handler(*cb));
twisti@1265 1499
twisti@1265 1500 // Emit the MethodHandle deopt handler code (if required).
twisti@1265 1501 if (has_method_handle_invokes()) {
twisti@1265 1502 // We can use the same code as for the normal deopt handler, we
twisti@1265 1503 // just need a different entry point address.
twisti@1265 1504 _code_offsets.set_value(CodeOffsets::DeoptMH, emit_deopt_handler(*cb));
twisti@1265 1505 }
duke@0 1506 }
duke@0 1507
duke@0 1508 // One last check for failed CodeBuffer::expand:
kvn@1202 1509 if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) {
duke@0 1510 turn_off_compiler(this);
duke@0 1511 return;
duke@0 1512 }
duke@0 1513
duke@0 1514 #ifndef PRODUCT
duke@0 1515 // Dump the assembly code, including basic-block numbers
duke@0 1516 if (print_assembly()) {
duke@0 1517 ttyLocker ttyl; // keep the following output all in one block
duke@0 1518 if (!VMThread::should_terminate()) { // test this under the tty lock
duke@0 1519 // This output goes directly to the tty, not the compiler log.
duke@0 1520 // To enable tools to match it up with the compilation activity,
duke@0 1521 // be sure to tag this tty output with the compile ID.
duke@0 1522 if (xtty != NULL) {
duke@0 1523 xtty->head("opto_assembly compile_id='%d'%s", compile_id(),
duke@0 1524 is_osr_compilation() ? " compile_kind='osr'" :
duke@0 1525 "");
duke@0 1526 }
duke@0 1527 if (method() != NULL) {
duke@0 1528 method()->print_oop();
duke@0 1529 print_codes();
duke@0 1530 }
duke@0 1531 dump_asm(node_offsets, node_offset_limit);
duke@0 1532 if (xtty != NULL) {
duke@0 1533 xtty->tail("opto_assembly");
duke@0 1534 }
duke@0 1535 }
duke@0 1536 }
duke@0 1537 #endif
duke@0 1538
duke@0 1539 }
duke@0 1540
duke@0 1541 void Compile::FillExceptionTables(uint cnt, uint *call_returns, uint *inct_starts, Label *blk_labels) {
duke@0 1542 _inc_table.set_size(cnt);
duke@0 1543
duke@0 1544 uint inct_cnt = 0;
duke@0 1545 for( uint i=0; i<_cfg->_num_blocks; i++ ) {
duke@0 1546 Block *b = _cfg->_blocks[i];
duke@0 1547 Node *n = NULL;
duke@0 1548 int j;
duke@0 1549
duke@0 1550 // Find the branch; ignore trailing NOPs.
duke@0 1551 for( j = b->_nodes.size()-1; j>=0; j-- ) {
duke@0 1552 n = b->_nodes[j];
duke@0 1553 if( !n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con )
duke@0 1554 break;
duke@0 1555 }
duke@0 1556
duke@0 1557 // If we didn't find anything, continue
duke@0 1558 if( j < 0 ) continue;
duke@0 1559
duke@0 1560 // Compute ExceptionHandlerTable subtable entry and add it
duke@0 1561 // (skip empty blocks)
duke@0 1562 if( n->is_Catch() ) {
duke@0 1563
duke@0 1564 // Get the offset of the return from the call
duke@0 1565 uint call_return = call_returns[b->_pre_order];
duke@0 1566 #ifdef ASSERT
duke@0 1567 assert( call_return > 0, "no call seen for this basic block" );
duke@0 1568 while( b->_nodes[--j]->Opcode() == Op_MachProj ) ;
duke@0 1569 assert( b->_nodes[j]->is_Call(), "CatchProj must follow call" );
duke@0 1570 #endif
duke@0 1571 // last instruction is a CatchNode, find it's CatchProjNodes
duke@0 1572 int nof_succs = b->_num_succs;
duke@0 1573 // allocate space
duke@0 1574 GrowableArray<intptr_t> handler_bcis(nof_succs);
duke@0 1575 GrowableArray<intptr_t> handler_pcos(nof_succs);
duke@0 1576 // iterate through all successors
duke@0 1577 for (int j = 0; j < nof_succs; j++) {
duke@0 1578 Block* s = b->_succs[j];
duke@0 1579 bool found_p = false;
duke@0 1580 for( uint k = 1; k < s->num_preds(); k++ ) {
duke@0 1581 Node *pk = s->pred(k);
duke@0 1582 if( pk->is_CatchProj() && pk->in(0) == n ) {
duke@0 1583 const CatchProjNode* p = pk->as_CatchProj();
duke@0 1584 found_p = true;
duke@0 1585 // add the corresponding handler bci & pco information
duke@0 1586 if( p->_con != CatchProjNode::fall_through_index ) {
duke@0 1587 // p leads to an exception handler (and is not fall through)
duke@0 1588 assert(s == _cfg->_blocks[s->_pre_order],"bad numbering");
duke@0 1589 // no duplicates, please
duke@0 1590 if( !handler_bcis.contains(p->handler_bci()) ) {
duke@0 1591 uint block_num = s->non_connector()->_pre_order;
duke@0 1592 handler_bcis.append(p->handler_bci());
duke@0 1593 handler_pcos.append(blk_labels[block_num].loc_pos());
duke@0 1594 }
duke@0 1595 }
duke@0 1596 }
duke@0 1597 }
duke@0 1598 assert(found_p, "no matching predecessor found");
duke@0 1599 // Note: Due to empty block removal, one block may have
duke@0 1600 // several CatchProj inputs, from the same Catch.
duke@0 1601 }
duke@0 1602
duke@0 1603 // Set the offset of the return from the call
duke@0 1604 _handler_table.add_subtable(call_return, &handler_bcis, NULL, &handler_pcos);
duke@0 1605 continue;
duke@0 1606 }
duke@0 1607
duke@0 1608 // Handle implicit null exception table updates
duke@0 1609 if( n->is_MachNullCheck() ) {
duke@0 1610 uint block_num = b->non_connector_successor(0)->_pre_order;
duke@0 1611 _inc_table.append( inct_starts[inct_cnt++], blk_labels[block_num].loc_pos() );
duke@0 1612 continue;
duke@0 1613 }
duke@0 1614 } // End of for all blocks fill in exception table entries
duke@0 1615 }
duke@0 1616
duke@0 1617 // Static Variables
duke@0 1618 #ifndef PRODUCT
duke@0 1619 uint Scheduling::_total_nop_size = 0;
duke@0 1620 uint Scheduling::_total_method_size = 0;
duke@0 1621 uint Scheduling::_total_branches = 0;
duke@0 1622 uint Scheduling::_total_unconditional_delays = 0;
duke@0 1623 uint Scheduling::_total_instructions_per_bundle[Pipeline::_max_instrs_per_cycle+1];
duke@0 1624 #endif
duke@0 1625
duke@0 1626 // Initializer for class Scheduling
duke@0 1627
duke@0 1628 Scheduling::Scheduling(Arena *arena, Compile &compile)
duke@0 1629 : _arena(arena),
duke@0 1630 _cfg(compile.cfg()),
duke@0 1631 _bbs(compile.cfg()->_bbs),
duke@0 1632 _regalloc(compile.regalloc()),
duke@0 1633 _reg_node(arena),
duke@0 1634 _bundle_instr_count(0),
duke@0 1635 _bundle_cycle_number(0),
duke@0 1636 _scheduled(arena),
duke@0 1637 _available(arena),
duke@0 1638 _next_node(NULL),
duke@0 1639 _bundle_use(0, 0, resource_count, &_bundle_use_elements[0]),
duke@0 1640 _pinch_free_list(arena)
duke@0 1641 #ifndef PRODUCT
duke@0 1642 , _branches(0)
duke@0 1643 , _unconditional_delays(0)
duke@0 1644 #endif
duke@0 1645 {
duke@0 1646 // Create a MachNopNode
duke@0 1647 _nop = new (&compile) MachNopNode();
duke@0 1648
duke@0 1649 // Now that the nops are in the array, save the count
duke@0 1650 // (but allow entries for the nops)
duke@0 1651 _node_bundling_limit = compile.unique();
duke@0 1652 uint node_max = _regalloc->node_regs_max_index();
duke@0 1653
duke@0 1654 compile.set_node_bundling_limit(_node_bundling_limit);
duke@0 1655
twisti@605 1656 // This one is persistent within the Compile class
duke@0 1657 _node_bundling_base = NEW_ARENA_ARRAY(compile.comp_arena(), Bundle, node_max);
duke@0 1658
duke@0 1659 // Allocate space for fixed-size arrays
duke@0 1660 _node_latency = NEW_ARENA_ARRAY(arena, unsigned short, node_max);
duke@0 1661 _uses = NEW_ARENA_ARRAY(arena, short, node_max);
duke@0 1662 _current_latency = NEW_ARENA_ARRAY(arena, unsigned short, node_max);
duke@0 1663
duke@0 1664 // Clear the arrays
duke@0 1665 memset(_node_bundling_base, 0, node_max * sizeof(Bundle));
duke@0 1666 memset(_node_latency, 0, node_max * sizeof(unsigned short));
duke@0 1667 memset(_uses, 0, node_max * sizeof(short));
duke@0 1668 memset(_current_latency, 0, node_max * sizeof(unsigned short));
duke@0 1669
duke@0 1670 // Clear the bundling information
duke@0 1671 memcpy(_bundle_use_elements,
duke@0 1672 Pipeline_Use::elaborated_elements,
duke@0 1673 sizeof(Pipeline_Use::elaborated_elements));
duke@0 1674
duke@0 1675 // Get the last node
duke@0 1676 Block *bb = _cfg->_blocks[_cfg->_blocks.size()-1];
duke@0 1677
duke@0 1678 _next_node = bb->_nodes[bb->_nodes.size()-1];
duke@0 1679 }
duke@0 1680
duke@0 1681 #ifndef PRODUCT
duke@0 1682 // Scheduling destructor
duke@0 1683 Scheduling::~Scheduling() {
duke@0 1684 _total_branches += _branches;
duke@0 1685 _total_unconditional_delays += _unconditional_delays;
duke@0 1686 }
duke@0 1687 #endif
duke@0 1688
duke@0 1689 // Step ahead "i" cycles
duke@0 1690 void Scheduling::step(uint i) {
duke@0 1691
duke@0 1692 Bundle *bundle = node_bundling(_next_node);
duke@0 1693 bundle->set_starts_bundle();
duke@0 1694
duke@0 1695 // Update the bundle record, but leave the flags information alone
duke@0 1696 if (_bundle_instr_count > 0) {
duke@0 1697 bundle->set_instr_count(_bundle_instr_count);
duke@0 1698 bundle->set_resources_used(_bundle_use.resourcesUsed());
duke@0 1699 }
duke@0 1700
duke@0 1701 // Update the state information
duke@0 1702 _bundle_instr_count = 0;
duke@0 1703 _bundle_cycle_number += i;
duke@0 1704 _bundle_use.step(i);
duke@0 1705 }
duke@0 1706
duke@0 1707 void Scheduling::step_and_clear() {
duke@0 1708 Bundle *bundle = node_bundling(_next_node);
duke@0 1709 bundle->set_starts_bundle();
duke@0 1710
duke@0 1711 // Update the bundle record
duke@0 1712 if (_bundle_instr_count > 0) {
duke@0 1713 bundle->set_instr_count(_bundle_instr_count);
duke@0 1714 bundle->set_resources_used(_bundle_use.resourcesUsed());
duke@0 1715
duke@0 1716 _bundle_cycle_number += 1;
duke@0 1717 }
duke@0 1718
duke@0 1719 // Clear the bundling information
duke@0 1720 _bundle_instr_count = 0;
duke@0 1721 _bundle_use.reset();
duke@0 1722
duke@0 1723 memcpy(_bundle_use_elements,
duke@0 1724 Pipeline_Use::elaborated_elements,
duke@0 1725 sizeof(Pipeline_Use::elaborated_elements));
duke@0 1726 }
duke@0 1727
duke@0 1728 //------------------------------ScheduleAndBundle------------------------------
duke@0 1729 // Perform instruction scheduling and bundling over the sequence of
duke@0 1730 // instructions in backwards order.
duke@0 1731 void Compile::ScheduleAndBundle() {
duke@0 1732
duke@0 1733 // Don't optimize this if it isn't a method
duke@0 1734 if (!_method)
duke@0 1735 return;
duke@0 1736
duke@0 1737 // Don't optimize this if scheduling is disabled
duke@0 1738 if (!do_scheduling())
duke@0 1739 return;
duke@0 1740
duke@0 1741 NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); )
duke@0 1742
duke@0 1743 // Create a data structure for all the scheduling information
duke@0 1744 Scheduling scheduling(Thread::current()->resource_area(), *this);
duke@0 1745
twisti@1915 1746 // Initialize the space for the BufferBlob used to find and verify
twisti@1915 1747 // instruction size in MachNode::emit_size()
twisti@1915 1748 init_scratch_buffer_blob(MAX_const_size);
twisti@1915 1749 if (failing()) return; // Out of memory
twisti@1915 1750
duke@0 1751 // Walk backwards over each basic block, computing the needed alignment
duke@0 1752 // Walk over all the basic blocks
duke@0 1753 scheduling.DoScheduling();
duke@0 1754 }
duke@0 1755
duke@0 1756 //------------------------------ComputeLocalLatenciesForward-------------------
duke@0 1757 // Compute the latency of all the instructions. This is fairly simple,
duke@0 1758 // because we already have a legal ordering. Walk over the instructions
duke@0 1759 // from first to last, and compute the latency of the instruction based
twisti@605 1760 // on the latency of the preceding instruction(s).
duke@0 1761 void Scheduling::ComputeLocalLatenciesForward(const Block *bb) {
duke@0 1762 #ifndef PRODUCT
duke@0 1763 if (_cfg->C->trace_opto_output())
duke@0 1764 tty->print("# -> ComputeLocalLatenciesForward\n");
duke@0 1765 #endif
duke@0 1766
duke@0 1767 // Walk over all the schedulable instructions
duke@0 1768 for( uint j=_bb_start; j < _bb_end; j++ ) {
duke@0 1769
duke@0 1770 // This is a kludge, forcing all latency calculations to start at 1.
duke@0 1771 // Used to allow latency 0 to force an instruction to the beginning
duke@0 1772 // of the bb
duke@0 1773 uint latency = 1;
duke@0 1774 Node *use = bb->_nodes[j];
duke@0 1775 uint nlen = use->len();
duke@0 1776
duke@0 1777 // Walk over all the inputs
duke@0 1778 for ( uint k=0; k < nlen; k++ ) {
duke@0 1779 Node *def = use->in(k);
duke@0 1780 if (!def)
duke@0 1781 continue;
duke@0 1782
duke@0 1783 uint l = _node_latency[def->_idx] + use->latency(k);
duke@0 1784 if (latency < l)
duke@0 1785 latency = l;
duke@0 1786 }
duke@0 1787
duke@0 1788 _node_latency[use->_idx] = latency;
duke@0 1789
duke@0 1790 #ifndef PRODUCT
duke@0 1791 if (_cfg->C->trace_opto_output()) {
duke@0 1792 tty->print("# latency %4d: ", latency);
duke@0 1793 use->dump();
duke@0 1794 }
duke@0 1795 #endif
duke@0 1796 }
duke@0 1797
duke@0 1798 #ifndef PRODUCT
duke@0 1799 if (_cfg->C->trace_opto_output())
duke@0 1800 tty->print("# <- ComputeLocalLatenciesForward\n");
duke@0 1801 #endif
duke@0 1802
duke@0 1803 } // end ComputeLocalLatenciesForward
duke@0 1804
duke@0 1805 // See if this node fits into the present instruction bundle
duke@0 1806 bool Scheduling::NodeFitsInBundle(Node *n) {
duke@0 1807 uint n_idx = n->_idx;
duke@0 1808
duke@0 1809 // If this is the unconditional delay instruction, then it fits
duke@0 1810 if (n == _unconditional_delay_slot) {
duke@0 1811 #ifndef PRODUCT
duke@0 1812 if (_cfg->C->trace_opto_output())
duke@0 1813 tty->print("# NodeFitsInBundle [%4d]: TRUE; is in unconditional delay slot\n", n->_idx);
duke@0 1814 #endif
duke@0 1815 return (true);
duke@0 1816 }
duke@0 1817
duke@0 1818 // If the node cannot be scheduled this cycle, skip it
duke@0 1819 if (_current_latency[n_idx] > _bundle_cycle_number) {
duke@0 1820 #ifndef PRODUCT
duke@0 1821 if (_cfg->C->trace_opto_output())
duke@0 1822 tty->print("# NodeFitsInBundle [%4d]: FALSE; latency %4d > %d\n",
duke@0 1823 n->_idx, _current_latency[n_idx], _bundle_cycle_number);
duke@0 1824 #endif
duke@0 1825 return (false);
duke@0 1826 }
duke@0 1827
duke@0 1828 const Pipeline *node_pipeline = n->pipeline();
duke@0 1829
duke@0 1830 uint instruction_count = node_pipeline->instructionCount();
duke@0 1831 if (node_pipeline->mayHaveNoCode() && n->size(_regalloc) == 0)
duke@0 1832 instruction_count = 0;
duke@0 1833 else if (node_pipeline->hasBranchDelay() && !_unconditional_delay_slot)
duke@0 1834 instruction_count++;
duke@0 1835
duke@0 1836 if (_bundle_instr_count + instruction_count > Pipeline::_max_instrs_per_cycle) {
duke@0 1837 #ifndef PRODUCT
duke@0 1838 if (_cfg->C->trace_opto_output())
duke@0 1839 tty->print("# NodeFitsInBundle [%4d]: FALSE; too many instructions: %d > %d\n",
duke@0 1840 n->_idx, _bundle_instr_count + instruction_count, Pipeline::_max_instrs_per_cycle);
duke@0 1841 #endif
duke@0 1842 return (false);
duke@0 1843 }
duke@0 1844
duke@0 1845 // Don't allow non-machine nodes to be handled this way
duke@0 1846 if (!n->is_Mach() && instruction_count == 0)
duke@0 1847 return (false);
duke@0 1848
duke@0 1849 // See if there is any overlap
duke@0 1850 uint delay = _bundle_use.full_latency(0, node_pipeline->resourceUse());
duke@0 1851
duke@0 1852 if (delay > 0) {
duke@0 1853 #ifndef PRODUCT
duke@0 1854 if (_cfg->C->trace_opto_output())
duke@0 1855 tty->print("# NodeFitsInBundle [%4d]: FALSE; functional units overlap\n", n_idx);
duke@0 1856 #endif
duke@0 1857 return false;
duke@0 1858 }
duke@0 1859
duke@0 1860 #ifndef PRODUCT
duke@0 1861 if (_cfg->C->trace_opto_output())
duke@0 1862 tty->print("# NodeFitsInBundle [%4d]: TRUE\n", n_idx);
duke@0 1863 #endif
duke@0 1864
duke@0 1865 return true;
duke@0 1866 }
duke@0 1867
duke@0 1868 Node * Scheduling::ChooseNodeToBundle() {
duke@0 1869 uint siz = _available.size();
duke@0 1870
duke@0 1871 if (siz == 0) {
duke@0 1872
duke@0 1873 #ifndef PRODUCT
duke@0 1874 if (_cfg->C->trace_opto_output())
duke@0 1875 tty->print("# ChooseNodeToBundle: NULL\n");
duke@0 1876 #endif
duke@0 1877 return (NULL);
duke@0 1878 }
duke@0 1879
duke@0 1880 // Fast path, if only 1 instruction in the bundle
duke@0 1881 if (siz == 1) {
duke@0 1882 #ifndef PRODUCT
duke@0 1883 if (_cfg->C->trace_opto_output()) {
duke@0 1884 tty->print("# ChooseNodeToBundle (only 1): ");
duke@0 1885 _available[0]->dump();
duke@0 1886 }
duke@0 1887 #endif
duke@0 1888 return (_available[0]);
duke@0 1889 }
duke@0 1890
duke@0 1891 // Don't bother, if the bundle is already full
duke@0 1892 if (_bundle_instr_count < Pipeline::_max_instrs_per_cycle) {
duke@0 1893 for ( uint i = 0; i < siz; i++ ) {
duke@0 1894 Node *n = _available[i];
duke@0 1895
duke@0 1896 // Skip projections, we'll handle them another way
duke@0 1897 if (n->is_Proj())
duke@0 1898 continue;
duke@0 1899
duke@0 1900 // This presupposed that instructions are inserted into the
duke@0 1901 // available list in a legality order; i.e. instructions that
duke@0 1902 // must be inserted first are at the head of the list
duke@0 1903 if (NodeFitsInBundle(n)) {
duke@0 1904 #ifndef PRODUCT
duke@0 1905 if (_cfg->C->trace_opto_output()) {
duke@0 1906 tty->print("# ChooseNodeToBundle: ");
duke@0 1907 n->dump();
duke@0 1908 }
duke@0 1909 #endif
duke@0 1910 return (n);
duke@0 1911 }
duke@0 1912 }
duke@0 1913 }
duke@0 1914
duke@0 1915 // Nothing fits in this bundle, choose the highest priority
duke@0 1916 #ifndef PRODUCT
duke@0 1917 if (_cfg->C->trace_opto_output()) {
duke@0 1918 tty->print("# ChooseNodeToBundle: ");
duke@0 1919 _available[0]->dump();
duke@0 1920 }
duke@0 1921 #endif
duke@0 1922
duke@0 1923 return _available[0];
duke@0 1924 }
duke@0 1925
duke@0 1926 //------------------------------AddNodeToAvailableList-------------------------
duke@0 1927 void Scheduling::AddNodeToAvailableList(Node *n) {
duke@0 1928 assert( !n->is_Proj(), "projections never directly made available" );
duke@0 1929 #ifndef PRODUCT
duke@0 1930 if (_cfg->C->trace_opto_output()) {
duke@0 1931 tty->print("# AddNodeToAvailableList: ");
duke@0 1932 n->dump();
duke@0 1933 }
duke@0 1934 #endif
duke@0 1935
duke@0 1936 int latency = _current_latency[n->_idx];
duke@0 1937
duke@0 1938 // Insert in latency order (insertion sort)
duke@0 1939 uint i;
duke@0 1940 for ( i=0; i < _available.size(); i++ )
duke@0 1941 if (_current_latency[_available[i]->_idx] > latency)
duke@0 1942 break;
duke@0 1943
duke@0 1944 // Special Check for compares following branches
duke@0 1945 if( n->is_Mach() && _scheduled.size() > 0 ) {
duke@0 1946 int op = n->as_Mach()->ideal_Opcode();
duke@0 1947 Node *last = _scheduled[0];
duke@0 1948 if( last->is_MachIf() && last->in(1) == n &&
duke@0 1949 ( op == Op_CmpI ||
duke@0 1950 op == Op_CmpU ||
duke@0 1951 op == Op_CmpP ||
duke@0 1952 op == Op_CmpF ||
duke@0 1953 op == Op_CmpD ||
duke@0 1954 op == Op_CmpL ) ) {
duke@0 1955
duke@0 1956 // Recalculate position, moving to front of same latency
duke@0 1957 for ( i=0 ; i < _available.size(); i++ )
duke@0 1958 if (_current_latency[_available[i]->_idx] >= latency)
duke@0 1959 break;
duke@0 1960 }
duke@0 1961 }
duke@0 1962
duke@0 1963 // Insert the node in the available list
duke@0 1964 _available.insert(i, n);
duke@0 1965
duke@0 1966 #ifndef PRODUCT
duke@0 1967 if (_cfg->C->trace_opto_output())
duke@0 1968 dump_available();
duke@0 1969 #endif
duke@0 1970 }
duke@0 1971
duke@0 1972 //------------------------------DecrementUseCounts-----------------------------
duke@0 1973 void Scheduling::DecrementUseCounts(Node *n, const Block *bb) {
duke@0 1974 for ( uint i=0; i < n->len(); i++ ) {
duke@0 1975 Node *def = n->in(i);
duke@0 1976 if (!def) continue;
duke@0 1977 if( def->is_Proj() ) // If this is a machine projection, then
duke@0 1978 def = def->in(0); // propagate usage thru to the base instruction
duke@0 1979
duke@0 1980 if( _bbs[def->_idx] != bb ) // Ignore if not block-local
duke@0 1981 continue;
duke@0 1982
duke@0 1983 // Compute the latency
duke@0 1984 uint l = _bundle_cycle_number + n->latency(i);
duke@0 1985 if (_current_latency[def->_idx] < l)
duke@0 1986 _current_latency[def->_idx] = l;
duke@0 1987
duke@0 1988 // If this does not have uses then schedule it
duke@0 1989 if ((--_uses[def->_idx]) == 0)
duke@0 1990 AddNodeToAvailableList(def);
duke@0 1991 }
duke@0 1992 }
duke@0 1993
duke@0 1994 //------------------------------AddNodeToBundle--------------------------------
duke@0 1995 void Scheduling::AddNodeToBundle(Node *n, const Block *bb) {
duke@0 1996 #ifndef PRODUCT
duke@0 1997 if (_cfg->C->trace_opto_output()) {
duke@0 1998 tty->print("# AddNodeToBundle: ");
duke@0 1999 n->dump();
duke@0 2000 }
duke@0 2001 #endif
duke@0 2002
duke@0 2003 // Remove this from the available list
duke@0 2004 uint i;
duke@0 2005 for (i = 0; i < _available.size(); i++)
duke@0 2006 if (_available[i] == n)
duke@0 2007 break;
duke@0 2008 assert(i < _available.size(), "entry in _available list not found");
duke@0 2009 _available.remove(i);
duke@0 2010
duke@0 2011 // See if this fits in the current bundle
duke@0 2012 const Pipeline *node_pipeline = n->pipeline();
duke@0 2013 const Pipeline_Use& node_usage = node_pipeline->resourceUse();
duke@0 2014
duke@0 2015 // Check for instructions to be placed in the delay slot. We
duke@0 2016 // do this before we actually schedule the current instruction,
duke@0 2017 // because the delay slot follows the current instruction.
duke@0 2018 if (Pipeline::_branch_has_delay_slot &&
duke@0 2019 node_pipeline->hasBranchDelay() &&
duke@0 2020 !_unconditional_delay_slot) {
duke@0 2021
duke@0 2022 uint siz = _available.size();
duke@0 2023
duke@0 2024 // Conditional branches can support an instruction that
twisti@605 2025 // is unconditionally executed and not dependent by the
duke@0 2026 // branch, OR a conditionally executed instruction if
duke@0 2027 // the branch is taken. In practice, this means that
duke@0 2028 // the first instruction at the branch target is
duke@0 2029 // copied to the delay slot, and the branch goes to
duke@0 2030 // the instruction after that at the branch target
duke@0 2031 if ( n->is_Mach() && n->is_Branch() ) {
duke@0 2032
duke@0 2033 assert( !n->is_MachNullCheck(), "should not look for delay slot for Null Check" );
duke@0 2034 assert( !n->is_Catch(), "should not look for delay slot for Catch" );
duke@0 2035
duke@0 2036 #ifndef PRODUCT
duke@0 2037 _branches++;
duke@0 2038 #endif
duke@0 2039
duke@0 2040 // At least 1 instruction is on the available list
twisti@605 2041 // that is not dependent on the branch
duke@0 2042 for (uint i = 0; i < siz; i++) {
duke@0 2043 Node *d = _available[i];
duke@0 2044 const Pipeline *avail_pipeline = d->pipeline();
duke@0 2045
duke@0 2046 // Don't allow safepoints in the branch shadow, that will
duke@0 2047 // cause a number of difficulties
duke@0 2048 if ( avail_pipeline->instructionCount() == 1 &&
duke@0 2049 !avail_pipeline->hasMultipleBundles() &&
duke@0 2050 !avail_pipeline->hasBranchDelay() &&
duke@0 2051 Pipeline::instr_has_unit_size() &&
duke@0 2052 d->size(_regalloc) == Pipeline::instr_unit_size() &&
duke@0 2053 NodeFitsInBundle(d) &&
duke@0 2054 !node_bundling(d)->used_in_delay()) {
duke@0 2055
duke@0 2056 if (d->is_Mach() && !d->is_MachSafePoint()) {
duke@0 2057 // A node that fits in the delay slot was found, so we need to
duke@0 2058 // set the appropriate bits in the bundle pipeline information so
duke@0 2059 // that it correctly indicates resource usage. Later, when we
duke@0 2060 // attempt to add this instruction to the bundle, we will skip
duke@0 2061 // setting the resource usage.
duke@0 2062 _unconditional_delay_slot = d;
duke@0 2063 node_bundling(n)->set_use_unconditional_delay();
duke@0 2064 node_bundling(d)->set_used_in_unconditional_delay();
duke@0 2065 _bundle_use.add_usage(avail_pipeline->resourceUse());
duke@0 2066 _current_latency[d->_idx] = _bundle_cycle_number;
duke@0 2067 _next_node = d;
duke@0 2068 ++_bundle_instr_count;
duke@0 2069 #ifndef PRODUCT
duke@0 2070 _unconditional_delays++;
duke@0 2071 #endif
duke@0 2072 break;
duke@0 2073 }
duke@0 2074 }
duke@0 2075 }
duke@0 2076 }
duke@0 2077
duke@0 2078 // No delay slot, add a nop to the usage
duke@0 2079 if (!_unconditional_delay_slot) {
duke@0 2080 // See if adding an instruction in the delay slot will overflow
duke@0 2081 // the bundle.
duke@0 2082 if (!NodeFitsInBundle(_nop)) {
duke@0 2083 #ifndef PRODUCT
duke@0 2084 if (_cfg->C->trace_opto_output())
duke@0 2085 tty->print("# *** STEP(1 instruction for delay slot) ***\n");
duke@0 2086 #endif
duke@0 2087 step(1);
duke@0 2088 }
duke@0 2089
duke@0 2090 _bundle_use.add_usage(_nop->pipeline()->resourceUse());
duke@0 2091 _next_node = _nop;
duke@0 2092 ++_bundle_instr_count;
duke@0 2093 }
duke@0 2094
duke@0 2095 // See if the instruction in the delay slot requires a
duke@0 2096 // step of the bundles
duke@0 2097 if (!NodeFitsInBundle(n)) {
duke@0 2098 #ifndef PRODUCT
duke@0 2099 if (_cfg->C->trace_opto_output())
duke@0 2100 tty->print("# *** STEP(branch won't fit) ***\n");
duke@0 2101 #endif
duke@0 2102 // Update the state information
duke@0 2103 _bundle_instr_count = 0;
duke@0 2104 _bundle_cycle_number += 1;
duke@0 2105 _bundle_use.step(1);
duke@0 2106 }
duke@0 2107 }
duke@0 2108
duke@0 2109 // Get the number of instructions
duke@0 2110 uint instruction_count = node_pipeline->instructionCount();
duke@0 2111 if (node_pipeline->mayHaveNoCode() && n->size(_regalloc) == 0)
duke@0 2112 instruction_count = 0;
duke@0 2113
duke@0 2114 // Compute the latency information
duke@0 2115 uint delay = 0;
duke@0 2116
duke@0 2117 if (instruction_count > 0 || !node_pipeline->mayHaveNoCode()) {
duke@0 2118 int relative_latency = _current_latency[n->_idx] - _bundle_cycle_number;
duke@0 2119 if (relative_latency < 0)
duke@0 2120 relative_latency = 0;
duke@0 2121
duke@0 2122 delay = _bundle_use.full_latency(relative_latency, node_usage);
duke@0 2123
duke@0 2124 // Does not fit in this bundle, start a new one
duke@0 2125 if (delay > 0) {
duke@0 2126 step(delay);
duke@0 2127
duke@0 2128 #ifndef PRODUCT
duke@0 2129 if (_cfg->C->trace_opto_output())
duke@0 2130 tty->print("# *** STEP(%d) ***\n", delay);
duke@0 2131 #endif
duke@0 2132 }
duke@0 2133 }
duke@0 2134
duke@0 2135 // If this was placed in the delay slot, ignore it
duke@0 2136 if (n != _unconditional_delay_slot) {
duke@0 2137
duke@0 2138 if (delay == 0) {
duke@0 2139 if (node_pipeline->hasMultipleBundles()) {
duke@0 2140 #ifndef PRODUCT
duke@0 2141 if (_cfg->C->trace_opto_output())
duke@0 2142 tty->print("# *** STEP(multiple instructions) ***\n");
duke@0 2143 #endif
duke@0 2144 step(1);
duke@0 2145 }
duke@0 2146
duke@0 2147 else if (instruction_count + _bundle_instr_count > Pipeline::_max_instrs_per_cycle) {
duke@0 2148 #ifndef PRODUCT
duke@0 2149 if (_cfg->C->trace_opto_output())
duke@0 2150 tty->print("# *** STEP(%d >= %d instructions) ***\n",
duke@0 2151 instruction_count + _bundle_instr_count,
duke@0 2152 Pipeline::_max_instrs_per_cycle);
duke@0 2153 #endif
duke@0 2154 step(1);
duke@0 2155 }
duke@0 2156 }
duke@0 2157
duke@0 2158 if (node_pipeline->hasBranchDelay() && !_unconditional_delay_slot)
duke@0 2159 _bundle_instr_count++;
duke@0 2160
duke@0 2161 // Set the node's latency
duke@0 2162 _current_latency[n->_idx] = _bundle_cycle_number;
duke@0 2163
duke@0 2164 // Now merge the functional unit information
duke@0 2165 if (instruction_count > 0 || !node_pipeline->mayHaveNoCode())
duke@0 2166 _bundle_use.add_usage(node_usage);
duke@0 2167
duke@0 2168 // Increment the number of instructions in this bundle
duke@0 2169 _bundle_instr_count += instruction_count;
duke@0 2170
duke@0 2171 // Remember this node for later
duke@0 2172 if (n->is_Mach())
duke@0 2173 _next_node = n;
duke@0 2174 }
duke@0 2175
duke@0 2176 // It's possible to have a BoxLock in the graph and in the _bbs mapping but
duke@0 2177 // not in the bb->_nodes array. This happens for debug-info-only BoxLocks.
duke@0 2178 // 'Schedule' them (basically ignore in the schedule) but do not insert them
duke@0 2179 // into the block. All other scheduled nodes get put in the schedule here.
duke@0 2180 int op = n->Opcode();
duke@0 2181 if( (op == Op_Node && n->req() == 0) || // anti-dependence node OR
duke@0 2182 (op != Op_Node && // Not an unused antidepedence node and
duke@0 2183 // not an unallocated boxlock
duke@0 2184 (OptoReg::is_valid(_regalloc->get_reg_first(n)) || op != Op_BoxLock)) ) {
duke@0 2185
duke@0 2186 // Push any trailing projections
duke@0 2187 if( bb->_nodes[bb->_nodes.size()-1] != n ) {
duke@0 2188 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
duke@0 2189 Node *foi = n->fast_out(i);
duke@0 2190 if( foi->is_Proj() )
duke@0 2191 _scheduled.push(foi);
duke@0 2192 }
duke@0 2193 }
duke@0 2194
duke@0 2195 // Put the instruction in the schedule list
duke@0 2196 _scheduled.push(n);
duke@0 2197 }
duke@0 2198
duke@0 2199 #ifndef PRODUCT
duke@0 2200 if (_cfg->C->trace_opto_output())
duke@0 2201 dump_available();
duke@0 2202 #endif
duke@0 2203
duke@0 2204 // Walk all the definitions, decrementing use counts, and
duke@0 2205 // if a definition has a 0 use count, place it in the available list.
duke@0 2206 DecrementUseCounts(n,bb);
duke@0 2207 }
duke@0 2208
duke@0 2209 //------------------------------ComputeUseCount--------------------------------
duke@0 2210 // This method sets the use count within a basic block. We will ignore all
duke@0 2211 // uses outside the current basic block. As we are doing a backwards walk,
duke@0 2212 // any node we reach that has a use count of 0 may be scheduled. This also
duke@0 2213 // avoids the problem of cyclic references from phi nodes, as long as phi
duke@0 2214 // nodes are at the front of the basic block. This method also initializes
duke@0 2215 // the available list to the set of instructions that have no uses within this
duke@0 2216 // basic block.
duke@0 2217 void Scheduling::ComputeUseCount(const Block *bb) {
duke@0 2218 #ifndef PRODUCT
duke@0 2219 if (_cfg->C->trace_opto_output())
duke@0 2220 tty->print("# -> ComputeUseCount\n");
duke@0 2221 #endif
duke@0 2222
duke@0 2223 // Clear the list of available and scheduled instructions, just in case
duke@0 2224 _available.clear();
duke@0 2225 _scheduled.clear();
duke@0 2226
duke@0 2227 // No delay slot specified
duke@0 2228 _unconditional_delay_slot = NULL;
duke@0 2229
duke@0 2230 #ifdef ASSERT
duke@0 2231 for( uint i=0; i < bb->_nodes.size(); i++ )
duke@0 2232 assert( _uses[bb->_nodes[i]->_idx] == 0, "_use array not clean" );
duke@0 2233 #endif
duke@0 2234
duke@0 2235 // Force the _uses count to never go to zero for unscheduable pieces
duke@0 2236 // of the block
duke@0 2237 for( uint k = 0; k < _bb_start; k++ )
duke@0 2238 _uses[bb->_nodes[k]->_idx] = 1;
duke@0 2239 for( uint l = _bb_end; l < bb->_nodes.size(); l++ )
duke@0 2240 _uses[bb->_nodes[l]->_idx] = 1;
duke@0 2241
duke@0 2242 // Iterate backwards over the instructions in the block. Don't count the
duke@0 2243 // branch projections at end or the block header instructions.
duke@0 2244 for( uint j = _bb_end-1; j >= _bb_start; j-- ) {
duke@0 2245 Node *n = bb->_nodes[j];
duke@0 2246 if( n->is_Proj() ) continue; // Projections handled another way
duke@0 2247
duke@0 2248 // Account for all uses
duke@0 2249 for ( uint k = 0; k < n->len(); k++ ) {
duke@0 2250 Node *inp = n->in(k);
duke@0 2251 if (!inp) continue;
duke@0 2252 assert(inp != n, "no cycles allowed" );
duke@0 2253 if( _bbs[inp->_idx] == bb ) { // Block-local use?
duke@0 2254 if( inp->is_Proj() ) // Skip through Proj's
duke@0 2255 inp = inp->in(0);
duke@0 2256 ++_uses[inp->_idx]; // Count 1 block-local use
duke@0 2257 }
duke@0 2258 }
duke@0 2259
duke@0 2260 // If this instruction has a 0 use count, then it is available
duke@0 2261 if (!_uses[n->_idx]) {
duke@0 2262 _current_latency[n->_idx] = _bundle_cycle_number;
duke@0 2263 AddNodeToAvailableList(n);
duke@0 2264 }
duke@0 2265
duke@0 2266 #ifndef PRODUCT
duke@0 2267 if (_cfg->C->trace_opto_output()) {
duke@0 2268 tty->print("# uses: %3d: ", _uses[n->_idx]);
duke@0 2269 n->dump();
duke@0 2270 }
duke@0 2271 #endif
duke@0 2272 }
duke@0 2273
duke@0 2274 #ifndef PRODUCT
duke@0 2275 if (_cfg->C->trace_opto_output())
duke@0 2276 tty->print("# <- ComputeUseCount\n");
duke@0 2277 #endif
duke@0 2278 }
duke@0 2279
duke@0 2280 // This routine performs scheduling on each basic block in reverse order,
duke@0 2281 // using instruction latencies and taking into account function unit
duke@0 2282 // availability.
duke@0 2283 void Scheduling::DoScheduling() {
duke@0 2284 #ifndef PRODUCT
duke@0 2285 if (_cfg->C->trace_opto_output())
duke@0 2286 tty->print("# -> DoScheduling\n");
duke@0 2287 #endif
duke@0 2288
duke@0 2289 Block *succ_bb = NULL;
duke@0 2290 Block *bb;
duke@0 2291
duke@0 2292 // Walk over all the basic blocks in reverse order
duke@0 2293 for( int i=_cfg->_num_blocks-1; i >= 0; succ_bb = bb, i-- ) {
duke@0 2294 bb = _cfg->_blocks[i];
duke@0 2295
duke@0 2296 #ifndef PRODUCT
duke@0 2297 if (_cfg->C->trace_opto_output()) {
duke@0 2298 tty->print("# Schedule BB#%03d (initial)\n", i);
duke@0 2299 for (uint j = 0; j < bb->_nodes.size(); j++)
duke@0 2300 bb->_nodes[j]->dump();
duke@0 2301 }
duke@0 2302 #endif
duke@0 2303
duke@0 2304 // On the head node, skip processing
duke@0 2305 if( bb == _cfg->_broot )
duke@0 2306 continue;
duke@0 2307
duke@0 2308 // Skip empty, connector blocks
duke@0 2309 if (bb->is_connector())
duke@0 2310 continue;
duke@0 2311
duke@0 2312 // If the following block is not the sole successor of
duke@0 2313 // this one, then reset the pipeline information
duke@0 2314 if (bb->_num_succs != 1 || bb->non_connector_successor(0) != succ_bb) {
duke@0 2315 #ifndef PRODUCT
duke@0 2316 if (_cfg->C->trace_opto_output()) {
duke@0 2317 tty->print("*** bundle start of next BB, node %d, for %d instructions\n",
duke@0 2318 _next_node->_idx, _bundle_instr_count);
duke@0 2319 }
duke@0 2320 #endif
duke@0 2321 step_and_clear();
duke@0 2322 }
duke@0 2323
duke@0 2324 // Leave untouched the starting instruction, any Phis, a CreateEx node
duke@0 2325 // or Top. bb->_nodes[_bb_start] is the first schedulable instruction.
duke@0 2326 _bb_end = bb->_nodes.size()-1;
duke@0 2327 for( _bb_start=1; _bb_start <= _bb_end; _bb_start++ ) {
duke@0 2328 Node *n = bb->_nodes[_bb_start];
duke@0 2329 // Things not matched, like Phinodes and ProjNodes don't get scheduled.
duke@0 2330 // Also, MachIdealNodes do not get scheduled
duke@0 2331 if( !n->is_Mach() ) continue; // Skip non-machine nodes
duke@0 2332 MachNode *mach = n->as_Mach();
duke@0 2333 int iop = mach->ideal_Opcode();
duke@0 2334 if( iop == Op_CreateEx ) continue; // CreateEx is pinned
duke@0 2335 if( iop == Op_Con ) continue; // Do not schedule Top
duke@0 2336 if( iop == Op_Node && // Do not schedule PhiNodes, ProjNodes
duke@0 2337 mach->pipeline() == MachNode::pipeline_class() &&
duke@0 2338 !n->is_SpillCopy() ) // Breakpoints, Prolog, etc
duke@0 2339 continue;
duke@0 2340 break; // Funny loop structure to be sure...
duke@0 2341 }
duke@0 2342 // Compute last "interesting" instruction in block - last instruction we
duke@0 2343 // might schedule. _bb_end points just after last schedulable inst. We
duke@0 2344 // normally schedule conditional branches (despite them being forced last
duke@0 2345 // in the block), because they have delay slots we can fill. Calls all
duke@0 2346 // have their delay slots filled in the template expansions, so we don't
duke@0 2347 // bother scheduling them.
duke@0 2348 Node *last = bb->_nodes[_bb_end];
duke@0 2349 if( last->is_Catch() ||
kvn@707 2350 // Exclude unreachable path case when Halt node is in a separate block.
kvn@707 2351 (_bb_end > 1 && last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) {
duke@0 2352 // There must be a prior call. Skip it.
duke@0 2353 while( !bb->_nodes[--_bb_end]->is_Call() ) {
duke@0 2354 assert( bb->_nodes[_bb_end]->is_Proj(), "skipping projections after expected call" );
duke@0 2355 }
duke@0 2356 } else if( last->is_MachNullCheck() ) {
duke@0 2357 // Backup so the last null-checked memory instruction is
duke@0 2358 // outside the schedulable range. Skip over the nullcheck,
duke@0 2359 // projection, and the memory nodes.
duke@0 2360 Node *mem = last->in(1);
duke@0 2361 do {
duke@0 2362 _bb_end--;
duke@0 2363 } while (mem != bb->_nodes[_bb_end]);
duke@0 2364 } else {
duke@0 2365 // Set _bb_end to point after last schedulable inst.
duke@0 2366 _bb_end++;
duke@0 2367 }
duke@0 2368
duke@0 2369 assert( _bb_start <= _bb_end, "inverted block ends" );
duke@0 2370
duke@0 2371 // Compute the register antidependencies for the basic block
duke@0 2372 ComputeRegisterAntidependencies(bb);
duke@0 2373 if (_cfg->C->failing()) return; // too many D-U pinch points
duke@0 2374
duke@0 2375 // Compute intra-bb latencies for the nodes
duke@0 2376 ComputeLocalLatenciesForward(bb);
duke@0 2377
duke@0 2378 // Compute the usage within the block, and set the list of all nodes
duke@0 2379 // in the block that have no uses within the block.
duke@0 2380 ComputeUseCount(bb);
duke@0 2381
duke@0 2382 // Schedule the remaining instructions in the block
duke@0 2383 while ( _available.size() > 0 ) {
duke@0 2384 Node *n = ChooseNodeToBundle();
duke@0 2385 AddNodeToBundle(n,bb);
duke@0 2386 }
duke@0 2387
duke@0 2388 assert( _scheduled.size() == _bb_end - _bb_start, "wrong number of instructions" );
duke@0 2389 #ifdef ASSERT
duke@0 2390 for( uint l = _bb_start; l < _bb_end; l++ ) {
duke@0 2391 Node *n = bb->_nodes[l];
duke@0 2392 uint m;
duke@0 2393 for( m = 0; m < _bb_end-_bb_start; m++ )
duke@0 2394 if( _scheduled[m] == n )
duke@0 2395 break;
duke@0 2396 assert( m < _bb_end-_bb_start, "instruction missing in schedule" );
duke@0 2397 }
duke@0 2398 #endif
duke@0 2399
duke@0 2400 // Now copy the instructions (in reverse order) back to the block
duke@0 2401 for ( uint k = _bb_start; k < _bb_end; k++ )
duke@0 2402 bb->_nodes.map(k, _scheduled[_bb_end-k-1]);
duke@0 2403
duke@0 2404 #ifndef PRODUCT
duke@0 2405 if (_cfg->C->trace_opto_output()) {
duke@0 2406 tty->print("# Schedule BB#%03d (final)\n", i);
duke@0 2407 uint current = 0;
duke@0 2408 for (uint j = 0; j < bb->_nodes.size(); j++) {
duke@0 2409 Node *n = bb->_nodes[j];
duke@0 2410 if( valid_bundle_info(n) ) {
duke@0 2411 Bundle *bundle = node_bundling(n);
duke@0 2412 if (bundle->instr_count() > 0 || bundle->flags() > 0) {
duke@0 2413 tty->print("*** Bundle: ");
duke@0 2414 bundle->dump();
duke@0 2415 }
duke@0 2416 n->dump();
duke@0 2417 }
duke@0 2418 }
duke@0 2419 }
duke@0 2420 #endif
duke@0 2421 #ifdef ASSERT
duke@0 2422 verify_good_schedule(bb,"after block local scheduling");
duke@0 2423 #endif
duke@0 2424 }
duke@0 2425
duke@0 2426 #ifndef PRODUCT
duke@0 2427 if (_cfg->C->trace_opto_output())
duke@0 2428 tty->print("# <- DoScheduling\n");
duke@0 2429 #endif
duke@0 2430
duke@0 2431 // Record final node-bundling array location
duke@0 2432 _regalloc->C->set_node_bundling_base(_node_bundling_base);
duke@0 2433
duke@0 2434 } // end DoScheduling
duke@0 2435
duke@0 2436 //------------------------------verify_good_schedule---------------------------
duke@0 2437 // Verify that no live-range used in the block is killed in the block by a
duke@0 2438 // wrong DEF. This doesn't verify live-ranges that span blocks.
duke@0 2439
duke@0 2440 // Check for edge existence. Used to avoid adding redundant precedence edges.
duke@0 2441 static bool edge_from_to( Node *from, Node *to ) {
duke@0 2442 for( uint i=0; i<from->len(); i++ )
duke@0 2443 if( from->in(i) == to )
duke@0 2444 return true;
duke@0 2445 return false;
duke@0 2446 }
duke@0 2447
duke@0 2448 #ifdef ASSERT
duke@0 2449 //------------------------------verify_do_def----------------------------------
duke@0 2450 void Scheduling::verify_do_def( Node *n, OptoReg::Name def, const char *msg ) {
duke@0 2451 // Check for bad kills
duke@0 2452 if( OptoReg::is_valid(def) ) { // Ignore stores & control flow
duke@0 2453 Node *prior_use = _reg_node[def];
duke@0 2454 if( prior_use && !edge_from_to(prior_use,n) ) {
duke@0 2455 tty->print("%s = ",OptoReg::as_VMReg(def)->name());
duke@0 2456 n->dump();
duke@0 2457 tty->print_cr("...");
duke@0 2458 prior_use->dump();
jcoomes@1410 2459 assert(edge_from_to(prior_use,n),msg);
duke@0 2460 }
duke@0 2461 _reg_node.map(def,NULL); // Kill live USEs
duke@0 2462 }
duke@0 2463 }
duke@0 2464
duke@0 2465 //------------------------------verify_good_schedule---------------------------
duke@0 2466 void Scheduling::verify_good_schedule( Block *b, const char *msg ) {
duke@0 2467
duke@0 2468 // Zap to something reasonable for the verify code
duke@0 2469 _reg_node.clear();
duke@0 2470
duke@0 2471 // Walk over the block backwards. Check to make sure each DEF doesn't
duke@0 2472 // kill a live value (other than the one it's supposed to). Add each
duke@0 2473 // USE to the live set.
duke@0 2474 for( uint i = b->_nodes.size()-1; i >= _bb_start; i-- ) {
duke@0 2475 Node *n = b->_nodes[i];
duke@0 2476 int n_op = n->Opcode();
duke@0 2477 if( n_op == Op_MachProj && n->ideal_reg() == MachProjNode::fat_proj ) {
duke@0 2478 // Fat-proj kills a slew of registers
duke@0 2479 RegMask rm = n->out_RegMask();// Make local copy
duke@0 2480 while( rm.is_NotEmpty() ) {
duke@0 2481 OptoReg::Name kill = rm.find_first_elem();
duke@0 2482 rm.Remove(kill);
duke@0 2483 verify_do_def( n, kill, msg );
duke@0 2484 }
duke@0 2485 } else if( n_op != Op_Node ) { // Avoid brand new antidependence nodes
duke@0 2486 // Get DEF'd registers the normal way
duke@0 2487 verify_do_def( n, _regalloc->get_reg_first(n), msg );
duke@0 2488 verify_do_def( n, _regalloc->get_reg_second(n), msg );
duke@0 2489 }
duke@0 2490
duke@0 2491 // Now make all USEs live
duke@0 2492 for( uint i=1; i<n->req(); i++ ) {
duke@0 2493 Node *def = n->in(i);
duke@0 2494 assert(def != 0, "input edge required");
duke@0 2495 OptoReg::Name reg_lo = _regalloc->get_reg_first(def);
duke@0 2496 OptoReg::Name reg_hi = _regalloc->get_reg_second(def);
duke@0 2497 if( OptoReg::is_valid(reg_lo) ) {
jcoomes@1410 2498 assert(!_reg_node[reg_lo] || edge_from_to(_reg_node[reg_lo],def), msg);
duke@0 2499 _reg_node.map(reg_lo,n);
duke@0 2500 }
duke@0 2501 if( OptoReg::is_valid(reg_hi) ) {
jcoomes@1410 2502 assert(!_reg_node[reg_hi] || edge_from_to(_reg_node[reg_hi],def), msg);
duke@0 2503 _reg_node.map(reg_hi,n);
duke@0 2504 }
duke@0 2505 }
duke@0 2506
duke@0 2507 }
duke@0 2508
duke@0 2509 // Zap to something reasonable for the Antidependence code
duke@0 2510 _reg_node.clear();
duke@0 2511 }
duke@0 2512 #endif
duke@0 2513
duke@0 2514 // Conditionally add precedence edges. Avoid putting edges on Projs.
duke@0 2515 static void add_prec_edge_from_to( Node *from, Node *to ) {
duke@0 2516 if( from->is_Proj() ) { // Put precedence edge on Proj's input
duke@0 2517 assert( from->req() == 1 && (from->len() == 1 || from->in(1)==0), "no precedence edges on projections" );
duke@0 2518 from = from->in(0);
duke@0 2519 }
duke@0 2520 if( from != to && // No cycles (for things like LD L0,[L0+4] )
duke@0 2521 !edge_from_to( from, to ) ) // Avoid duplicate edge
duke@0 2522 from->add_prec(to);
duke@0 2523 }
duke@0 2524
duke@0 2525 //------------------------------anti_do_def------------------------------------
duke@0 2526 void Scheduling::anti_do_def( Block *b, Node *def, OptoReg::Name def_reg, int is_def ) {
duke@0 2527 if( !OptoReg::is_valid(def_reg) ) // Ignore stores & control flow
duke@0 2528 return;
duke@0 2529
duke@0 2530 Node *pinch = _reg_node[def_reg]; // Get pinch point
duke@0 2531 if( !pinch || _bbs[pinch->_idx] != b || // No pinch-point yet?
duke@0 2532 is_def ) { // Check for a true def (not a kill)
duke@0 2533 _reg_node.map(def_reg,def); // Record def/kill as the optimistic pinch-point
duke@0 2534 return;
duke@0 2535 }
duke@0 2536
duke@0 2537 Node *kill = def; // Rename 'def' to more descriptive 'kill'
duke@0 2538 debug_only( def = (Node*)0xdeadbeef; )
duke@0 2539
duke@0 2540 // After some number of kills there _may_ be a later def
duke@0 2541 Node *later_def = NULL;
duke@0 2542
duke@0 2543 // Finding a kill requires a real pinch-point.
duke@0 2544 // Check for not already having a pinch-point.
duke@0 2545 // Pinch points are Op_Node's.
duke@0 2546 if( pinch->Opcode() != Op_Node ) { // Or later-def/kill as pinch-point?
duke@0 2547 later_def = pinch; // Must be def/kill as optimistic pinch-point
duke@0 2548 if ( _pinch_free_list.size() > 0) {
duke@0 2549 pinch = _pinch_free_list.pop();
duke@0 2550 } else {
duke@0 2551 pinch = new (_cfg->C, 1) Node(1); // Pinch point to-be
duke@0 2552 }
duke@0 2553 if (pinch->_idx >= _regalloc->node_regs_max_index()) {
duke@0 2554 _cfg->C->record_method_not_compilable("too many D-U pinch points");
duke@0 2555 return;
duke@0 2556 }
duke@0 2557 _bbs.map(pinch->_idx,b); // Pretend it's valid in this block (lazy init)
duke@0 2558 _reg_node.map(def_reg,pinch); // Record pinch-point
duke@0 2559 //_regalloc->set_bad(pinch->_idx); // Already initialized this way.
duke@0 2560 if( later_def->outcnt() == 0 || later_def->ideal_reg() == MachProjNode::fat_proj ) { // Distinguish def from kill
duke@0 2561 pinch->init_req(0, _cfg->C->top()); // set not NULL for the next call
duke@0 2562 add_prec_edge_from_to(later_def,pinch); // Add edge from kill to pinch
duke@0 2563 later_def = NULL; // and no later def
duke@0 2564 }
duke@0 2565 pinch->set_req(0,later_def); // Hook later def so we can find it
duke@0 2566 } else { // Else have valid pinch point
duke@0 2567 if( pinch->in(0) ) // If there is a later-def
duke@0 2568 later_def = pinch->in(0); // Get it
duke@0 2569 }
duke@0 2570
duke@0 2571 // Add output-dependence edge from later def to kill
duke@0 2572 if( later_def ) // If there is some original def
duke@0 2573 add_prec_edge_from_to(later_def,kill); // Add edge from def to kill
duke@0 2574
duke@0 2575 // See if current kill is also a use, and so is forced to be the pinch-point.
duke@0 2576 if( pinch->Opcode() == Op_Node ) {
duke@0 2577 Node *uses = kill->is_Proj() ? kill->in(0) : kill;
duke@0 2578 for( uint i=1; i<uses->req(); i++ ) {
duke@0 2579 if( _regalloc->get_reg_first(uses->in(i)) == def_reg ||
duke@0 2580 _regalloc->get_reg_second(uses->in(i)) == def_reg ) {
duke@0 2581 // Yes, found a use/kill pinch-point
duke@0 2582 pinch->set_req(0,NULL); //
duke@0 2583 pinch->replace_by(kill); // Move anti-dep edges up
duke@0 2584 pinch = kill;
duke@0 2585 _reg_node.map(def_reg,pinch);
duke@0 2586 return;
duke@0 2587 }
duke@0 2588 }
duke@0 2589 }
duke@0 2590
duke@0 2591 // Add edge from kill to pinch-point
duke@0 2592 add_prec_edge_from_to(kill,pinch);
duke@0 2593 }
duke@0 2594
duke@0 2595 //------------------------------anti_do_use------------------------------------
duke@0 2596 void Scheduling::anti_do_use( Block *b, Node *use, OptoReg::Name use_reg ) {
duke@0 2597 if( !OptoReg::is_valid(use_reg) ) // Ignore stores & control flow
duke@0 2598 return;
duke@0 2599 Node *pinch = _reg_node[use_reg]; // Get pinch point
duke@0 2600 // Check for no later def_reg/kill in block
duke@0 2601 if( pinch && _bbs[pinch->_idx] == b &&
duke@0 2602 // Use has to be block-local as well
duke@0 2603 _bbs[use->_idx] == b ) {
duke@0 2604 if( pinch->Opcode() == Op_Node && // Real pinch-point (not optimistic?)
duke@0 2605 pinch->req() == 1 ) { // pinch not yet in block?
duke@0 2606 pinch->del_req(0); // yank pointer to later-def, also set flag
duke@0 2607 // Insert the pinch-point in the block just after the last use
duke@0 2608 b->_nodes.insert(b->find_node(use)+1,pinch);
duke@0 2609 _bb_end++; // Increase size scheduled region in block
duke@0 2610 }
duke@0 2611
duke@0 2612 add_prec_edge_from_to(pinch,use);
duke@0 2613 }
duke@0 2614 }
duke@0 2615
duke@0 2616 //------------------------------ComputeRegisterAntidependences-----------------
duke@0 2617 // We insert antidependences between the reads and following write of
duke@0 2618 // allocated registers to prevent illegal code motion. Hopefully, the
duke@0 2619 // number of added references should be fairly small, especially as we
duke@0 2620 // are only adding references within the current basic block.
duke@0 2621 void Scheduling::ComputeRegisterAntidependencies(Block *b) {
duke@0 2622
duke@0 2623 #ifdef ASSERT
duke@0 2624 verify_good_schedule(b,"before block local scheduling");
duke@0 2625 #endif
duke@0 2626
duke@0 2627 // A valid schedule, for each register independently, is an endless cycle
duke@0 2628 // of: a def, then some uses (connected to the def by true dependencies),
duke@0 2629 // then some kills (defs with no uses), finally the cycle repeats with a new
duke@0 2630 // def. The uses are allowed to float relative to each other, as are the
duke@0 2631 // kills. No use is allowed to slide past a kill (or def). This requires
duke@0 2632 // antidependencies between all uses of a single def and all kills that
duke@0 2633 // follow, up to the next def. More edges are redundant, because later defs
duke@0 2634 // & kills are already serialized with true or antidependencies. To keep
duke@0 2635 // the edge count down, we add a 'pinch point' node if there's more than
duke@0 2636 // one use or more than one kill/def.
duke@0 2637
duke@0 2638 // We add dependencies in one bottom-up pass.
duke@0 2639
duke@0 2640 // For each instruction we handle it's DEFs/KILLs, then it's USEs.
duke@0 2641
duke@0 2642 // For each DEF/KILL, we check to see if there's a prior DEF/KILL for this
duke@0 2643 // register. If not, we record the DEF/KILL in _reg_node, the
duke@0 2644 // register-to-def mapping. If there is a prior DEF/KILL, we insert a
duke@0 2645 // "pinch point", a new Node that's in the graph but not in the block.
duke@0 2646 // We put edges from the prior and current DEF/KILLs to the pinch point.
duke@0 2647 // We put the pinch point in _reg_node. If there's already a pinch point
duke@0 2648 // we merely add an edge from the current DEF/KILL to the pinch point.
duke@0 2649
duke@0 2650 // After doing the DEF/KILLs, we handle USEs. For each used register, we
duke@0 2651 // put an edge from the pinch point to the USE.
duke@0 2652
duke@0 2653 // To be expedient, the _reg_node array is pre-allocated for the whole
duke@0 2654 // compilation. _reg_node is lazily initialized; it either contains a NULL,
duke@0 2655 // or a valid def/kill/pinch-point, or a leftover node from some prior
duke@0 2656 // block. Leftover node from some prior block is treated like a NULL (no
duke@0 2657 // prior def, so no anti-dependence needed). Valid def is distinguished by
duke@0 2658 // it being in the current block.
duke@0 2659 bool fat_proj_seen = false;
duke@0 2660 uint last_safept = _bb_end-1;
duke@0 2661 Node* end_node = (_bb_end-1 >= _bb_start) ? b->_nodes[last_safept] : NULL;
duke@0 2662 Node* last_safept_node = end_node;
duke@0 2663 for( uint i = _bb_end-1; i >= _bb_start; i-- ) {
duke@0 2664 Node *n = b->_nodes[i];
duke@0 2665 int is_def = n->outcnt(); // def if some uses prior to adding precedence edges
duke@0 2666 if( n->Opcode() == Op_MachProj && n->ideal_reg() == MachProjNode::fat_proj ) {
duke@0 2667 // Fat-proj kills a slew of registers
duke@0 2668 // This can add edges to 'n' and obscure whether or not it was a def,
duke@0 2669 // hence the is_def flag.
duke@0 2670 fat_proj_seen = true;
duke@0 2671 RegMask rm = n->out_RegMask();// Make local copy
duke@0 2672 while( rm.is_NotEmpty() ) {
duke@0 2673 OptoReg::Name kill = rm.find_first_elem();
duke@0 2674 rm.Remove(kill);
duke@0 2675 anti_do_def( b, n, kill, is_def );
duke@0 2676 }
duke@0 2677 } else {
duke@0 2678 // Get DEF'd registers the normal way
duke@0 2679 anti_do_def( b, n, _regalloc->get_reg_first(n), is_def );
duke@0 2680 anti_do_def( b, n, _regalloc->get_reg_second(n), is_def );
duke@0 2681 }
duke@0 2682
duke@0 2683 // Check each register used by this instruction for a following DEF/KILL
duke@0 2684 // that must occur afterward and requires an anti-dependence edge.
duke@0 2685 for( uint j=0; j<n->req(); j++ ) {
duke@0 2686 Node *def = n->in(j);
duke@0 2687 if( def ) {
duke@0 2688 assert( def->Opcode() != Op_MachProj || def->ideal_reg() != MachProjNode::fat_proj, "" );
duke@0 2689 anti_do_use( b, n, _regalloc->get_reg_first(def) );
duke@0 2690 anti_do_use( b, n, _regalloc->get_reg_second(def) );
duke@0 2691 }
duke@0 2692 }
duke@0 2693 // Do not allow defs of new derived values to float above GC
duke@0 2694 // points unless the base is definitely available at the GC point.
duke@0 2695
duke@0 2696 Node *m = b->_nodes[i];
duke@0 2697
duke@0 2698 // Add precedence edge from following safepoint to use of derived pointer
duke@0 2699 if( last_safept_node != end_node &&
duke@0 2700 m != last_safept_node) {
duke@0 2701 for (uint k = 1; k < m->req(); k++) {
duke@0 2702 const Type *t = m->in(k)->bottom_type();
duke@0 2703 if( t->isa_oop_ptr() &&
duke@0 2704 t->is_ptr()->offset() != 0 ) {
duke@0 2705 last_safept_node->add_prec( m );
duke@0 2706 break;
duke@0 2707 }
duke@0 2708 }
duke@0 2709 }
duke@0 2710
duke@0 2711 if( n->jvms() ) { // Precedence edge from derived to safept
duke@0 2712 // Check if last_safept_node was moved by pinch-point insertion in anti_do_use()
duke@0 2713 if( b->_nodes[last_safept] != last_safept_node ) {
duke@0 2714 last_safept = b->find_node(last_safept_node);
duke@0 2715 }
duke@0 2716 for( uint j=last_safept; j > i; j-- ) {
duke@0 2717 Node *mach = b->_nodes[j];
duke@0 2718 if( mach->is_Mach() && mach->as_Mach()->ideal_Opcode() == Op_AddP )
duke@0 2719 mach->add_prec( n );
duke@0 2720 }
duke@0 2721 last_safept = i;
duke@0 2722 last_safept_node = m;
duke@0 2723 }
duke@0 2724 }
duke@0 2725
duke@0 2726 if (fat_proj_seen) {
duke@0 2727 // Garbage collect pinch nodes that were not consumed.
duke@0 2728 // They are usually created by a fat kill MachProj for a call.
duke@0 2729 garbage_collect_pinch_nodes();
duke@0 2730 }
duke@0 2731 }
duke@0 2732
duke@0 2733 //------------------------------garbage_collect_pinch_nodes-------------------------------
duke@0 2734
duke@0 2735 // Garbage collect pinch nodes for reuse by other blocks.
duke@0 2736 //
duke@0 2737 // The block scheduler's insertion of anti-dependence
duke@0 2738 // edges creates many pinch nodes when the block contains
duke@0 2739 // 2 or more Calls. A pinch node is used to prevent a
duke@0 2740 // combinatorial explosion of edges. If a set of kills for a
duke@0 2741 // register is anti-dependent on a set of uses (or defs), rather
duke@0 2742 // than adding an edge in the graph between each pair of kill
duke@0 2743 // and use (or def), a pinch is inserted between them:
duke@0 2744 //
duke@0 2745 // use1 use2 use3
duke@0 2746 // \ | /
duke@0 2747 // \ | /
duke@0 2748 // pinch
duke@0 2749 // / | \
duke@0 2750 // / | \
duke@0 2751 // kill1 kill2 kill3
duke@0 2752 //
duke@0 2753 // One pinch node is created per register killed when
duke@0 2754 // the second call is encountered during a backwards pass
duke@0 2755 // over the block. Most of these pinch nodes are never
duke@0 2756 // wired into the graph because the register is never
duke@0 2757 // used or def'ed in the block.
duke@0 2758 //
duke@0 2759 void Scheduling::garbage_collect_pinch_nodes() {
duke@0 2760 #ifndef PRODUCT
duke@0 2761 if (_cfg->C->trace_opto_output()) tty->print("Reclaimed pinch nodes:");
duke@0 2762 #endif
duke@0 2763 int trace_cnt = 0;
duke@0 2764 for (uint k = 0; k < _reg_node.Size(); k++) {
duke@0 2765 Node* pinch = _reg_node[k];
duke@0 2766 if (pinch != NULL && pinch->Opcode() == Op_Node &&
duke@0 2767 // no predecence input edges
duke@0 2768 (pinch->req() == pinch->len() || pinch->in(pinch->req()) == NULL) ) {
duke@0 2769 cleanup_pinch(pinch);
duke@0 2770 _pinch_free_list.push(pinch);
duke@0 2771 _reg_node.map(k, NULL);
duke@0 2772 #ifndef PRODUCT
duke@0 2773 if (_cfg->C->trace_opto_output()) {
duke@0 2774 trace_cnt++;
duke@0 2775 if (trace_cnt > 40) {
duke@0 2776 tty->print("\n");
duke@0 2777 trace_cnt = 0;
duke@0 2778 }
duke@0 2779 tty->print(" %d", pinch->_idx);
duke@0 2780 }
duke@0 2781 #endif
duke@0 2782 }
duke@0 2783 }
duke@0 2784 #ifndef PRODUCT
duke@0 2785 if (_cfg->C->trace_opto_output()) tty->print("\n");
duke@0 2786 #endif
duke@0 2787 }
duke@0 2788
duke@0 2789 // Clean up a pinch node for reuse.
duke@0 2790 void Scheduling::cleanup_pinch( Node *pinch ) {
duke@0 2791 assert (pinch && pinch->Opcode() == Op_Node && pinch->req() == 1, "just checking");
duke@0 2792
duke@0 2793 for (DUIterator_Last imin, i = pinch->last_outs(imin); i >= imin; ) {
duke@0 2794 Node* use = pinch->last_out(i);
duke@0 2795 uint uses_found = 0;
duke@0 2796 for (uint j = use->req(); j < use->len(); j++) {
duke@0 2797 if (use->in(j) == pinch) {
duke@0 2798 use->rm_prec(j);
duke@0 2799 uses_found++;
duke@0 2800 }
duke@0 2801 }
duke@0 2802 assert(uses_found > 0, "must be a precedence edge");
duke@0 2803 i -= uses_found; // we deleted 1 or more copies of this edge
duke@0 2804 }
duke@0 2805 // May have a later_def entry
duke@0 2806 pinch->set_req(0, NULL);
duke@0 2807 }
duke@0 2808
duke@0 2809 //------------------------------print_statistics-------------------------------
duke@0 2810 #ifndef PRODUCT
duke@0 2811
duke@0 2812 void Scheduling::dump_available() const {
duke@0 2813 tty->print("#Availist ");
duke@0 2814 for (uint i = 0; i < _available.size(); i++)
duke@0 2815 tty->print(" N%d/l%d", _available[i]->_idx,_current_latency[_available[i]->_idx]);
duke@0 2816 tty->cr();
duke@0 2817 }
duke@0 2818
duke@0 2819 // Print Scheduling Statistics
duke@0 2820 void Scheduling::print_statistics() {
duke@0 2821 // Print the size added by nops for bundling
duke@0 2822 tty->print("Nops added %d bytes to total of %d bytes",
duke@0 2823 _total_nop_size, _total_method_size);
duke@0 2824 if (_total_method_size > 0)
duke@0 2825 tty->print(", for %.2f%%",
duke@0 2826 ((double)_total_nop_size) / ((double) _total_method_size) * 100.0);
duke@0 2827 tty->print("\n");
duke@0 2828
duke@0 2829 // Print the number of branch shadows filled
duke@0 2830 if (Pipeline::_branch_has_delay_slot) {
duke@0 2831 tty->print("Of %d branches, %d had unconditional delay slots filled",
duke@0 2832 _total_branches, _total_unconditional_delays);
duke@0 2833 if (_total_branches > 0)
duke@0 2834 tty->print(", for %.2f%%",
duke@0 2835 ((double)_total_unconditional_delays) / ((double)_total_branches) * 100.0);
duke@0 2836 tty->print("\n");
duke@0 2837 }
duke@0 2838
duke@0 2839 uint total_instructions = 0, total_bundles = 0;
duke@0 2840
duke@0 2841 for (uint i = 1; i <= Pipeline::_max_instrs_per_cycle; i++) {
duke@0 2842 uint bundle_count = _total_instructions_per_bundle[i];
duke@0 2843 total_instructions += bundle_count * i;
duke@0 2844 total_bundles += bundle_count;
duke@0 2845 }
duke@0 2846
duke@0 2847 if (total_bundles > 0)
duke@0 2848 tty->print("Average ILP (excluding nops) is %.2f\n",
duke@0 2849 ((double)total_instructions) / ((double)total_bundles));
duke@0 2850 }
duke@0 2851 #endif