annotate src/cpu/sparc/vm/stubGenerator_sparc.cpp @ 1454:6476042f815c

6940701: Don't align loops in stubs for Niagara sparc Summary: Don't align loops in stubs for Niagara sparc since NOPs are expensive. Reviewed-by: twisti, never
author kvn
date Wed, 07 Apr 2010 09:37:47 -0700
parents 0dc88ad3244e
children c640000b7cc1
rev   line source
duke@0 1 /*
never@1252 2 * Copyright 1997-2010 Sun Microsystems, Inc. All Rights Reserved.
duke@0 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
duke@0 4 *
duke@0 5 * This code is free software; you can redistribute it and/or modify it
duke@0 6 * under the terms of the GNU General Public License version 2 only, as
duke@0 7 * published by the Free Software Foundation.
duke@0 8 *
duke@0 9 * This code is distributed in the hope that it will be useful, but WITHOUT
duke@0 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
duke@0 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
duke@0 12 * version 2 for more details (a copy is included in the LICENSE file that
duke@0 13 * accompanied this code).
duke@0 14 *
duke@0 15 * You should have received a copy of the GNU General Public License version
duke@0 16 * 2 along with this work; if not, write to the Free Software Foundation,
duke@0 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
duke@0 18 *
duke@0 19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
duke@0 20 * CA 95054 USA or visit www.sun.com if you need additional information or
duke@0 21 * have any questions.
duke@0 22 *
duke@0 23 */
duke@0 24
duke@0 25 #include "incls/_precompiled.incl"
duke@0 26 #include "incls/_stubGenerator_sparc.cpp.incl"
duke@0 27
duke@0 28 // Declaration and definition of StubGenerator (no .hpp file).
duke@0 29 // For a more detailed description of the stub routine structure
duke@0 30 // see the comment in stubRoutines.hpp.
duke@0 31
duke@0 32 #define __ _masm->
duke@0 33
duke@0 34 #ifdef PRODUCT
duke@0 35 #define BLOCK_COMMENT(str) /* nothing */
duke@0 36 #else
duke@0 37 #define BLOCK_COMMENT(str) __ block_comment(str)
duke@0 38 #endif
duke@0 39
duke@0 40 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
duke@0 41
duke@0 42 // Note: The register L7 is used as L7_thread_cache, and may not be used
duke@0 43 // any other way within this module.
duke@0 44
duke@0 45
duke@0 46 static const Register& Lstub_temp = L2;
duke@0 47
duke@0 48 // -------------------------------------------------------------------------------------------------------------------------
duke@0 49 // Stub Code definitions
duke@0 50
duke@0 51 static address handle_unsafe_access() {
duke@0 52 JavaThread* thread = JavaThread::current();
duke@0 53 address pc = thread->saved_exception_pc();
duke@0 54 address npc = thread->saved_exception_npc();
duke@0 55 // pc is the instruction which we must emulate
duke@0 56 // doing a no-op is fine: return garbage from the load
duke@0 57
duke@0 58 // request an async exception
duke@0 59 thread->set_pending_unsafe_access_error();
duke@0 60
duke@0 61 // return address of next instruction to execute
duke@0 62 return npc;
duke@0 63 }
duke@0 64
duke@0 65 class StubGenerator: public StubCodeGenerator {
duke@0 66 private:
duke@0 67
duke@0 68 #ifdef PRODUCT
duke@0 69 #define inc_counter_np(a,b,c) (0)
duke@0 70 #else
duke@0 71 #define inc_counter_np(counter, t1, t2) \
duke@0 72 BLOCK_COMMENT("inc_counter " #counter); \
twisti@765 73 __ inc_counter(&counter, t1, t2);
duke@0 74 #endif
duke@0 75
duke@0 76 //----------------------------------------------------------------------------------------------------
duke@0 77 // Call stubs are used to call Java from C
duke@0 78
duke@0 79 address generate_call_stub(address& return_pc) {
duke@0 80 StubCodeMark mark(this, "StubRoutines", "call_stub");
duke@0 81 address start = __ pc();
duke@0 82
duke@0 83 // Incoming arguments:
duke@0 84 //
duke@0 85 // o0 : call wrapper address
duke@0 86 // o1 : result (address)
duke@0 87 // o2 : result type
duke@0 88 // o3 : method
duke@0 89 // o4 : (interpreter) entry point
duke@0 90 // o5 : parameters (address)
duke@0 91 // [sp + 0x5c]: parameter size (in words)
duke@0 92 // [sp + 0x60]: thread
duke@0 93 //
duke@0 94 // +---------------+ <--- sp + 0
duke@0 95 // | |
duke@0 96 // . reg save area .
duke@0 97 // | |
duke@0 98 // +---------------+ <--- sp + 0x40
duke@0 99 // | |
duke@0 100 // . extra 7 slots .
duke@0 101 // | |
duke@0 102 // +---------------+ <--- sp + 0x5c
duke@0 103 // | param. size |
duke@0 104 // +---------------+ <--- sp + 0x60
duke@0 105 // | thread |
duke@0 106 // +---------------+
duke@0 107 // | |
duke@0 108
duke@0 109 // note: if the link argument position changes, adjust
duke@0 110 // the code in frame::entry_frame_call_wrapper()
duke@0 111
duke@0 112 const Argument link = Argument(0, false); // used only for GC
duke@0 113 const Argument result = Argument(1, false);
duke@0 114 const Argument result_type = Argument(2, false);
duke@0 115 const Argument method = Argument(3, false);
duke@0 116 const Argument entry_point = Argument(4, false);
duke@0 117 const Argument parameters = Argument(5, false);
duke@0 118 const Argument parameter_size = Argument(6, false);
duke@0 119 const Argument thread = Argument(7, false);
duke@0 120
duke@0 121 // setup thread register
duke@0 122 __ ld_ptr(thread.as_address(), G2_thread);
coleenp@113 123 __ reinit_heapbase();
duke@0 124
duke@0 125 #ifdef ASSERT
duke@0 126 // make sure we have no pending exceptions
duke@0 127 { const Register t = G3_scratch;
duke@0 128 Label L;
duke@0 129 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), t);
duke@0 130 __ br_null(t, false, Assembler::pt, L);
duke@0 131 __ delayed()->nop();
duke@0 132 __ stop("StubRoutines::call_stub: entered with pending exception");
duke@0 133 __ bind(L);
duke@0 134 }
duke@0 135 #endif
duke@0 136
duke@0 137 // create activation frame & allocate space for parameters
duke@0 138 { const Register t = G3_scratch;
duke@0 139 __ ld_ptr(parameter_size.as_address(), t); // get parameter size (in words)
duke@0 140 __ add(t, frame::memory_parameter_word_sp_offset, t); // add space for save area (in words)
duke@0 141 __ round_to(t, WordsPerLong); // make sure it is multiple of 2 (in words)
duke@0 142 __ sll(t, Interpreter::logStackElementSize(), t); // compute number of bytes
duke@0 143 __ neg(t); // negate so it can be used with save
duke@0 144 __ save(SP, t, SP); // setup new frame
duke@0 145 }
duke@0 146
duke@0 147 // +---------------+ <--- sp + 0
duke@0 148 // | |
duke@0 149 // . reg save area .
duke@0 150 // | |
duke@0 151 // +---------------+ <--- sp + 0x40
duke@0 152 // | |
duke@0 153 // . extra 7 slots .
duke@0 154 // | |
duke@0 155 // +---------------+ <--- sp + 0x5c
duke@0 156 // | empty slot | (only if parameter size is even)
duke@0 157 // +---------------+
duke@0 158 // | |
duke@0 159 // . parameters .
duke@0 160 // | |
duke@0 161 // +---------------+ <--- fp + 0
duke@0 162 // | |
duke@0 163 // . reg save area .
duke@0 164 // | |
duke@0 165 // +---------------+ <--- fp + 0x40
duke@0 166 // | |
duke@0 167 // . extra 7 slots .
duke@0 168 // | |
duke@0 169 // +---------------+ <--- fp + 0x5c
duke@0 170 // | param. size |
duke@0 171 // +---------------+ <--- fp + 0x60
duke@0 172 // | thread |
duke@0 173 // +---------------+
duke@0 174 // | |
duke@0 175
duke@0 176 // pass parameters if any
duke@0 177 BLOCK_COMMENT("pass parameters if any");
duke@0 178 { const Register src = parameters.as_in().as_register();
duke@0 179 const Register dst = Lentry_args;
duke@0 180 const Register tmp = G3_scratch;
duke@0 181 const Register cnt = G4_scratch;
duke@0 182
duke@0 183 // test if any parameters & setup of Lentry_args
duke@0 184 Label exit;
duke@0 185 __ ld_ptr(parameter_size.as_in().as_address(), cnt); // parameter counter
duke@0 186 __ add( FP, STACK_BIAS, dst );
duke@0 187 __ tst(cnt);
duke@0 188 __ br(Assembler::zero, false, Assembler::pn, exit);
duke@0 189 __ delayed()->sub(dst, BytesPerWord, dst); // setup Lentry_args
duke@0 190
duke@0 191 // copy parameters if any
duke@0 192 Label loop;
duke@0 193 __ BIND(loop);
duke@0 194 // Store tag first.
duke@0 195 if (TaggedStackInterpreter) {
duke@0 196 __ ld_ptr(src, 0, tmp);
duke@0 197 __ add(src, BytesPerWord, src); // get next
duke@0 198 __ st_ptr(tmp, dst, Interpreter::tag_offset_in_bytes());
duke@0 199 }
duke@0 200 // Store parameter value
duke@0 201 __ ld_ptr(src, 0, tmp);
duke@0 202 __ add(src, BytesPerWord, src);
duke@0 203 __ st_ptr(tmp, dst, Interpreter::value_offset_in_bytes());
duke@0 204 __ deccc(cnt);
duke@0 205 __ br(Assembler::greater, false, Assembler::pt, loop);
duke@0 206 __ delayed()->sub(dst, Interpreter::stackElementSize(), dst);
duke@0 207
duke@0 208 // done
duke@0 209 __ BIND(exit);
duke@0 210 }
duke@0 211
duke@0 212 // setup parameters, method & call Java function
duke@0 213 #ifdef ASSERT
duke@0 214 // layout_activation_impl checks it's notion of saved SP against
duke@0 215 // this register, so if this changes update it as well.
duke@0 216 const Register saved_SP = Lscratch;
duke@0 217 __ mov(SP, saved_SP); // keep track of SP before call
duke@0 218 #endif
duke@0 219
duke@0 220 // setup parameters
duke@0 221 const Register t = G3_scratch;
duke@0 222 __ ld_ptr(parameter_size.as_in().as_address(), t); // get parameter size (in words)
duke@0 223 __ sll(t, Interpreter::logStackElementSize(), t); // compute number of bytes
duke@0 224 __ sub(FP, t, Gargs); // setup parameter pointer
duke@0 225 #ifdef _LP64
duke@0 226 __ add( Gargs, STACK_BIAS, Gargs ); // Account for LP64 stack bias
duke@0 227 #endif
duke@0 228 __ mov(SP, O5_savedSP);
duke@0 229
duke@0 230
duke@0 231 // do the call
duke@0 232 //
duke@0 233 // the following register must be setup:
duke@0 234 //
duke@0 235 // G2_thread
duke@0 236 // G5_method
duke@0 237 // Gargs
duke@0 238 BLOCK_COMMENT("call Java function");
duke@0 239 __ jmpl(entry_point.as_in().as_register(), G0, O7);
duke@0 240 __ delayed()->mov(method.as_in().as_register(), G5_method); // setup method
duke@0 241
duke@0 242 BLOCK_COMMENT("call_stub_return_address:");
duke@0 243 return_pc = __ pc();
duke@0 244
duke@0 245 // The callee, if it wasn't interpreted, can return with SP changed so
duke@0 246 // we can no longer assert of change of SP.
duke@0 247
duke@0 248 // store result depending on type
duke@0 249 // (everything that is not T_OBJECT, T_LONG, T_FLOAT, or T_DOUBLE
duke@0 250 // is treated as T_INT)
duke@0 251 { const Register addr = result .as_in().as_register();
duke@0 252 const Register type = result_type.as_in().as_register();
duke@0 253 Label is_long, is_float, is_double, is_object, exit;
duke@0 254 __ cmp(type, T_OBJECT); __ br(Assembler::equal, false, Assembler::pn, is_object);
duke@0 255 __ delayed()->cmp(type, T_FLOAT); __ br(Assembler::equal, false, Assembler::pn, is_float);
duke@0 256 __ delayed()->cmp(type, T_DOUBLE); __ br(Assembler::equal, false, Assembler::pn, is_double);
duke@0 257 __ delayed()->cmp(type, T_LONG); __ br(Assembler::equal, false, Assembler::pn, is_long);
duke@0 258 __ delayed()->nop();
duke@0 259
duke@0 260 // store int result
duke@0 261 __ st(O0, addr, G0);
duke@0 262
duke@0 263 __ BIND(exit);
duke@0 264 __ ret();
duke@0 265 __ delayed()->restore();
duke@0 266
duke@0 267 __ BIND(is_object);
duke@0 268 __ ba(false, exit);
duke@0 269 __ delayed()->st_ptr(O0, addr, G0);
duke@0 270
duke@0 271 __ BIND(is_float);
duke@0 272 __ ba(false, exit);
duke@0 273 __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0);
duke@0 274
duke@0 275 __ BIND(is_double);
duke@0 276 __ ba(false, exit);
duke@0 277 __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0);
duke@0 278
duke@0 279 __ BIND(is_long);
duke@0 280 #ifdef _LP64
duke@0 281 __ ba(false, exit);
duke@0 282 __ delayed()->st_long(O0, addr, G0); // store entire long
duke@0 283 #else
duke@0 284 #if defined(COMPILER2)
duke@0 285 // All return values are where we want them, except for Longs. C2 returns
duke@0 286 // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1.
duke@0 287 // Since the interpreter will return longs in G1 and O0/O1 in the 32bit
duke@0 288 // build we simply always use G1.
duke@0 289 // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to
duke@0 290 // do this here. Unfortunately if we did a rethrow we'd see an machepilog node
duke@0 291 // first which would move g1 -> O0/O1 and destroy the exception we were throwing.
duke@0 292
duke@0 293 __ ba(false, exit);
duke@0 294 __ delayed()->stx(G1, addr, G0); // store entire long
duke@0 295 #else
duke@0 296 __ st(O1, addr, BytesPerInt);
duke@0 297 __ ba(false, exit);
duke@0 298 __ delayed()->st(O0, addr, G0);
duke@0 299 #endif /* COMPILER2 */
duke@0 300 #endif /* _LP64 */
duke@0 301 }
duke@0 302 return start;
duke@0 303 }
duke@0 304
duke@0 305
duke@0 306 //----------------------------------------------------------------------------------------------------
duke@0 307 // Return point for a Java call if there's an exception thrown in Java code.
duke@0 308 // The exception is caught and transformed into a pending exception stored in
duke@0 309 // JavaThread that can be tested from within the VM.
duke@0 310 //
duke@0 311 // Oexception: exception oop
duke@0 312
duke@0 313 address generate_catch_exception() {
duke@0 314 StubCodeMark mark(this, "StubRoutines", "catch_exception");
duke@0 315
duke@0 316 address start = __ pc();
duke@0 317 // verify that thread corresponds
duke@0 318 __ verify_thread();
duke@0 319
duke@0 320 const Register& temp_reg = Gtemp;
twisti@765 321 Address pending_exception_addr (G2_thread, Thread::pending_exception_offset());
twisti@765 322 Address exception_file_offset_addr(G2_thread, Thread::exception_file_offset ());
twisti@765 323 Address exception_line_offset_addr(G2_thread, Thread::exception_line_offset ());
duke@0 324
duke@0 325 // set pending exception
duke@0 326 __ verify_oop(Oexception);
duke@0 327 __ st_ptr(Oexception, pending_exception_addr);
duke@0 328 __ set((intptr_t)__FILE__, temp_reg);
duke@0 329 __ st_ptr(temp_reg, exception_file_offset_addr);
duke@0 330 __ set((intptr_t)__LINE__, temp_reg);
duke@0 331 __ st(temp_reg, exception_line_offset_addr);
duke@0 332
duke@0 333 // complete return to VM
duke@0 334 assert(StubRoutines::_call_stub_return_address != NULL, "must have been generated before");
duke@0 335
twisti@765 336 AddressLiteral stub_ret(StubRoutines::_call_stub_return_address);
twisti@765 337 __ jump_to(stub_ret, temp_reg);
duke@0 338 __ delayed()->nop();
duke@0 339
duke@0 340 return start;
duke@0 341 }
duke@0 342
duke@0 343
duke@0 344 //----------------------------------------------------------------------------------------------------
duke@0 345 // Continuation point for runtime calls returning with a pending exception
duke@0 346 // The pending exception check happened in the runtime or native call stub
duke@0 347 // The pending exception in Thread is converted into a Java-level exception
duke@0 348 //
duke@0 349 // Contract with Java-level exception handler: O0 = exception
duke@0 350 // O1 = throwing pc
duke@0 351
duke@0 352 address generate_forward_exception() {
duke@0 353 StubCodeMark mark(this, "StubRoutines", "forward_exception");
duke@0 354 address start = __ pc();
duke@0 355
duke@0 356 // Upon entry, O7 has the return address returning into Java
duke@0 357 // (interpreted or compiled) code; i.e. the return address
duke@0 358 // becomes the throwing pc.
duke@0 359
duke@0 360 const Register& handler_reg = Gtemp;
duke@0 361
twisti@765 362 Address exception_addr(G2_thread, Thread::pending_exception_offset());
duke@0 363
duke@0 364 #ifdef ASSERT
duke@0 365 // make sure that this code is only executed if there is a pending exception
duke@0 366 { Label L;
duke@0 367 __ ld_ptr(exception_addr, Gtemp);
duke@0 368 __ br_notnull(Gtemp, false, Assembler::pt, L);
duke@0 369 __ delayed()->nop();
duke@0 370 __ stop("StubRoutines::forward exception: no pending exception (1)");
duke@0 371 __ bind(L);
duke@0 372 }
duke@0 373 #endif
duke@0 374
duke@0 375 // compute exception handler into handler_reg
duke@0 376 __ get_thread();
duke@0 377 __ ld_ptr(exception_addr, Oexception);
duke@0 378 __ verify_oop(Oexception);
duke@0 379 __ save_frame(0); // compensates for compiler weakness
duke@0 380 __ add(O7->after_save(), frame::pc_return_offset, Lscratch); // save the issuing PC
duke@0 381 BLOCK_COMMENT("call exception_handler_for_return_address");
twisti@1382 382 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), G2_thread, Lscratch);
duke@0 383 __ mov(O0, handler_reg);
duke@0 384 __ restore(); // compensates for compiler weakness
duke@0 385
duke@0 386 __ ld_ptr(exception_addr, Oexception);
duke@0 387 __ add(O7, frame::pc_return_offset, Oissuing_pc); // save the issuing PC
duke@0 388
duke@0 389 #ifdef ASSERT
duke@0 390 // make sure exception is set
duke@0 391 { Label L;
duke@0 392 __ br_notnull(Oexception, false, Assembler::pt, L);
duke@0 393 __ delayed()->nop();
duke@0 394 __ stop("StubRoutines::forward exception: no pending exception (2)");
duke@0 395 __ bind(L);
duke@0 396 }
duke@0 397 #endif
duke@0 398 // jump to exception handler
duke@0 399 __ jmp(handler_reg, 0);
duke@0 400 // clear pending exception
duke@0 401 __ delayed()->st_ptr(G0, exception_addr);
duke@0 402
duke@0 403 return start;
duke@0 404 }
duke@0 405
duke@0 406
duke@0 407 //------------------------------------------------------------------------------------------------------------------------
duke@0 408 // Continuation point for throwing of implicit exceptions that are not handled in
duke@0 409 // the current activation. Fabricates an exception oop and initiates normal
duke@0 410 // exception dispatching in this frame. Only callee-saved registers are preserved
duke@0 411 // (through the normal register window / RegisterMap handling).
duke@0 412 // If the compiler needs all registers to be preserved between the fault
duke@0 413 // point and the exception handler then it must assume responsibility for that in
duke@0 414 // AbstractCompiler::continuation_for_implicit_null_exception or
duke@0 415 // continuation_for_implicit_division_by_zero_exception. All other implicit
duke@0 416 // exceptions (e.g., NullPointerException or AbstractMethodError on entry) are
duke@0 417 // either at call sites or otherwise assume that stack unwinding will be initiated,
duke@0 418 // so caller saved registers were assumed volatile in the compiler.
duke@0 419
duke@0 420 // Note that we generate only this stub into a RuntimeStub, because it needs to be
duke@0 421 // properly traversed and ignored during GC, so we change the meaning of the "__"
duke@0 422 // macro within this method.
duke@0 423 #undef __
duke@0 424 #define __ masm->
duke@0 425
duke@0 426 address generate_throw_exception(const char* name, address runtime_entry, bool restore_saved_exception_pc) {
duke@0 427 #ifdef ASSERT
duke@0 428 int insts_size = VerifyThread ? 1 * K : 600;
duke@0 429 #else
duke@0 430 int insts_size = VerifyThread ? 1 * K : 256;
duke@0 431 #endif /* ASSERT */
duke@0 432 int locs_size = 32;
duke@0 433
duke@0 434 CodeBuffer code(name, insts_size, locs_size);
duke@0 435 MacroAssembler* masm = new MacroAssembler(&code);
duke@0 436
duke@0 437 __ verify_thread();
duke@0 438
duke@0 439 // This is an inlined and slightly modified version of call_VM
duke@0 440 // which has the ability to fetch the return PC out of thread-local storage
duke@0 441 __ assert_not_delayed();
duke@0 442
duke@0 443 // Note that we always push a frame because on the SPARC
duke@0 444 // architecture, for all of our implicit exception kinds at call
duke@0 445 // sites, the implicit exception is taken before the callee frame
duke@0 446 // is pushed.
duke@0 447 __ save_frame(0);
duke@0 448
duke@0 449 int frame_complete = __ offset();
duke@0 450
duke@0 451 if (restore_saved_exception_pc) {
twisti@765 452 __ ld_ptr(G2_thread, JavaThread::saved_exception_pc_offset(), I7);
duke@0 453 __ sub(I7, frame::pc_return_offset, I7);
duke@0 454 }
duke@0 455
duke@0 456 // Note that we always have a runtime stub frame on the top of stack by this point
duke@0 457 Register last_java_sp = SP;
duke@0 458 // 64-bit last_java_sp is biased!
duke@0 459 __ set_last_Java_frame(last_java_sp, G0);
duke@0 460 if (VerifyThread) __ mov(G2_thread, O0); // about to be smashed; pass early
duke@0 461 __ save_thread(noreg);
duke@0 462 // do the call
duke@0 463 BLOCK_COMMENT("call runtime_entry");
duke@0 464 __ call(runtime_entry, relocInfo::runtime_call_type);
duke@0 465 if (!VerifyThread)
duke@0 466 __ delayed()->mov(G2_thread, O0); // pass thread as first argument
duke@0 467 else
duke@0 468 __ delayed()->nop(); // (thread already passed)
duke@0 469 __ restore_thread(noreg);
duke@0 470 __ reset_last_Java_frame();
duke@0 471
duke@0 472 // check for pending exceptions. use Gtemp as scratch register.
duke@0 473 #ifdef ASSERT
duke@0 474 Label L;
duke@0 475
twisti@765 476 Address exception_addr(G2_thread, Thread::pending_exception_offset());
duke@0 477 Register scratch_reg = Gtemp;
duke@0 478 __ ld_ptr(exception_addr, scratch_reg);
duke@0 479 __ br_notnull(scratch_reg, false, Assembler::pt, L);
duke@0 480 __ delayed()->nop();
duke@0 481 __ should_not_reach_here();
duke@0 482 __ bind(L);
duke@0 483 #endif // ASSERT
duke@0 484 BLOCK_COMMENT("call forward_exception_entry");
duke@0 485 __ call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
duke@0 486 // we use O7 linkage so that forward_exception_entry has the issuing PC
duke@0 487 __ delayed()->restore();
duke@0 488
duke@0 489 RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, masm->total_frame_size_in_bytes(0), NULL, false);
duke@0 490 return stub->entry_point();
duke@0 491 }
duke@0 492
duke@0 493 #undef __
duke@0 494 #define __ _masm->
duke@0 495
duke@0 496
duke@0 497 // Generate a routine that sets all the registers so we
duke@0 498 // can tell if the stop routine prints them correctly.
duke@0 499 address generate_test_stop() {
duke@0 500 StubCodeMark mark(this, "StubRoutines", "test_stop");
duke@0 501 address start = __ pc();
duke@0 502
duke@0 503 int i;
duke@0 504
duke@0 505 __ save_frame(0);
duke@0 506
duke@0 507 static jfloat zero = 0.0, one = 1.0;
duke@0 508
duke@0 509 // put addr in L0, then load through L0 to F0
duke@0 510 __ set((intptr_t)&zero, L0); __ ldf( FloatRegisterImpl::S, L0, 0, F0);
duke@0 511 __ set((intptr_t)&one, L0); __ ldf( FloatRegisterImpl::S, L0, 0, F1); // 1.0 to F1
duke@0 512
duke@0 513 // use add to put 2..18 in F2..F18
duke@0 514 for ( i = 2; i <= 18; ++i ) {
duke@0 515 __ fadd( FloatRegisterImpl::S, F1, as_FloatRegister(i-1), as_FloatRegister(i));
duke@0 516 }
duke@0 517
duke@0 518 // Now put double 2 in F16, double 18 in F18
duke@0 519 __ ftof( FloatRegisterImpl::S, FloatRegisterImpl::D, F2, F16 );
duke@0 520 __ ftof( FloatRegisterImpl::S, FloatRegisterImpl::D, F18, F18 );
duke@0 521
duke@0 522 // use add to put 20..32 in F20..F32
duke@0 523 for (i = 20; i < 32; i += 2) {
duke@0 524 __ fadd( FloatRegisterImpl::D, F16, as_FloatRegister(i-2), as_FloatRegister(i));
duke@0 525 }
duke@0 526
duke@0 527 // put 0..7 in i's, 8..15 in l's, 16..23 in o's, 24..31 in g's
duke@0 528 for ( i = 0; i < 8; ++i ) {
duke@0 529 if (i < 6) {
duke@0 530 __ set( i, as_iRegister(i));
duke@0 531 __ set(16 + i, as_oRegister(i));
duke@0 532 __ set(24 + i, as_gRegister(i));
duke@0 533 }
duke@0 534 __ set( 8 + i, as_lRegister(i));
duke@0 535 }
duke@0 536
duke@0 537 __ stop("testing stop");
duke@0 538
duke@0 539
duke@0 540 __ ret();
duke@0 541 __ delayed()->restore();
duke@0 542
duke@0 543 return start;
duke@0 544 }
duke@0 545
duke@0 546
duke@0 547 address generate_stop_subroutine() {
duke@0 548 StubCodeMark mark(this, "StubRoutines", "stop_subroutine");
duke@0 549 address start = __ pc();
duke@0 550
duke@0 551 __ stop_subroutine();
duke@0 552
duke@0 553 return start;
duke@0 554 }
duke@0 555
duke@0 556 address generate_flush_callers_register_windows() {
duke@0 557 StubCodeMark mark(this, "StubRoutines", "flush_callers_register_windows");
duke@0 558 address start = __ pc();
duke@0 559
duke@0 560 __ flush_windows();
duke@0 561 __ retl(false);
duke@0 562 __ delayed()->add( FP, STACK_BIAS, O0 );
duke@0 563 // The returned value must be a stack pointer whose register save area
duke@0 564 // is flushed, and will stay flushed while the caller executes.
duke@0 565
duke@0 566 return start;
duke@0 567 }
duke@0 568
duke@0 569 // Helper functions for v8 atomic operations.
duke@0 570 //
duke@0 571 void get_v8_oop_lock_ptr(Register lock_ptr_reg, Register mark_oop_reg, Register scratch_reg) {
duke@0 572 if (mark_oop_reg == noreg) {
duke@0 573 address lock_ptr = (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr();
duke@0 574 __ set((intptr_t)lock_ptr, lock_ptr_reg);
duke@0 575 } else {
duke@0 576 assert(scratch_reg != noreg, "just checking");
duke@0 577 address lock_ptr = (address)StubRoutines::Sparc::_v8_oop_lock_cache;
duke@0 578 __ set((intptr_t)lock_ptr, lock_ptr_reg);
duke@0 579 __ and3(mark_oop_reg, StubRoutines::Sparc::v8_oop_lock_mask_in_place, scratch_reg);
duke@0 580 __ add(lock_ptr_reg, scratch_reg, lock_ptr_reg);
duke@0 581 }
duke@0 582 }
duke@0 583
duke@0 584 void generate_v8_lock_prologue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) {
duke@0 585
duke@0 586 get_v8_oop_lock_ptr(lock_ptr_reg, mark_oop_reg, scratch_reg);
duke@0 587 __ set(StubRoutines::Sparc::locked, lock_reg);
duke@0 588 // Initialize yield counter
duke@0 589 __ mov(G0,yield_reg);
duke@0 590
duke@0 591 __ BIND(retry);
duke@0 592 __ cmp(yield_reg, V8AtomicOperationUnderLockSpinCount);
duke@0 593 __ br(Assembler::less, false, Assembler::pt, dontyield);
duke@0 594 __ delayed()->nop();
duke@0 595
duke@0 596 // This code can only be called from inside the VM, this
duke@0 597 // stub is only invoked from Atomic::add(). We do not
duke@0 598 // want to use call_VM, because _last_java_sp and such
duke@0 599 // must already be set.
duke@0 600 //
duke@0 601 // Save the regs and make space for a C call
duke@0 602 __ save(SP, -96, SP);
duke@0 603 __ save_all_globals_into_locals();
duke@0 604 BLOCK_COMMENT("call os::naked_sleep");
duke@0 605 __ call(CAST_FROM_FN_PTR(address, os::naked_sleep));
duke@0 606 __ delayed()->nop();
duke@0 607 __ restore_globals_from_locals();
duke@0 608 __ restore();
duke@0 609 // reset the counter
duke@0 610 __ mov(G0,yield_reg);
duke@0 611
duke@0 612 __ BIND(dontyield);
duke@0 613
duke@0 614 // try to get lock
duke@0 615 __ swap(lock_ptr_reg, 0, lock_reg);
duke@0 616
duke@0 617 // did we get the lock?
duke@0 618 __ cmp(lock_reg, StubRoutines::Sparc::unlocked);
duke@0 619 __ br(Assembler::notEqual, true, Assembler::pn, retry);
duke@0 620 __ delayed()->add(yield_reg,1,yield_reg);
duke@0 621
duke@0 622 // yes, got lock. do the operation here.
duke@0 623 }
duke@0 624
duke@0 625 void generate_v8_lock_epilogue(Register lock_reg, Register lock_ptr_reg, Register yield_reg, Label& retry, Label& dontyield, Register mark_oop_reg = noreg, Register scratch_reg = noreg) {
duke@0 626 __ st(lock_reg, lock_ptr_reg, 0); // unlock
duke@0 627 }
duke@0 628
duke@0 629 // Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest).
duke@0 630 //
duke@0 631 // Arguments :
duke@0 632 //
duke@0 633 // exchange_value: O0
duke@0 634 // dest: O1
duke@0 635 //
duke@0 636 // Results:
duke@0 637 //
duke@0 638 // O0: the value previously stored in dest
duke@0 639 //
duke@0 640 address generate_atomic_xchg() {
duke@0 641 StubCodeMark mark(this, "StubRoutines", "atomic_xchg");
duke@0 642 address start = __ pc();
duke@0 643
duke@0 644 if (UseCASForSwap) {
duke@0 645 // Use CAS instead of swap, just in case the MP hardware
duke@0 646 // prefers to work with just one kind of synch. instruction.
duke@0 647 Label retry;
duke@0 648 __ BIND(retry);
duke@0 649 __ mov(O0, O3); // scratch copy of exchange value
duke@0 650 __ ld(O1, 0, O2); // observe the previous value
duke@0 651 // try to replace O2 with O3
duke@0 652 __ cas_under_lock(O1, O2, O3,
duke@0 653 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false);
duke@0 654 __ cmp(O2, O3);
duke@0 655 __ br(Assembler::notEqual, false, Assembler::pn, retry);
duke@0 656 __ delayed()->nop();
duke@0 657
duke@0 658 __ retl(false);
duke@0 659 __ delayed()->mov(O2, O0); // report previous value to caller
duke@0 660
duke@0 661 } else {
duke@0 662 if (VM_Version::v9_instructions_work()) {
duke@0 663 __ retl(false);
duke@0 664 __ delayed()->swap(O1, 0, O0);
duke@0 665 } else {
duke@0 666 const Register& lock_reg = O2;
duke@0 667 const Register& lock_ptr_reg = O3;
duke@0 668 const Register& yield_reg = O4;
duke@0 669
duke@0 670 Label retry;
duke@0 671 Label dontyield;
duke@0 672
duke@0 673 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
duke@0 674 // got the lock, do the swap
duke@0 675 __ swap(O1, 0, O0);
duke@0 676
duke@0 677 generate_v8_lock_epilogue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
duke@0 678 __ retl(false);
duke@0 679 __ delayed()->nop();
duke@0 680 }
duke@0 681 }
duke@0 682
duke@0 683 return start;
duke@0 684 }
duke@0 685
duke@0 686
duke@0 687 // Support for jint Atomic::cmpxchg(jint exchange_value, volatile jint* dest, jint compare_value)
duke@0 688 //
duke@0 689 // Arguments :
duke@0 690 //
duke@0 691 // exchange_value: O0
duke@0 692 // dest: O1
duke@0 693 // compare_value: O2
duke@0 694 //
duke@0 695 // Results:
duke@0 696 //
duke@0 697 // O0: the value previously stored in dest
duke@0 698 //
duke@0 699 // Overwrites (v8): O3,O4,O5
duke@0 700 //
duke@0 701 address generate_atomic_cmpxchg() {
duke@0 702 StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg");
duke@0 703 address start = __ pc();
duke@0 704
duke@0 705 // cmpxchg(dest, compare_value, exchange_value)
duke@0 706 __ cas_under_lock(O1, O2, O0,
duke@0 707 (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false);
duke@0 708 __ retl(false);
duke@0 709 __ delayed()->nop();
duke@0 710
duke@0 711 return start;
duke@0 712 }
duke@0 713
duke@0 714 // Support for jlong Atomic::cmpxchg(jlong exchange_value, volatile jlong *dest, jlong compare_value)
duke@0 715 //
duke@0 716 // Arguments :
duke@0 717 //
duke@0 718 // exchange_value: O1:O0
duke@0 719 // dest: O2
duke@0 720 // compare_value: O4:O3
duke@0 721 //
duke@0 722 // Results:
duke@0 723 //
duke@0 724 // O1:O0: the value previously stored in dest
duke@0 725 //
duke@0 726 // This only works on V9, on V8 we don't generate any
duke@0 727 // code and just return NULL.
duke@0 728 //
duke@0 729 // Overwrites: G1,G2,G3
duke@0 730 //
duke@0 731 address generate_atomic_cmpxchg_long() {
duke@0 732 StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg_long");
duke@0 733 address start = __ pc();
duke@0 734
duke@0 735 if (!VM_Version::supports_cx8())
duke@0 736 return NULL;;
duke@0 737 __ sllx(O0, 32, O0);
duke@0 738 __ srl(O1, 0, O1);
duke@0 739 __ or3(O0,O1,O0); // O0 holds 64-bit value from compare_value
duke@0 740 __ sllx(O3, 32, O3);
duke@0 741 __ srl(O4, 0, O4);
duke@0 742 __ or3(O3,O4,O3); // O3 holds 64-bit value from exchange_value
duke@0 743 __ casx(O2, O3, O0);
duke@0 744 __ srl(O0, 0, O1); // unpacked return value in O1:O0
duke@0 745 __ retl(false);
duke@0 746 __ delayed()->srlx(O0, 32, O0);
duke@0 747
duke@0 748 return start;
duke@0 749 }
duke@0 750
duke@0 751
duke@0 752 // Support for jint Atomic::add(jint add_value, volatile jint* dest).
duke@0 753 //
duke@0 754 // Arguments :
duke@0 755 //
duke@0 756 // add_value: O0 (e.g., +1 or -1)
duke@0 757 // dest: O1
duke@0 758 //
duke@0 759 // Results:
duke@0 760 //
duke@0 761 // O0: the new value stored in dest
duke@0 762 //
duke@0 763 // Overwrites (v9): O3
duke@0 764 // Overwrites (v8): O3,O4,O5
duke@0 765 //
duke@0 766 address generate_atomic_add() {
duke@0 767 StubCodeMark mark(this, "StubRoutines", "atomic_add");
duke@0 768 address start = __ pc();
duke@0 769 __ BIND(_atomic_add_stub);
duke@0 770
duke@0 771 if (VM_Version::v9_instructions_work()) {
duke@0 772 Label(retry);
duke@0 773 __ BIND(retry);
duke@0 774
duke@0 775 __ lduw(O1, 0, O2);
duke@0 776 __ add(O0, O2, O3);
duke@0 777 __ cas(O1, O2, O3);
duke@0 778 __ cmp( O2, O3);
duke@0 779 __ br(Assembler::notEqual, false, Assembler::pn, retry);
duke@0 780 __ delayed()->nop();
duke@0 781 __ retl(false);
duke@0 782 __ delayed()->add(O0, O2, O0); // note that cas made O2==O3
duke@0 783 } else {
duke@0 784 const Register& lock_reg = O2;
duke@0 785 const Register& lock_ptr_reg = O3;
duke@0 786 const Register& value_reg = O4;
duke@0 787 const Register& yield_reg = O5;
duke@0 788
duke@0 789 Label(retry);
duke@0 790 Label(dontyield);
duke@0 791
duke@0 792 generate_v8_lock_prologue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
duke@0 793 // got lock, do the increment
duke@0 794 __ ld(O1, 0, value_reg);
duke@0 795 __ add(O0, value_reg, value_reg);
duke@0 796 __ st(value_reg, O1, 0);
duke@0 797
duke@0 798 // %%% only for RMO and PSO
duke@0 799 __ membar(Assembler::StoreStore);
duke@0 800
duke@0 801 generate_v8_lock_epilogue(lock_reg, lock_ptr_reg, yield_reg, retry, dontyield);
duke@0 802
duke@0 803 __ retl(false);
duke@0 804 __ delayed()->mov(value_reg, O0);
duke@0 805 }
duke@0 806
duke@0 807 return start;
duke@0 808 }
duke@0 809 Label _atomic_add_stub; // called from other stubs
duke@0 810
duke@0 811
duke@0 812 //------------------------------------------------------------------------------------------------------------------------
duke@0 813 // The following routine generates a subroutine to throw an asynchronous
duke@0 814 // UnknownError when an unsafe access gets a fault that could not be
duke@0 815 // reasonably prevented by the programmer. (Example: SIGBUS/OBJERR.)
duke@0 816 //
duke@0 817 // Arguments :
duke@0 818 //
duke@0 819 // trapping PC: O7
duke@0 820 //
duke@0 821 // Results:
duke@0 822 // posts an asynchronous exception, skips the trapping instruction
duke@0 823 //
duke@0 824
duke@0 825 address generate_handler_for_unsafe_access() {
duke@0 826 StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
duke@0 827 address start = __ pc();
duke@0 828
duke@0 829 const int preserve_register_words = (64 * 2);
twisti@765 830 Address preserve_addr(FP, (-preserve_register_words * wordSize) + STACK_BIAS);
duke@0 831
duke@0 832 Register Lthread = L7_thread_cache;
duke@0 833 int i;
duke@0 834
duke@0 835 __ save_frame(0);
duke@0 836 __ mov(G1, L1);
duke@0 837 __ mov(G2, L2);
duke@0 838 __ mov(G3, L3);
duke@0 839 __ mov(G4, L4);
duke@0 840 __ mov(G5, L5);
duke@0 841 for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) {
duke@0 842 __ stf(FloatRegisterImpl::D, as_FloatRegister(i), preserve_addr, i * wordSize);
duke@0 843 }
duke@0 844
duke@0 845 address entry_point = CAST_FROM_FN_PTR(address, handle_unsafe_access);
duke@0 846 BLOCK_COMMENT("call handle_unsafe_access");
duke@0 847 __ call(entry_point, relocInfo::runtime_call_type);
duke@0 848 __ delayed()->nop();
duke@0 849
duke@0 850 __ mov(L1, G1);
duke@0 851 __ mov(L2, G2);
duke@0 852 __ mov(L3, G3);
duke@0 853 __ mov(L4, G4);
duke@0 854 __ mov(L5, G5);
duke@0 855 for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) {
duke@0 856 __ ldf(FloatRegisterImpl::D, preserve_addr, as_FloatRegister(i), i * wordSize);
duke@0 857 }
duke@0 858
duke@0 859 __ verify_thread();
duke@0 860
duke@0 861 __ jmp(O0, 0);
duke@0 862 __ delayed()->restore();
duke@0 863
duke@0 864 return start;
duke@0 865 }
duke@0 866
duke@0 867
duke@0 868 // Support for uint StubRoutine::Sparc::partial_subtype_check( Klass sub, Klass super );
duke@0 869 // Arguments :
duke@0 870 //
duke@0 871 // ret : O0, returned
duke@0 872 // icc/xcc: set as O0 (depending on wordSize)
duke@0 873 // sub : O1, argument, not changed
duke@0 874 // super: O2, argument, not changed
duke@0 875 // raddr: O7, blown by call
duke@0 876 address generate_partial_subtype_check() {
coleenp@113 877 __ align(CodeEntryAlignment);
duke@0 878 StubCodeMark mark(this, "StubRoutines", "partial_subtype_check");
duke@0 879 address start = __ pc();
jrose@682 880 Label miss;
duke@0 881
duke@0 882 #if defined(COMPILER2) && !defined(_LP64)
duke@0 883 // Do not use a 'save' because it blows the 64-bit O registers.
coleenp@113 884 __ add(SP,-4*wordSize,SP); // Make space for 4 temps (stack must be 2 words aligned)
duke@0 885 __ st_ptr(L0,SP,(frame::register_save_words+0)*wordSize);
duke@0 886 __ st_ptr(L1,SP,(frame::register_save_words+1)*wordSize);
duke@0 887 __ st_ptr(L2,SP,(frame::register_save_words+2)*wordSize);
duke@0 888 __ st_ptr(L3,SP,(frame::register_save_words+3)*wordSize);
duke@0 889 Register Rret = O0;
duke@0 890 Register Rsub = O1;
duke@0 891 Register Rsuper = O2;
duke@0 892 #else
duke@0 893 __ save_frame(0);
duke@0 894 Register Rret = I0;
duke@0 895 Register Rsub = I1;
duke@0 896 Register Rsuper = I2;
duke@0 897 #endif
duke@0 898
duke@0 899 Register L0_ary_len = L0;
duke@0 900 Register L1_ary_ptr = L1;
duke@0 901 Register L2_super = L2;
duke@0 902 Register L3_index = L3;
duke@0 903
jrose@682 904 __ check_klass_subtype_slow_path(Rsub, Rsuper,
jrose@682 905 L0, L1, L2, L3,
jrose@682 906 NULL, &miss);
jrose@682 907
jrose@682 908 // Match falls through here.
jrose@682 909 __ addcc(G0,0,Rret); // set Z flags, Z result
duke@0 910
duke@0 911 #if defined(COMPILER2) && !defined(_LP64)
duke@0 912 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0);
duke@0 913 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1);
duke@0 914 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2);
duke@0 915 __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3);
duke@0 916 __ retl(); // Result in Rret is zero; flags set to Z
duke@0 917 __ delayed()->add(SP,4*wordSize,SP);
duke@0 918 #else
duke@0 919 __ ret(); // Result in Rret is zero; flags set to Z
duke@0 920 __ delayed()->restore();
duke@0 921 #endif
duke@0 922
duke@0 923 __ BIND(miss);
duke@0 924 __ addcc(G0,1,Rret); // set NZ flags, NZ result
duke@0 925
duke@0 926 #if defined(COMPILER2) && !defined(_LP64)
duke@0 927 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0);
duke@0 928 __ ld_ptr(SP,(frame::register_save_words+1)*wordSize,L1);
duke@0 929 __ ld_ptr(SP,(frame::register_save_words+2)*wordSize,L2);
duke@0 930 __ ld_ptr(SP,(frame::register_save_words+3)*wordSize,L3);
duke@0 931 __ retl(); // Result in Rret is != 0; flags set to NZ
duke@0 932 __ delayed()->add(SP,4*wordSize,SP);
duke@0 933 #else
duke@0 934 __ ret(); // Result in Rret is != 0; flags set to NZ
duke@0 935 __ delayed()->restore();
duke@0 936 #endif
duke@0 937
duke@0 938 return start;
duke@0 939 }
duke@0 940
duke@0 941
duke@0 942 // Called from MacroAssembler::verify_oop
duke@0 943 //
duke@0 944 address generate_verify_oop_subroutine() {
duke@0 945 StubCodeMark mark(this, "StubRoutines", "verify_oop_stub");
duke@0 946
duke@0 947 address start = __ pc();
duke@0 948
duke@0 949 __ verify_oop_subroutine();
duke@0 950
duke@0 951 return start;
duke@0 952 }
duke@0 953
duke@0 954 static address disjoint_byte_copy_entry;
duke@0 955 static address disjoint_short_copy_entry;
duke@0 956 static address disjoint_int_copy_entry;
duke@0 957 static address disjoint_long_copy_entry;
duke@0 958 static address disjoint_oop_copy_entry;
duke@0 959
duke@0 960 static address byte_copy_entry;
duke@0 961 static address short_copy_entry;
duke@0 962 static address int_copy_entry;
duke@0 963 static address long_copy_entry;
duke@0 964 static address oop_copy_entry;
duke@0 965
duke@0 966 static address checkcast_copy_entry;
duke@0 967
duke@0 968 //
duke@0 969 // Verify that a register contains clean 32-bits positive value
duke@0 970 // (high 32-bits are 0) so it could be used in 64-bits shifts (sllx, srax).
duke@0 971 //
duke@0 972 // Input:
duke@0 973 // Rint - 32-bits value
duke@0 974 // Rtmp - scratch
duke@0 975 //
duke@0 976 void assert_clean_int(Register Rint, Register Rtmp) {
duke@0 977 #if defined(ASSERT) && defined(_LP64)
duke@0 978 __ signx(Rint, Rtmp);
duke@0 979 __ cmp(Rint, Rtmp);
duke@0 980 __ breakpoint_trap(Assembler::notEqual, Assembler::xcc);
duke@0 981 #endif
duke@0 982 }
duke@0 983
duke@0 984 //
duke@0 985 // Generate overlap test for array copy stubs
duke@0 986 //
duke@0 987 // Input:
duke@0 988 // O0 - array1
duke@0 989 // O1 - array2
duke@0 990 // O2 - element count
duke@0 991 //
duke@0 992 // Kills temps: O3, O4
duke@0 993 //
duke@0 994 void array_overlap_test(address no_overlap_target, int log2_elem_size) {
duke@0 995 assert(no_overlap_target != NULL, "must be generated");
duke@0 996 array_overlap_test(no_overlap_target, NULL, log2_elem_size);
duke@0 997 }
duke@0 998 void array_overlap_test(Label& L_no_overlap, int log2_elem_size) {
duke@0 999 array_overlap_test(NULL, &L_no_overlap, log2_elem_size);
duke@0 1000 }
duke@0 1001 void array_overlap_test(address no_overlap_target, Label* NOLp, int log2_elem_size) {
duke@0 1002 const Register from = O0;
duke@0 1003 const Register to = O1;
duke@0 1004 const Register count = O2;
duke@0 1005 const Register to_from = O3; // to - from
duke@0 1006 const Register byte_count = O4; // count << log2_elem_size
duke@0 1007
duke@0 1008 __ subcc(to, from, to_from);
duke@0 1009 __ sll_ptr(count, log2_elem_size, byte_count);
duke@0 1010 if (NOLp == NULL)
duke@0 1011 __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, no_overlap_target);
duke@0 1012 else
duke@0 1013 __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, (*NOLp));
duke@0 1014 __ delayed()->cmp(to_from, byte_count);
duke@0 1015 if (NOLp == NULL)
duke@0 1016 __ brx(Assembler::greaterEqual, false, Assembler::pt, no_overlap_target);
duke@0 1017 else
duke@0 1018 __ brx(Assembler::greaterEqual, false, Assembler::pt, (*NOLp));
duke@0 1019 __ delayed()->nop();
duke@0 1020 }
duke@0 1021
duke@0 1022 //
duke@0 1023 // Generate pre-write barrier for array.
duke@0 1024 //
duke@0 1025 // Input:
duke@0 1026 // addr - register containing starting address
duke@0 1027 // count - register containing element count
duke@0 1028 // tmp - scratch register
duke@0 1029 //
duke@0 1030 // The input registers are overwritten.
duke@0 1031 //
duke@0 1032 void gen_write_ref_array_pre_barrier(Register addr, Register count) {
duke@0 1033 BarrierSet* bs = Universe::heap()->barrier_set();
duke@0 1034 if (bs->has_write_ref_pre_barrier()) {
duke@0 1035 assert(bs->has_write_ref_array_pre_opt(),
duke@0 1036 "Else unsupported barrier set.");
duke@0 1037
duke@0 1038 __ save_frame(0);
duke@0 1039 // Save the necessary global regs... will be used after.
ysr@345 1040 if (addr->is_global()) {
ysr@345 1041 __ mov(addr, L0);
ysr@345 1042 }
ysr@345 1043 if (count->is_global()) {
ysr@345 1044 __ mov(count, L1);
ysr@345 1045 }
ysr@345 1046 __ mov(addr->after_save(), O0);
duke@0 1047 // Get the count into O1
duke@0 1048 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
ysr@345 1049 __ delayed()->mov(count->after_save(), O1);
ysr@345 1050 if (addr->is_global()) {
ysr@345 1051 __ mov(L0, addr);
ysr@345 1052 }
ysr@345 1053 if (count->is_global()) {
ysr@345 1054 __ mov(L1, count);
ysr@345 1055 }
duke@0 1056 __ restore();
duke@0 1057 }
duke@0 1058 }
duke@0 1059 //
duke@0 1060 // Generate post-write barrier for array.
duke@0 1061 //
duke@0 1062 // Input:
duke@0 1063 // addr - register containing starting address
duke@0 1064 // count - register containing element count
duke@0 1065 // tmp - scratch register
duke@0 1066 //
duke@0 1067 // The input registers are overwritten.
duke@0 1068 //
duke@0 1069 void gen_write_ref_array_post_barrier(Register addr, Register count,
duke@0 1070 Register tmp) {
duke@0 1071 BarrierSet* bs = Universe::heap()->barrier_set();
duke@0 1072
duke@0 1073 switch (bs->kind()) {
duke@0 1074 case BarrierSet::G1SATBCT:
duke@0 1075 case BarrierSet::G1SATBCTLogging:
duke@0 1076 {
duke@0 1077 // Get some new fresh output registers.
duke@0 1078 __ save_frame(0);
ysr@345 1079 __ mov(addr->after_save(), O0);
duke@0 1080 __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));
ysr@345 1081 __ delayed()->mov(count->after_save(), O1);
duke@0 1082 __ restore();
duke@0 1083 }
duke@0 1084 break;
duke@0 1085 case BarrierSet::CardTableModRef:
duke@0 1086 case BarrierSet::CardTableExtension:
duke@0 1087 {
duke@0 1088 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
duke@0 1089 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
duke@0 1090 assert_different_registers(addr, count, tmp);
duke@0 1091
duke@0 1092 Label L_loop;
duke@0 1093
coleenp@113 1094 __ sll_ptr(count, LogBytesPerHeapOop, count);
coleenp@113 1095 __ sub(count, BytesPerHeapOop, count);
duke@0 1096 __ add(count, addr, count);
duke@0 1097 // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.)
duke@0 1098 __ srl_ptr(addr, CardTableModRefBS::card_shift, addr);
duke@0 1099 __ srl_ptr(count, CardTableModRefBS::card_shift, count);
duke@0 1100 __ sub(count, addr, count);
twisti@765 1101 AddressLiteral rs(ct->byte_map_base);
twisti@765 1102 __ set(rs, tmp);
duke@0 1103 __ BIND(L_loop);
twisti@765 1104 __ stb(G0, tmp, addr);
duke@0 1105 __ subcc(count, 1, count);
duke@0 1106 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop);
duke@0 1107 __ delayed()->add(addr, 1, addr);
twisti@765 1108 }
duke@0 1109 break;
duke@0 1110 case BarrierSet::ModRef:
duke@0 1111 break;
twisti@765 1112 default:
duke@0 1113 ShouldNotReachHere();
duke@0 1114 }
duke@0 1115 }
duke@0 1116
duke@0 1117
duke@0 1118 // Copy big chunks forward with shift
duke@0 1119 //
duke@0 1120 // Inputs:
duke@0 1121 // from - source arrays
duke@0 1122 // to - destination array aligned to 8-bytes
duke@0 1123 // count - elements count to copy >= the count equivalent to 16 bytes
duke@0 1124 // count_dec - elements count's decrement equivalent to 16 bytes
duke@0 1125 // L_copy_bytes - copy exit label
duke@0 1126 //
duke@0 1127 void copy_16_bytes_forward_with_shift(Register from, Register to,
duke@0 1128 Register count, int count_dec, Label& L_copy_bytes) {
duke@0 1129 Label L_loop, L_aligned_copy, L_copy_last_bytes;
duke@0 1130
duke@0 1131 // if both arrays have the same alignment mod 8, do 8 bytes aligned copy
duke@0 1132 __ andcc(from, 7, G1); // misaligned bytes
duke@0 1133 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy);
duke@0 1134 __ delayed()->nop();
duke@0 1135
duke@0 1136 const Register left_shift = G1; // left shift bit counter
duke@0 1137 const Register right_shift = G5; // right shift bit counter
duke@0 1138
duke@0 1139 __ sll(G1, LogBitsPerByte, left_shift);
duke@0 1140 __ mov(64, right_shift);
duke@0 1141 __ sub(right_shift, left_shift, right_shift);
duke@0 1142
duke@0 1143 //
duke@0 1144 // Load 2 aligned 8-bytes chunks and use one from previous iteration
duke@0 1145 // to form 2 aligned 8-bytes chunks to store.
duke@0 1146 //
duke@0 1147 __ deccc(count, count_dec); // Pre-decrement 'count'
duke@0 1148 __ andn(from, 7, from); // Align address
duke@0 1149 __ ldx(from, 0, O3);
duke@0 1150 __ inc(from, 8);
kvn@1454 1151 __ align(OptoLoopAlignment);
duke@0 1152 __ BIND(L_loop);
duke@0 1153 __ ldx(from, 0, O4);
duke@0 1154 __ deccc(count, count_dec); // Can we do next iteration after this one?
duke@0 1155 __ ldx(from, 8, G4);
duke@0 1156 __ inc(to, 16);
duke@0 1157 __ inc(from, 16);
duke@0 1158 __ sllx(O3, left_shift, O3);
duke@0 1159 __ srlx(O4, right_shift, G3);
duke@0 1160 __ bset(G3, O3);
duke@0 1161 __ stx(O3, to, -16);
duke@0 1162 __ sllx(O4, left_shift, O4);
duke@0 1163 __ srlx(G4, right_shift, G3);
duke@0 1164 __ bset(G3, O4);
duke@0 1165 __ stx(O4, to, -8);
duke@0 1166 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop);
duke@0 1167 __ delayed()->mov(G4, O3);
duke@0 1168
duke@0 1169 __ inccc(count, count_dec>>1 ); // + 8 bytes
duke@0 1170 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes);
duke@0 1171 __ delayed()->inc(count, count_dec>>1); // restore 'count'
duke@0 1172
duke@0 1173 // copy 8 bytes, part of them already loaded in O3
duke@0 1174 __ ldx(from, 0, O4);
duke@0 1175 __ inc(to, 8);
duke@0 1176 __ inc(from, 8);
duke@0 1177 __ sllx(O3, left_shift, O3);
duke@0 1178 __ srlx(O4, right_shift, G3);
duke@0 1179 __ bset(O3, G3);
duke@0 1180 __ stx(G3, to, -8);
duke@0 1181
duke@0 1182 __ BIND(L_copy_last_bytes);
duke@0 1183 __ srl(right_shift, LogBitsPerByte, right_shift); // misaligned bytes
duke@0 1184 __ br(Assembler::always, false, Assembler::pt, L_copy_bytes);
duke@0 1185 __ delayed()->sub(from, right_shift, from); // restore address
duke@0 1186
duke@0 1187 __ BIND(L_aligned_copy);
duke@0 1188 }
duke@0 1189
duke@0 1190 // Copy big chunks backward with shift
duke@0 1191 //
duke@0 1192 // Inputs:
duke@0 1193 // end_from - source arrays end address
duke@0 1194 // end_to - destination array end address aligned to 8-bytes
duke@0 1195 // count - elements count to copy >= the count equivalent to 16 bytes
duke@0 1196 // count_dec - elements count's decrement equivalent to 16 bytes
duke@0 1197 // L_aligned_copy - aligned copy exit label
duke@0 1198 // L_copy_bytes - copy exit label
duke@0 1199 //
duke@0 1200 void copy_16_bytes_backward_with_shift(Register end_from, Register end_to,
duke@0 1201 Register count, int count_dec,
duke@0 1202 Label& L_aligned_copy, Label& L_copy_bytes) {
duke@0 1203 Label L_loop, L_copy_last_bytes;
duke@0 1204
duke@0 1205 // if both arrays have the same alignment mod 8, do 8 bytes aligned copy
duke@0 1206 __ andcc(end_from, 7, G1); // misaligned bytes
duke@0 1207 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy);
duke@0 1208 __ delayed()->deccc(count, count_dec); // Pre-decrement 'count'
duke@0 1209
duke@0 1210 const Register left_shift = G1; // left shift bit counter
duke@0 1211 const Register right_shift = G5; // right shift bit counter
duke@0 1212
duke@0 1213 __ sll(G1, LogBitsPerByte, left_shift);
duke@0 1214 __ mov(64, right_shift);
duke@0 1215 __ sub(right_shift, left_shift, right_shift);
duke@0 1216
duke@0 1217 //
duke@0 1218 // Load 2 aligned 8-bytes chunks and use one from previous iteration
duke@0 1219 // to form 2 aligned 8-bytes chunks to store.
duke@0 1220 //
duke@0 1221 __ andn(end_from, 7, end_from); // Align address
duke@0 1222 __ ldx(end_from, 0, O3);
kvn@1454 1223 __ align(OptoLoopAlignment);
duke@0 1224 __ BIND(L_loop);
duke@0 1225 __ ldx(end_from, -8, O4);
duke@0 1226 __ deccc(count, count_dec); // Can we do next iteration after this one?
duke@0 1227 __ ldx(end_from, -16, G4);
duke@0 1228 __ dec(end_to, 16);
duke@0 1229 __ dec(end_from, 16);
duke@0 1230 __ srlx(O3, right_shift, O3);
duke@0 1231 __ sllx(O4, left_shift, G3);
duke@0 1232 __ bset(G3, O3);
duke@0 1233 __ stx(O3, end_to, 8);
duke@0 1234 __ srlx(O4, right_shift, O4);
duke@0 1235 __ sllx(G4, left_shift, G3);
duke@0 1236 __ bset(G3, O4);
duke@0 1237 __ stx(O4, end_to, 0);
duke@0 1238 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop);
duke@0 1239 __ delayed()->mov(G4, O3);
duke@0 1240
duke@0 1241 __ inccc(count, count_dec>>1 ); // + 8 bytes
duke@0 1242 __ brx(Assembler::negative, true, Assembler::pn, L_copy_last_bytes);
duke@0 1243 __ delayed()->inc(count, count_dec>>1); // restore 'count'
duke@0 1244
duke@0 1245 // copy 8 bytes, part of them already loaded in O3
duke@0 1246 __ ldx(end_from, -8, O4);
duke@0 1247 __ dec(end_to, 8);
duke@0 1248 __ dec(end_from, 8);
duke@0 1249 __ srlx(O3, right_shift, O3);
duke@0 1250 __ sllx(O4, left_shift, G3);
duke@0 1251 __ bset(O3, G3);
duke@0 1252 __ stx(G3, end_to, 0);
duke@0 1253
duke@0 1254 __ BIND(L_copy_last_bytes);
duke@0 1255 __ srl(left_shift, LogBitsPerByte, left_shift); // misaligned bytes
duke@0 1256 __ br(Assembler::always, false, Assembler::pt, L_copy_bytes);
duke@0 1257 __ delayed()->add(end_from, left_shift, end_from); // restore address
duke@0 1258 }
duke@0 1259
duke@0 1260 //
duke@0 1261 // Generate stub for disjoint byte copy. If "aligned" is true, the
duke@0 1262 // "from" and "to" addresses are assumed to be heapword aligned.
duke@0 1263 //
duke@0 1264 // Arguments for generated stub:
duke@0 1265 // from: O0
duke@0 1266 // to: O1
duke@0 1267 // count: O2 treated as signed
duke@0 1268 //
duke@0 1269 address generate_disjoint_byte_copy(bool aligned, const char * name) {
duke@0 1270 __ align(CodeEntryAlignment);
duke@0 1271 StubCodeMark mark(this, "StubRoutines", name);
duke@0 1272 address start = __ pc();
duke@0 1273
duke@0 1274 Label L_skip_alignment, L_align;
duke@0 1275 Label L_copy_byte, L_copy_byte_loop, L_exit;
duke@0 1276
duke@0 1277 const Register from = O0; // source array address
duke@0 1278 const Register to = O1; // destination array address
duke@0 1279 const Register count = O2; // elements count
duke@0 1280 const Register offset = O5; // offset from start of arrays
duke@0 1281 // O3, O4, G3, G4 are used as temp registers
duke@0 1282
duke@0 1283 assert_clean_int(count, O3); // Make sure 'count' is clean int.
duke@0 1284
duke@0 1285 if (!aligned) disjoint_byte_copy_entry = __ pc();
duke@0 1286 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
duke@0 1287 if (!aligned) BLOCK_COMMENT("Entry:");
duke@0 1288
duke@0 1289 // for short arrays, just do single element copy
duke@0 1290 __ cmp(count, 23); // 16 + 7
duke@0 1291 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
duke@0 1292 __ delayed()->mov(G0, offset);
duke@0 1293
duke@0 1294 if (aligned) {
duke@0 1295 // 'aligned' == true when it is known statically during compilation
duke@0 1296 // of this arraycopy call site that both 'from' and 'to' addresses
duke@0 1297 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
duke@0 1298 //
duke@0 1299 // Aligned arrays have 4 bytes alignment in 32-bits VM
duke@0 1300 // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM
duke@0 1301 //
duke@0 1302 #ifndef _LP64
duke@0 1303 // copy a 4-bytes word if necessary to align 'to' to 8 bytes
duke@0 1304 __ andcc(to, 7, G0);
duke@0 1305 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment);
duke@0 1306 __ delayed()->ld(from, 0, O3);
duke@0 1307 __ inc(from, 4);
duke@0 1308 __ inc(to, 4);
duke@0 1309 __ dec(count, 4);
duke@0 1310 __ st(O3, to, -4);
duke@0 1311 __ BIND(L_skip_alignment);
duke@0 1312 #endif
duke@0 1313 } else {
duke@0 1314 // copy bytes to align 'to' on 8 byte boundary
duke@0 1315 __ andcc(to, 7, G1); // misaligned bytes
duke@0 1316 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
duke@0 1317 __ delayed()->neg(G1);
duke@0 1318 __ inc(G1, 8); // bytes need to copy to next 8-bytes alignment
duke@0 1319 __ sub(count, G1, count);
duke@0 1320 __ BIND(L_align);
duke@0 1321 __ ldub(from, 0, O3);
duke@0 1322 __ deccc(G1);
duke@0 1323 __ inc(from);
duke@0 1324 __ stb(O3, to, 0);
duke@0 1325 __ br(Assembler::notZero, false, Assembler::pt, L_align);
duke@0 1326 __ delayed()->inc(to);
duke@0 1327 __ BIND(L_skip_alignment);
duke@0 1328 }
duke@0 1329 #ifdef _LP64
duke@0 1330 if (!aligned)
duke@0 1331 #endif
duke@0 1332 {
duke@0 1333 // Copy with shift 16 bytes per iteration if arrays do not have
duke@0 1334 // the same alignment mod 8, otherwise fall through to the next
duke@0 1335 // code for aligned copy.
duke@0 1336 // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
duke@0 1337 // Also jump over aligned copy after the copy with shift completed.
duke@0 1338
duke@0 1339 copy_16_bytes_forward_with_shift(from, to, count, 16, L_copy_byte);
duke@0 1340 }
duke@0 1341
duke@0 1342 // Both array are 8 bytes aligned, copy 16 bytes at a time
duke@0 1343 __ and3(count, 7, G4); // Save count
duke@0 1344 __ srl(count, 3, count);
duke@0 1345 generate_disjoint_long_copy_core(aligned);
duke@0 1346 __ mov(G4, count); // Restore count
duke@0 1347
duke@0 1348 // copy tailing bytes
duke@0 1349 __ BIND(L_copy_byte);
duke@0 1350 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
duke@0 1351 __ delayed()->nop();
kvn@1454 1352 __ align(OptoLoopAlignment);
duke@0 1353 __ BIND(L_copy_byte_loop);
duke@0 1354 __ ldub(from, offset, O3);
duke@0 1355 __ deccc(count);
duke@0 1356 __ stb(O3, to, offset);
duke@0 1357 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop);
duke@0 1358 __ delayed()->inc(offset);
duke@0 1359
duke@0 1360 __ BIND(L_exit);
duke@0 1361 // O3, O4 are used as temp registers
duke@0 1362 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
duke@0 1363 __ retl();
duke@0 1364 __ delayed()->mov(G0, O0); // return 0
duke@0 1365 return start;
duke@0 1366 }
duke@0 1367
duke@0 1368 //
duke@0 1369 // Generate stub for conjoint byte copy. If "aligned" is true, the
duke@0 1370 // "from" and "to" addresses are assumed to be heapword aligned.
duke@0 1371 //
duke@0 1372 // Arguments for generated stub:
duke@0 1373 // from: O0
duke@0 1374 // to: O1
duke@0 1375 // count: O2 treated as signed
duke@0 1376 //
duke@0 1377 address generate_conjoint_byte_copy(bool aligned, const char * name) {
duke@0 1378 // Do reverse copy.
duke@0 1379
duke@0 1380 __ align(CodeEntryAlignment);
duke@0 1381 StubCodeMark mark(this, "StubRoutines", name);
duke@0 1382 address start = __ pc();
duke@0 1383 address nooverlap_target = aligned ?
duke@0 1384 StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
duke@0 1385 disjoint_byte_copy_entry;
duke@0 1386
duke@0 1387 Label L_skip_alignment, L_align, L_aligned_copy;
duke@0 1388 Label L_copy_byte, L_copy_byte_loop, L_exit;
duke@0 1389
duke@0 1390 const Register from = O0; // source array address
duke@0 1391 const Register to = O1; // destination array address
duke@0 1392 const Register count = O2; // elements count
duke@0 1393 const Register end_from = from; // source array end address
duke@0 1394 const Register end_to = to; // destination array end address
duke@0 1395
duke@0 1396 assert_clean_int(count, O3); // Make sure 'count' is clean int.
duke@0 1397
duke@0 1398 if (!aligned) byte_copy_entry = __ pc();
duke@0 1399 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
duke@0 1400 if (!aligned) BLOCK_COMMENT("Entry:");
duke@0 1401
duke@0 1402 array_overlap_test(nooverlap_target, 0);
duke@0 1403
duke@0 1404 __ add(to, count, end_to); // offset after last copied element
duke@0 1405
duke@0 1406 // for short arrays, just do single element copy
duke@0 1407 __ cmp(count, 23); // 16 + 7
duke@0 1408 __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
duke@0 1409 __ delayed()->add(from, count, end_from);
duke@0 1410
duke@0 1411 {
duke@0 1412 // Align end of arrays since they could be not aligned even
duke@0 1413 // when arrays itself are aligned.
duke@0 1414
duke@0 1415 // copy bytes to align 'end_to' on 8 byte boundary
duke@0 1416 __ andcc(end_to, 7, G1); // misaligned bytes
duke@0 1417 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
duke@0 1418 __ delayed()->nop();
duke@0 1419 __ sub(count, G1, count);
duke@0 1420 __ BIND(L_align);
duke@0 1421 __ dec(end_from);
duke@0 1422 __ dec(end_to);
duke@0 1423 __ ldub(end_from, 0, O3);
duke@0 1424 __ deccc(G1);
duke@0 1425 __ brx(Assembler::notZero, false, Assembler::pt, L_align);
duke@0 1426 __ delayed()->stb(O3, end_to, 0);
duke@0 1427 __ BIND(L_skip_alignment);
duke@0 1428 }
duke@0 1429 #ifdef _LP64
duke@0 1430 if (aligned) {
duke@0 1431 // Both arrays are aligned to 8-bytes in 64-bits VM.
duke@0 1432 // The 'count' is decremented in copy_16_bytes_backward_with_shift()
duke@0 1433 // in unaligned case.
duke@0 1434 __ dec(count, 16);
duke@0 1435 } else
duke@0 1436 #endif
duke@0 1437 {
duke@0 1438 // Copy with shift 16 bytes per iteration if arrays do not have
duke@0 1439 // the same alignment mod 8, otherwise jump to the next
duke@0 1440 // code for aligned copy (and substracting 16 from 'count' before jump).
duke@0 1441 // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
duke@0 1442 // Also jump over aligned copy after the copy with shift completed.
duke@0 1443
duke@0 1444 copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
duke@0 1445 L_aligned_copy, L_copy_byte);
duke@0 1446 }
duke@0 1447 // copy 4 elements (16 bytes) at a time
kvn@1454 1448 __ align(OptoLoopAlignment);
duke@0 1449 __ BIND(L_aligned_copy);
duke@0 1450 __ dec(end_from, 16);
duke@0 1451 __ ldx(end_from, 8, O3);
duke@0 1452 __ ldx(end_from, 0, O4);
duke@0 1453 __ dec(end_to, 16);
duke@0 1454 __ deccc(count, 16);
duke@0 1455 __ stx(O3, end_to, 8);
duke@0 1456 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
duke@0 1457 __ delayed()->stx(O4, end_to, 0);
duke@0 1458 __ inc(count, 16);
duke@0 1459
duke@0 1460 // copy 1 element (2 bytes) at a time
duke@0 1461 __ BIND(L_copy_byte);
duke@0 1462 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
duke@0 1463 __ delayed()->nop();
kvn@1454 1464 __ align(OptoLoopAlignment);
duke@0 1465 __ BIND(L_copy_byte_loop);
duke@0 1466 __ dec(end_from);
duke@0 1467 __ dec(end_to);
duke@0 1468 __ ldub(end_from, 0, O4);
duke@0 1469 __ deccc(count);
duke@0 1470 __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop);
duke@0 1471 __ delayed()->stb(O4, end_to, 0);
duke@0 1472
duke@0 1473 __ BIND(L_exit);
duke@0 1474 // O3, O4 are used as temp registers
duke@0 1475 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr, O3, O4);
duke@0 1476 __ retl();
duke@0 1477 __ delayed()->mov(G0, O0); // return 0
duke@0 1478 return start;
duke@0 1479 }
duke@0 1480
duke@0 1481 //
duke@0 1482 // Generate stub for disjoint short copy. If "aligned" is true, the
duke@0 1483 // "from" and "to" addresses are assumed to be heapword aligned.
duke@0 1484 //
duke@0 1485 // Arguments for generated stub:
duke@0 1486 // from: O0
duke@0 1487 // to: O1
duke@0 1488 // count: O2 treated as signed
duke@0 1489 //
duke@0 1490 address generate_disjoint_short_copy(bool aligned, const char * name) {
duke@0 1491 __ align(CodeEntryAlignment);
duke@0 1492 StubCodeMark mark(this, "StubRoutines", name);
duke@0 1493 address start = __ pc();
duke@0 1494
duke@0 1495 Label L_skip_alignment, L_skip_alignment2;
duke@0 1496 Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit;
duke@0 1497
duke@0 1498 const Register from = O0; // source array address
duke@0 1499 const Register to = O1; // destination array address
duke@0 1500 const Register count = O2; // elements count
duke@0 1501 const Register offset = O5; // offset from start of arrays
duke@0 1502 // O3, O4, G3, G4 are used as temp registers
duke@0 1503
duke@0 1504 assert_clean_int(count, O3); // Make sure 'count' is clean int.
duke@0 1505
duke@0 1506 if (!aligned) disjoint_short_copy_entry = __ pc();
duke@0 1507 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
duke@0 1508 if (!aligned) BLOCK_COMMENT("Entry:");
duke@0 1509
duke@0 1510 // for short arrays, just do single element copy
duke@0 1511 __ cmp(count, 11); // 8 + 3 (22 bytes)
duke@0 1512 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
duke@0 1513 __ delayed()->mov(G0, offset);
duke@0 1514
duke@0 1515 if (aligned) {
duke@0 1516 // 'aligned' == true when it is known statically during compilation
duke@0 1517 // of this arraycopy call site that both 'from' and 'to' addresses
duke@0 1518 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
duke@0 1519 //
duke@0 1520 // Aligned arrays have 4 bytes alignment in 32-bits VM
duke@0 1521 // and 8 bytes - in 64-bits VM.
duke@0 1522 //
duke@0 1523 #ifndef _LP64
duke@0 1524 // copy a 2-elements word if necessary to align 'to' to 8 bytes
duke@0 1525 __ andcc(to, 7, G0);
duke@0 1526 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
duke@0 1527 __ delayed()->ld(from, 0, O3);
duke@0 1528 __ inc(from, 4);
duke@0 1529 __ inc(to, 4);
duke@0 1530 __ dec(count, 2);
duke@0 1531 __ st(O3, to, -4);
duke@0 1532 __ BIND(L_skip_alignment);
duke@0 1533 #endif
duke@0 1534 } else {
duke@0 1535 // copy 1 element if necessary to align 'to' on an 4 bytes
duke@0 1536 __ andcc(to, 3, G0);
duke@0 1537 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
duke@0 1538 __ delayed()->lduh(from, 0, O3);
duke@0 1539 __ inc(from, 2);
duke@0 1540 __ inc(to, 2);
duke@0 1541 __ dec(count);
duke@0 1542 __ sth(O3, to, -2);
duke@0 1543 __ BIND(L_skip_alignment);
duke@0 1544
duke@0 1545 // copy 2 elements to align 'to' on an 8 byte boundary
duke@0 1546 __ andcc(to, 7, G0);
duke@0 1547 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
duke@0 1548 __ delayed()->lduh(from, 0, O3);
duke@0 1549 __ dec(count, 2);
duke@0 1550 __ lduh(from, 2, O4);
duke@0 1551 __ inc(from, 4);
duke@0 1552 __ inc(to, 4);
duke@0 1553 __ sth(O3, to, -4);
duke@0 1554 __ sth(O4, to, -2);
duke@0 1555 __ BIND(L_skip_alignment2);
duke@0 1556 }
duke@0 1557 #ifdef _LP64
duke@0 1558 if (!aligned)
duke@0 1559 #endif
duke@0 1560 {
duke@0 1561 // Copy with shift 16 bytes per iteration if arrays do not have
duke@0 1562 // the same alignment mod 8, otherwise fall through to the next
duke@0 1563 // code for aligned copy.
duke@0 1564 // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
duke@0 1565 // Also jump over aligned copy after the copy with shift completed.
duke@0 1566
duke@0 1567 copy_16_bytes_forward_with_shift(from, to, count, 8, L_copy_2_bytes);
duke@0 1568 }
duke@0 1569
duke@0 1570 // Both array are 8 bytes aligned, copy 16 bytes at a time
duke@0 1571 __ and3(count, 3, G4); // Save
duke@0 1572 __ srl(count, 2, count);
duke@0 1573 generate_disjoint_long_copy_core(aligned);
duke@0 1574 __ mov(G4, count); // restore
duke@0 1575
duke@0 1576 // copy 1 element at a time
duke@0 1577 __ BIND(L_copy_2_bytes);
duke@0 1578 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
duke@0 1579 __ delayed()->nop();
kvn@1454 1580 __ align(OptoLoopAlignment);
duke@0 1581 __ BIND(L_copy_2_bytes_loop);
duke@0 1582 __ lduh(from, offset, O3);
duke@0 1583 __ deccc(count);
duke@0 1584 __ sth(O3, to, offset);
duke@0 1585 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop);
duke@0 1586 __ delayed()->inc(offset, 2);
duke@0 1587
duke@0 1588 __ BIND(L_exit);
duke@0 1589 // O3, O4 are used as temp registers
duke@0 1590 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
duke@0 1591 __ retl();
duke@0 1592 __ delayed()->mov(G0, O0); // return 0
duke@0 1593 return start;
duke@0 1594 }
duke@0 1595
duke@0 1596 //
duke@0 1597 // Generate stub for conjoint short copy. If "aligned" is true, the
duke@0 1598 // "from" and "to" addresses are assumed to be heapword aligned.
duke@0 1599 //
duke@0 1600 // Arguments for generated stub:
duke@0 1601 // from: O0
duke@0 1602 // to: O1
duke@0 1603 // count: O2 treated as signed
duke@0 1604 //
duke@0 1605 address generate_conjoint_short_copy(bool aligned, const char * name) {
duke@0 1606 // Do reverse copy.
duke@0 1607
duke@0 1608 __ align(CodeEntryAlignment);
duke@0 1609 StubCodeMark mark(this, "StubRoutines", name);
duke@0 1610 address start = __ pc();
duke@0 1611 address nooverlap_target = aligned ?
duke@0 1612 StubRoutines::arrayof_jshort_disjoint_arraycopy() :
duke@0 1613 disjoint_short_copy_entry;
duke@0 1614
duke@0 1615 Label L_skip_alignment, L_skip_alignment2, L_aligned_copy;
duke@0 1616 Label L_copy_2_bytes, L_copy_2_bytes_loop, L_exit;
duke@0 1617
duke@0 1618 const Register from = O0; // source array address
duke@0 1619 const Register to = O1; // destination array address
duke@0 1620 const Register count = O2; // elements count
duke@0 1621 const Register end_from = from; // source array end address
duke@0 1622 const Register end_to = to; // destination array end address
duke@0 1623
duke@0 1624 const Register byte_count = O3; // bytes count to copy
duke@0 1625
duke@0 1626 assert_clean_int(count, O3); // Make sure 'count' is clean int.
duke@0 1627
duke@0 1628 if (!aligned) short_copy_entry = __ pc();
duke@0 1629 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
duke@0 1630 if (!aligned) BLOCK_COMMENT("Entry:");
duke@0 1631
duke@0 1632 array_overlap_test(nooverlap_target, 1);
duke@0 1633
duke@0 1634 __ sllx(count, LogBytesPerShort, byte_count);
duke@0 1635 __ add(to, byte_count, end_to); // offset after last copied element
duke@0 1636
duke@0 1637 // for short arrays, just do single element copy
duke@0 1638 __ cmp(count, 11); // 8 + 3 (22 bytes)
duke@0 1639 __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
duke@0 1640 __ delayed()->add(from, byte_count, end_from);
duke@0 1641
duke@0 1642 {
duke@0 1643 // Align end of arrays since they could be not aligned even
duke@0 1644 // when arrays itself are aligned.
duke@0 1645
duke@0 1646 // copy 1 element if necessary to align 'end_to' on an 4 bytes
duke@0 1647 __ andcc(end_to, 3, G0);
duke@0 1648 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
duke@0 1649 __ delayed()->lduh(end_from, -2, O3);
duke@0 1650 __ dec(end_from, 2);
duke@0 1651 __ dec(end_to, 2);
duke@0 1652 __ dec(count);
duke@0 1653 __ sth(O3, end_to, 0);
duke@0 1654 __ BIND(L_skip_alignment);
duke@0 1655
duke@0 1656 // copy 2 elements to align 'end_to' on an 8 byte boundary
duke@0 1657 __ andcc(end_to, 7, G0);
duke@0 1658 __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
duke@0 1659 __ delayed()->lduh(end_from, -2, O3);
duke@0 1660 __ dec(count, 2);
duke@0 1661 __ lduh(end_from, -4, O4);
duke@0 1662 __ dec(end_from, 4);
duke@0 1663 __ dec(end_to, 4);
duke@0 1664 __ sth(O3, end_to, 2);
duke@0 1665 __ sth(O4, end_to, 0);
duke@0 1666 __ BIND(L_skip_alignment2);
duke@0 1667 }
duke@0 1668 #ifdef _LP64
duke@0 1669 if (aligned) {
duke@0 1670 // Both arrays are aligned to 8-bytes in 64-bits VM.
duke@0 1671 // The 'count' is decremented in copy_16_bytes_backward_with_shift()
duke@0 1672 // in unaligned case.
duke@0 1673 __ dec(count, 8);
duke@0 1674 } else
duke@0 1675 #endif
duke@0 1676 {
duke@0 1677 // Copy with shift 16 bytes per iteration if arrays do not have
duke@0 1678 // the same alignment mod 8, otherwise jump to the next
duke@0 1679 // code for aligned copy (and substracting 8 from 'count' before jump).
duke@0 1680 // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
duke@0 1681 // Also jump over aligned copy after the copy with shift completed.
duke@0 1682
duke@0 1683 copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
duke@0 1684 L_aligned_copy, L_copy_2_bytes);
duke@0 1685 }
duke@0 1686 // copy 4 elements (16 bytes) at a time
kvn@1454 1687 __ align(OptoLoopAlignment);
duke@0 1688 __ BIND(L_aligned_copy);
duke@0 1689 __ dec(end_from, 16);
duke@0 1690 __ ldx(end_from, 8, O3);
duke@0 1691 __ ldx(end_from, 0, O4);
duke@0 1692 __ dec(end_to, 16);
duke@0 1693 __ deccc(count, 8);
duke@0 1694 __ stx(O3, end_to, 8);
duke@0 1695 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
duke@0 1696 __ delayed()->stx(O4, end_to, 0);
duke@0 1697 __ inc(count, 8);
duke@0 1698
duke@0 1699 // copy 1 element (2 bytes) at a time
duke@0 1700 __ BIND(L_copy_2_bytes);
duke@0 1701 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
duke@0 1702 __ delayed()->nop();
duke@0 1703 __ BIND(L_copy_2_bytes_loop);
duke@0 1704 __ dec(end_from, 2);
duke@0 1705 __ dec(end_to, 2);
duke@0 1706 __ lduh(end_from, 0, O4);
duke@0 1707 __ deccc(count);
duke@0 1708 __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop);
duke@0 1709 __ delayed()->sth(O4, end_to, 0);
duke@0 1710
duke@0 1711 __ BIND(L_exit);
duke@0 1712 // O3, O4 are used as temp registers
duke@0 1713 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
duke@0 1714 __ retl();
duke@0 1715 __ delayed()->mov(G0, O0); // return 0
duke@0 1716 return start;
duke@0 1717 }
duke@0 1718
duke@0 1719 //
duke@0 1720 // Generate core code for disjoint int copy (and oop copy on 32-bit).
duke@0 1721 // If "aligned" is true, the "from" and "to" addresses are assumed
duke@0 1722 // to be heapword aligned.
duke@0 1723 //
duke@0 1724 // Arguments:
duke@0 1725 // from: O0
duke@0 1726 // to: O1
duke@0 1727 // count: O2 treated as signed
duke@0 1728 //
duke@0 1729 void generate_disjoint_int_copy_core(bool aligned) {
duke@0 1730
duke@0 1731 Label L_skip_alignment, L_aligned_copy;
duke@0 1732 Label L_copy_16_bytes, L_copy_4_bytes, L_copy_4_bytes_loop, L_exit;
duke@0 1733
duke@0 1734 const Register from = O0; // source array address
duke@0 1735 const Register to = O1; // destination array address
duke@0 1736 const Register count = O2; // elements count
duke@0 1737 const Register offset = O5; // offset from start of arrays
duke@0 1738 // O3, O4, G3, G4 are used as temp registers
duke@0 1739
duke@0 1740 // 'aligned' == true when it is known statically during compilation
duke@0 1741 // of this arraycopy call site that both 'from' and 'to' addresses
duke@0 1742 // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
duke@0 1743 //
duke@0 1744 // Aligned arrays have 4 bytes alignment in 32-bits VM
duke@0 1745 // and 8 bytes - in 64-bits VM.
duke@0 1746 //
duke@0 1747 #ifdef _LP64
duke@0 1748 if (!aligned)
duke@0 1749 #endif
duke@0 1750 {
duke@0 1751 // The next check could be put under 'ifndef' since the code in
duke@0 1752 // generate_disjoint_long_copy_core() has own checks and set 'offset'.
duke@0 1753
duke@0 1754 // for short arrays, just do single element copy
duke@0 1755 __ cmp(count, 5); // 4 + 1 (20 bytes)
duke@0 1756 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes);
duke@0 1757 __ delayed()->mov(G0, offset);
duke@0 1758
duke@0 1759 // copy 1 element to align 'to' on an 8 byte boundary
duke@0 1760 __ andcc(to, 7, G0);
duke@0 1761 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
duke@0 1762 __ delayed()->ld(from, 0, O3);
duke@0 1763 __ inc(from, 4);
duke@0 1764 __ inc(to, 4);
duke@0 1765 __ dec(count);
duke@0 1766 __ st(O3, to, -4);
duke@0 1767 __ BIND(L_skip_alignment);
duke@0 1768
duke@0 1769 // if arrays have same alignment mod 8, do 4 elements copy
duke@0 1770 __ andcc(from, 7, G0);
duke@0 1771 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy);
duke@0 1772 __ delayed()->ld(from, 0, O3);
duke@0 1773
duke@0 1774 //
duke@0 1775 // Load 2 aligned 8-bytes chunks and use one from previous iteration
duke@0 1776 // to form 2 aligned 8-bytes chunks to store.
duke@0 1777 //
duke@0 1778 // copy_16_bytes_forward_with_shift() is not used here since this
duke@0 1779 // code is more optimal.
duke@0 1780
duke@0 1781 // copy with shift 4 elements (16 bytes) at a time
duke@0 1782 __ dec(count, 4); // The cmp at the beginning guaranty count >= 4
duke@0 1783
kvn@1454 1784 __ align(OptoLoopAlignment);
duke@0 1785 __ BIND(L_copy_16_bytes);
duke@0 1786 __ ldx(from, 4, O4);
duke@0 1787 __ deccc(count, 4); // Can we do next iteration after this one?
duke@0 1788 __ ldx(from, 12, G4);
duke@0 1789 __ inc(to, 16);
duke@0 1790 __ inc(from, 16);
duke@0 1791 __ sllx(O3, 32, O3);
duke@0 1792 __ srlx(O4, 32, G3);
duke@0 1793 __ bset(G3, O3);
duke@0 1794 __ stx(O3, to, -16);
duke@0 1795 __ sllx(O4, 32, O4);
duke@0 1796 __ srlx(G4, 32, G3);
duke@0 1797 __ bset(G3, O4);
duke@0 1798 __ stx(O4, to, -8);
duke@0 1799 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
duke@0 1800 __ delayed()->mov(G4, O3);
duke@0 1801
duke@0 1802 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
duke@0 1803 __ delayed()->inc(count, 4); // restore 'count'
duke@0 1804
duke@0 1805 __ BIND(L_aligned_copy);
duke@0 1806 }
duke@0 1807 // copy 4 elements (16 bytes) at a time
duke@0 1808 __ and3(count, 1, G4); // Save
duke@0 1809 __ srl(count, 1, count);
duke@0 1810 generate_disjoint_long_copy_core(aligned);
duke@0 1811 __ mov(G4, count); // Restore
duke@0 1812
duke@0 1813 // copy 1 element at a time
duke@0 1814 __ BIND(L_copy_4_bytes);
duke@0 1815 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
duke@0 1816 __ delayed()->nop();
duke@0 1817 __ BIND(L_copy_4_bytes_loop);
duke@0 1818 __ ld(from, offset, O3);
duke@0 1819 __ deccc(count);
duke@0 1820 __ st(O3, to, offset);
duke@0 1821 __ brx(Assembler::notZero, false, Assembler::pt, L_copy_4_bytes_loop);
duke@0 1822 __ delayed()->inc(offset, 4);
duke@0 1823 __ BIND(L_exit);
duke@0 1824 }
duke@0 1825
duke@0 1826 //
duke@0 1827 // Generate stub for disjoint int copy. If "aligned" is true, the
duke@0 1828 // "from" and "to" addresses are assumed to be heapword aligned.
duke@0 1829 //
duke@0 1830 // Arguments for generated stub:
duke@0 1831 // from: O0
duke@0 1832 // to: O1
duke@0 1833 // count: O2 treated as signed
duke@0 1834 //
duke@0 1835 address generate_disjoint_int_copy(bool aligned, const char * name) {
duke@0 1836 __ align(CodeEntryAlignment);
duke@0 1837 StubCodeMark mark(this, "StubRoutines", name);
duke@0 1838 address start = __ pc();
duke@0 1839
duke@0 1840 const Register count = O2;
duke@0 1841 assert_clean_int(count, O3); // Make sure 'count' is clean int.
duke@0 1842
duke@0 1843 if (!aligned) disjoint_int_copy_entry = __ pc();
duke@0 1844 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
duke@0 1845 if (!aligned) BLOCK_COMMENT("Entry:");
duke@0 1846
duke@0 1847 generate_disjoint_int_copy_core(aligned);
duke@0 1848
duke@0 1849 // O3, O4 are used as temp registers
duke@0 1850 inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4);
duke@0 1851 __ retl();
duke@0 1852 __ delayed()->mov(G0, O0); // return 0
duke@0 1853 return start;
duke@0 1854 }
duke@0 1855
duke@0 1856 //
duke@0 1857 // Generate core code for conjoint int copy (and oop copy on 32-bit).
duke@0 1858 // If "aligned" is true, the "from" and "to" addresses are assumed
duke@0 1859 // to be heapword aligned.
duke@0 1860 //
duke@0 1861 // Arguments:
duke@0 1862 // from: O0
duke@0 1863 // to: O1
duke@0 1864 // count: O2 treated as signed
duke@0 1865 //
duke@0 1866 void generate_conjoint_int_copy_core(bool aligned) {
duke@0 1867 // Do reverse copy.
duke@0 1868
duke@0 1869 Label L_skip_alignment, L_aligned_copy;
duke@0 1870 Label L_copy_16_bytes, L_copy_4_bytes, L_copy_4_bytes_loop, L_exit;
duke@0 1871
duke@0 1872 const Register from = O0; // source array address
duke@0 1873 const Register to = O1; // destination array address
duke@0 1874 const Register count = O2; // elements count
duke@0 1875 const Register end_from = from; // source array end address
duke@0 1876 const Register end_to = to; // destination array end address
duke@0 1877 // O3, O4, O5, G3 are used as temp registers
duke@0 1878
duke@0 1879 const Register byte_count = O3; // bytes count to copy
duke@0 1880
duke@0 1881 __ sllx(count, LogBytesPerInt, byte_count);
duke@0 1882 __ add(to, byte_count, end_to); // offset after last copied element
duke@0 1883
duke@0 1884 __ cmp(count, 5); // for short arrays, just do single element copy
duke@0 1885 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_4_bytes);
duke@0 1886 __ delayed()->add(from, byte_count, end_from);
duke@0 1887
duke@0 1888 // copy 1 element to align 'to' on an 8 byte boundary
duke@0 1889 __ andcc(end_to, 7, G0);
duke@0 1890 __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
duke@0 1891 __ delayed()->nop();
duke@0 1892 __ dec(count);
duke@0 1893 __ dec(end_from, 4);
duke@0 1894 __ dec(end_to, 4);
duke@0 1895 __ ld(end_from, 0, O4);
duke@0 1896 __ st(O4, end_to, 0);
duke@0 1897 __ BIND(L_skip_alignment);
duke@0 1898
duke@0 1899 // Check if 'end_from' and 'end_to' has the same alignment.
duke@0 1900 __ andcc(end_from, 7, G0);
duke@0 1901 __ br(Assembler::zero, false, Assembler::pt, L_aligned_copy);
duke@0 1902 __ delayed()->dec(count, 4); // The cmp at the start guaranty cnt >= 4
duke@0 1903
duke@0 1904 // copy with shift 4 elements (16 bytes) at a time
duke@0 1905 //
duke@0 1906 // Load 2 aligned 8-bytes chunks and use one from previous iteration
duke@0 1907 // to form 2 aligned 8-bytes chunks to store.
duke@0 1908 //
duke@0 1909 __ ldx(end_from, -4, O3);
kvn@1454 1910 __ align(OptoLoopAlignment);
duke@0 1911 __ BIND(L_copy_16_bytes);
duke@0 1912 __ ldx(end_from, -12, O4);
duke@0 1913 __ deccc(count, 4);
duke@0 1914 __ ldx(end_from, -20, O5);
duke@0 1915 __ dec(end_to, 16);
duke@0 1916 __ dec(end_from, 16);
duke@0 1917 __ srlx(O3, 32, O3);
duke@0 1918 __ sllx(O4, 32, G3);
duke@0 1919 __ bset(G3, O3);
duke@0 1920 __ stx(O3, end_to, 8);
duke@0 1921 __ srlx(O4, 32, O4);
duke@0 1922 __ sllx(O5, 32, G3);
duke@0 1923 __ bset(O4, G3);
duke@0 1924 __ stx(G3, end_to, 0);
duke@0 1925 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
duke@0 1926 __ delayed()->mov(O5, O3);
duke@0 1927
duke@0 1928 __ br(Assembler::always, false, Assembler::pt, L_copy_4_bytes);
duke@0 1929 __ delayed()->inc(count, 4);
duke@0 1930
duke@0 1931 // copy 4 elements (16 bytes) at a time
kvn@1454 1932 __ align(OptoLoopAlignment);
duke@0 1933 __ BIND(L_aligned_copy);
duke@0 1934 __ dec(end_from, 16);
duke@0 1935 __ ldx(end_from, 8, O3);
duke@0 1936 __ ldx(end_from, 0, O4);
duke@0 1937 __ dec(end_to, 16);
duke@0 1938 __ deccc(count, 4);
duke@0 1939 __ stx(O3, end_to, 8);
duke@0 1940 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
duke@0 1941 __ delayed()->stx(O4, end_to, 0);
duke@0 1942 __ inc(count, 4);
duke@0 1943
duke@0 1944 // copy 1 element (4 bytes) at a time
duke@0 1945 __ BIND(L_copy_4_bytes);
duke@0 1946 __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
duke@0 1947 __ delayed()->nop();
duke@0 1948 __ BIND(L_copy_4_bytes_loop);
duke@0 1949 __ dec(end_from, 4);
duke@0 1950 __ dec(end_to, 4);
duke@0 1951 __ ld(end_from, 0, O4);
duke@0 1952 __ deccc(count);
duke@0 1953 __ brx(Assembler::greater, false, Assembler::pt, L_copy_4_bytes_loop);
duke@0 1954 __ delayed()->st(O4, end_to, 0);
duke@0 1955 __ BIND(L_exit);
duke@0 1956 }
duke@0 1957
duke@0 1958 //
duke@0 1959 // Generate stub for conjoint int copy. If "aligned" is true, the
duke@0 1960 // "from" and "to" addresses are assumed to be heapword aligned.
duke@0 1961 //
duke@0 1962 // Arguments for generated stub:
duke@0 1963 // from: O0
duke@0 1964 // to: O1
duke@0 1965 // count: O2 treated as signed
duke@0 1966 //
duke@0 1967 address generate_conjoint_int_copy(bool aligned, const char * name) {
duke@0 1968 __ align(CodeEntryAlignment);
duke@0 1969 StubCodeMark mark(this, "StubRoutines", name);
duke@0 1970 address start = __ pc();
duke@0 1971
duke@0 1972 address nooverlap_target = aligned ?
duke@0 1973 StubRoutines::arrayof_jint_disjoint_arraycopy() :
duke@0 1974 disjoint_int_copy_entry;
duke@0 1975
duke@0 1976 assert_clean_int(O2, O3); // Make sure 'count' is clean int.
duke@0 1977
duke@0 1978 if (!aligned) int_copy_entry = __ pc();
duke@0 1979 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
duke@0 1980 if (!aligned) BLOCK_COMMENT("Entry:");
duke@0 1981
duke@0 1982 array_overlap_test(nooverlap_target, 2);
duke@0 1983
duke@0 1984 generate_conjoint_int_copy_core(aligned);
duke@0 1985
duke@0 1986 // O3, O4 are used as temp registers
duke@0 1987 inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4);
duke@0 1988 __ retl();
duke@0 1989 __ delayed()->mov(G0, O0); // return 0
duke@0 1990 return start;
duke@0 1991 }
duke@0 1992
duke@0 1993 //
duke@0 1994 // Generate core code for disjoint long copy (and oop copy on 64-bit).
duke@0 1995 // "aligned" is ignored, because we must make the stronger
duke@0 1996 // assumption that both addresses are always 64-bit aligned.
duke@0 1997 //
duke@0 1998 // Arguments:
duke@0 1999 // from: O0
duke@0 2000 // to: O1
duke@0 2001 // count: O2 treated as signed
duke@0 2002 //
kvn@1453 2003 // count -= 2;
kvn@1453 2004 // if ( count >= 0 ) { // >= 2 elements
kvn@1453 2005 // if ( count > 6) { // >= 8 elements
kvn@1453 2006 // count -= 6; // original count - 8
kvn@1453 2007 // do {
kvn@1453 2008 // copy_8_elements;
kvn@1453 2009 // count -= 8;
kvn@1453 2010 // } while ( count >= 0 );
kvn@1453 2011 // count += 6;
kvn@1453 2012 // }
kvn@1453 2013 // if ( count >= 0 ) { // >= 2 elements
kvn@1453 2014 // do {
kvn@1453 2015 // copy_2_elements;
kvn@1453 2016 // } while ( (count=count-2) >= 0 );
kvn@1453 2017 // }
kvn@1453 2018 // }
kvn@1453 2019 // count += 2;
kvn@1453 2020 // if ( count != 0 ) { // 1 element left
kvn@1453 2021 // copy_1_element;
kvn@1453 2022 // }
kvn@1453 2023 //
duke@0 2024 void generate_disjoint_long_copy_core(bool aligned) {
duke@0 2025 Label L_copy_8_bytes, L_copy_16_bytes, L_exit;
duke@0 2026 const Register from = O0; // source array address
duke@0 2027 const Register to = O1; // destination array address
duke@0 2028 const Register count = O2; // elements count
duke@0 2029 const Register offset0 = O4; // element offset
duke@0 2030 const Register offset8 = O5; // next element offset
duke@0 2031
duke@0 2032 __ deccc(count, 2);
duke@0 2033 __ mov(G0, offset0); // offset from start of arrays (0)
duke@0 2034 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes );
duke@0 2035 __ delayed()->add(offset0, 8, offset8);
kvn@1453 2036
kvn@1453 2037 // Copy by 64 bytes chunks
kvn@1453 2038 Label L_copy_64_bytes;
kvn@1453 2039 const Register from64 = O3; // source address
kvn@1453 2040 const Register to64 = G3; // destination address
kvn@1453 2041 __ subcc(count, 6, O3);
kvn@1453 2042 __ brx(Assembler::negative, false, Assembler::pt, L_copy_16_bytes );
kvn@1453 2043 __ delayed()->mov(to, to64);
kvn@1453 2044 // Now we can use O4(offset0), O5(offset8) as temps
kvn@1453 2045 __ mov(O3, count);
kvn@1453 2046 __ mov(from, from64);
kvn@1453 2047
kvn@1454 2048 __ align(OptoLoopAlignment);
kvn@1453 2049 __ BIND(L_copy_64_bytes);
kvn@1453 2050 for( int off = 0; off < 64; off += 16 ) {
kvn@1453 2051 __ ldx(from64, off+0, O4);
kvn@1453 2052 __ ldx(from64, off+8, O5);
kvn@1453 2053 __ stx(O4, to64, off+0);
kvn@1453 2054 __ stx(O5, to64, off+8);
kvn@1453 2055 }
kvn@1453 2056 __ deccc(count, 8);
kvn@1453 2057 __ inc(from64, 64);
kvn@1453 2058 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_64_bytes);
kvn@1453 2059 __ delayed()->inc(to64, 64);
kvn@1453 2060
kvn@1453 2061 // Restore O4(offset0), O5(offset8)
kvn@1453 2062 __ sub(from64, from, offset0);
kvn@1453 2063 __ inccc(count, 6);
kvn@1453 2064 __ brx(Assembler::negative, false, Assembler::pn, L_copy_8_bytes );
kvn@1453 2065 __ delayed()->add(offset0, 8, offset8);
kvn@1453 2066
kvn@1453 2067 // Copy by 16 bytes chunks
kvn@1454 2068 __ align(OptoLoopAlignment);
duke@0 2069 __ BIND(L_copy_16_bytes);
duke@0 2070 __ ldx(from, offset0, O3);
duke@0 2071 __ ldx(from, offset8, G3);
duke@0 2072 __ deccc(count, 2);
duke@0 2073 __ stx(O3, to, offset0);
duke@0 2074 __ inc(offset0, 16);
duke@0 2075 __ stx(G3, to, offset8);
duke@0 2076 __ brx(Assembler::greaterEqual, false, Assembler::pt, L_copy_16_bytes);
duke@0 2077 __ delayed()->inc(offset8, 16);
duke@0 2078
kvn@1453 2079 // Copy last 8 bytes
duke@0 2080 __ BIND(L_copy_8_bytes);
duke@0 2081 __ inccc(count, 2);
duke@0 2082 __ brx(Assembler::zero, true, Assembler::pn, L_exit );
duke@0 2083 __ delayed()->mov(offset0, offset8); // Set O5 used by other stubs
duke@0 2084 __ ldx(from, offset0, O3);
duke@0 2085 __ stx(O3, to, offset0);
duke@0 2086 __ BIND(L_exit);
duke@0 2087 }
duke@0 2088
duke@0 2089 //
duke@0 2090 // Generate stub for disjoint long copy.
duke@0 2091 // "aligned" is ignored, because we must make the stronger
duke@0 2092 // assumption that both addresses are always 64-bit aligned.
duke@0 2093 //
duke@0 2094 // Arguments for generated stub:
duke@0 2095 // from: O0
duke@0 2096 // to: O1
duke@0 2097 // count: O2 treated as signed
duke@0 2098 //
duke@0 2099 address generate_disjoint_long_copy(bool aligned, const char * name) {
duke@0 2100 __ align(CodeEntryAlignment);
duke@0 2101 StubCodeMark mark(this, "StubRoutines", name);
duke@0 2102 address start = __ pc();
duke@0 2103
duke@0 2104 assert_clean_int(O2, O3); // Make sure 'count' is clean int.
duke@0 2105
duke@0 2106 if (!aligned) disjoint_long_copy_entry = __ pc();
duke@0 2107 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
duke@0 2108 if (!aligned) BLOCK_COMMENT("Entry:");
duke@0 2109
duke@0 2110 generate_disjoint_long_copy_core(aligned);
duke@0 2111
duke@0 2112 // O3, O4 are used as temp registers
duke@0 2113 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4);
duke@0 2114 __ retl();
duke@0 2115 __ delayed()->mov(G0, O0); // return 0
duke@0 2116 return start;
duke@0 2117 }
duke@0 2118
duke@0 2119 //
duke@0 2120 // Generate core code for conjoint long copy (and oop copy on 64-bit).
duke@0 2121 // "aligned" is ignored, because we must make the stronger
duke@0 2122 // assumption that both addresses are always 64-bit aligned.
duke@0 2123 //
duke@0 2124 // Arguments:
duke@0 2125 // from: O0
duke@0 2126 // to: O1
duke@0 2127 // count: O2 treated as signed
duke@0 2128 //
duke@0 2129 void generate_conjoint_long_copy_core(bool aligned) {
duke@0 2130 // Do reverse copy.
duke@0 2131 Label L_copy_8_bytes, L_copy_16_bytes, L_exit;
duke@0 2132 const Register from = O0; // source array address
duke@0 2133 const Register to = O1; // destination array address
duke@0 2134 const Register count = O2; // elements count
duke@0 2135 const Register offset8 = O4; // element offset
duke@0 2136 const Register offset0 = O5; // previous element offset
duke@0 2137
duke@0 2138 __ subcc(count, 1, count);
duke@0 2139 __ brx(Assembler::lessEqual, false, Assembler::pn, L_copy_8_bytes );
duke@0 2140 __ delayed()->sllx(count, LogBytesPerLong, offset8);
duke@0 2141 __ sub(offset8, 8, offset0);
kvn@1454 2142 __ align(OptoLoopAlignment);
duke@0 2143 __ BIND(L_copy_16_bytes);
duke@0 2144 __ ldx(from, offset8, O2);
duke@0 2145 __ ldx(from, offset0, O3);
duke@0 2146 __ stx(O2, to, offset8);
duke@0 2147 __ deccc(offset8, 16); // use offset8 as counter
duke@0 2148 __ stx(O3, to, offset0);
duke@0 2149 __ brx(Assembler::greater, false, Assembler::pt, L_copy_16_bytes);
duke@0 2150 __ delayed()->dec(offset0, 16);
duke@0 2151
duke@0 2152 __ BIND(L_copy_8_bytes);
duke@0 2153 __ brx(Assembler::negative, false, Assembler::pn, L_exit );
duke@0 2154 __ delayed()->nop();
duke@0 2155 __ ldx(from, 0, O3);
duke@0 2156 __ stx(O3, to, 0);
duke@0 2157 __ BIND(L_exit);
duke@0 2158 }
duke@0 2159
duke@0 2160 // Generate stub for conjoint long copy.
duke@0 2161 // "aligned" is ignored, because we must make the stronger
duke@0 2162 // assumption that both addresses are always 64-bit aligned.
duke@0 2163 //
duke@0 2164 // Arguments for generated stub:
duke@0 2165 // from: O0
duke@0 2166 // to: O1
duke@0 2167 // count: O2 treated as signed
duke@0 2168 //
duke@0 2169 address generate_conjoint_long_copy(bool aligned, const char * name) {
duke@0 2170 __ align(CodeEntryAlignment);
duke@0 2171 StubCodeMark mark(this, "StubRoutines", name);
duke@0 2172 address start = __ pc();
duke@0 2173
duke@0 2174 assert(!aligned, "usage");
duke@0 2175 address nooverlap_target = disjoint_long_copy_entry;
duke@0 2176
duke@0 2177 assert_clean_int(O2, O3); // Make sure 'count' is clean int.
duke@0 2178
duke@0 2179 if (!aligned) long_copy_entry = __ pc();
duke@0 2180 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
duke@0 2181 if (!aligned) BLOCK_COMMENT("Entry:");
duke@0 2182
duke@0 2183 array_overlap_test(nooverlap_target, 3);
duke@0 2184
duke@0 2185 generate_conjoint_long_copy_core(aligned);
duke@0 2186
duke@0 2187 // O3, O4 are used as temp registers
duke@0 2188 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4);
duke@0 2189 __ retl();
duke@0 2190 __ delayed()->mov(G0, O0); // return 0
duke@0 2191 return start;
duke@0 2192 }
duke@0 2193
duke@0 2194 // Generate stub for disjoint oop copy. If "aligned" is true, the
duke@0 2195 // "from" and "to" addresses are assumed to be heapword aligned.
duke@0 2196 //
duke@0 2197 // Arguments for generated stub:
duke@0 2198 // from: O0
duke@0 2199 // to: O1
duke@0 2200 // count: O2 treated as signed
duke@0 2201 //
duke@0 2202 address generate_disjoint_oop_copy(bool aligned, const char * name) {
duke@0 2203
duke@0 2204 const Register from = O0; // source array address
duke@0 2205 const Register to = O1; // destination array address
duke@0 2206 const Register count = O2; // elements count
duke@0 2207
duke@0 2208 __ align(CodeEntryAlignment);
duke@0 2209 StubCodeMark mark(this, "StubRoutines", name);
duke@0 2210 address start = __ pc();
duke@0 2211
duke@0 2212 assert_clean_int(count, O3); // Make sure 'count' is clean int.
duke@0 2213
duke@0 2214 if (!aligned) disjoint_oop_copy_entry = __ pc();
duke@0 2215 // caller can pass a 64-bit byte count here
duke@0 2216 if (!aligned) BLOCK_COMMENT("Entry:");
duke@0 2217
duke@0 2218 // save arguments for barrier generation
duke@0 2219 __ mov(to, G1);
duke@0 2220 __ mov(count, G5);
duke@0 2221 gen_write_ref_array_pre_barrier(G1, G5);
duke@0 2222 #ifdef _LP64
coleenp@113 2223 assert_clean_int(count, O3); // Make sure 'count' is clean int.
coleenp@113 2224 if (UseCompressedOops) {
coleenp@113 2225 generate_disjoint_int_copy_core(aligned);
coleenp@113 2226 } else {
coleenp@113 2227 generate_disjoint_long_copy_core(aligned);
coleenp@113 2228 }
duke@0 2229 #else
duke@0 2230 generate_disjoint_int_copy_core(aligned);
duke@0 2231 #endif
duke@0 2232 // O0 is used as temp register
duke@0 2233 gen_write_ref_array_post_barrier(G1, G5, O0);
duke@0 2234
duke@0 2235 // O3, O4 are used as temp registers
duke@0 2236 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4);
duke@0 2237 __ retl();
duke@0 2238 __ delayed()->mov(G0, O0); // return 0
duke@0 2239 return start;
duke@0 2240 }
duke@0 2241
duke@0 2242 // Generate stub for conjoint oop copy. If "aligned" is true, the
duke@0 2243 // "from" and "to" addresses are assumed to be heapword aligned.
duke@0 2244 //
duke@0 2245 // Arguments for generated stub:
duke@0 2246 // from: O0
duke@0 2247 // to: O1
duke@0 2248 // count: O2 treated as signed
duke@0 2249 //
duke@0 2250 address generate_conjoint_oop_copy(bool aligned, const char * name) {
duke@0 2251
duke@0 2252 const Register from = O0; // source array address
duke@0 2253 const Register to = O1; // destination array address
duke@0 2254 const Register count = O2; // elements count
duke@0 2255
duke@0 2256 __ align(CodeEntryAlignment);
duke@0 2257 StubCodeMark mark(this, "StubRoutines", name);
duke@0 2258 address start = __ pc();
duke@0 2259
duke@0 2260 assert_clean_int(count, O3); // Make sure 'count' is clean int.
duke@0 2261
duke@0 2262 if (!aligned) oop_copy_entry = __ pc();
duke@0 2263 // caller can pass a 64-bit byte count here
duke@0 2264 if (!aligned) BLOCK_COMMENT("Entry:");
duke@0 2265
duke@0 2266 // save arguments for barrier generation
duke@0 2267 __ mov(to, G1);
duke@0 2268 __ mov(count, G5);
duke@0 2269
duke@0 2270 gen_write_ref_array_pre_barrier(G1, G5);
duke@0 2271
duke@0 2272 address nooverlap_target = aligned ?
duke@0 2273 StubRoutines::arrayof_oop_disjoint_arraycopy() :
duke@0 2274 disjoint_oop_copy_entry;
duke@0 2275
coleenp@113 2276 array_overlap_test(nooverlap_target, LogBytesPerHeapOop);
duke@0 2277
duke@0 2278 #ifdef _LP64
coleenp@113 2279 if (UseCompressedOops) {
coleenp@113 2280 generate_conjoint_int_copy_core(aligned);
coleenp@113 2281 } else {
coleenp@113 2282 generate_conjoint_long_copy_core(aligned);
coleenp@113 2283 }
duke@0 2284 #else
duke@0 2285 generate_conjoint_int_copy_core(aligned);
duke@0 2286 #endif
duke@0 2287
duke@0 2288 // O0 is used as temp register
duke@0 2289 gen_write_ref_array_post_barrier(G1, G5, O0);
duke@0 2290
duke@0 2291 // O3, O4 are used as temp registers
duke@0 2292 inc_counter_np(SharedRuntime::_oop_array_copy_ctr, O3, O4);
duke@0 2293 __ retl();
duke@0 2294 __ delayed()->mov(G0, O0); // return 0
duke@0 2295 return start;
duke@0 2296 }
duke@0 2297
duke@0 2298
duke@0 2299 // Helper for generating a dynamic type check.
duke@0 2300 // Smashes only the given temp registers.
duke@0 2301 void generate_type_check(Register sub_klass,
duke@0 2302 Register super_check_offset,
duke@0 2303 Register super_klass,
duke@0 2304 Register temp,
jrose@682 2305 Label& L_success) {
duke@0 2306 assert_different_registers(sub_klass, super_check_offset, super_klass, temp);
duke@0 2307
duke@0 2308 BLOCK_COMMENT("type_check:");
duke@0 2309
jrose@682 2310 Label L_miss, L_pop_to_miss;
duke@0 2311
duke@0 2312 assert_clean_int(super_check_offset, temp);
duke@0 2313
jrose@682 2314 __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg,
jrose@682 2315 &L_success, &L_miss, NULL,
jrose@682 2316 super_check_offset);
jrose@682 2317
jrose@682 2318 BLOCK_COMMENT("type_check_slow_path:");
duke@0 2319 __ save_frame(0);
jrose@682 2320 __ check_klass_subtype_slow_path(sub_klass->after_save(),
jrose@682 2321 super_klass->after_save(),
jrose@682 2322 L0, L1, L2, L4,
jrose@682 2323 NULL, &L_pop_to_miss);
jrose@682 2324 __ ba(false, L_success);
jrose@682 2325 __ delayed()->restore();
jrose@682 2326
jrose@682 2327 __ bind(L_pop_to_miss);
duke@0 2328 __ restore();
duke@0 2329
duke@0 2330 // Fall through on failure!
duke@0 2331 __ BIND(L_miss);
duke@0 2332 }
duke@0 2333
duke@0 2334
duke@0 2335 // Generate stub for checked oop copy.
duke@0 2336 //
duke@0 2337 // Arguments for generated stub:
duke@0 2338 // from: O0
duke@0 2339 // to: O1
duke@0 2340 // count: O2 treated as signed
duke@0 2341 // ckoff: O3 (super_check_offset)
duke@0 2342 // ckval: O4 (super_klass)
duke@0 2343 // ret: O0 zero for success; (-1^K) where K is partial transfer count
duke@0 2344 //
duke@0 2345 address generate_checkcast_copy(const char* name) {
duke@0 2346
duke@0 2347 const Register O0_from = O0; // source array address
duke@0 2348 const Register O1_to = O1; // destination array address
duke@0 2349 const Register O2_count = O2; // elements count
duke@0 2350 const Register O3_ckoff = O3; // super_check_offset
duke@0 2351 const Register O4_ckval = O4; // super_klass
duke@0 2352
duke@0 2353 const Register O5_offset = O5; // loop var, with stride wordSize
duke@0 2354 const Register G1_remain = G1; // loop var, with stride -1
duke@0 2355 const Register G3_oop = G3; // actual oop copied
duke@0 2356 const Register G4_klass = G4; // oop._klass
duke@0 2357 const Register G5_super = G5; // oop._klass._primary_supers[ckval]
duke@0 2358
duke@0 2359 __ align(CodeEntryAlignment);
duke@0 2360 StubCodeMark mark(this, "StubRoutines", name);
duke@0 2361 address start = __ pc();
duke@0 2362
ysr@345 2363 gen_write_ref_array_pre_barrier(O1, O2);
duke@0 2364
duke@0 2365 #ifdef ASSERT
jrose@682 2366 // We sometimes save a frame (see generate_type_check below).
duke@0 2367 // If this will cause trouble, let's fail now instead of later.
duke@0 2368 __ save_frame(0);
duke@0 2369 __ restore();
duke@0 2370 #endif
duke@0 2371
duke@0 2372 #ifdef ASSERT
duke@0 2373 // caller guarantees that the arrays really are different
duke@0 2374 // otherwise, we would have to make conjoint checks
duke@0 2375 { Label L;
duke@0 2376 __ mov(O3, G1); // spill: overlap test smashes O3
duke@0 2377 __ mov(O4, G4); // spill: overlap test smashes O4
coleenp@113 2378 array_overlap_test(L, LogBytesPerHeapOop);
duke@0 2379 __ stop("checkcast_copy within a single array");
duke@0 2380 __ bind(L);
duke@0 2381 __ mov(G1, O3);
duke@0 2382 __ mov(G4, O4);
duke@0 2383 }
duke@0 2384 #endif //ASSERT
duke@0 2385
duke@0 2386 assert_clean_int(O2_count, G1); // Make sure 'count' is clean int.
duke@0 2387
duke@0 2388 checkcast_copy_entry = __ pc();
duke@0 2389 // caller can pass a 64-bit byte count here (from generic stub)
duke@0 2390 BLOCK_COMMENT("Entry:");
duke@0 2391
duke@0 2392 Label load_element, store_element, do_card_marks, fail, done;
duke@0 2393 __ addcc(O2_count, 0, G1_remain); // initialize loop index, and test it
duke@0 2394 __ brx(Assembler::notZero, false, Assembler::pt, load_element);
duke@0 2395 __ delayed()->mov(G0, O5_offset); // offset from start of arrays
duke@0 2396
duke@0 2397 // Empty array: Nothing to do.
duke@0 2398 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4);
duke@0 2399 __ retl();
duke@0 2400 __ delayed()->set(0, O0); // return 0 on (trivial) success
duke@0 2401
duke@0 2402 // ======== begin loop ========
duke@0 2403 // (Loop is rotated; its entry is load_element.)
duke@0 2404 // Loop variables:
duke@0 2405 // (O5 = 0; ; O5 += wordSize) --- offset from src, dest arrays
duke@0 2406 // (O2 = len; O2 != 0; O2--) --- number of oops *remaining*
duke@0 2407 // G3, G4, G5 --- current oop, oop.klass, oop.klass.super
kvn@1454 2408 __ align(OptoLoopAlignment);
duke@0 2409
jrose@682 2410 __ BIND(store_element);
jrose@682 2411 __ deccc(G1_remain); // decrement the count
coleenp@113 2412 __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop
coleenp@113 2413 __ inc(O5_offset, heapOopSize); // step to next offset
duke@0 2414 __ brx(Assembler::zero, true, Assembler::pt, do_card_marks);
duke@0 2415 __ delayed()->set(0, O0); // return -1 on success
duke@0 2416
duke@0 2417 // ======== loop entry is here ========
jrose@682 2418 __ BIND(load_element);
coleenp@113 2419 __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop
duke@0 2420 __ br_null(G3_oop, true, Assembler::pt, store_element);
jrose@682 2421 __ delayed()->nop();
duke@0 2422
coleenp@113 2423 __ load_klass(G3_oop, G4_klass); // query the object klass
duke@0 2424
duke@0 2425 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super,
duke@0 2426 // branch to this on success:
jrose@682 2427 store_element);
duke@0 2428 // ======== end loop ========
duke@0 2429
duke@0 2430 // It was a real error; we must depend on the caller to finish the job.
duke@0 2431 // Register G1 has number of *remaining* oops, O2 number of *total* oops.
duke@0 2432 // Emit GC store barriers for the oops we have copied (O2 minus G1),
duke@0 2433 // and report their number to the caller.
jrose@682 2434 __ BIND(fail);
duke@0 2435 __ subcc(O2_count, G1_remain, O2_count);
duke@0 2436 __ brx(Assembler::zero, false, Assembler::pt, done);
duke@0 2437 __ delayed()->not1(O2_count, O0); // report (-1^K) to caller
duke@0 2438
jrose@682 2439 __ BIND(do_card_marks);
duke@0 2440 gen_write_ref_array_post_barrier(O1_to, O2_count, O3); // store check on O1[0..O2]
duke@0 2441
jrose@682 2442 __ BIND(done);
duke@0 2443 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4);
duke@0 2444 __ retl();
duke@0 2445 __ delayed()->nop(); // return value in 00
duke@0 2446
duke@0 2447 return start;
duke@0 2448 }
duke@0 2449
duke@0 2450
duke@0 2451 // Generate 'unsafe' array copy stub
duke@0 2452 // Though just as safe as the other stubs, it takes an unscaled
duke@0 2453 // size_t argument instead of an element count.
duke@0 2454 //
duke@0 2455 // Arguments for generated stub:
duke@0 2456 // from: O0
duke@0 2457 // to: O1
duke@0 2458 // count: O2 byte count, treated as ssize_t, can be zero
duke@0 2459 //
duke@0 2460 // Examines the alignment of the operands and dispatches
duke@0 2461 // to a long, int, short, or byte copy loop.
duke@0 2462 //
duke@0 2463 address generate_unsafe_copy(const char* name) {
duke@0 2464
duke@0 2465 const Register O0_from = O0; // source array address
duke@0 2466 const Register O1_to = O1; // destination array address
duke@0 2467 const Register O2_count = O2; // elements count
duke@0 2468
duke@0 2469 const Register G1_bits = G1; // test copy of low bits
duke@0 2470
duke@0 2471 __ align(CodeEntryAlignment);
duke@0 2472 StubCodeMark mark(this, "StubRoutines", name);
duke@0 2473 address start = __ pc();
duke@0 2474
duke@0 2475 // bump this on entry, not on exit:
duke@0 2476 inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr, G1, G3);
duke@0 2477
duke@0 2478 __ or3(O0_from, O1_to, G1_bits);
duke@0 2479 __ or3(O2_count, G1_bits, G1_bits);
duke@0 2480
duke@0 2481 __ btst(BytesPerLong-1, G1_bits);
duke@0 2482 __ br(Assembler::zero, true, Assembler::pt,
duke@0 2483 long_copy_entry, relocInfo::runtime_call_type);
duke@0 2484 // scale the count on the way out:
duke@0 2485 __ delayed()->srax(O2_count, LogBytesPerLong, O2_count);
duke@0 2486
duke@0 2487 __ btst(BytesPerInt-1, G1_bits);
duke@0 2488 __ br(Assembler::zero, true, Assembler::pt,
duke@0 2489 int_copy_entry, relocInfo::runtime_call_type);
duke@0 2490 // scale the count on the way out:
duke@0 2491 __ delayed()->srax(O2_count, LogBytesPerInt, O2_count);
duke@0 2492
duke@0 2493 __ btst(BytesPerShort-1, G1_bits);
duke@0 2494 __ br(Assembler::zero, true, Assembler::pt,
duke@0 2495 short_copy_entry, relocInfo::runtime_call_type);
duke@0 2496 // scale the count on the way out:
duke@0 2497 __ delayed()->srax(O2_count, LogBytesPerShort, O2_count);
duke@0 2498
duke@0 2499 __ br(Assembler::always, false, Assembler::pt,
duke@0 2500 byte_copy_entry, relocInfo::runtime_call_type);
duke@0 2501 __ delayed()->nop();
duke@0 2502
duke@0 2503 return start;
duke@0 2504 }
duke@0 2505
duke@0 2506
duke@0 2507 // Perform range checks on the proposed arraycopy.
duke@0 2508 // Kills the two temps, but nothing else.
duke@0 2509 // Also, clean the sign bits of src_pos and dst_pos.
duke@0 2510 void arraycopy_range_checks(Register src, // source array oop (O0)
duke@0 2511 Register src_pos, // source position (O1)
duke@0 2512 Register dst, // destination array oo (O2)
duke@0 2513 Register dst_pos, // destination position (O3)
duke@0 2514 Register length, // length of copy (O4)
duke@0 2515 Register temp1, Register temp2,
duke@0 2516 Label& L_failed) {
duke@0 2517 BLOCK_COMMENT("arraycopy_range_checks:");
duke@0 2518
duke@0 2519 // if (src_pos + length > arrayOop(src)->length() ) FAIL;
duke@0 2520
duke@0 2521 const Register array_length = temp1; // scratch
duke@0 2522 const Register end_pos = temp2; // scratch
duke@0 2523
duke@0 2524 // Note: This next instruction may be in the delay slot of a branch:
duke@0 2525 __ add(length, src_pos, end_pos); // src_pos + length
duke@0 2526 __ lduw(src, arrayOopDesc::length_offset_in_bytes(), array_length);
duke@0 2527 __ cmp(end_pos, array_length);
duke@0 2528 __ br(Assembler::greater, false, Assembler::pn, L_failed);
duke@0 2529
duke@0 2530 // if (dst_pos + length > arrayOop(dst)->length() ) FAIL;
duke@0 2531 __ delayed()->add(length, dst_pos, end_pos); // dst_pos + length
duke@0 2532 __ lduw(dst, arrayOopDesc::length_offset_in_bytes(), array_length);
duke@0 2533 __ cmp(end_pos, array_length);
duke@0 2534 __ br(Assembler::greater, false, Assembler::pn, L_failed);
duke@0 2535
duke@0 2536 // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'.
duke@0 2537 // Move with sign extension can be used since they are positive.
duke@0 2538 __ delayed()->signx(src_pos, src_pos);
duke@0 2539 __ signx(dst_pos, dst_pos);
duke@0 2540
duke@0 2541 BLOCK_COMMENT("arraycopy_range_checks done");
duke@0 2542 }
duke@0 2543
duke@0 2544
duke@0 2545 //
duke@0 2546 // Generate generic array copy stubs
duke@0 2547 //
duke@0 2548 // Input:
duke@0 2549 // O0 - src oop
duke@0 2550 // O1 - src_pos
duke@0 2551 // O2 - dst oop
duke@0 2552 // O3 - dst_pos
duke@0 2553 // O4 - element count
duke@0 2554 //
duke@0 2555 // Output:
duke@0 2556 // O0 == 0 - success
duke@0 2557 // O0 == -1 - need to call System.arraycopy
duke@0 2558 //
duke@0 2559 address generate_generic_copy(const char *name) {
duke@0 2560
duke@0 2561 Label L_failed, L_objArray;
duke@0 2562
duke@0 2563 // Input registers
duke@0 2564 const Register src = O0; // source array oop
duke@0 2565 const Register src_pos = O1; // source position
duke@0 2566 const Register dst = O2; // destination array oop
duke@0 2567 const Register dst_pos = O3; // destination position
duke@0 2568 const Register length = O4; // elements count
duke@0 2569
duke@0 2570 // registers used as temp
duke@0 2571 const Register G3_src_klass = G3; // source array klass
duke@0 2572 const Register G4_dst_klass = G4; // destination array klass
duke@0 2573 const Register G5_lh = G5; // layout handler
duke@0 2574 const Register O5_temp = O5;
duke@0 2575
duke@0 2576 __ align(CodeEntryAlignment);
duke@0 2577 StubCodeMark mark(this, "StubRoutines", name);
duke@0 2578 address start = __ pc();
duke@0 2579
duke@0 2580 // bump this on entry, not on exit:
duke@0 2581 inc_counter_np(SharedRuntime::_generic_array_copy_ctr, G1, G3);
duke@0 2582
duke@0 2583 // In principle, the int arguments could be dirty.
duke@0 2584 //assert_clean_int(src_pos, G1);
duke@0 2585 //assert_clean_int(dst_pos, G1);
duke@0 2586 //assert_clean_int(length, G1);
duke@0 2587
duke@0 2588 //-----------------------------------------------------------------------
duke@0 2589 // Assembler stubs will be used for this call to arraycopy
duke@0 2590 // if the following conditions are met:
duke@0 2591 //
duke@0 2592 // (1) src and dst must not be null.
duke@0 2593 // (2) src_pos must not be negative.
duke@0 2594 // (3) dst_pos must not be negative.
duke@0 2595 // (4) length must not be negative.
duke@0 2596 // (5) src klass and dst klass should be the same and not NULL.
duke@0 2597 // (6) src and dst should be arrays.
duke@0 2598 // (7) src_pos + length must not exceed length of src.
duke@0 2599 // (8) dst_pos + length must not exceed length of dst.
duke@0 2600 BLOCK_COMMENT("arraycopy initial argument checks");
duke@0 2601
duke@0 2602 // if (src == NULL) return -1;
duke@0 2603 __ br_null(src, false, Assembler::pn, L_failed);
duke@0 2604
duke@0 2605 // if (src_pos < 0) return -1;
duke@0 2606 __ delayed()->tst(src_pos);
duke@0 2607 __ br(Assembler::negative, false, Assembler::pn, L_failed);
duke@0 2608 __ delayed()->nop();
duke@0 2609
duke@0 2610 // if (dst == NULL) return -1;
duke@0 2611 __ br_null(dst, false, Assembler::pn, L_failed);
duke@0 2612
duke@0 2613 // if (dst_pos < 0) return -1;
duke@0 2614 __ delayed()->tst(dst_pos);
duke@0 2615 __ br(Assembler::negative, false, Assembler::pn, L_failed);
duke@0 2616
duke@0 2617 // if (length < 0) return -1;
duke@0 2618 __ delayed()->tst(length);
duke@0 2619 __ br(Assembler::negative, false, Assembler::pn, L_failed);
duke@0 2620
duke@0 2621 BLOCK_COMMENT("arraycopy argument klass checks");
duke@0 2622 // get src->klass()
coleenp@113 2623 if (UseCompressedOops) {
coleenp@113 2624 __ delayed()->nop(); // ??? not good
coleenp@113 2625 __ load_klass(src, G3_src_klass);
coleenp@113 2626 } else {
coleenp@113 2627 __ delayed()->ld_ptr(src, oopDesc::klass_offset_in_bytes(), G3_src_klass);
coleenp@113 2628 }
duke@0 2629
duke@0 2630 #ifdef ASSERT
duke@0 2631 // assert(src->klass() != NULL);
duke@0 2632 BLOCK_COMMENT("assert klasses not null");
duke@0 2633 { Label L_a, L_b;
duke@0 2634 __ br_notnull(G3_src_klass, false, Assembler::pt, L_b); // it is broken if klass is NULL
coleenp@113 2635 __ delayed()->nop();
duke@0 2636 __ bind(L_a);
duke@0 2637 __ stop("broken null klass");
duke@0 2638 __ bind(L_b);
coleenp@113 2639 __ load_klass(dst, G4_dst_klass);
duke@0 2640 __ br_null(G4_dst_klass, false, Assembler::pn, L_a); // this would be broken also
duke@0 2641 __ delayed()->mov(G0, G4_dst_klass); // scribble the temp
duke@0 2642 BLOCK_COMMENT("assert done");
duke@0 2643 }
duke@0 2644 #endif
duke@0 2645
duke@0 2646 // Load layout helper
duke@0 2647 //
duke@0 2648 // |array_tag| | header_size | element_type | |log2_element_size|
duke@0 2649 // 32 30 24 16 8 2 0
duke@0 2650 //
duke@0 2651 // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
duke@0 2652 //
duke@0 2653
duke@0 2654 int lh_offset = klassOopDesc::header_size() * HeapWordSize +
duke@0 2655 Klass::layout_helper_offset_in_bytes();
duke@0 2656
duke@0 2657 // Load 32-bits signed value. Use br() instruction with it to check icc.
duke@0 2658 __ lduw(G3_src_klass, lh_offset, G5_lh);
duke@0 2659
coleenp@113 2660 if (UseCompressedOops) {
coleenp@113 2661 __ load_klass(dst, G4_dst_klass);
coleenp@113 2662 }
duke@0 2663 // Handle objArrays completely differently...
duke@0 2664 juint objArray_lh = Klass::array_layout_helper(T_OBJECT);
duke@0 2665 __ set(objArray_lh, O5_temp);
duke@0 2666 __ cmp(G5_lh, O5_temp);
duke@0 2667 __ br(Assembler::equal, false, Assembler::pt, L_objArray);
coleenp@113 2668 if (UseCompressedOops) {
coleenp@113 2669 __ delayed()->nop();
coleenp@113 2670 } else {
coleenp@113 2671 __ delayed()->ld_ptr(dst, oopDesc::klass_offset_in_bytes(), G4_dst_klass);
coleenp@113 2672 }
duke@0 2673
duke@0 2674 // if (src->klass() != dst->klass()) return -1;
duke@0 2675 __ cmp(G3_src_klass, G4_dst_klass);
duke@0 2676 __ brx(Assembler::notEqual, false, Assembler::pn, L_failed);
duke@0 2677 __ delayed()->nop();
duke@0 2678
duke@0 2679 // if (!src->is_Array()) return -1;
duke@0 2680 __ cmp(G5_lh, Klass::_lh_neutral_value); // < 0
duke@0 2681 __ br(Assembler::greaterEqual, false, Assembler::pn, L_failed);
duke@0 2682
duke@0 2683 // At this point, it is known to be a typeArray (array_tag 0x3).
duke@0 2684 #ifdef ASSERT
duke@0 2685 __ delayed()->nop();
duke@0 2686 { Label L;
duke@0 2687 jint lh_prim_tag_in_place = (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
duke@0 2688 __ set(lh_prim_tag_in_place, O5_temp);
duke@0 2689 __ cmp(G5_lh, O5_temp);
duke@0 2690 __ br(Assembler::greaterEqual, false, Assembler::pt, L);
duke@0 2691 __ delayed()->nop();
duke@0 2692 __ stop("must be a primitive array");
duke@0 2693 __ bind(L);
duke@0 2694 }
duke@0 2695 #else
duke@0 2696 __ delayed(); // match next insn to prev branch
duke@0 2697 #endif
duke@0 2698
duke@0 2699 arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
duke@0 2700 O5_temp, G4_dst_klass, L_failed);
duke@0 2701
duke@0 2702 // typeArrayKlass
duke@0 2703 //
duke@0 2704 // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
duke@0 2705 // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
duke@0 2706 //
duke@0 2707
duke@0 2708 const Register G4_offset = G4_dst_klass; // array offset
duke@0 2709 const Register G3_elsize = G3_src_klass; // log2 element size
duke@0 2710
duke@0 2711 __ srl(G5_lh, Klass::_lh_header_size_shift, G4_offset);
duke@0 2712 __ and3(G4_offset, Klass::_lh_header_size_mask, G4_offset); // array_offset
duke@0 2713 __ add(src, G4_offset, src); // src array offset
duke@0 2714 __ add(dst, G4_offset, dst); // dst array offset
duke@0 2715 __ and3(G5_lh, Klass::_lh_log2_element_size_mask, G3_elsize); // log2 element size
duke@0 2716
duke@0 2717 // next registers should be set before the jump to corresponding stub
duke@0 2718 const Register from = O0; // source array address
duke@0 2719 const Register to = O1; // destination array address
duke@0 2720 const Register count = O2; // elements count
duke@0 2721
duke@0 2722 // 'from', 'to', 'count' registers should be set in this order
duke@0 2723 // since they are the same as 'src', 'src_pos', 'dst'.
duke@0 2724
duke@0 2725 BLOCK_COMMENT("scale indexes to element size");
duke@0 2726 __ sll_ptr(src_pos, G3_elsize, src_pos);
duke@0 2727 __ sll_ptr(dst_pos, G3_elsize, dst_pos);
duke@0 2728 __ add(src, src_pos, from); // src_addr
duke@0 2729 __ add(dst, dst_pos, to); // dst_addr
duke@0 2730
duke@0 2731 BLOCK_COMMENT("choose copy loop based on element size");
duke@0 2732 __ cmp(G3_elsize, 0);
duke@0 2733 __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jbyte_arraycopy);
duke@0 2734 __ delayed()->signx(length, count); // length
duke@0 2735
duke@0 2736 __ cmp(G3_elsize, LogBytesPerShort);
duke@0 2737 __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jshort_arraycopy);
duke@0 2738 __ delayed()->signx(length, count); // length
duke@0 2739
duke@0 2740 __ cmp(G3_elsize, LogBytesPerInt);
duke@0 2741 __ br(Assembler::equal,true,Assembler::pt,StubRoutines::_jint_arraycopy);
duke@0 2742 __ delayed()->signx(length, count); // length
duke@0 2743 #ifdef ASSERT
duke@0 2744 { Label L;
duke@0 2745 __ cmp(G3_elsize, LogBytesPerLong);
duke@0 2746 __ br(Assembler::equal, false, Assembler::pt, L);
duke@0 2747 __ delayed()->nop();
duke@0 2748 __ stop("must be long copy, but elsize is wrong");
duke@0 2749 __ bind(L);
duke@0 2750 }
duke@0 2751 #endif
duke@0 2752 __ br(Assembler::always,false,Assembler::pt,StubRoutines::_jlong_arraycopy);
duke@0 2753 __ delayed()->signx(length, count); // length
duke@0 2754
duke@0 2755 // objArrayKlass
duke@0 2756 __ BIND(L_objArray);
duke@0 2757 // live at this point: G3_src_klass, G4_dst_klass, src[_pos], dst[_pos], length
duke@0 2758
duke@0 2759 Label L_plain_copy, L_checkcast_copy;
duke@0 2760 // test array classes for subtyping
duke@0 2761 __ cmp(G3_src_klass, G4_dst_klass); // usual case is exact equality
duke@0 2762 __ brx(Assembler::notEqual, true, Assembler::pn, L_checkcast_copy);
duke@0 2763 __ delayed()->lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted from below
duke@0 2764
duke@0 2765 // Identically typed arrays can be copied without element-wise checks.
duke@0 2766 arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
duke@0 2767 O5_temp, G5_lh, L_failed);
duke@0 2768
duke@0 2769 __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset
duke@0 2770 __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset
coleenp@113 2771 __ sll_ptr(src_pos, LogBytesPerHeapOop, src_pos);
coleenp@113 2772 __ sll_ptr(dst_pos, LogBytesPerHeapOop, dst_pos);
duke@0 2773 __ add(src, src_pos, from); // src_addr
duke@0 2774 __ add(dst, dst_pos, to); // dst_addr
duke@0 2775 __ BIND(L_plain_copy);
duke@0 2776 __ br(Assembler::always, false, Assembler::pt,StubRoutines::_oop_arraycopy);
duke@0 2777 __ delayed()->signx(length, count); // length
duke@0 2778
duke@0 2779 __ BIND(L_checkcast_copy);
duke@0 2780 // live at this point: G3_src_klass, G4_dst_klass
duke@0 2781 {
duke@0 2782 // Before looking at dst.length, make sure dst is also an objArray.
duke@0 2783 // lduw(G4_dst_klass, lh_offset, O5_temp); // hoisted to delay slot
duke@0 2784 __ cmp(G5_lh, O5_temp);
duke@0 2785 __ br(Assembler::notEqual, false, Assembler::pn, L_failed);
duke@0 2786
duke@0 2787 // It is safe to examine both src.length and dst.length.
duke@0 2788 __ delayed(); // match next insn to prev branch
duke@0 2789 arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
duke@0 2790 O5_temp, G5_lh, L_failed);
duke@0 2791
duke@0 2792 // Marshal the base address arguments now, freeing registers.
duke@0 2793 __ add(src, arrayOopDesc::base_offset_in_bytes(T_OBJECT), src); //src offset
duke@0 2794 __ add(dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT), dst); //dst offset
coleenp@113 2795 __ sll_ptr(src_pos, LogBytesPerHeapOop, src_pos);
coleenp@113 2796 __ sll_ptr(dst_pos, LogBytesPerHeapOop, dst_pos);
duke@0 2797 __ add(src, src_pos, from); // src_addr
duke@0 2798 __ add(dst, dst_pos, to); // dst_addr
duke@0 2799 __ signx(length, count); // length (reloaded)
duke@0 2800
duke@0 2801 Register sco_temp = O3; // this register is free now
duke@0 2802 assert_different_registers(from, to, count, sco_temp,
duke@0 2803 G4_dst_klass, G3_src_klass);
duke@0 2804
duke@0 2805 // Generate the type check.
duke@0 2806 int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
duke@0 2807 Klass::super_check_offset_offset_in_bytes());
duke@0 2808 __ lduw(G4_dst_klass, sco_offset, sco_temp);
duke@0 2809 generate_type_check(G3_src_klass, sco_temp, G4_dst_klass,
duke@0 2810 O5_temp, L_plain_copy);
duke@0 2811
duke@0 2812 // Fetch destination element klass from the objArrayKlass header.
duke@0 2813 int ek_offset = (klassOopDesc::header_size() * HeapWordSize +
duke@0 2814 objArrayKlass::element_klass_offset_in_bytes());
duke@0 2815
duke@0 2816 // the checkcast_copy loop needs two extra arguments:
duke@0 2817 __ ld_ptr(G4_dst_klass, ek_offset, O4); // dest elem klass
duke@0 2818 // lduw(O4, sco_offset, O3); // sco of elem klass
duke@0 2819
duke@0 2820 __ br(Assembler::always, false, Assembler::pt, checkcast_copy_entry);
duke@0 2821 __ delayed()->lduw(O4, sco_offset, O3);
duke@0 2822 }
duke@0 2823
duke@0 2824 __ BIND(L_failed);
duke@0 2825 __ retl();
duke@0 2826 __ delayed()->sub(G0, 1, O0); // return -1
duke@0 2827 return start;
duke@0 2828 }
duke@0 2829
duke@0 2830 void generate_arraycopy_stubs() {
duke@0 2831
duke@0 2832 // Note: the disjoint stubs must be generated first, some of
duke@0 2833 // the conjoint stubs use them.
duke@0 2834 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
duke@0 2835 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
duke@0 2836 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, "jint_disjoint_arraycopy");
duke@0 2837 StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
duke@0 2838 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy(false, "oop_disjoint_arraycopy");
duke@0 2839 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, "arrayof_jbyte_disjoint_arraycopy");
duke@0 2840 StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy");
duke@0 2841 StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, "arrayof_jint_disjoint_arraycopy");
duke@0 2842 StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, "arrayof_jlong_disjoint_arraycopy");
duke@0 2843 StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_oop_copy(true, "arrayof_oop_disjoint_arraycopy");
duke@0 2844
duke@0 2845 StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
duke@0 2846 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy");
duke@0 2847 StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, "jint_arraycopy");
duke@0 2848 StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy(false, "jlong_arraycopy");
duke@0 2849 StubRoutines::_oop_arraycopy = generate_conjoint_oop_copy(false, "oop_arraycopy");
duke@0 2850 StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, "arrayof_jbyte_arraycopy");
duke@0 2851 StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy");
duke@0 2852 #ifdef _LP64
duke@0 2853 // since sizeof(jint) < sizeof(HeapWord), there's a different flavor:
duke@0 2854 StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, "arrayof_jint_arraycopy");
duke@0 2855 #else
duke@0 2856 StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy;
duke@0 2857 #endif
duke@0 2858 StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy;
duke@0 2859 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy;
duke@0 2860
duke@0 2861 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy");
duke@0 2862 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy");
duke@0 2863 StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy");
duke@0 2864 }
duke@0 2865
duke@0 2866 void generate_initial() {
duke@0 2867 // Generates all stubs and initializes the entry points
duke@0 2868
duke@0 2869 //------------------------------------------------------------------------------------------------------------------------
duke@0 2870 // entry points that exist in all platforms
duke@0 2871 // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than
duke@0 2872 // the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp.
duke@0 2873 StubRoutines::_forward_exception_entry = generate_forward_exception();
duke@0 2874
duke@0 2875 StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
duke@0 2876 StubRoutines::_catch_exception_entry = generate_catch_exception();
duke@0 2877
duke@0 2878 //------------------------------------------------------------------------------------------------------------------------
duke@0 2879 // entry points that are platform specific
duke@0 2880 StubRoutines::Sparc::_test_stop_entry = generate_test_stop();
duke@0 2881
duke@0 2882 StubRoutines::Sparc::_stop_subroutine_entry = generate_stop_subroutine();
duke@0 2883 StubRoutines::Sparc::_flush_callers_register_windows_entry = generate_flush_callers_register_windows();
duke@0 2884
duke@0 2885 #if !defined(COMPILER2) && !defined(_LP64)
duke@0 2886 StubRoutines::_atomic_xchg_entry = generate_atomic_xchg();
duke@0 2887 StubRoutines::_atomic_cmpxchg_entry = generate_atomic_cmpxchg();
duke@0 2888 StubRoutines::_atomic_add_entry = generate_atomic_add();
duke@0 2889 StubRoutines::_atomic_xchg_ptr_entry = StubRoutines::_atomic_xchg_entry;
duke@0 2890 StubRoutines::_atomic_cmpxchg_ptr_entry = StubRoutines::_atomic_cmpxchg_entry;
duke@0 2891 StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long();
duke@0 2892 StubRoutines::_atomic_add_ptr_entry = StubRoutines::_atomic_add_entry;
duke@0 2893 #endif // COMPILER2 !=> _LP64
duke@0 2894 }
duke@0 2895
duke@0 2896
duke@0 2897 void generate_all() {
duke@0 2898 // Generates all stubs and initializes the entry points
duke@0 2899
kvn@680 2900 // Generate partial_subtype_check first here since its code depends on
kvn@680 2901 // UseZeroBaseCompressedOops which is defined after heap initialization.
kvn@680 2902 StubRoutines::Sparc::_partial_subtype_check = generate_partial_subtype_check();
duke@0 2903 // These entry points require SharedInfo::stack0 to be set up in non-core builds
duke@0 2904 StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false);
dcubed@16 2905 StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError), false);
duke@0 2906 StubRoutines::_throw_ArithmeticException_entry = generate_throw_exception("ArithmeticException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_ArithmeticException), true);
duke@0 2907 StubRoutines::_throw_NullPointerException_entry = generate_throw_exception("NullPointerException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true);
duke@0 2908 StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
duke@0 2909 StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false);
duke@0 2910
duke@0 2911 StubRoutines::_handler_for_unsafe_access_entry =
duke@0 2912 generate_handler_for_unsafe_access();
duke@0 2913
duke@0 2914 // support for verify_oop (must happen after universe_init)
duke@0 2915 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop_subroutine();
duke@0 2916
duke@0 2917 // arraycopy stubs used by compilers
duke@0 2918 generate_arraycopy_stubs();
never@1252 2919
never@1252 2920 // Don't initialize the platform math functions since sparc
never@1252 2921 // doesn't have intrinsics for these operations.
duke@0 2922 }
duke@0 2923
duke@0 2924
duke@0 2925 public:
duke@0 2926 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
duke@0 2927 // replace the standard masm with a special one:
duke@0 2928 _masm = new MacroAssembler(code);
duke@0 2929
duke@0 2930 _stub_count = !all ? 0x100 : 0x200;
duke@0 2931 if (all) {
duke@0 2932 generate_all();
duke@0 2933 } else {
duke@0 2934 generate_initial();
duke@0 2935 }
duke@0 2936
duke@0 2937 // make sure this stub is available for all local calls
duke@0 2938 if (_atomic_add_stub.is_unbound()) {
duke@0 2939 // generate a second time, if necessary
duke@0 2940 (void) generate_atomic_add();
duke@0 2941 }
duke@0 2942 }
duke@0 2943
duke@0 2944
duke@0 2945 private:
duke@0 2946 int _stub_count;
duke@0 2947 void stub_prolog(StubCodeDesc* cdesc) {
duke@0 2948 # ifdef ASSERT
duke@0 2949 // put extra information in the stub code, to make it more readable
duke@0 2950 #ifdef _LP64
duke@0 2951 // Write the high part of the address
duke@0 2952 // [RGV] Check if there is a dependency on the size of this prolog
duke@0 2953 __ emit_data((intptr_t)cdesc >> 32, relocInfo::none);
duke@0 2954 #endif
duke@0 2955 __ emit_data((intptr_t)cdesc, relocInfo::none);
duke@0 2956 __ emit_data(++_stub_count, relocInfo::none);
duke@0 2957 # endif
duke@0 2958 align(true);
duke@0 2959 }
duke@0 2960
duke@0 2961 void align(bool at_header = false) {
duke@0 2962 // %%%%% move this constant somewhere else
duke@0 2963 // UltraSPARC cache line size is 8 instructions:
duke@0 2964 const unsigned int icache_line_size = 32;
duke@0 2965 const unsigned int icache_half_line_size = 16;
duke@0 2966
duke@0 2967 if (at_header) {
duke@0 2968 while ((intptr_t)(__ pc()) % icache_line_size != 0) {
duke@0 2969 __ emit_data(0, relocInfo::none);
duke@0 2970 }
duke@0 2971 } else {
duke@0 2972 while ((intptr_t)(__ pc()) % icache_half_line_size != 0) {
duke@0 2973 __ nop();
duke@0 2974 }
duke@0 2975 }
duke@0 2976 }
duke@0 2977
duke@0 2978 }; // end class declaration
duke@0 2979
duke@0 2980
duke@0 2981 address StubGenerator::disjoint_byte_copy_entry = NULL;
duke@0 2982 address StubGenerator::disjoint_short_copy_entry = NULL;
duke@0 2983 address StubGenerator::disjoint_int_copy_entry = NULL;
duke@0 2984 address StubGenerator::disjoint_long_copy_entry = NULL;
duke@0 2985 address StubGenerator::disjoint_oop_copy_entry = NULL;
duke@0 2986
duke@0 2987 address StubGenerator::byte_copy_entry = NULL;
duke@0 2988 address StubGenerator::short_copy_entry = NULL;
duke@0 2989 address StubGenerator::int_copy_entry = NULL;
duke@0 2990 address StubGenerator::long_copy_entry = NULL;
duke@0 2991 address StubGenerator::oop_copy_entry = NULL;
duke@0 2992
duke@0 2993 address StubGenerator::checkcast_copy_entry = NULL;
duke@0 2994
duke@0 2995 void StubGenerator_generate(CodeBuffer* code, bool all) {
duke@0 2996 StubGenerator g(code, all);
duke@0 2997 }