OpenJDK / valhalla / valhalla
changeset 57415:114caf2fa7b9 lworld
Merge
line wrap: on
line diff
--- a/.hgtags Thu Oct 10 14:11:18 2019 +0200 +++ b/.hgtags Fri Oct 11 10:39:58 2019 +0200 @@ -595,3 +595,4 @@ 9c250a7600e12bdb1e611835250af3204d4aa152 jdk-13-ga 778fc2dcbdaa8981e07e929a2cacef979c72261e jdk-14+15 d29f0181ba424a95d881aba5eabf2e393abcc70f jdk-14+16 +5c83830390baafb76a1fbe33443c57620bd45fb9 jdk-14+17
--- a/make/CreateJmods.gmk Thu Oct 10 14:11:18 2019 +0200 +++ b/make/CreateJmods.gmk Fri Oct 11 10:39:58 2019 +0200 @@ -86,16 +86,18 @@ # from there. These files were explicitly filtered or modified in <module>-copy # targets. For the rest, just pick up everything from the source legal dirs. LEGAL_NOTICES := \ - $(SUPPORT_OUTPUTDIR)/modules_legal/common \ + $(wildcard $(SUPPORT_OUTPUTDIR)/modules_legal/common) \ $(if $(wildcard $(SUPPORT_OUTPUTDIR)/modules_legal/$(MODULE)), \ $(wildcard $(SUPPORT_OUTPUTDIR)/modules_legal/$(MODULE)), \ $(call FindModuleLegalSrcDirs, $(MODULE)) \ ) -LEGAL_NOTICES_PATH := $(call PathList, $(LEGAL_NOTICES)) -DEPS += $(call FindFiles, $(LEGAL_NOTICES)) +ifneq ($(strip $(LEGAL_NOTICES)), ) + LEGAL_NOTICES_PATH := $(call PathList, $(LEGAL_NOTICES)) + DEPS += $(call FindFiles, $(LEGAL_NOTICES)) -JMOD_FLAGS += --legal-notices $(LEGAL_NOTICES_PATH) + JMOD_FLAGS += --legal-notices $(LEGAL_NOTICES_PATH) +endif ifeq ($(filter-out jdk.incubator.%, $(MODULE)), ) JMOD_FLAGS += --do-not-resolve-by-default
--- a/make/gensrc/Gensrc-jdk.internal.vm.compiler.management.gmk Thu Oct 10 14:11:18 2019 +0200 +++ b/make/gensrc/Gensrc-jdk.internal.vm.compiler.management.gmk Fri Oct 11 10:39:58 2019 +0200 @@ -73,7 +73,7 @@ ($(CD) $(GENSRC_DIR)/META-INF/providers && \ p=""; \ impl=""; \ - for i in $$($(GREP) '^' * | $(SORT) -t ':' -k 2 | $(SED) 's/:.*//'); do \ + for i in $$($(NAWK) '$$0=FILENAME" "$$0' * | $(SORT) -k 2 | $(SED) 's/ .*//'); do \ c=$$($(CAT) $$i | $(TR) -d '\n\r'); \ if test x$$p != x$$c; then \ if test x$$p != x; then \
--- a/make/lib/CoreLibraries.gmk Thu Oct 10 14:11:18 2019 +0200 +++ b/make/lib/CoreLibraries.gmk Fri Oct 11 10:39:58 2019 +0200 @@ -23,8 +23,6 @@ # questions. # -WIN_VERIFY_LIB := $(SUPPORT_OUTPUTDIR)/native/$(MODULE)/libverify/verify.lib - # Hook to include the corresponding custom file, if present. $(eval $(call IncludeCustomExtension, lib/CoreLibraries.gmk)) @@ -110,14 +108,14 @@ LDFLAGS_macosx := -L$(SUPPORT_OUTPUTDIR)/native/$(MODULE)/, \ LDFLAGS_windows := -delayload:shell32.dll, \ LIBS := $(BUILD_LIBFDLIBM_TARGET), \ - LIBS_unix := -ljvm -lverify, \ + LIBS_unix := -ljvm, \ LIBS_linux := $(LIBDL), \ LIBS_solaris := -lsocket -lnsl -lscf $(LIBDL), \ LIBS_aix := $(LIBDL) $(LIBM),\ LIBS_macosx := -framework CoreFoundation \ -framework Foundation \ -framework SystemConfiguration, \ - LIBS_windows := jvm.lib $(WIN_VERIFY_LIB) \ + LIBS_windows := jvm.lib \ shell32.lib delayimp.lib \ advapi32.lib version.lib, \ ))
--- a/src/hotspot/cpu/aarch64/aarch64.ad Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/aarch64/aarch64.ad Fri Oct 11 10:39:58 2019 +0200 @@ -2526,17 +2526,8 @@ __ INSN(REG, as_Register(BASE)); \ } -typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr); -typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr); -typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt, - MacroAssembler::SIMD_RegVariant T, const Address &adr); - - // Used for all non-volatile memory accesses. The use of - // $mem->opcode() to discover whether this pattern uses sign-extended - // offsets is something of a kludge. - static void loadStore(MacroAssembler masm, mem_insn insn, - Register reg, int opcode, - Register base, int index, int size, int disp) + +static Address mem2address(int opcode, Register base, int index, int size, int disp) { Address::extend scale; @@ -2555,16 +2546,34 @@ } if (index == -1) { - (masm.*insn)(reg, Address(base, disp)); + return Address(base, disp); } else { assert(disp == 0, "unsupported address mode: disp = %d", disp); - (masm.*insn)(reg, Address(base, as_Register(index), scale)); + return Address(base, as_Register(index), scale); } } + +typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr); +typedef void (MacroAssembler::* mem_insn2)(Register Rt, Register adr); +typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr); +typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt, + MacroAssembler::SIMD_RegVariant T, const Address &adr); + + // Used for all non-volatile memory accesses. The use of + // $mem->opcode() to discover whether this pattern uses sign-extended + // offsets is something of a kludge. + static void loadStore(MacroAssembler masm, mem_insn insn, + Register reg, int opcode, + Register base, int index, int size, int disp) + { + Address addr = mem2address(opcode, base, index, size, disp); + (masm.*insn)(reg, addr); + } + static void loadStore(MacroAssembler masm, mem_float_insn insn, - FloatRegister reg, int opcode, - Register base, int index, int size, int disp) + FloatRegister reg, int opcode, + Register base, int index, int size, int disp) { Address::extend scale; @@ -2586,8 +2595,8 @@ } static void loadStore(MacroAssembler masm, mem_vector_insn insn, - FloatRegister reg, MacroAssembler::SIMD_RegVariant T, - int opcode, Register base, int index, int size, int disp) + FloatRegister reg, MacroAssembler::SIMD_RegVariant T, + int opcode, Register base, int index, int size, int disp) { if (index == -1) { (masm.*insn)(reg, T, Address(base, disp)); @@ -3804,7 +3813,7 @@ static const int hi[Op_RegL + 1] = { // enum name 0, // Op_Node 0, // Op_Set - OptoReg::Bad, // Op_RegN + OptoReg::Bad, // Op_RegN OptoReg::Bad, // Op_RegI R0_H_num, // Op_RegP OptoReg::Bad, // Op_RegF @@ -6936,7 +6945,7 @@ instruct loadP(iRegPNoSp dst, memory mem) %{ match(Set dst (LoadP mem)); - predicate(!needs_acquiring_load(n)); + predicate(!needs_acquiring_load(n) && (n->as_Load()->barrier_data() == 0)); ins_cost(4 * INSN_COST); format %{ "ldr $dst, $mem\t# ptr" %} @@ -7629,6 +7638,7 @@ instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem) %{ match(Set dst (LoadP mem)); + predicate(n->as_Load()->barrier_data() == 0); ins_cost(VOLATILE_REF_COST); format %{ "ldar $dst, $mem\t# ptr" %} @@ -8611,6 +8621,7 @@ instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ match(Set res (CompareAndSwapP mem (Binary oldval newval))); + predicate(n->as_LoadStore()->barrier_data() == 0); ins_cost(2 * VOLATILE_REF_COST); effect(KILL cr); @@ -8724,7 +8735,7 @@ instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0)); match(Set res (CompareAndSwapP mem (Binary oldval newval))); ins_cost(VOLATILE_REF_COST); @@ -8855,6 +8866,7 @@ %} instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); match(Set res (CompareAndExchangeP mem (Binary oldval newval))); ins_cost(2 * VOLATILE_REF_COST); effect(TEMP_DEF res, KILL cr); @@ -8954,7 +8966,7 @@ %} instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0)); match(Set res (CompareAndExchangeP mem (Binary oldval newval))); ins_cost(VOLATILE_REF_COST); effect(TEMP_DEF res, KILL cr); @@ -9055,6 +9067,7 @@ %} instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); ins_cost(2 * VOLATILE_REF_COST); effect(KILL cr); @@ -9162,8 +9175,8 @@ %} instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ - predicate(needs_acquiring_load_exclusive(n)); match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0)); ins_cost(VOLATILE_REF_COST); effect(KILL cr); format %{ @@ -9213,6 +9226,7 @@ %} instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); match(Set prev (GetAndSetP mem newv)); ins_cost(2 * VOLATILE_REF_COST); format %{ "atomic_xchg $prev, $newv, [$mem]" %} @@ -9256,7 +9270,7 @@ %} instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0)); match(Set prev (GetAndSetP mem newv)); ins_cost(VOLATILE_REF_COST); format %{ "atomic_xchg_acq $prev, $newv, [$mem]" %}
--- a/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -162,16 +162,12 @@ // Creation also verifies the object. NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::instruction_size); -#ifndef PRODUCT + +#ifdef ASSERT NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); + verify_mt_safe(callee, entry, method_holder, jump); +#endif - // read the value once - volatile intptr_t data = method_holder->data(); - assert(data == 0 || data == (intptr_t)callee(), - "a) MT-unsafe modification of inline cache"); - assert(data == 0 || jump->jump_destination() == entry, - "b) MT-unsafe modification of inline cache"); -#endif // Update stub. method_holder->set_data((intptr_t)callee()); NativeGeneralJump::insert_unconditional(method_holder->next_instruction_address(), entry);
--- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -24,22 +24,23 @@ #include "precompiled.hpp" #include "asm/macroAssembler.inline.hpp" #include "code/codeBlob.hpp" +#include "code/vmreg.inline.hpp" #include "gc/z/zBarrier.inline.hpp" #include "gc/z/zBarrierSet.hpp" #include "gc/z/zBarrierSetAssembler.hpp" #include "gc/z/zBarrierSetRuntime.hpp" +#include "gc/z/zThreadLocalData.hpp" #include "memory/resourceArea.hpp" +#include "runtime/sharedRuntime.hpp" +#include "utilities/macros.hpp" #ifdef COMPILER1 #include "c1/c1_LIRAssembler.hpp" #include "c1/c1_MacroAssembler.hpp" #include "gc/z/c1/zBarrierSetC1.hpp" #endif // COMPILER1 - -#include "gc/z/zThreadLocalData.hpp" - -ZBarrierSetAssembler::ZBarrierSetAssembler() : - _load_barrier_slow_stub(), - _load_barrier_weak_slow_stub() {} +#ifdef COMPILER2 +#include "gc/z/c2/zBarrierSetC2.hpp" +#endif // COMPILER2 #ifdef PRODUCT #define BLOCK_COMMENT(str) /* nothing */ @@ -66,7 +67,7 @@ assert_different_registers(rscratch1, rscratch2, src.base()); assert_different_registers(rscratch1, rscratch2, dst); - RegSet savedRegs = RegSet::range(r0,r28) - RegSet::of(dst, rscratch1, rscratch2); + RegSet savedRegs = RegSet::range(r0, r28) - RegSet::of(dst, rscratch1, rscratch2); Label done; @@ -207,7 +208,8 @@ // The Address offset is too large to direct load - -784. Our range is +127, -128. __ mov(tmp, (long int)(in_bytes(ZThreadLocalData::address_bad_mask_offset()) - - in_bytes(JavaThread::jni_environment_offset()))); + in_bytes(JavaThread::jni_environment_offset()))); + // Load address bad mask __ add(tmp, jni_env, tmp); __ ldr(tmp, Address(tmp)); @@ -295,12 +297,12 @@ __ prologue("zgc_load_barrier stub", false); // We don't use push/pop_clobbered_registers() - we need to pull out the result from r0. - for (int i = 0; i < 32; i +=2) { - __ stpd(as_FloatRegister(i), as_FloatRegister(i+1), Address(__ pre(sp,-16))); + for (int i = 0; i < 32; i += 2) { + __ stpd(as_FloatRegister(i), as_FloatRegister(i + 1), Address(__ pre(sp,-16))); } - RegSet saveRegs = RegSet::range(r0,r28) - RegSet::of(r0); - __ push(saveRegs, sp); + const RegSet save_regs = RegSet::range(r1, r28); + __ push(save_regs, sp); // Setup arguments __ load_parameter(0, c_rarg0); @@ -308,98 +310,161 @@ __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), 2); - __ pop(saveRegs, sp); + __ pop(save_regs, sp); - for (int i = 30; i >0; i -=2) { - __ ldpd(as_FloatRegister(i), as_FloatRegister(i+1), Address(__ post(sp, 16))); - } + for (int i = 30; i >= 0; i -= 2) { + __ ldpd(as_FloatRegister(i), as_FloatRegister(i + 1), Address(__ post(sp, 16))); + } __ epilogue(); } #endif // COMPILER1 -#undef __ -#define __ cgen->assembler()-> +#ifdef COMPILER2 -// Generates a register specific stub for calling -// ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded() or -// ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded(). -// -// The raddr register serves as both input and output for this stub. When the stub is -// called the raddr register contains the object field address (oop*) where the bad oop -// was loaded from, which caused the slow path to be taken. On return from the stub the -// raddr register contains the good/healed oop returned from -// ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded() or -// ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded(). -static address generate_load_barrier_stub(StubCodeGenerator* cgen, Register raddr, DecoratorSet decorators) { - // Don't generate stub for invalid registers - if (raddr == zr || raddr == r29 || raddr == r30) { - return NULL; +OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) { + if (!OptoReg::is_reg(opto_reg)) { + return OptoReg::Bad; } - // Create stub name - char name[64]; - const bool weak = (decorators & ON_WEAK_OOP_REF) != 0; - os::snprintf(name, sizeof(name), "zgc_load_barrier%s_stub_%s", weak ? "_weak" : "", raddr->name()); - - __ align(CodeEntryAlignment); - StubCodeMark mark(cgen, "StubRoutines", os::strdup(name, mtCode)); - address start = __ pc(); - - // Save live registers - RegSet savedRegs = RegSet::range(r0,r18) - RegSet::of(raddr); - - __ enter(); - __ push(savedRegs, sp); - - // Setup arguments - if (raddr != c_rarg1) { - __ mov(c_rarg1, raddr); + const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); + if (vm_reg->is_FloatRegister()) { + return opto_reg & ~1; } - __ ldr(c_rarg0, Address(raddr)); + return opto_reg; +} - // Call barrier function - __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), c_rarg0, c_rarg1); +#undef __ +#define __ _masm-> - // Move result returned in r0 to raddr, if needed - if (raddr != r0) { - __ mov(raddr, r0); +class ZSaveLiveRegisters { +private: + MacroAssembler* const _masm; + RegSet _gp_regs; + RegSet _fp_regs; + +public: + void initialize(ZLoadBarrierStubC2* stub) { + // Create mask of live registers + RegMask live = stub->live(); + + // Record registers that needs to be saved/restored + while (live.is_NotEmpty()) { + const OptoReg::Name opto_reg = live.find_first_elem(); + live.Remove(opto_reg); + if (OptoReg::is_reg(opto_reg)) { + const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); + if (vm_reg->is_Register()) { + _gp_regs += RegSet::of(vm_reg->as_Register()); + } else if (vm_reg->is_FloatRegister()) { + _fp_regs += RegSet::of((Register)vm_reg->as_FloatRegister()); + } else { + fatal("Unknown register type"); + } + } + } + + // Remove C-ABI SOE registers, scratch regs and _ref register that will be updated + _gp_regs -= RegSet::range(r19, r30) + RegSet::of(r8, r9, stub->ref()); } - __ pop(savedRegs, sp); - __ leave(); - __ ret(lr); + ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) : + _masm(masm), + _gp_regs(), + _fp_regs() { - return start; + // Figure out what registers to save/restore + initialize(stub); + + // Save registers + __ push(_gp_regs, sp); + __ push_fp(_fp_regs, sp); + } + + ~ZSaveLiveRegisters() { + // Restore registers + __ pop_fp(_fp_regs, sp); + __ pop(_gp_regs, sp); + } +}; + +#undef __ +#define __ _masm-> + +class ZSetupArguments { +private: + MacroAssembler* const _masm; + const Register _ref; + const Address _ref_addr; + +public: + ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) : + _masm(masm), + _ref(stub->ref()), + _ref_addr(stub->ref_addr()) { + + // Setup arguments + if (_ref_addr.base() == noreg) { + // No self healing + if (_ref != c_rarg0) { + __ mov(c_rarg0, _ref); + } + __ mov(c_rarg1, 0); + } else { + // Self healing + if (_ref == c_rarg0) { + // _ref is already at correct place + __ lea(c_rarg1, _ref_addr); + } else if (_ref != c_rarg1) { + // _ref is in wrong place, but not in c_rarg1, so fix it first + __ lea(c_rarg1, _ref_addr); + __ mov(c_rarg0, _ref); + } else if (_ref_addr.base() != c_rarg0 && _ref_addr.index() != c_rarg0) { + assert(_ref == c_rarg1, "Mov ref first, vacating c_rarg0"); + __ mov(c_rarg0, _ref); + __ lea(c_rarg1, _ref_addr); + } else { + assert(_ref == c_rarg1, "Need to vacate c_rarg1 and _ref_addr is using c_rarg0"); + if (_ref_addr.base() == c_rarg0 || _ref_addr.index() == c_rarg0) { + __ mov(rscratch2, c_rarg1); + __ lea(c_rarg1, _ref_addr); + __ mov(c_rarg0, rscratch2); + } else { + ShouldNotReachHere(); + } + } + } + } + + ~ZSetupArguments() { + // Transfer result + if (_ref != r0) { + __ mov(_ref, r0); + } + } +}; + +#undef __ +#define __ masm-> + +void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const { + BLOCK_COMMENT("ZLoadBarrierStubC2"); + + // Stub entry + __ bind(*stub->entry()); + + { + ZSaveLiveRegisters save_live_registers(masm, stub); + ZSetupArguments setup_arguments(masm, stub); + __ mov(rscratch1, stub->slow_path()); + __ blr(rscratch1); + } + + // Stub exit + __ b(*stub->continuation()); } #undef __ -static void barrier_stubs_init_inner(const char* label, const DecoratorSet decorators, address* stub) { - const int nregs = 28; // Exclude FP, XZR, SP from calculation. - const int code_size = nregs * 254; // Rough estimate of code size - - ResourceMark rm; - - CodeBuffer buf(BufferBlob::create(label, code_size)); - StubCodeGenerator cgen(&buf); - - for (int i = 0; i < nregs; i++) { - const Register reg = as_Register(i); - stub[i] = generate_load_barrier_stub(&cgen, reg, decorators); - } -} - -void ZBarrierSetAssembler::barrier_stubs_init() { - barrier_stubs_init_inner("zgc_load_barrier_stubs", ON_STRONG_OOP_REF, _load_barrier_slow_stub); - barrier_stubs_init_inner("zgc_load_barrier_weak_stubs", ON_WEAK_OOP_REF, _load_barrier_weak_slow_stub); -} - -address ZBarrierSetAssembler::load_barrier_slow_stub(Register reg) { - return _load_barrier_slow_stub[reg->encoding()]; -} - -address ZBarrierSetAssembler::load_barrier_weak_slow_stub(Register reg) { - return _load_barrier_weak_slow_stub[reg->encoding()]; -} +#endif // COMPILER2
--- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -24,6 +24,12 @@ #ifndef CPU_AARCH64_GC_Z_ZBARRIERSETASSEMBLER_AARCH64_HPP #define CPU_AARCH64_GC_Z_ZBARRIERSETASSEMBLER_AARCH64_HPP +#include "code/vmreg.hpp" +#include "oops/accessDecorators.hpp" +#ifdef COMPILER2 +#include "opto/optoreg.hpp" +#endif // COMPILER2 + #ifdef COMPILER1 class LIR_Assembler; class LIR_OprDesc; @@ -32,14 +38,13 @@ class ZLoadBarrierStubC1; #endif // COMPILER1 +#ifdef COMPILER2 +class Node; +class ZLoadBarrierStubC2; +#endif // COMPILER2 + class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase { -private: - address _load_barrier_slow_stub[RegisterImpl::number_of_registers]; - address _load_barrier_weak_slow_stub[RegisterImpl::number_of_registers]; - public: - ZBarrierSetAssembler(); - virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, @@ -84,10 +89,13 @@ DecoratorSet decorators) const; #endif // COMPILER1 - virtual void barrier_stubs_init(); +#ifdef COMPILER2 + OptoReg::Name refine_register(const Node* node, + OptoReg::Name opto_reg); - address load_barrier_slow_stub(Register reg); - address load_barrier_weak_slow_stub(Register reg); + void generate_c2_load_barrier_stub(MacroAssembler* masm, + ZLoadBarrierStubC2* stub) const; +#endif // COMPILER2 }; #endif // CPU_AARCH64_GC_Z_ZBARRIERSETASSEMBLER_AARCH64_HPP
--- a/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -40,7 +40,7 @@ // +--------------------------------+ 0x0000014000000000 (20TB) // | Remapped View | // +--------------------------------+ 0x0000010000000000 (16TB) -// | (Reserved, but unused) | +// . . // +--------------------------------+ 0x00000c0000000000 (12TB) // | Marked1 View | // +--------------------------------+ 0x0000080000000000 (8TB) @@ -75,7 +75,7 @@ // +--------------------------------+ 0x0000280000000000 (40TB) // | Remapped View | // +--------------------------------+ 0x0000200000000000 (32TB) -// | (Reserved, but unused) | +// . . // +--------------------------------+ 0x0000180000000000 (24TB) // | Marked1 View | // +--------------------------------+ 0x0000100000000000 (16TB) @@ -110,7 +110,7 @@ // +--------------------------------+ 0x0000500000000000 (80TB) // | Remapped View | // +--------------------------------+ 0x0000400000000000 (64TB) -// | (Reserved, but unused) | +// . . // +--------------------------------+ 0x0000300000000000 (48TB) // | Marked1 View | // +--------------------------------+ 0x0000200000000000 (32TB)
--- a/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -36,7 +36,6 @@ // ------------------------------------------------------------------ // const size_t ZPlatformGranuleSizeShift = 21; // 2MB -const size_t ZPlatformMaxHeapSizeShift = 46; // 16TB const size_t ZPlatformNMethodDisarmedOffset = 4; const size_t ZPlatformCacheLineSize = 64;
--- a/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad Fri Oct 11 10:39:58 2019 +0200 @@ -24,155 +24,244 @@ source_hpp %{ #include "gc/z/c2/zBarrierSetC2.hpp" +#include "gc/z/zThreadLocalData.hpp" %} source %{ -#include "gc/z/zBarrierSetAssembler.hpp" +static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) { + ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak); + __ ldr(tmp, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); + __ andr(tmp, tmp, ref); + __ cbnz(tmp, *stub->entry()); + __ bind(*stub->continuation()); +} -static void z_load_barrier_slow_reg(MacroAssembler& _masm, Register dst, - Register base, int index, int scale, - int disp, bool weak) { - const address stub = weak ? ZBarrierSet::assembler()->load_barrier_weak_slow_stub(dst) - : ZBarrierSet::assembler()->load_barrier_slow_stub(dst); - - if (index == -1) { - if (disp != 0) { - __ lea(dst, Address(base, disp)); - } else { - __ mov(dst, base); - } - } else { - Register index_reg = as_Register(index); - if (disp == 0) { - __ lea(dst, Address(base, index_reg, Address::lsl(scale))); - } else { - __ lea(dst, Address(base, disp)); - __ lea(dst, Address(dst, index_reg, Address::lsl(scale))); - } - } - - __ far_call(RuntimeAddress(stub)); +static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) { + ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */); + __ b(*stub->entry()); + __ bind(*stub->continuation()); } %} -// -// Execute ZGC load barrier (strong) slow path -// -instruct loadBarrierSlowReg(iRegP dst, memory src, rFlagsReg cr, - vRegD_V0 v0, vRegD_V1 v1, vRegD_V2 v2, vRegD_V3 v3, vRegD_V4 v4, - vRegD_V5 v5, vRegD_V6 v6, vRegD_V7 v7, vRegD_V8 v8, vRegD_V9 v9, - vRegD_V10 v10, vRegD_V11 v11, vRegD_V12 v12, vRegD_V13 v13, vRegD_V14 v14, - vRegD_V15 v15, vRegD_V16 v16, vRegD_V17 v17, vRegD_V18 v18, vRegD_V19 v19, - vRegD_V20 v20, vRegD_V21 v21, vRegD_V22 v22, vRegD_V23 v23, vRegD_V24 v24, - vRegD_V25 v25, vRegD_V26 v26, vRegD_V27 v27, vRegD_V28 v28, vRegD_V29 v29, - vRegD_V30 v30, vRegD_V31 v31) %{ - match(Set dst (LoadBarrierSlowReg src dst)); - predicate(!n->as_LoadBarrierSlowReg()->is_weak()); +// Load Pointer +instruct zLoadP(iRegPNoSp dst, memory mem, rFlagsReg cr) +%{ + match(Set dst (LoadP mem)); + predicate(UseZGC && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() == ZLoadBarrierStrong)); + effect(TEMP dst, KILL cr); - effect(KILL cr, - KILL v0, KILL v1, KILL v2, KILL v3, KILL v4, KILL v5, KILL v6, KILL v7, - KILL v8, KILL v9, KILL v10, KILL v11, KILL v12, KILL v13, KILL v14, - KILL v15, KILL v16, KILL v17, KILL v18, KILL v19, KILL v20, KILL v21, - KILL v22, KILL v23, KILL v24, KILL v25, KILL v26, KILL v27, KILL v28, - KILL v29, KILL v30, KILL v31); + ins_cost(4 * INSN_COST); - format %{ "lea $dst, $src\n\t" - "call #ZLoadBarrierSlowPath" %} + format %{ "ldr $dst, $mem" %} ins_encode %{ - z_load_barrier_slow_reg(_masm, $dst$$Register, $src$$base$$Register, - $src$$index, $src$$scale, $src$$disp, false); + const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + __ ldr($dst$$Register, ref_addr); + if (barrier_data() != ZLoadBarrierElided) { + z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, false /* weak */); + } %} - ins_pipe(pipe_slow); + + ins_pipe(iload_reg_mem); %} -// -// Execute ZGC load barrier (weak) slow path -// -instruct loadBarrierWeakSlowReg(iRegP dst, memory src, rFlagsReg cr, - vRegD_V0 v0, vRegD_V1 v1, vRegD_V2 v2, vRegD_V3 v3, vRegD_V4 v4, - vRegD_V5 v5, vRegD_V6 v6, vRegD_V7 v7, vRegD_V8 v8, vRegD_V9 v9, - vRegD_V10 v10, vRegD_V11 v11, vRegD_V12 v12, vRegD_V13 v13, vRegD_V14 v14, - vRegD_V15 v15, vRegD_V16 v16, vRegD_V17 v17, vRegD_V18 v18, vRegD_V19 v19, - vRegD_V20 v20, vRegD_V21 v21, vRegD_V22 v22, vRegD_V23 v23, vRegD_V24 v24, - vRegD_V25 v25, vRegD_V26 v26, vRegD_V27 v27, vRegD_V28 v28, vRegD_V29 v29, - vRegD_V30 v30, vRegD_V31 v31) %{ - match(Set dst (LoadBarrierSlowReg src dst)); - predicate(n->as_LoadBarrierSlowReg()->is_weak()); +// Load Weak Pointer +instruct zLoadWeakP(iRegPNoSp dst, memory mem, rFlagsReg cr) +%{ + match(Set dst (LoadP mem)); + predicate(UseZGC && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() == ZLoadBarrierWeak)); + effect(TEMP dst, KILL cr); - effect(KILL cr, - KILL v0, KILL v1, KILL v2, KILL v3, KILL v4, KILL v5, KILL v6, KILL v7, - KILL v8, KILL v9, KILL v10, KILL v11, KILL v12, KILL v13, KILL v14, - KILL v15, KILL v16, KILL v17, KILL v18, KILL v19, KILL v20, KILL v21, - KILL v22, KILL v23, KILL v24, KILL v25, KILL v26, KILL v27, KILL v28, - KILL v29, KILL v30, KILL v31); + ins_cost(4 * INSN_COST); - format %{ "lea $dst, $src\n\t" - "call #ZLoadBarrierSlowPath" %} + format %{ "ldr $dst, $mem" %} ins_encode %{ - z_load_barrier_slow_reg(_masm, $dst$$Register, $src$$base$$Register, - $src$$index, $src$$scale, $src$$disp, true); + const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + __ ldr($dst$$Register, ref_addr); + z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, true /* weak */); %} - ins_pipe(pipe_slow); + + ins_pipe(iload_reg_mem); %} +// Load Pointer Volatile +instruct zLoadPVolatile(iRegPNoSp dst, indirect mem /* sync_memory */, rFlagsReg cr) +%{ + match(Set dst (LoadP mem)); + predicate(UseZGC && needs_acquiring_load(n) && n->as_Load()->barrier_data() == ZLoadBarrierStrong); + effect(TEMP dst, KILL cr); -// Specialized versions of compareAndExchangeP that adds a keepalive that is consumed -// but doesn't affect output. + ins_cost(VOLATILE_REF_COST); -instruct z_compareAndExchangeP(iRegPNoSp res, indirect mem, - iRegP oldval, iRegP newval, iRegP keepalive, - rFlagsReg cr) %{ - match(Set res (ZCompareAndExchangeP (Binary mem keepalive) (Binary oldval newval))); - ins_cost(2 * VOLATILE_REF_COST); - effect(TEMP_DEF res, KILL cr); - format %{ - "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval" + format %{ "ldar $dst, $mem\t" %} + + ins_encode %{ + __ ldar($dst$$Register, $mem$$Register); + if (barrier_data() != ZLoadBarrierElided) { + z_load_barrier(_masm, this, Address($mem$$Register), $dst$$Register, rscratch2 /* tmp */, false /* weak */); + } %} - ins_encode %{ - __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, - Assembler::xword, /*acquire*/ false, /*release*/ true, - /*weak*/ false, $res$$Register); - %} - ins_pipe(pipe_slow); + + ins_pipe(pipe_serial); %} -instruct z_compareAndSwapP(iRegINoSp res, - indirect mem, - iRegP oldval, iRegP newval, iRegP keepalive, - rFlagsReg cr) %{ - - match(Set res (ZCompareAndSwapP (Binary mem keepalive) (Binary oldval newval))); - match(Set res (ZWeakCompareAndSwapP (Binary mem keepalive) (Binary oldval newval))); +instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); + effect(KILL cr, TEMP_DEF res); ins_cost(2 * VOLATILE_REF_COST); - effect(KILL cr); + format %{ "cmpxchg $mem, $oldval, $newval\n\t" + "cset $res, EQ" %} - format %{ - "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval" - "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" - %} - - ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval), - aarch64_enc_cset_eq(res)); + ins_encode %{ + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, + false /* acquire */, true /* release */, false /* weak */, rscratch2); + __ cset($res$$Register, Assembler::EQ); + if (barrier_data() != ZLoadBarrierElided) { + Label good; + __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); + __ andr(rscratch1, rscratch1, rscratch2); + __ cbz(rscratch1, good); + z_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, + false /* acquire */, true /* release */, false /* weak */, rscratch2); + __ cset($res$$Register, Assembler::EQ); + __ bind(good); + } + %} ins_pipe(pipe_slow); %} - -instruct z_get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev, - iRegP keepalive) %{ - match(Set prev (ZGetAndSetP mem (Binary newv keepalive))); +instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + predicate(UseZGC && needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong)); + effect(KILL cr, TEMP_DEF res); ins_cost(2 * VOLATILE_REF_COST); + + format %{ "cmpxchg $mem, $oldval, $newval\n\t" + "cset $res, EQ" %} + + ins_encode %{ + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, + true /* acquire */, true /* release */, false /* weak */, rscratch2); + __ cset($res$$Register, Assembler::EQ); + if (barrier_data() != ZLoadBarrierElided) { + Label good; + __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); + __ andr(rscratch1, rscratch1, rscratch2); + __ cbz(rscratch1, good); + z_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */ ); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, + true /* acquire */, true /* release */, false /* weak */, rscratch2); + __ cset($res$$Register, Assembler::EQ); + __ bind(good); + } + %} + + ins_pipe(pipe_slow); +%} + +instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); + effect(TEMP_DEF res, KILL cr); + + ins_cost(2 * VOLATILE_REF_COST); + + format %{ "cmpxchg $res = $mem, $oldval, $newval" %} + + ins_encode %{ + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, + false /* acquire */, true /* release */, false /* weak */, $res$$Register); + if (barrier_data() != ZLoadBarrierElided) { + Label good; + __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); + __ andr(rscratch1, rscratch1, $res$$Register); + __ cbz(rscratch1, good); + z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, + false /* acquire */, true /* release */, false /* weak */, $res$$Register); + __ bind(good); + } + %} + + ins_pipe(pipe_slow); +%} + +instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + predicate(UseZGC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); + effect(TEMP_DEF res, KILL cr); + + ins_cost(2 * VOLATILE_REF_COST); + + format %{ "cmpxchg $res = $mem, $oldval, $newval" %} + + ins_encode %{ + guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, + true /* acquire */, true /* release */, false /* weak */, $res$$Register); + if (barrier_data() != ZLoadBarrierElided) { + Label good; + __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); + __ andr(rscratch1, rscratch1, $res$$Register); + __ cbz(rscratch1, good); + z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, + true /* acquire */, true /* release */, false /* weak */, $res$$Register); + __ bind(good); + } + %} + + ins_pipe(pipe_slow); +%} + +instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ + match(Set prev (GetAndSetP mem newv)); + predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); + effect(TEMP_DEF prev, KILL cr); + + ins_cost(2 * VOLATILE_REF_COST); + format %{ "atomic_xchg $prev, $newv, [$mem]" %} + ins_encode %{ - __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); + __ atomic_xchg($prev$$Register, $newv$$Register, $mem$$Register); + if (barrier_data() != ZLoadBarrierElided) { + z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, false /* weak */); + } + %} + + ins_pipe(pipe_serial); +%} + +instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ + match(Set prev (GetAndSetP mem newv)); + predicate(UseZGC && needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong)); + effect(TEMP_DEF prev, KILL cr); + + ins_cost(VOLATILE_REF_COST); + + format %{ "atomic_xchg_acq $prev, $newv, [$mem]" %} + + ins_encode %{ + __ atomic_xchgal($prev$$Register, $newv$$Register, $mem$$Register); + if (barrier_data() != ZLoadBarrierElided) { + z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, false /* weak */); + } %} ins_pipe(pipe_serial); %}
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -2178,6 +2178,65 @@ return count; } + +// Push lots of registers in the bit set supplied. Don't push sp. +// Return the number of words pushed +int MacroAssembler::push_fp(unsigned int bitset, Register stack) { + int words_pushed = 0; + + // Scan bitset to accumulate register pairs + unsigned char regs[32]; + int count = 0; + for (int reg = 0; reg <= 31; reg++) { + if (1 & bitset) + regs[count++] = reg; + bitset >>= 1; + } + regs[count++] = zr->encoding_nocheck(); + count &= ~1; // Only push an even number of regs + + // Always pushing full 128 bit registers. + if (count) { + stpq(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(pre(stack, -count * wordSize * 2))); + words_pushed += 2; + } + for (int i = 2; i < count; i += 2) { + stpq(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize * 2)); + words_pushed += 2; + } + + assert(words_pushed == count, "oops, pushed != count"); + return count; +} + +int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { + int words_pushed = 0; + + // Scan bitset to accumulate register pairs + unsigned char regs[32]; + int count = 0; + for (int reg = 0; reg <= 31; reg++) { + if (1 & bitset) + regs[count++] = reg; + bitset >>= 1; + } + regs[count++] = zr->encoding_nocheck(); + count &= ~1; + + for (int i = 2; i < count; i += 2) { + ldpq(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize * 2)); + words_pushed += 2; + } + if (count) { + ldpq(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(post(stack, count * wordSize * 2))); + words_pushed += 2; + } + + assert(words_pushed == count, "oops, pushed != count"); + + return count; +} + #ifdef ASSERT void MacroAssembler::verify_heapbase(const char* msg) { #if 0
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -447,12 +447,18 @@ int push(unsigned int bitset, Register stack); int pop(unsigned int bitset, Register stack); + int push_fp(unsigned int bitset, Register stack); + int pop_fp(unsigned int bitset, Register stack); + void mov(Register dst, Address a); public: void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); } void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); } + void push_fp(RegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); } + void pop_fp(RegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); } + // Push and pop everything that might be clobbered by a native // runtime call except rscratch1 and rscratch2. (They are always // scratch, so we don't have to protect them.) Only save the lower
--- a/src/hotspot/cpu/aarch64/register_aarch64.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/aarch64/register_aarch64.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -230,6 +230,11 @@ return *this; } + RegSet &operator-=(const RegSet aSet) { + *this = *this - aSet; + return *this; + } + static RegSet of(Register r1) { return RegSet(r1); }
--- a/src/hotspot/cpu/arm/compiledIC_arm.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/arm/compiledIC_arm.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -115,16 +115,7 @@ // Creation also verifies the object. NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); - -#ifdef ASSERT - // read the value once - volatile intptr_t data = method_holder->data(); - volatile address destination = jump->jump_destination(); - assert(data == 0 || data == (intptr_t)callee(), - "a) MT-unsafe modification of inline cache"); - assert(destination == (address)-1 || destination == entry, - "b) MT-unsafe modification of inline cache"); -#endif + verify_mt_safe(callee, entry, method_holder, jump); // Update stub. method_holder->set_data((intptr_t)callee());
--- a/src/hotspot/cpu/ppc/compiledIC_ppc.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/ppc/compiledIC_ppc.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2015 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -178,15 +178,7 @@ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + IC_pos_in_java_to_interp_stub); NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); -#ifdef ASSERT - // read the value once - volatile intptr_t data = method_holder->data(); - volatile address destination = jump->jump_destination(); - assert(data == 0 || data == (intptr_t)callee(), - "a) MT-unsafe modification of inline cache"); - assert(destination == (address)-1 || destination == entry, - "b) MT-unsafe modification of inline cache"); -#endif + verify_mt_safe(callee, entry, method_holder, jump); // Update stub. method_holder->set_data((intptr_t)callee());
--- a/src/hotspot/cpu/s390/compiledIC_s390.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/s390/compiledIC_s390.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -104,19 +104,7 @@ // Creation also verifies the object. NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeCall::get_IC_pos_in_java_to_interp_stub()); NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); - -#ifdef ASSERT - // A generated lambda form might be deleted from the Lambdaform - // cache in MethodTypeForm. If a jit compiled lambdaform method - // becomes not entrant and the cache access returns null, the new - // resolve will lead to a new generated LambdaForm. - volatile intptr_t data = method_holder->data(); - volatile address destination = jump->jump_destination(); - assert(data == 0 || data == (intptr_t)callee() || callee->is_compiled_lambda_form(), - "a) MT-unsafe modification of inline cache"); - assert(destination == (address)-1 || destination == entry, - "b) MT-unsafe modification of inline cache"); -#endif + verify_mt_safe(callee, entry, method_holder, jump); // Update stub. method_holder->set_data((intptr_t)callee(), relocInfo::metadata_type);
--- a/src/hotspot/cpu/sparc/compiledIC_sparc.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/sparc/compiledIC_sparc.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -104,16 +104,7 @@ // Creation also verifies the object. NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); - -#ifdef ASSERT - // read the value once - volatile intptr_t data = method_holder->data(); - volatile address destination = jump->jump_destination(); - assert(data == 0 || data == (intptr_t)callee(), - "a) MT-unsafe modification of inline cache"); - assert(destination == (address)-1 || destination == entry, - "b) MT-unsafe modification of inline cache"); -#endif + verify_mt_safe(callee, entry, method_holder, jump); // Update stub. method_holder->set_data((intptr_t)callee());
--- a/src/hotspot/cpu/x86/compiledIC_x86.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/x86/compiledIC_x86.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -157,16 +157,7 @@ // Creation also verifies the object. NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); - -#ifdef ASSERT - Method* old_method = reinterpret_cast<Method*>(method_holder->data()); - address destination = jump->jump_destination(); - assert(old_method == NULL || old_method == callee() || - !old_method->method_holder()->is_loader_alive(), - "a) MT-unsafe modification of inline cache"); - assert(destination == (address)-1 || destination == entry, - "b) MT-unsafe modification of inline cache"); -#endif + verify_mt_safe(callee, entry, method_holder, jump); // Update stub. method_holder->set_data((intptr_t)callee());
--- a/src/hotspot/cpu/x86/gc/z/zArguments_x86.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/x86/gc/z/zArguments_x86.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -23,20 +23,7 @@ #include "precompiled.hpp" #include "gc/z/zArguments.hpp" -#include "runtime/globals.hpp" -#include "runtime/globals_extension.hpp" -#include "utilities/debug.hpp" void ZArguments::initialize_platform() { -#ifdef COMPILER2 - // The C2 barrier slow path expects vector registers to be least - // 16 bytes wide, which is the minimum width available on all - // x86-64 systems. However, the user could have speficied a lower - // number on the command-line, in which case we print a warning - // and raise it to 16. - if (MaxVectorSize < 16) { - warning("ZGC requires MaxVectorSize to be at least 16"); - FLAG_SET_DEFAULT(MaxVectorSize, 16); - } -#endif + // Does nothing }
--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -24,22 +24,22 @@ #include "precompiled.hpp" #include "asm/macroAssembler.inline.hpp" #include "code/codeBlob.hpp" +#include "code/vmreg.inline.hpp" #include "gc/z/zBarrier.inline.hpp" #include "gc/z/zBarrierSet.hpp" #include "gc/z/zBarrierSetAssembler.hpp" #include "gc/z/zBarrierSetRuntime.hpp" #include "memory/resourceArea.hpp" -#include "runtime/stubCodeGenerator.hpp" +#include "runtime/sharedRuntime.hpp" #include "utilities/macros.hpp" #ifdef COMPILER1 #include "c1/c1_LIRAssembler.hpp" #include "c1/c1_MacroAssembler.hpp" #include "gc/z/c1/zBarrierSetC1.hpp" #endif // COMPILER1 - -ZBarrierSetAssembler::ZBarrierSetAssembler() : - _load_barrier_slow_stub(), - _load_barrier_weak_slow_stub() {} +#ifdef COMPILER2 +#include "gc/z/c2/zBarrierSetC2.hpp" +#endif // COMPILER2 #ifdef PRODUCT #define BLOCK_COMMENT(str) /* nothing */ @@ -346,137 +346,327 @@ #endif // COMPILER1 -#undef __ -#define __ cgen->assembler()-> +#ifdef COMPILER2 -// Generates a register specific stub for calling -// ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded() or -// ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded(). -// -// The raddr register serves as both input and output for this stub. When the stub is -// called the raddr register contains the object field address (oop*) where the bad oop -// was loaded from, which caused the slow path to be taken. On return from the stub the -// raddr register contains the good/healed oop returned from -// ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded() or -// ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded(). -static address generate_load_barrier_stub(StubCodeGenerator* cgen, Register raddr, DecoratorSet decorators) { - // Don't generate stub for invalid registers - if (raddr == rsp || raddr == r15) { - return NULL; +OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) { + if (!OptoReg::is_reg(opto_reg)) { + return OptoReg::Bad; } - // Create stub name - char name[64]; - const bool weak = (decorators & ON_WEAK_OOP_REF) != 0; - os::snprintf(name, sizeof(name), "zgc_load_barrier%s_stub_%s", weak ? "_weak" : "", raddr->name()); - - __ align(CodeEntryAlignment); - StubCodeMark mark(cgen, "StubRoutines", os::strdup(name, mtCode)); - address start = __ pc(); - - // Save live registers - if (raddr != rax) { - __ push(rax); - } - if (raddr != rcx) { - __ push(rcx); - } - if (raddr != rdx) { - __ push(rdx); - } - if (raddr != rsi) { - __ push(rsi); - } - if (raddr != rdi) { - __ push(rdi); - } - if (raddr != r8) { - __ push(r8); - } - if (raddr != r9) { - __ push(r9); - } - if (raddr != r10) { - __ push(r10); - } - if (raddr != r11) { - __ push(r11); + const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); + if (vm_reg->is_XMMRegister()) { + opto_reg &= ~15; + switch (node->ideal_reg()) { + case Op_VecX: + opto_reg |= 2; + break; + case Op_VecY: + opto_reg |= 4; + break; + case Op_VecZ: + opto_reg |= 8; + break; + default: + opto_reg |= 1; + break; + } } - // Setup arguments - if (raddr != c_rarg1) { - __ movq(c_rarg1, raddr); - } - __ movq(c_rarg0, Address(raddr, 0)); + return opto_reg; +} - // Call barrier function - __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), c_rarg0, c_rarg1); +// We use the vec_spill_helper from the x86.ad file to avoid reinventing this wheel +extern int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, + int stack_offset, int reg, uint ireg, outputStream* st); - // Move result returned in rax to raddr, if needed - if (raddr != rax) { - __ movq(raddr, rax); +#undef __ +#define __ _masm-> + +class ZSaveLiveRegisters { +private: + struct XMMRegisterData { + XMMRegister _reg; + int _size; + + // Used by GrowableArray::find() + bool operator == (const XMMRegisterData& other) { + return _reg == other._reg; + } + }; + + MacroAssembler* const _masm; + GrowableArray<Register> _gp_registers; + GrowableArray<XMMRegisterData> _xmm_registers; + int _spill_size; + int _spill_offset; + + static int xmm_compare_register_size(XMMRegisterData* left, XMMRegisterData* right) { + if (left->_size == right->_size) { + return 0; + } + + return (left->_size < right->_size) ? -1 : 1; } - // Restore saved registers - if (raddr != r11) { - __ pop(r11); - } - if (raddr != r10) { - __ pop(r10); - } - if (raddr != r9) { - __ pop(r9); - } - if (raddr != r8) { - __ pop(r8); - } - if (raddr != rdi) { - __ pop(rdi); - } - if (raddr != rsi) { - __ pop(rsi); - } - if (raddr != rdx) { - __ pop(rdx); - } - if (raddr != rcx) { - __ pop(rcx); - } - if (raddr != rax) { - __ pop(rax); + static int xmm_slot_size(OptoReg::Name opto_reg) { + // The low order 4 bytes denote what size of the XMM register is live + return (opto_reg & 15) << 3; } - __ ret(0); + static uint xmm_ideal_reg_for_size(int reg_size) { + switch (reg_size) { + case 8: + return Op_VecD; + case 16: + return Op_VecX; + case 32: + return Op_VecY; + case 64: + return Op_VecZ; + default: + fatal("Invalid register size %d", reg_size); + return 0; + } + } - return start; + bool xmm_needs_vzeroupper() const { + return _xmm_registers.is_nonempty() && _xmm_registers.at(0)._size > 16; + } + + void xmm_register_save(const XMMRegisterData& reg_data) { + const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg()); + const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size); + _spill_offset -= reg_data._size; + vec_spill_helper(__ code(), false /* do_size */, false /* is_load */, _spill_offset, opto_reg, ideal_reg, tty); + } + + void xmm_register_restore(const XMMRegisterData& reg_data) { + const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg()); + const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size); + vec_spill_helper(__ code(), false /* do_size */, true /* is_load */, _spill_offset, opto_reg, ideal_reg, tty); + _spill_offset += reg_data._size; + } + + void gp_register_save(Register reg) { + _spill_offset -= 8; + __ movq(Address(rsp, _spill_offset), reg); + } + + void gp_register_restore(Register reg) { + __ movq(reg, Address(rsp, _spill_offset)); + _spill_offset += 8; + } + + void initialize(ZLoadBarrierStubC2* stub) { + // Create mask of caller saved registers that need to + // be saved/restored if live + RegMask caller_saved; + caller_saved.Insert(OptoReg::as_OptoReg(rax->as_VMReg())); + caller_saved.Insert(OptoReg::as_OptoReg(rcx->as_VMReg())); + caller_saved.Insert(OptoReg::as_OptoReg(rdx->as_VMReg())); + caller_saved.Insert(OptoReg::as_OptoReg(rsi->as_VMReg())); + caller_saved.Insert(OptoReg::as_OptoReg(rdi->as_VMReg())); + caller_saved.Insert(OptoReg::as_OptoReg(r8->as_VMReg())); + caller_saved.Insert(OptoReg::as_OptoReg(r9->as_VMReg())); + caller_saved.Insert(OptoReg::as_OptoReg(r10->as_VMReg())); + caller_saved.Insert(OptoReg::as_OptoReg(r11->as_VMReg())); + caller_saved.Remove(OptoReg::as_OptoReg(stub->ref()->as_VMReg())); + + // Create mask of live registers + RegMask live = stub->live(); + if (stub->tmp() != noreg) { + live.Insert(OptoReg::as_OptoReg(stub->tmp()->as_VMReg())); + } + + int gp_spill_size = 0; + int xmm_spill_size = 0; + + // Record registers that needs to be saved/restored + while (live.is_NotEmpty()) { + const OptoReg::Name opto_reg = live.find_first_elem(); + const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); + + live.Remove(opto_reg); + + if (vm_reg->is_Register()) { + if (caller_saved.Member(opto_reg)) { + _gp_registers.append(vm_reg->as_Register()); + gp_spill_size += 8; + } + } else if (vm_reg->is_XMMRegister()) { + // We encode in the low order 4 bits of the opto_reg, how large part of the register is live + const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~15); + const int reg_size = xmm_slot_size(opto_reg); + const XMMRegisterData reg_data = { vm_reg_base->as_XMMRegister(), reg_size }; + const int reg_index = _xmm_registers.find(reg_data); + if (reg_index == -1) { + // Not previously appended + _xmm_registers.append(reg_data); + xmm_spill_size += reg_size; + } else { + // Previously appended, update size + const int reg_size_prev = _xmm_registers.at(reg_index)._size; + if (reg_size > reg_size_prev) { + _xmm_registers.at_put(reg_index, reg_data); + xmm_spill_size += reg_size - reg_size_prev; + } + } + } else { + fatal("Unexpected register type"); + } + } + + // Sort by size, largest first + _xmm_registers.sort(xmm_compare_register_size); + + // Stack pointer must be 16 bytes aligned for the call + _spill_offset = _spill_size = align_up(xmm_spill_size + gp_spill_size, 16); + } + +public: + ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) : + _masm(masm), + _gp_registers(), + _xmm_registers(), + _spill_size(0), + _spill_offset(0) { + + // + // Stack layout after registers have been spilled: + // + // | ... | original rsp, 16 bytes aligned + // ------------------ + // | zmm0 high | + // | ... | + // | zmm0 low | 16 bytes aligned + // | ... | + // | ymm1 high | + // | ... | + // | ymm1 low | 16 bytes aligned + // | ... | + // | xmmN high | + // | ... | + // | xmmN low | 8 bytes aligned + // | reg0 | 8 bytes aligned + // | reg1 | + // | ... | + // | regN | new rsp, if 16 bytes aligned + // | <padding> | else new rsp, 16 bytes aligned + // ------------------ + // + + // Figure out what registers to save/restore + initialize(stub); + + // Allocate stack space + if (_spill_size > 0) { + __ subptr(rsp, _spill_size); + } + + // Save XMM/YMM/ZMM registers + for (int i = 0; i < _xmm_registers.length(); i++) { + xmm_register_save(_xmm_registers.at(i)); + } + + if (xmm_needs_vzeroupper()) { + __ vzeroupper(); + } + + // Save general purpose registers + for (int i = 0; i < _gp_registers.length(); i++) { + gp_register_save(_gp_registers.at(i)); + } + } + + ~ZSaveLiveRegisters() { + // Restore general purpose registers + for (int i = _gp_registers.length() - 1; i >= 0; i--) { + gp_register_restore(_gp_registers.at(i)); + } + + __ vzeroupper(); + + // Restore XMM/YMM/ZMM registers + for (int i = _xmm_registers.length() - 1; i >= 0; i--) { + xmm_register_restore(_xmm_registers.at(i)); + } + + // Free stack space + if (_spill_size > 0) { + __ addptr(rsp, _spill_size); + } + } +}; + +class ZSetupArguments { +private: + MacroAssembler* const _masm; + const Register _ref; + const Address _ref_addr; + +public: + ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) : + _masm(masm), + _ref(stub->ref()), + _ref_addr(stub->ref_addr()) { + + // Setup arguments + if (_ref_addr.base() == noreg) { + // No self healing + if (_ref != c_rarg0) { + __ movq(c_rarg0, _ref); + } + __ xorq(c_rarg1, c_rarg1); + } else { + // Self healing + if (_ref == c_rarg0) { + __ lea(c_rarg1, _ref_addr); + } else if (_ref != c_rarg1) { + __ lea(c_rarg1, _ref_addr); + __ movq(c_rarg0, _ref); + } else if (_ref_addr.base() != c_rarg0 && _ref_addr.index() != c_rarg0) { + __ movq(c_rarg0, _ref); + __ lea(c_rarg1, _ref_addr); + } else { + __ xchgq(c_rarg0, c_rarg1); + if (_ref_addr.base() == c_rarg0) { + __ lea(c_rarg1, Address(c_rarg1, _ref_addr.index(), _ref_addr.scale(), _ref_addr.disp())); + } else if (_ref_addr.index() == c_rarg0) { + __ lea(c_rarg1, Address(_ref_addr.base(), c_rarg1, _ref_addr.scale(), _ref_addr.disp())); + } else { + ShouldNotReachHere(); + } + } + } + } + + ~ZSetupArguments() { + // Transfer result + if (_ref != rax) { + __ movq(_ref, rax); + } + } +}; + +#undef __ +#define __ masm-> + +void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const { + BLOCK_COMMENT("ZLoadBarrierStubC2"); + + // Stub entry + __ bind(*stub->entry()); + + { + ZSaveLiveRegisters save_live_registers(masm, stub); + ZSetupArguments setup_arguments(masm, stub); + __ call(RuntimeAddress(stub->slow_path())); + } + + // Stub exit + __ jmp(*stub->continuation()); } #undef __ -static void barrier_stubs_init_inner(const char* label, const DecoratorSet decorators, address* stub) { - const int nregs = RegisterImpl::number_of_registers; - const int code_size = nregs * 128; // Rough estimate of code size - - ResourceMark rm; - - CodeBuffer buf(BufferBlob::create(label, code_size)); - StubCodeGenerator cgen(&buf); - - for (int i = 0; i < nregs; i++) { - const Register reg = as_Register(i); - stub[i] = generate_load_barrier_stub(&cgen, reg, decorators); - } -} - -void ZBarrierSetAssembler::barrier_stubs_init() { - barrier_stubs_init_inner("zgc_load_barrier_stubs", ON_STRONG_OOP_REF, _load_barrier_slow_stub); - barrier_stubs_init_inner("zgc_load_barrier_weak_stubs", ON_WEAK_OOP_REF, _load_barrier_weak_slow_stub); -} - -address ZBarrierSetAssembler::load_barrier_slow_stub(Register reg) { - return _load_barrier_slow_stub[reg->encoding()]; -} - -address ZBarrierSetAssembler::load_barrier_weak_slow_stub(Register reg) { - return _load_barrier_weak_slow_stub[reg->encoding()]; -} +#endif // COMPILER2
--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -24,6 +24,14 @@ #ifndef CPU_X86_GC_Z_ZBARRIERSETASSEMBLER_X86_HPP #define CPU_X86_GC_Z_ZBARRIERSETASSEMBLER_X86_HPP +#include "code/vmreg.hpp" +#include "oops/accessDecorators.hpp" +#ifdef COMPILER2 +#include "opto/optoreg.hpp" +#endif // COMPILER2 + +class MacroAssembler; + #ifdef COMPILER1 class LIR_Assembler; class LIR_OprDesc; @@ -32,14 +40,13 @@ class ZLoadBarrierStubC1; #endif // COMPILER1 +#ifdef COMPILER2 +class Node; +class ZLoadBarrierStubC2; +#endif // COMPILER2 + class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase { -private: - address _load_barrier_slow_stub[RegisterImpl::number_of_registers]; - address _load_barrier_weak_slow_stub[RegisterImpl::number_of_registers]; - public: - ZBarrierSetAssembler(); - virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, @@ -83,10 +90,13 @@ DecoratorSet decorators) const; #endif // COMPILER1 - virtual void barrier_stubs_init(); +#ifdef COMPILER2 + OptoReg::Name refine_register(const Node* node, + OptoReg::Name opto_reg); - address load_barrier_slow_stub(Register reg); - address load_barrier_weak_slow_stub(Register reg); + void generate_c2_load_barrier_stub(MacroAssembler* masm, + ZLoadBarrierStubC2* stub) const; +#endif // COMPILER2 }; #endif // CPU_X86_GC_Z_ZBARRIERSETASSEMBLER_X86_HPP
--- a/src/hotspot/cpu/x86/gc/z/zGlobals_x86.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/x86/gc/z/zGlobals_x86.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -40,7 +40,7 @@ // +--------------------------------+ 0x0000014000000000 (20TB) // | Remapped View | // +--------------------------------+ 0x0000010000000000 (16TB) -// | (Reserved, but unused) | +// . . // +--------------------------------+ 0x00000c0000000000 (12TB) // | Marked1 View | // +--------------------------------+ 0x0000080000000000 (8TB) @@ -75,7 +75,7 @@ // +--------------------------------+ 0x0000280000000000 (40TB) // | Remapped View | // +--------------------------------+ 0x0000200000000000 (32TB) -// | (Reserved, but unused) | +// . . // +--------------------------------+ 0x0000180000000000 (24TB) // | Marked1 View | // +--------------------------------+ 0x0000100000000000 (16TB) @@ -110,7 +110,7 @@ // +--------------------------------+ 0x0000500000000000 (80TB) // | Remapped View | // +--------------------------------+ 0x0000400000000000 (64TB) -// | (Reserved, but unused) | +// . . // +--------------------------------+ 0x0000300000000000 (48TB) // | Marked1 View | // +--------------------------------+ 0x0000200000000000 (32TB)
--- a/src/hotspot/cpu/x86/gc/z/zGlobals_x86.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/x86/gc/z/zGlobals_x86.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -36,7 +36,6 @@ // ------------------------------------------------------------------ // const size_t ZPlatformGranuleSizeShift = 21; // 2MB -const size_t ZPlatformMaxHeapSizeShift = 46; // 16TB const size_t ZPlatformNMethodDisarmedOffset = 4; const size_t ZPlatformCacheLineSize = 64;
--- a/src/hotspot/cpu/x86/gc/z/z_x86_64.ad Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/x86/gc/z/z_x86_64.ad Fri Oct 11 10:39:58 2019 +0200 @@ -24,190 +24,144 @@ source_hpp %{ #include "gc/z/c2/zBarrierSetC2.hpp" +#include "gc/z/zThreadLocalData.hpp" %} source %{ -#include "gc/z/zBarrierSetAssembler.hpp" +static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) { + ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak); + __ testptr(ref, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset())); + __ jcc(Assembler::notZero, *stub->entry()); + __ bind(*stub->continuation()); +} -static void z_load_barrier_slow_reg(MacroAssembler& _masm, Register dst, Address src, bool weak) { - assert(dst != rsp, "Invalid register"); - assert(dst != r15, "Invalid register"); - - const address stub = weak ? ZBarrierSet::assembler()->load_barrier_weak_slow_stub(dst) - : ZBarrierSet::assembler()->load_barrier_slow_stub(dst); - __ lea(dst, src); - __ call(RuntimeAddress(stub)); +static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) { + ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */); + __ jmp(*stub->entry()); + __ bind(*stub->continuation()); } %} -// For XMM and YMM enabled processors -instruct zLoadBarrierSlowRegXmmAndYmm(rRegP dst, memory src, rFlagsReg cr, - rxmm0 x0, rxmm1 x1, rxmm2 x2, rxmm3 x3, - rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7, - rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11, - rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15) %{ - match(Set dst (LoadBarrierSlowReg src dst)); - predicate(UseAVX <= 2 && !n->as_LoadBarrierSlowReg()->is_weak()); +// Load Pointer +instruct zLoadP(rRegP dst, memory mem, rFlagsReg cr) +%{ + predicate(UseZGC && n->as_Load()->barrier_data() == ZLoadBarrierStrong); + match(Set dst (LoadP mem)); + effect(KILL cr, TEMP dst); - effect(KILL cr, - KILL x0, KILL x1, KILL x2, KILL x3, - KILL x4, KILL x5, KILL x6, KILL x7, - KILL x8, KILL x9, KILL x10, KILL x11, - KILL x12, KILL x13, KILL x14, KILL x15); + ins_cost(125); - format %{ "lea $dst, $src\n\t" - "call #ZLoadBarrierSlowPath" %} + format %{ "movq $dst, $mem" %} ins_encode %{ - z_load_barrier_slow_reg(_masm, $dst$$Register, $src$$Address, false /* weak */); + __ movptr($dst$$Register, $mem$$Address); + if (barrier_data() != ZLoadBarrierElided) { + z_load_barrier(_masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, false /* weak */); + } %} - ins_pipe(pipe_slow); + + ins_pipe(ialu_reg_mem); %} -// For ZMM enabled processors -instruct zLoadBarrierSlowRegZmm(rRegP dst, memory src, rFlagsReg cr, - rxmm0 x0, rxmm1 x1, rxmm2 x2, rxmm3 x3, - rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7, - rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11, - rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15, - rxmm16 x16, rxmm17 x17, rxmm18 x18, rxmm19 x19, - rxmm20 x20, rxmm21 x21, rxmm22 x22, rxmm23 x23, - rxmm24 x24, rxmm25 x25, rxmm26 x26, rxmm27 x27, - rxmm28 x28, rxmm29 x29, rxmm30 x30, rxmm31 x31) %{ +// Load Weak Pointer +instruct zLoadWeakP(rRegP dst, memory mem, rFlagsReg cr) +%{ + predicate(UseZGC && n->as_Load()->barrier_data() == ZLoadBarrierWeak); + match(Set dst (LoadP mem)); + effect(KILL cr, TEMP dst); - match(Set dst (LoadBarrierSlowReg src dst)); - predicate(UseAVX == 3 && !n->as_LoadBarrierSlowReg()->is_weak()); + ins_cost(125); - effect(KILL cr, - KILL x0, KILL x1, KILL x2, KILL x3, - KILL x4, KILL x5, KILL x6, KILL x7, - KILL x8, KILL x9, KILL x10, KILL x11, - KILL x12, KILL x13, KILL x14, KILL x15, - KILL x16, KILL x17, KILL x18, KILL x19, - KILL x20, KILL x21, KILL x22, KILL x23, - KILL x24, KILL x25, KILL x26, KILL x27, - KILL x28, KILL x29, KILL x30, KILL x31); - - format %{ "lea $dst, $src\n\t" - "call #ZLoadBarrierSlowPath" %} + format %{ "movq $dst, $mem" %} ins_encode %{ - z_load_barrier_slow_reg(_masm, $dst$$Register, $src$$Address, false /* weak */); + __ movptr($dst$$Register, $mem$$Address); + z_load_barrier(_masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, true /* weak */); %} - ins_pipe(pipe_slow); + + ins_pipe(ialu_reg_mem); %} -// For XMM and YMM enabled processors -instruct zLoadBarrierWeakSlowRegXmmAndYmm(rRegP dst, memory src, rFlagsReg cr, - rxmm0 x0, rxmm1 x1, rxmm2 x2, rxmm3 x3, - rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7, - rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11, - rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15) %{ - match(Set dst (LoadBarrierSlowReg src dst)); - predicate(UseAVX <= 2 && n->as_LoadBarrierSlowReg()->is_weak()); +instruct zCompareAndExchangeP(memory mem, rax_RegP oldval, rRegP newval, rRegP tmp, rFlagsReg cr) %{ + match(Set oldval (CompareAndExchangeP mem (Binary oldval newval))); + predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); + effect(KILL cr, TEMP tmp); - effect(KILL cr, - KILL x0, KILL x1, KILL x2, KILL x3, - KILL x4, KILL x5, KILL x6, KILL x7, - KILL x8, KILL x9, KILL x10, KILL x11, - KILL x12, KILL x13, KILL x14, KILL x15); - - format %{ "lea $dst, $src\n\t" - "call #ZLoadBarrierSlowPath" %} + format %{ "lock\n\t" + "cmpxchgq $newval, $mem" %} ins_encode %{ - z_load_barrier_slow_reg(_masm, $dst$$Register, $src$$Address, true /* weak */); + if (barrier_data() != ZLoadBarrierElided) { + __ movptr($tmp$$Register, $oldval$$Register); + } + __ lock(); + __ cmpxchgptr($newval$$Register, $mem$$Address); + if (barrier_data() != ZLoadBarrierElided) { + Label good; + __ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset())); + __ jcc(Assembler::zero, good); + z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register); + __ movptr($oldval$$Register, $tmp$$Register); + __ lock(); + __ cmpxchgptr($newval$$Register, $mem$$Address); + __ bind(good); + } %} - ins_pipe(pipe_slow); + + ins_pipe(pipe_cmpxchg); %} -// For ZMM enabled processors -instruct zLoadBarrierWeakSlowRegZmm(rRegP dst, memory src, rFlagsReg cr, - rxmm0 x0, rxmm1 x1, rxmm2 x2, rxmm3 x3, - rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7, - rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11, - rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15, - rxmm16 x16, rxmm17 x17, rxmm18 x18, rxmm19 x19, - rxmm20 x20, rxmm21 x21, rxmm22 x22, rxmm23 x23, - rxmm24 x24, rxmm25 x25, rxmm26 x26, rxmm27 x27, - rxmm28 x28, rxmm29 x29, rxmm30 x30, rxmm31 x31) %{ +instruct zCompareAndSwapP(rRegI res, memory mem, rRegP newval, rRegP tmp, rFlagsReg cr, rax_RegP oldval) %{ + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); + effect(KILL cr, KILL oldval, TEMP tmp); - match(Set dst (LoadBarrierSlowReg src dst)); - predicate(UseAVX == 3 && n->as_LoadBarrierSlowReg()->is_weak()); - - effect(KILL cr, - KILL x0, KILL x1, KILL x2, KILL x3, - KILL x4, KILL x5, KILL x6, KILL x7, - KILL x8, KILL x9, KILL x10, KILL x11, - KILL x12, KILL x13, KILL x14, KILL x15, - KILL x16, KILL x17, KILL x18, KILL x19, - KILL x20, KILL x21, KILL x22, KILL x23, - KILL x24, KILL x25, KILL x26, KILL x27, - KILL x28, KILL x29, KILL x30, KILL x31); - - format %{ "lea $dst, $src\n\t" - "call #ZLoadBarrierSlowPath" %} + format %{ "lock\n\t" + "cmpxchgq $newval, $mem\n\t" + "sete $res\n\t" + "movzbl $res, $res" %} ins_encode %{ - z_load_barrier_slow_reg(_masm, $dst$$Register, $src$$Address, true /* weak */); + if (barrier_data() != ZLoadBarrierElided) { + __ movptr($tmp$$Register, $oldval$$Register); + } + __ lock(); + __ cmpxchgptr($newval$$Register, $mem$$Address); + if (barrier_data() != ZLoadBarrierElided) { + Label good; + __ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset())); + __ jcc(Assembler::zero, good); + z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register); + __ movptr($oldval$$Register, $tmp$$Register); + __ lock(); + __ cmpxchgptr($newval$$Register, $mem$$Address); + __ bind(good); + __ cmpptr($tmp$$Register, $oldval$$Register); + } + __ setb(Assembler::equal, $res$$Register); + __ movzbl($res$$Register, $res$$Register); %} - ins_pipe(pipe_slow); + + ins_pipe(pipe_cmpxchg); %} -// Specialized versions of compareAndExchangeP that adds a keepalive that is consumed -// but doesn't affect output. +instruct zXChgP(memory mem, rRegP newval, rFlagsReg cr) %{ + match(Set newval (GetAndSetP mem newval)); + predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); + effect(KILL cr); -instruct z_compareAndExchangeP( - memory mem_ptr, - rax_RegP oldval, rRegP newval, rRegP keepalive, - rFlagsReg cr) %{ - predicate(VM_Version::supports_cx8()); - match(Set oldval (ZCompareAndExchangeP (Binary mem_ptr keepalive) (Binary oldval newval))); - effect(KILL cr); + format %{ "xchgq $newval, $mem" %} - format %{ "cmpxchgq $mem_ptr,$newval\t# " - "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %} - opcode(0x0F, 0xB1); - ins_encode(lock_prefix, - REX_reg_mem_wide(newval, mem_ptr), - OpcP, OpcS, - reg_mem(newval, mem_ptr) // lock cmpxchg - ); - ins_pipe( pipe_cmpxchg ); + ins_encode %{ + __ xchgptr($newval$$Register, $mem$$Address); + if (barrier_data() != ZLoadBarrierElided) { + z_load_barrier(_masm, this, Address(noreg, 0), $newval$$Register, noreg /* tmp */, false /* weak */); + } + %} + + ins_pipe(pipe_cmpxchg); %} - -instruct z_compareAndSwapP(rRegI res, - memory mem_ptr, - rax_RegP oldval, rRegP newval, rRegP keepalive, - rFlagsReg cr) %{ - predicate(VM_Version::supports_cx8()); - match(Set res (ZCompareAndSwapP (Binary mem_ptr keepalive) (Binary oldval newval))); - match(Set res (ZWeakCompareAndSwapP (Binary mem_ptr keepalive) (Binary oldval newval))); - effect(KILL cr, KILL oldval); - - format %{ "cmpxchgq $mem_ptr,$newval\t# " - "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" - "sete $res\n\t" - "movzbl $res, $res" %} - opcode(0x0F, 0xB1); - ins_encode(lock_prefix, - REX_reg_mem_wide(newval, mem_ptr), - OpcP, OpcS, - reg_mem(newval, mem_ptr), - REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete - REX_reg_breg(res, res), // movzbl - Opcode(0xF), Opcode(0xB6), reg_reg(res, res)); - ins_pipe( pipe_cmpxchg ); -%} - -instruct z_xchgP( memory mem, rRegP newval, rRegP keepalive) %{ - match(Set newval (ZGetAndSetP mem (Binary newval keepalive))); - format %{ "XCHGQ $newval,[$mem]" %} - ins_encode %{ - __ xchgq($newval$$Register, $mem$$Address); - %} - ins_pipe( pipe_cmpxchg ); -%}
--- a/src/hotspot/cpu/x86/globals_x86.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/x86/globals_x86.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -214,5 +214,15 @@ "Use BMI2 instructions") \ \ diagnostic(bool, UseLibmIntrinsic, true, \ - "Use Libm Intrinsics") + "Use Libm Intrinsics") \ + \ + /* Minimum array size in bytes to use AVX512 intrinsics */ \ + /* for copy, inflate and fill which don't bail out early based on any */ \ + /* condition. When this value is set to zero compare operations like */ \ + /* compare, vectorizedMismatch, compress can also use AVX512 intrinsics.*/\ + diagnostic(int, AVX3Threshold, 4096, \ + "Minimum array size in bytes to use AVX512 intrinsics" \ + "for copy, inflate and fill. When this value is set as zero" \ + "compare operations can also use AVX512 intrinsics.") \ + range(0, max_jint) #endif // CPU_X86_GLOBALS_X86_HPP
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -7061,7 +7061,7 @@ bind(COMPARE_WIDE_VECTORS_LOOP); #ifdef _LP64 - if (VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop + if ((AVX3Threshold == 0) && VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop cmpl(cnt2, stride2x2); jccb(Assembler::below, COMPARE_WIDE_VECTORS_LOOP_AVX2); testl(cnt2, stride2x2-1); // cnt2 holds the vector count @@ -7321,7 +7321,7 @@ testl(len, len); jcc(Assembler::zero, FALSE_LABEL); - if ((UseAVX > 2) && // AVX512 + if ((AVX3Threshold == 0) && (UseAVX > 2) && // AVX512 VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()) { @@ -7394,7 +7394,7 @@ } else { movl(result, len); // copy - if (UseAVX == 2 && UseSSE >= 2) { + if (UseAVX >= 2 && UseSSE >= 2) { // With AVX2, use 32-byte vector compare Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; @@ -7567,14 +7567,12 @@ lea(ary2, Address(ary2, limit, Address::times_1)); negptr(limit); - bind(COMPARE_WIDE_VECTORS); - #ifdef _LP64 - if (VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop + if ((AVX3Threshold == 0) && VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop Label COMPARE_WIDE_VECTORS_LOOP_AVX2, COMPARE_WIDE_VECTORS_LOOP_AVX3; cmpl(limit, -64); - jccb(Assembler::greater, COMPARE_WIDE_VECTORS_LOOP_AVX2); + jcc(Assembler::greater, COMPARE_WIDE_VECTORS_LOOP_AVX2); bind(COMPARE_WIDE_VECTORS_LOOP_AVX3); // the hottest loop @@ -7607,7 +7605,7 @@ }//if (VM_Version::supports_avx512vlbw()) #endif //_LP64 - + bind(COMPARE_WIDE_VECTORS); vmovdqu(vec1, Address(ary1, limit, Address::times_1)); vmovdqu(vec2, Address(ary2, limit, Address::times_1)); vpxor(vec1, vec2); @@ -7833,32 +7831,33 @@ assert( UseSSE >= 2, "supported cpu only" ); Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; movdl(xtmp, value); - if (UseAVX > 2 && UseUnalignedLoadStores) { + if (UseAVX >= 2 && UseUnalignedLoadStores) { + Label L_check_fill_32_bytes; + if (UseAVX > 2) { + // Fill 64-byte chunks + Label L_fill_64_bytes_loop_avx3, L_check_fill_64_bytes_avx2; + + // If number of bytes to fill < AVX3Threshold, perform fill using AVX2 + cmpl(count, AVX3Threshold); + jccb(Assembler::below, L_check_fill_64_bytes_avx2); + + vpbroadcastd(xtmp, xtmp, Assembler::AVX_512bit); + + subl(count, 16 << shift); + jccb(Assembler::less, L_check_fill_32_bytes); + align(16); + + BIND(L_fill_64_bytes_loop_avx3); + evmovdqul(Address(to, 0), xtmp, Assembler::AVX_512bit); + addptr(to, 64); + subl(count, 16 << shift); + jcc(Assembler::greaterEqual, L_fill_64_bytes_loop_avx3); + jmpb(L_check_fill_32_bytes); + + BIND(L_check_fill_64_bytes_avx2); + } // Fill 64-byte chunks - Label L_fill_64_bytes_loop, L_check_fill_32_bytes; - vpbroadcastd(xtmp, xtmp, Assembler::AVX_512bit); - - subl(count, 16 << shift); - jcc(Assembler::less, L_check_fill_32_bytes); - align(16); - - BIND(L_fill_64_bytes_loop); - evmovdqul(Address(to, 0), xtmp, Assembler::AVX_512bit); - addptr(to, 64); - subl(count, 16 << shift); - jcc(Assembler::greaterEqual, L_fill_64_bytes_loop); - - BIND(L_check_fill_32_bytes); - addl(count, 8 << shift); - jccb(Assembler::less, L_check_fill_8_bytes); - vmovdqu(Address(to, 0), xtmp); - addptr(to, 32); - subl(count, 8 << shift); - - BIND(L_check_fill_8_bytes); - } else if (UseAVX == 2 && UseUnalignedLoadStores) { - // Fill 64-byte chunks - Label L_fill_64_bytes_loop, L_check_fill_32_bytes; + Label L_fill_64_bytes_loop; vpbroadcastd(xtmp, xtmp, Assembler::AVX_256bit); subl(count, 16 << shift); @@ -8572,12 +8571,13 @@ shlq(length); xorq(result, result); - if ((UseAVX > 2) && + if ((AVX3Threshold == 0) && (UseAVX > 2) && VM_Version::supports_avx512vlbw()) { Label VECTOR64_LOOP, VECTOR64_NOT_EQUAL, VECTOR32_TAIL; cmpq(length, 64); jcc(Assembler::less, VECTOR32_TAIL); + movq(tmp1, length); andq(tmp1, 0x3F); // tail count andq(length, ~(0x3F)); //vector count @@ -10034,7 +10034,7 @@ // save length for return push(len); - if ((UseAVX > 2) && // AVX512 + if ((AVX3Threshold == 0) && (UseAVX > 2) && // AVX512 VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()) { @@ -10226,7 +10226,7 @@ // } void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len, XMMRegister tmp1, Register tmp2) { - Label copy_chars_loop, done, below_threshold; + Label copy_chars_loop, done, below_threshold, avx3_threshold; // rsi: src // rdi: dst // rdx: len @@ -10236,7 +10236,7 @@ // rdi holds start addr of destination char[] // rdx holds length assert_different_registers(src, dst, len, tmp2); - + movl(tmp2, len); if ((UseAVX > 2) && // AVX512 VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()) { @@ -10248,9 +10248,11 @@ testl(len, -16); jcc(Assembler::zero, below_threshold); + testl(len, -1 * AVX3Threshold); + jcc(Assembler::zero, avx3_threshold); + // In order to use only one arithmetic operation for the main loop we use // this pre-calculation - movl(tmp2, len); andl(tmp2, (32 - 1)); // tail count (in chars), 32 element wide loop andl(len, -32); // vector count jccb(Assembler::zero, copy_tail); @@ -10281,12 +10283,11 @@ evmovdquw(Address(dst, 0), k2, tmp1, Assembler::AVX_512bit); jmp(done); + bind(avx3_threshold); } if (UseSSE42Intrinsics) { Label copy_16_loop, copy_8_loop, copy_bytes, copy_new_tail, copy_tail; - movl(tmp2, len); - if (UseAVX > 1) { andl(tmp2, (16 - 1)); andl(len, -16); @@ -10311,13 +10312,7 @@ bind(below_threshold); bind(copy_new_tail); - if ((UseAVX > 2) && - VM_Version::supports_avx512vlbw() && - VM_Version::supports_bmi2()) { - movl(tmp2, len); - } else { - movl(len, tmp2); - } + movl(len, tmp2); andl(tmp2, 0x00000007); andl(len, 0xFFFFFFF8); jccb(Assembler::zero, copy_tail);
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -1304,30 +1304,58 @@ if (UseUnalignedLoadStores) { Label L_end; // Copy 64-bytes per iteration - __ BIND(L_loop); if (UseAVX > 2) { + Label L_loop_avx512, L_loop_avx2, L_32_byte_head, L_above_threshold, L_below_threshold; + + __ BIND(L_copy_bytes); + __ cmpptr(qword_count, (-1 * AVX3Threshold / 8)); + __ jccb(Assembler::less, L_above_threshold); + __ jmpb(L_below_threshold); + + __ bind(L_loop_avx512); __ evmovdqul(xmm0, Address(end_from, qword_count, Address::times_8, -56), Assembler::AVX_512bit); __ evmovdqul(Address(end_to, qword_count, Address::times_8, -56), xmm0, Assembler::AVX_512bit); - } else if (UseAVX == 2) { + __ bind(L_above_threshold); + __ addptr(qword_count, 8); + __ jcc(Assembler::lessEqual, L_loop_avx512); + __ jmpb(L_32_byte_head); + + __ bind(L_loop_avx2); __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56)); __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0); __ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24)); __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1); + __ bind(L_below_threshold); + __ addptr(qword_count, 8); + __ jcc(Assembler::lessEqual, L_loop_avx2); + + __ bind(L_32_byte_head); + __ subptr(qword_count, 4); // sub(8) and add(4) + __ jccb(Assembler::greater, L_end); } else { - __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56)); - __ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0); - __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40)); - __ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1); - __ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24)); - __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2); - __ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8)); - __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3); + __ BIND(L_loop); + if (UseAVX == 2) { + __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56)); + __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0); + __ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24)); + __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1); + } else { + __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56)); + __ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0); + __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40)); + __ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1); + __ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24)); + __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2); + __ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8)); + __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3); + } + + __ BIND(L_copy_bytes); + __ addptr(qword_count, 8); + __ jcc(Assembler::lessEqual, L_loop); + __ subptr(qword_count, 4); // sub(8) and add(4) + __ jccb(Assembler::greater, L_end); } - __ BIND(L_copy_bytes); - __ addptr(qword_count, 8); - __ jcc(Assembler::lessEqual, L_loop); - __ subptr(qword_count, 4); // sub(8) and add(4) - __ jccb(Assembler::greater, L_end); // Copy trailing 32 bytes if (UseAVX >= 2) { __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24)); @@ -1384,31 +1412,59 @@ if (UseUnalignedLoadStores) { Label L_end; // Copy 64-bytes per iteration - __ BIND(L_loop); if (UseAVX > 2) { + Label L_loop_avx512, L_loop_avx2, L_32_byte_head, L_above_threshold, L_below_threshold; + + __ BIND(L_copy_bytes); + __ cmpptr(qword_count, (AVX3Threshold / 8)); + __ jccb(Assembler::greater, L_above_threshold); + __ jmpb(L_below_threshold); + + __ BIND(L_loop_avx512); __ evmovdqul(xmm0, Address(from, qword_count, Address::times_8, 0), Assembler::AVX_512bit); __ evmovdqul(Address(dest, qword_count, Address::times_8, 0), xmm0, Assembler::AVX_512bit); - } else if (UseAVX == 2) { + __ bind(L_above_threshold); + __ subptr(qword_count, 8); + __ jcc(Assembler::greaterEqual, L_loop_avx512); + __ jmpb(L_32_byte_head); + + __ bind(L_loop_avx2); __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32)); __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0); - __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8, 0)); - __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1); + __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8, 0)); + __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1); + __ bind(L_below_threshold); + __ subptr(qword_count, 8); + __ jcc(Assembler::greaterEqual, L_loop_avx2); + + __ bind(L_32_byte_head); + __ addptr(qword_count, 4); // add(8) and sub(4) + __ jccb(Assembler::less, L_end); } else { - __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48)); - __ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0); - __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32)); - __ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1); - __ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16)); - __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2); - __ movdqu(xmm3, Address(from, qword_count, Address::times_8, 0)); - __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm3); + __ BIND(L_loop); + if (UseAVX == 2) { + __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32)); + __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0); + __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8, 0)); + __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1); + } else { + __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48)); + __ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0); + __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32)); + __ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1); + __ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16)); + __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2); + __ movdqu(xmm3, Address(from, qword_count, Address::times_8, 0)); + __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm3); + } + + __ BIND(L_copy_bytes); + __ subptr(qword_count, 8); + __ jcc(Assembler::greaterEqual, L_loop); + + __ addptr(qword_count, 4); // add(8) and sub(4) + __ jccb(Assembler::less, L_end); } - __ BIND(L_copy_bytes); - __ subptr(qword_count, 8); - __ jcc(Assembler::greaterEqual, L_loop); - - __ addptr(qword_count, 4); // add(8) and sub(4) - __ jccb(Assembler::less, L_end); // Copy trailing 32 bytes if (UseAVX >= 2) { __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 0));
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/x86/vm_version_x86.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -381,6 +381,10 @@ __ cmpl(rax, 0xE0); __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported + __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); + __ movl(rax, Address(rsi, 0)); + __ cmpl(rax, 0x50654); // If it is Skylake + __ jcc(Assembler::equal, legacy_setup); // If UseAVX is unitialized or is set by the user to include EVEX if (use_evex) { // EVEX setup: run in lowest evex mode @@ -465,6 +469,11 @@ __ cmpl(rax, 0xE0); __ jcc(Assembler::notEqual, legacy_save_restore); + __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); + __ movl(rax, Address(rsi, 0)); + __ cmpl(rax, 0x50654); // If it is Skylake + __ jcc(Assembler::equal, legacy_save_restore); + // If UseAVX is unitialized or is set by the user to include EVEX if (use_evex) { // EVEX check: run in lowest evex mode @@ -660,6 +669,9 @@ } if (FLAG_IS_DEFAULT(UseAVX)) { FLAG_SET_DEFAULT(UseAVX, use_avx_limit); + if (is_intel_family_core() && _model == CPU_MODEL_SKYLAKE && _stepping < 5) { + FLAG_SET_DEFAULT(UseAVX, 2); //Set UseAVX=2 for Skylake + } } else if (UseAVX > use_avx_limit) { warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", (int) UseAVX, use_avx_limit); FLAG_SET_DEFAULT(UseAVX, use_avx_limit); @@ -1059,6 +1071,13 @@ } #endif // COMPILER2 && ASSERT + if (!FLAG_IS_DEFAULT(AVX3Threshold)) { + if (!is_power_of_2(AVX3Threshold)) { + warning("AVX3Threshold must be a power of 2"); + FLAG_SET_DEFAULT(AVX3Threshold, 4096); + } + } + #ifdef _LP64 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { UseMultiplyToLenIntrinsic = true;
--- a/src/hotspot/cpu/x86/vm_version_x86.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/x86/vm_version_x86.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -366,7 +366,7 @@ CPU_MODEL_HASWELL_E3 = 0x3c, CPU_MODEL_HASWELL_E7 = 0x3f, CPU_MODEL_BROADWELL = 0x3d, - CPU_MODEL_SKYLAKE = CPU_MODEL_HASWELL_E3 + CPU_MODEL_SKYLAKE = 0x55 }; // cpuid information block. All info derived from executing cpuid with
--- a/src/hotspot/cpu/x86/x86.ad Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/x86/x86.ad Fri Oct 11 10:39:58 2019 +0200 @@ -1097,138 +1097,6 @@ reg_class_dynamic vectorz_reg(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); -reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); -reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h); -reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p); - -reg_class xmm1_reg(XMM1, XMM1b, XMM1c, XMM1d); -reg_class ymm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h); -reg_class zmm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p); - -reg_class xmm2_reg(XMM2, XMM2b, XMM2c, XMM2d); -reg_class ymm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h); -reg_class zmm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p); - -reg_class xmm3_reg(XMM3, XMM3b, XMM3c, XMM3d); -reg_class ymm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h); -reg_class zmm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p); - -reg_class xmm4_reg(XMM4, XMM4b, XMM4c, XMM4d); -reg_class ymm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h); -reg_class zmm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p); - -reg_class xmm5_reg(XMM5, XMM5b, XMM5c, XMM5d); -reg_class ymm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h); -reg_class zmm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p); - -reg_class xmm6_reg(XMM6, XMM6b, XMM6c, XMM6d); -reg_class ymm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h); -reg_class zmm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p); - -reg_class xmm7_reg(XMM7, XMM7b, XMM7c, XMM7d); -reg_class ymm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h); -reg_class zmm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p); - -#ifdef _LP64 - -reg_class xmm8_reg(XMM8, XMM8b, XMM8c, XMM8d); -reg_class ymm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h); -reg_class zmm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p); - -reg_class xmm9_reg(XMM9, XMM9b, XMM9c, XMM9d); -reg_class ymm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h); -reg_class zmm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p); - -reg_class xmm10_reg(XMM10, XMM10b, XMM10c, XMM10d); -reg_class ymm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h); -reg_class zmm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p); - -reg_class xmm11_reg(XMM11, XMM11b, XMM11c, XMM11d); -reg_class ymm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h); -reg_class zmm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p); - -reg_class xmm12_reg(XMM12, XMM12b, XMM12c, XMM12d); -reg_class ymm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h); -reg_class zmm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p); - -reg_class xmm13_reg(XMM13, XMM13b, XMM13c, XMM13d); -reg_class ymm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h); -reg_class zmm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p); - -reg_class xmm14_reg(XMM14, XMM14b, XMM14c, XMM14d); -reg_class ymm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h); -reg_class zmm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p); - -reg_class xmm15_reg(XMM15, XMM15b, XMM15c, XMM15d); -reg_class ymm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h); -reg_class zmm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p); - -reg_class xmm16_reg(XMM16, XMM16b, XMM16c, XMM16d); -reg_class ymm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h); -reg_class zmm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p); - -reg_class xmm17_reg(XMM17, XMM17b, XMM17c, XMM17d); -reg_class ymm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h); -reg_class zmm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p); - -reg_class xmm18_reg(XMM18, XMM18b, XMM18c, XMM18d); -reg_class ymm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h); -reg_class zmm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p); - -reg_class xmm19_reg(XMM19, XMM19b, XMM19c, XMM19d); -reg_class ymm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h); -reg_class zmm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p); - -reg_class xmm20_reg(XMM20, XMM20b, XMM20c, XMM20d); -reg_class ymm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h); -reg_class zmm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p); - -reg_class xmm21_reg(XMM21, XMM21b, XMM21c, XMM21d); -reg_class ymm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h); -reg_class zmm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p); - -reg_class xmm22_reg(XMM22, XMM22b, XMM22c, XMM22d); -reg_class ymm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h); -reg_class zmm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p); - -reg_class xmm23_reg(XMM23, XMM23b, XMM23c, XMM23d); -reg_class ymm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h); -reg_class zmm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p); - -reg_class xmm24_reg(XMM24, XMM24b, XMM24c, XMM24d); -reg_class ymm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h); -reg_class zmm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p); - -reg_class xmm25_reg(XMM25, XMM25b, XMM25c, XMM25d); -reg_class ymm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h); -reg_class zmm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p); - -reg_class xmm26_reg(XMM26, XMM26b, XMM26c, XMM26d); -reg_class ymm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h); -reg_class zmm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p); - -reg_class xmm27_reg(XMM27, XMM27b, XMM27c, XMM27d); -reg_class ymm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h); -reg_class zmm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p); - -reg_class xmm28_reg(XMM28, XMM28b, XMM28c, XMM28d); -reg_class ymm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h); -reg_class zmm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p); - -reg_class xmm29_reg(XMM29, XMM29b, XMM29c, XMM29d); -reg_class ymm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h); -reg_class zmm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p); - -reg_class xmm30_reg(XMM30, XMM30b, XMM30c, XMM30d); -reg_class ymm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h); -reg_class zmm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p); - -reg_class xmm31_reg(XMM31, XMM31b, XMM31c, XMM31d); -reg_class ymm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h); -reg_class zmm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); - -#endif - %} @@ -1800,8 +1668,8 @@ return (UseAVX > 2) ? 6 : 4; } -static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, - int stack_offset, int reg, uint ireg, outputStream* st) { +int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, + int stack_offset, int reg, uint ireg, outputStream* st) { // In 64-bit VM size calculation is very complex. Emitting instructions // into scratch buffer is used to get size in 64-bit VM. LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) @@ -2863,6 +2731,7 @@ %} +#ifdef _LP64 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ predicate(UseSSE>=4); match(Set dst (RoundDoubleMode src rmode)); @@ -2963,6 +2832,7 @@ %} ins_pipe( pipe_slow ); %} +#endif // _LP64 instruct onspinwait() %{ match(OnSpinWait); @@ -3859,7 +3729,7 @@ %} instruct Repl2F_zero(vecD dst, immF0 zero) %{ - predicate(n->as_Vector()->length() == 2 && UseAVX < 3); + predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateF zero)); format %{ "xorps $dst,$dst\t! replicate2F zero" %} ins_encode %{ @@ -3869,7 +3739,7 @@ %} instruct Repl4F_zero(vecX dst, immF0 zero) %{ - predicate(n->as_Vector()->length() == 4 && UseAVX < 3); + predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateF zero)); format %{ "xorps $dst,$dst\t! replicate4F zero" %} ins_encode %{ @@ -3879,7 +3749,7 @@ %} instruct Repl8F_zero(vecY dst, immF0 zero) %{ - predicate(n->as_Vector()->length() == 8 && UseAVX < 3); + predicate(n->as_Vector()->length() == 8 && UseAVX > 0); match(Set dst (ReplicateF zero)); format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} ins_encode %{ @@ -3953,7 +3823,7 @@ // Replicate double (8 byte) scalar zero to be vector instruct Repl2D_zero(vecX dst, immD0 zero) %{ - predicate(n->as_Vector()->length() == 2 && UseAVX < 3); + predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateD zero)); format %{ "xorpd $dst,$dst\t! replicate2D zero" %} ins_encode %{ @@ -3963,7 +3833,7 @@ %} instruct Repl4D_zero(vecY dst, immD0 zero) %{ - predicate(n->as_Vector()->length() == 4 && UseAVX < 3); + predicate(n->as_Vector()->length() == 4 && UseAVX > 0); match(Set dst (ReplicateD zero)); format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} ins_encode %{ @@ -4888,42 +4758,6 @@ ins_pipe( pipe_slow ); %} -instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ - predicate(n->as_Vector()->length() == 2 && UseAVX > 2); - match(Set dst (ReplicateF zero)); - format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} - ins_encode %{ - // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation - int vector_len = 2; - __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); - %} - ins_pipe( fpu_reg_reg ); -%} - -instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ - predicate(n->as_Vector()->length() == 4 && UseAVX > 2); - match(Set dst (ReplicateF zero)); - format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} - ins_encode %{ - // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation - int vector_len = 2; - __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); - %} - ins_pipe( fpu_reg_reg ); -%} - -instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ - predicate(n->as_Vector()->length() == 8 && UseAVX > 2); - match(Set dst (ReplicateF zero)); - format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} - ins_encode %{ - // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation - int vector_len = 2; - __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); - %} - ins_pipe( fpu_reg_reg ); -%} - instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ predicate(n->as_Vector()->length() == 16 && UseAVX > 2); match(Set dst (ReplicateF zero)); @@ -4980,30 +4814,6 @@ ins_pipe( pipe_slow ); %} -instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ - predicate(n->as_Vector()->length() == 2 && UseAVX > 2); - match(Set dst (ReplicateD zero)); - format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} - ins_encode %{ - // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation - int vector_len = 2; - __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); - %} - ins_pipe( fpu_reg_reg ); -%} - -instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ - predicate(n->as_Vector()->length() == 4 && UseAVX > 2); - match(Set dst (ReplicateD zero)); - format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} - ins_encode %{ - // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation - int vector_len = 2; - __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); - %} - ins_pipe( fpu_reg_reg ); -%} - instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ predicate(n->as_Vector()->length() == 8 && UseAVX > 2); match(Set dst (ReplicateD zero));
--- a/src/hotspot/cpu/x86/x86_64.ad Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/cpu/x86/x86_64.ad Fri Oct 11 10:39:58 2019 +0200 @@ -1035,8 +1035,8 @@ static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, int src_hi, int dst_hi, uint ireg, outputStream* st); -static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, - int stack_offset, int reg, uint ireg, outputStream* st); +int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, + int stack_offset, int reg, uint ireg, outputStream* st); static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, int dst_offset, uint ireg, outputStream* st) { @@ -4270,200 +4270,6 @@ %} %} -// Operands for bound floating pointer register arguments -operand rxmm0() %{ - constraint(ALLOC_IN_RC(xmm0_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm1() %{ - constraint(ALLOC_IN_RC(xmm1_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm2() %{ - constraint(ALLOC_IN_RC(xmm2_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm3() %{ - constraint(ALLOC_IN_RC(xmm3_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm4() %{ - constraint(ALLOC_IN_RC(xmm4_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm5() %{ - constraint(ALLOC_IN_RC(xmm5_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm6() %{ - constraint(ALLOC_IN_RC(xmm6_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm7() %{ - constraint(ALLOC_IN_RC(xmm7_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm8() %{ - constraint(ALLOC_IN_RC(xmm8_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm9() %{ - constraint(ALLOC_IN_RC(xmm9_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm10() %{ - constraint(ALLOC_IN_RC(xmm10_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm11() %{ - constraint(ALLOC_IN_RC(xmm11_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm12() %{ - constraint(ALLOC_IN_RC(xmm12_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm13() %{ - constraint(ALLOC_IN_RC(xmm13_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm14() %{ - constraint(ALLOC_IN_RC(xmm14_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm15() %{ - constraint(ALLOC_IN_RC(xmm15_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm16() %{ - constraint(ALLOC_IN_RC(xmm16_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm17() %{ - constraint(ALLOC_IN_RC(xmm17_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm18() %{ - constraint(ALLOC_IN_RC(xmm18_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm19() %{ - constraint(ALLOC_IN_RC(xmm19_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm20() %{ - constraint(ALLOC_IN_RC(xmm20_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm21() %{ - constraint(ALLOC_IN_RC(xmm21_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm22() %{ - constraint(ALLOC_IN_RC(xmm22_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm23() %{ - constraint(ALLOC_IN_RC(xmm23_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm24() %{ - constraint(ALLOC_IN_RC(xmm24_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm25() %{ - constraint(ALLOC_IN_RC(xmm25_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm26() %{ - constraint(ALLOC_IN_RC(xmm26_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm27() %{ - constraint(ALLOC_IN_RC(xmm27_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm28() %{ - constraint(ALLOC_IN_RC(xmm28_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm29() %{ - constraint(ALLOC_IN_RC(xmm29_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm30() %{ - constraint(ALLOC_IN_RC(xmm30_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} -operand rxmm31() %{ - constraint(ALLOC_IN_RC(xmm31_reg)); - match(VecX); - format%{%} - interface(REG_INTER); -%} - //----------OPERAND CLASSES---------------------------------------------------- // Operand Classes are groups of operands that are used as to simplify // instruction definitions by not requiring the AD writer to specify separate @@ -5356,6 +5162,7 @@ instruct loadP(rRegP dst, memory mem) %{ match(Set dst (LoadP mem)); + predicate(n->as_Load()->barrier_data() == 0); ins_cost(125); // XXX format %{ "movq $dst, $mem\t# ptr" %} @@ -7844,6 +7651,7 @@ rax_RegP oldval, rRegP newval, rFlagsReg cr) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) " @@ -7895,7 +7703,7 @@ rax_RegP oldval, rRegP newval, rFlagsReg cr) %{ - predicate(VM_Version::supports_cx8()); + predicate(VM_Version::supports_cx8() && n->as_LoadStore()->barrier_data() == 0); match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); effect(KILL cr, KILL oldval); @@ -8137,7 +7945,7 @@ rax_RegP oldval, rRegP newval, rFlagsReg cr) %{ - predicate(VM_Version::supports_cx8()); + predicate(VM_Version::supports_cx8() && n->as_LoadStore()->barrier_data() == 0); match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); effect(KILL cr); @@ -8282,6 +8090,7 @@ instruct xchgP( memory mem, rRegP newval) %{ match(Set newval (GetAndSetP mem newval)); + predicate(n->as_LoadStore()->barrier_data() == 0); format %{ "XCHGQ $newval,[$mem]" %} ins_encode %{ __ xchgq($newval$$Register, $mem$$Address); @@ -12136,6 +11945,7 @@ instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2) %{ match(Set cr (CmpP op1 (LoadP op2))); + predicate(n->in(2)->as_Load()->barrier_data() == 0); ins_cost(500); // XXX format %{ "cmpq $op1, $op2\t# ptr" %} @@ -12161,7 +11971,8 @@ // and raw pointers have no anti-dependencies. instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2) %{ - predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none); + predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none && + n->in(2)->as_Load()->barrier_data() == 0); match(Set cr (CmpP op1 (LoadP op2))); format %{ "cmpq $op1, $op2\t# raw ptr" %} @@ -12186,7 +11997,8 @@ // any compare to a zero should be eq/neq. instruct testP_mem(rFlagsReg cr, memory op, immP0 zero) %{ - predicate(!UseCompressedOops || (CompressedOops::base() != NULL)); + predicate((!UseCompressedOops || (CompressedOops::base() != NULL)) && + n->in(1)->as_Load()->barrier_data() == 0); match(Set cr (CmpP (LoadP op) zero)); ins_cost(500); // XXX @@ -12199,7 +12011,9 @@ instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero) %{ - predicate(UseCompressedOops && (CompressedOops::base() == NULL) && (CompressedKlassPointers::base() == NULL)); + predicate(UseCompressedOops && (CompressedOops::base() == NULL) && + (CompressedKlassPointers::base() == NULL) && + n->in(1)->as_Load()->barrier_data() == 0); match(Set cr (CmpP (LoadP mem) zero)); format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
--- a/src/hotspot/os/aix/os_aix.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/os/aix/os_aix.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -132,18 +132,6 @@ #define ERROR_MP_VMGETINFO_CLAIMS_NO_SUPPORT_FOR_64K 103 // excerpts from systemcfg.h that might be missing on older os levels -#ifndef PV_5_Compat - #define PV_5_Compat 0x0F8000 /* Power PC 5 */ -#endif -#ifndef PV_6 - #define PV_6 0x100000 /* Power PC 6 */ -#endif -#ifndef PV_6_1 - #define PV_6_1 0x100001 /* Power PC 6 DD1.x */ -#endif -#ifndef PV_6_Compat - #define PV_6_Compat 0x108000 /* Power PC 6 */ -#endif #ifndef PV_7 #define PV_7 0x200000 /* Power PC 7 */ #endif @@ -156,6 +144,13 @@ #ifndef PV_8_Compat #define PV_8_Compat 0x308000 /* Power PC 8 */ #endif +#ifndef PV_9 + #define PV_9 0x400000 /* Power PC 9 */ +#endif +#ifndef PV_9_Compat + #define PV_9_Compat 0x408000 /* Power PC 9 */ +#endif + static address resolve_function_descriptor_to_code_pointer(address p); @@ -1386,15 +1381,7 @@ void os::print_os_info(outputStream* st) { st->print("OS:"); - st->print("uname:"); - struct utsname name; - uname(&name); - st->print(name.sysname); st->print(" "); - st->print(name.nodename); st->print(" "); - st->print(name.release); st->print(" "); - st->print(name.version); st->print(" "); - st->print(name.machine); - st->cr(); + os::Posix::print_uname_info(st); uint32_t ver = os::Aix::os_version(); st->print_cr("AIX kernel version %u.%u.%u.%u", @@ -1402,16 +1389,12 @@ os::Posix::print_rlimit_info(st); + os::Posix::print_load_average(st); + // _SC_THREAD_THREADS_MAX is the maximum number of threads within a process. long tmax = sysconf(_SC_THREAD_THREADS_MAX); st->print_cr("maximum #threads within a process:%ld", tmax); - // load average - st->print("load average:"); - double loadavg[3] = {-1.L, -1.L, -1.L}; - os::loadavg(loadavg, 3); - st->print_cr("%0.02f %0.02f %0.02f", loadavg[0], loadavg[1], loadavg[2]); - // print wpar info libperfstat::wparinfo_t wi; if (libperfstat::get_wparinfo(&wi)) { @@ -1504,6 +1487,9 @@ void os::get_summary_cpu_info(char* buf, size_t buflen) { // read _system_configuration.version switch (_system_configuration.version) { + case PV_9: + strncpy(buf, "Power PC 9", buflen); + break; case PV_8: strncpy(buf, "Power PC 8", buflen); break; @@ -1537,6 +1523,9 @@ case PV_8_Compat: strncpy(buf, "PV_8_Compat", buflen); break; + case PV_9_Compat: + strncpy(buf, "PV_9_Compat", buflen); + break; default: strncpy(buf, "unknown", buflen); }
--- a/src/hotspot/os/posix/gc/z/zVirtualMemory_posix.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/os/posix/gc/z/zVirtualMemory_posix.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -43,7 +43,7 @@ if ((uintptr_t)res != start) { // Failed to reserve memory at the requested address - unmap(start, size); + unmap((uintptr_t)res, size); return false; }
--- a/src/hotspot/os/posix/os_posix.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/os/posix/os_posix.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -373,8 +373,12 @@ void os::Posix::print_load_average(outputStream* st) { st->print("load average:"); double loadavg[3]; - os::loadavg(loadavg, 3); - st->print("%0.02f %0.02f %0.02f", loadavg[0], loadavg[1], loadavg[2]); + int res = os::loadavg(loadavg, 3); + if (res != -1) { + st->print("%0.02f %0.02f %0.02f", loadavg[0], loadavg[1], loadavg[2]); + } else { + st->print(" Unavailable"); + } st->cr(); }
--- a/src/hotspot/os/windows/os_windows.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/os/windows/os_windows.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -4189,8 +4189,6 @@ if (::isalpha(buf[0]) && !::IsDBCSLeadByte(buf[0]) && buf[1] == ':' && buf[2] == '\\') { prefix = L"\\\\?\\"; } else if (buf[0] == '\\' && buf[1] == '\\') { - assert(buf[2] != '\\'); - if (buf[2] == '?' && buf[3] == '\\') { prefix = L""; needs_fullpath = false;
--- a/src/hotspot/share/adlc/formssel.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/adlc/formssel.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -774,11 +774,6 @@ !strcmp(_matrule->_rChild->_opType,"CheckCastPP") || !strcmp(_matrule->_rChild->_opType,"GetAndSetP") || !strcmp(_matrule->_rChild->_opType,"GetAndSetN") || -#if INCLUDE_ZGC - !strcmp(_matrule->_rChild->_opType,"ZGetAndSetP") || - !strcmp(_matrule->_rChild->_opType,"ZCompareAndExchangeP") || - !strcmp(_matrule->_rChild->_opType,"LoadBarrierSlowReg") || -#endif #if INCLUDE_SHENANDOAHGC !strcmp(_matrule->_rChild->_opType,"ShenandoahCompareAndExchangeP") || !strcmp(_matrule->_rChild->_opType,"ShenandoahCompareAndExchangeN") || @@ -3512,9 +3507,6 @@ "StoreCM", "GetAndSetB", "GetAndSetS", "GetAndAddI", "GetAndSetI", "GetAndSetP", "GetAndAddB", "GetAndAddS", "GetAndAddL", "GetAndSetL", "GetAndSetN", -#if INCLUDE_ZGC - "ZGetAndSetP", "ZCompareAndSwapP", "ZCompareAndExchangeP", "ZWeakCompareAndSwapP", -#endif "ClearArray" }; int cnt = sizeof(needs_ideal_memory_list)/sizeof(char*);
--- a/src/hotspot/share/ci/ciEnv.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/ci/ciEnv.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -239,6 +239,7 @@ _jvmti_can_access_local_variables = JvmtiExport::can_access_local_variables(); _jvmti_can_post_on_exceptions = JvmtiExport::can_post_on_exceptions(); _jvmti_can_pop_frame = JvmtiExport::can_pop_frame(); + _jvmti_can_get_owned_monitor_info = JvmtiExport::can_get_owned_monitor_info(); } bool ciEnv::jvmti_state_changed() const { @@ -263,6 +264,10 @@ JvmtiExport::can_pop_frame()) { return true; } + if (!_jvmti_can_get_owned_monitor_info && + JvmtiExport::can_get_owned_monitor_info()) { + return true; + } return false; }
--- a/src/hotspot/share/ci/ciEnv.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/ci/ciEnv.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -73,6 +73,7 @@ bool _jvmti_can_access_local_variables; bool _jvmti_can_post_on_exceptions; bool _jvmti_can_pop_frame; + bool _jvmti_can_get_owned_monitor_info; // includes can_get_owned_monitor_stack_depth_info // Cache DTrace flags bool _dtrace_extended_probes; @@ -353,6 +354,7 @@ } bool jvmti_can_hotswap_or_post_breakpoint() const { return _jvmti_can_hotswap_or_post_breakpoint; } bool jvmti_can_post_on_exceptions() const { return _jvmti_can_post_on_exceptions; } + bool jvmti_can_get_owned_monitor_info() const { return _jvmti_can_get_owned_monitor_info; } // Cache DTrace flags void cache_dtrace_flags();
--- a/src/hotspot/share/classfile/classFileParser.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/classfile/classFileParser.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -3077,7 +3077,7 @@ // We temporarily use the vtable_index field in the Method* to store the // class file index, so we can read in after calling qsort. // Put the method ordering in the shared archive. - if (JvmtiExport::can_maintain_original_method_order() || DumpSharedSpaces) { + if (JvmtiExport::can_maintain_original_method_order() || Arguments::is_dumping_archive()) { for (int index = 0; index < length; index++) { Method* const m = methods->at(index); assert(!m->valid_vtable_index(), "vtable index should not be set"); @@ -3091,7 +3091,7 @@ intArray* method_ordering = NULL; // If JVMTI original method ordering or sharing is enabled construct int // array remembering the original ordering - if (JvmtiExport::can_maintain_original_method_order() || DumpSharedSpaces) { + if (JvmtiExport::can_maintain_original_method_order() || Arguments::is_dumping_archive()) { method_ordering = new intArray(length, length, -1); for (int index = 0; index < length; index++) { Method* const m = methods->at(index);
--- a/src/hotspot/share/classfile/classLoader.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/classfile/classLoader.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -462,7 +462,7 @@ #if INCLUDE_CDS void ClassLoader::exit_with_path_failure(const char* error, const char* message) { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "only called at dump time"); + Arguments::assert_is_dumping_archive(); tty->print_cr("Hint: enable -Xlog:class+path=info to diagnose the failure"); vm_exit_during_initialization(error, message); } @@ -532,7 +532,7 @@ #if INCLUDE_CDS void ClassLoader::setup_app_search_path(const char *class_path) { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "Sanity"); + Arguments::assert_is_dumping_archive(); ResourceMark rm; ClasspathStream cp_stream(class_path); @@ -546,7 +546,7 @@ void ClassLoader::add_to_module_path_entries(const char* path, ClassPathEntry* entry) { assert(entry != NULL, "ClassPathEntry should not be NULL"); - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "dump time only"); + Arguments::assert_is_dumping_archive(); // The entry does not exist, add to the list if (_module_path_entries == NULL) { @@ -560,7 +560,7 @@ // Add a module path to the _module_path_entries list. void ClassLoader::update_module_path_entry_list(const char *path, TRAPS) { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "dump time only"); + Arguments::assert_is_dumping_archive(); struct stat st; if (os::stat(path, &st) != 0) { tty->print_cr("os::stat error %d (%s). CDS dump aborted (path was \"%s\").", @@ -656,7 +656,7 @@ bool set_base_piece = true; #if INCLUDE_CDS - if (DumpSharedSpaces || DynamicDumpSharedSpaces) { + if (Arguments::is_dumping_archive()) { if (!Arguments::has_jimage()) { vm_exit_during_initialization("CDS is not supported in exploded JDK build", NULL); } @@ -1360,7 +1360,7 @@ // Record the shared classpath index and loader type for classes loaded // by the builtin loaders at dump time. void ClassLoader::record_result(InstanceKlass* ik, const ClassFileStream* stream, TRAPS) { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "sanity"); + Arguments::assert_is_dumping_archive(); assert(stream != NULL, "sanity"); if (ik->is_unsafe_anonymous()) { @@ -1537,13 +1537,13 @@ #if INCLUDE_CDS void ClassLoader::initialize_shared_path() { - if (DumpSharedSpaces || DynamicDumpSharedSpaces) { + if (Arguments::is_dumping_archive()) { ClassLoaderExt::setup_search_paths(); } } void ClassLoader::initialize_module_path(TRAPS) { - if (DumpSharedSpaces || DynamicDumpSharedSpaces) { + if (Arguments::is_dumping_archive()) { ClassLoaderExt::setup_module_paths(THREAD); FileMapInfo::allocate_shared_path_table(); }
--- a/src/hotspot/share/classfile/classLoader.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/classfile/classLoader.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -26,6 +26,7 @@ #define SHARE_CLASSFILE_CLASSLOADER_HPP #include "jimage.hpp" +#include "runtime/arguments.hpp" #include "runtime/handles.hpp" #include "runtime/perfData.hpp" #include "utilities/exceptions.hpp" @@ -236,6 +237,8 @@ CDS_ONLY(static ClassPathEntry* app_classpath_entries() {return _app_classpath_entries;}) CDS_ONLY(static ClassPathEntry* module_path_entries() {return _module_path_entries;}) + static bool has_bootclasspath_append() { return _first_append_entry != NULL; } + protected: // Initialization: // - setup the boot loader's system class path @@ -395,8 +398,7 @@ // Helper function used by CDS code to get the number of module path // entries during shared classpath setup time. static int num_module_path_entries() { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, - "Should only be called at CDS dump time"); + Arguments::assert_is_dumping_archive(); int num_entries = 0; ClassPathEntry* e= ClassLoader::_module_path_entries; while (e != NULL) {
--- a/src/hotspot/share/classfile/classLoader.inline.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/classfile/classLoader.inline.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -62,8 +62,7 @@ // entries during shared classpath setup time. inline int ClassLoader::num_boot_classpath_entries() { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, - "Should only be called at CDS dump time"); + Arguments::assert_is_dumping_archive(); assert(has_jrt_entry(), "must have a java runtime image"); int num_entries = 1; // count the runtime image ClassPathEntry* e = ClassLoader::_first_append_entry; @@ -85,8 +84,7 @@ // Helper function used by CDS code to get the number of app classpath // entries during shared classpath setup time. inline int ClassLoader::num_app_classpath_entries() { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, - "Should only be called at CDS dump time"); + Arguments::assert_is_dumping_archive(); int num_entries = 0; ClassPathEntry* e= ClassLoader::_app_classpath_entries; while (e != NULL) {
--- a/src/hotspot/share/classfile/classLoaderExt.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/classfile/classLoaderExt.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -62,8 +62,7 @@ } void ClassLoaderExt::setup_app_search_path() { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, - "this function is only used at CDS dump time"); + Arguments::assert_is_dumping_archive(); _app_class_paths_start_index = ClassLoader::num_boot_classpath_entries(); char* app_class_path = os::strdup(Arguments::get_appclasspath()); @@ -92,8 +91,7 @@ } } void ClassLoaderExt::setup_module_paths(TRAPS) { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, - "this function is only used with CDS dump time"); + Arguments::assert_is_dumping_archive(); _app_module_paths_start_index = ClassLoader::num_boot_classpath_entries() + ClassLoader::num_app_classpath_entries(); Handle system_class_loader (THREAD, SystemDictionary::java_system_loader()); @@ -231,7 +229,7 @@ void ClassLoaderExt::record_result(const s2 classpath_index, InstanceKlass* result, TRAPS) { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "Sanity"); + Arguments::assert_is_dumping_archive(); // We need to remember where the class comes from during dumping. oop loader = result->class_loader();
--- a/src/hotspot/share/classfile/compactHashtable.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/classfile/compactHashtable.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -42,7 +42,7 @@ // CompactHashtableWriter::CompactHashtableWriter(int num_entries, CompactHashtableStats* stats) { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "dump-time only"); + Arguments::assert_is_dumping_archive(); assert(num_entries >= 0, "sanity"); _num_buckets = calculate_num_buckets(num_entries); assert(_num_buckets > 0, "no buckets");
--- a/src/hotspot/share/classfile/dictionary.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/classfile/dictionary.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -246,7 +246,7 @@ // Used to scan and relocate the classes during CDS archive dump. void Dictionary::classes_do(MetaspaceClosure* it) { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "dump-time only"); + Arguments::assert_is_dumping_archive(); for (int index = 0; index < table_size(); index++) { for (DictionaryEntry* probe = bucket(index); probe != NULL;
--- a/src/hotspot/share/classfile/javaClasses.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/classfile/javaClasses.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -379,7 +379,7 @@ if (_to_java_string_fn == NULL) { void *lib_handle = os::native_java_library(); - _to_java_string_fn = CAST_TO_FN_PTR(to_java_string_fn_t, os::dll_lookup(lib_handle, "NewStringPlatform")); + _to_java_string_fn = CAST_TO_FN_PTR(to_java_string_fn_t, os::dll_lookup(lib_handle, "JNU_NewStringPlatform")); if (_to_java_string_fn == NULL) { fatal("NewStringPlatform missing"); }
--- a/src/hotspot/share/classfile/javaClasses.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/classfile/javaClasses.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -202,7 +202,6 @@ // Conversion between '.' and '/' formats static Handle externalize_classname(Handle java_string, TRAPS) { return char_converter(java_string, '/', '.', THREAD); } - static Handle internalize_classname(Handle java_string, TRAPS) { return char_converter(java_string, '.', '/', THREAD); } // Conversion static Symbol* as_symbol(oop java_string);
--- a/src/hotspot/share/classfile/klassFactory.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/classfile/klassFactory.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -218,7 +218,7 @@ JFR_ONLY(ON_KLASS_CREATION(result, parser, THREAD);) #if INCLUDE_CDS - if (DumpSharedSpaces || DynamicDumpSharedSpaces) { + if (Arguments::is_dumping_archive()) { ClassLoader::record_result(result, stream, THREAD); } #endif // INCLUDE_CDS
--- a/src/hotspot/share/classfile/symbolTable.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/classfile/symbolTable.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -220,7 +220,7 @@ assert (len <= Symbol::max_length(), "should be checked by caller"); Symbol* sym; - if (DumpSharedSpaces || DynamicDumpSharedSpaces) { + if (Arguments::is_dumping_archive()) { c_heap = false; } if (c_heap) { @@ -283,7 +283,7 @@ }; void SymbolTable::metaspace_pointers_do(MetaspaceClosure* it) { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "called only during dump time"); + Arguments::assert_is_dumping_archive(); MetaspacePointersDo mpd(it); _local_table->do_safepoint_scan(mpd); }
--- a/src/hotspot/share/classfile/systemDictionary.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/classfile/systemDictionary.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -1251,10 +1251,8 @@ TempNewSymbol pkg_name = NULL; PackageEntry* pkg_entry = NULL; ModuleEntry* mod_entry = NULL; - const char* pkg_string = NULL; pkg_name = InstanceKlass::package_from_name(class_name, CHECK_false); if (pkg_name != NULL) { - pkg_string = pkg_name->as_C_string(); if (loader_data != NULL) { pkg_entry = loader_data->packages()->lookup_only(pkg_name); } @@ -1291,7 +1289,7 @@ // 3. or, the class is from an unamed module if (!ent->is_modules_image() && ik->is_shared_boot_class()) { // the class is from the -Xbootclasspath/a - if (pkg_string == NULL || + if (pkg_name == NULL || pkg_entry == NULL || pkg_entry->in_unnamed_module()) { assert(mod_entry == NULL || @@ -1303,8 +1301,7 @@ return false; } else { bool res = SystemDictionaryShared::is_shared_class_visible_for_classloader( - ik, class_loader, pkg_string, pkg_name, - pkg_entry, mod_entry, CHECK_(false)); + ik, class_loader, pkg_name, pkg_entry, mod_entry, CHECK_(false)); return res; } } @@ -1478,6 +1475,11 @@ // a named package within the unnamed module. In all cases, // limit visibility to search for the class only in the boot // loader's append path. + if (!ClassLoader::has_bootclasspath_append()) { + // If there is no bootclasspath append entry, no need to continue + // searching. + return NULL; + } search_only_bootloader_append = true; } }
--- a/src/hotspot/share/classfile/systemDictionaryShared.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/classfile/systemDictionaryShared.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -657,7 +657,6 @@ bool SystemDictionaryShared::is_shared_class_visible_for_classloader( InstanceKlass* ik, Handle class_loader, - const char* pkg_string, Symbol* pkg_name, PackageEntry* pkg_entry, ModuleEntry* mod_entry, @@ -684,7 +683,7 @@ } } else if (SystemDictionary::is_system_class_loader(class_loader())) { assert(ent != NULL, "shared class for system loader should have valid SharedClassPathEntry"); - if (pkg_string == NULL) { + if (pkg_name == NULL) { // The archived class is in the unnamed package. Currently, the boot image // does not contain any class in the unnamed package. assert(!ent->is_modules_image(), "Class in the unnamed package must be from the classpath"); @@ -1029,7 +1028,7 @@ } void SystemDictionaryShared::set_shared_class_misc_info(InstanceKlass* k, ClassFileStream* cfs) { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "only when dumping"); + Arguments::assert_is_dumping_archive(); assert(!is_builtin(k), "must be unregistered class"); DumpTimeSharedClassInfo* info = find_or_allocate_info_for(k); info->_clsfile_size = cfs->length(); @@ -1185,7 +1184,7 @@ bool SystemDictionaryShared::is_excluded_class(InstanceKlass* k) { assert(_no_class_loading_should_happen, "sanity"); - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "only when dumping"); + Arguments::assert_is_dumping_archive(); return find_or_allocate_info_for(k)->is_excluded(); } @@ -1209,7 +1208,7 @@ bool SystemDictionaryShared::add_verification_constraint(InstanceKlass* k, Symbol* name, Symbol* from_name, bool from_field_is_protected, bool from_is_array, bool from_is_object) { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "called at dump time only"); + Arguments::assert_is_dumping_archive(); DumpTimeSharedClassInfo* info = find_or_allocate_info_for(k); info->add_verification_constraint(k, name, from_name, from_field_is_protected, from_is_array, from_is_object);
--- a/src/hotspot/share/classfile/systemDictionaryShared.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/classfile/systemDictionaryShared.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -240,7 +240,6 @@ static bool is_sharing_possible(ClassLoaderData* loader_data); static bool is_shared_class_visible_for_classloader(InstanceKlass* ik, Handle class_loader, - const char* pkg_string, Symbol* pkg_name, PackageEntry* pkg_entry, ModuleEntry* mod_entry,
--- a/src/hotspot/share/classfile/verificationType.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/classfile/verificationType.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -95,7 +95,7 @@ return true; } - if (DumpSharedSpaces || DynamicDumpSharedSpaces) { + if (Arguments::is_dumping_archive()) { if (SystemDictionaryShared::add_verification_constraint(klass, name(), from.name(), from_field_is_protected, from.is_array(), from.is_object())) {
--- a/src/hotspot/share/classfile/verifier.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/classfile/verifier.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -64,29 +64,39 @@ #define VALUETYPE_MAJOR_VERSION 56 #define MAX_ARRAY_DIMENSIONS 255 -// Access to external entry for VerifyClassCodes - old byte code verifier +// Access to external entry for VerifyClassForMajorVersion - old byte code verifier extern "C" { - typedef jboolean (*verify_byte_codes_fn_t)(JNIEnv *, jclass, char *, jint); - typedef jboolean (*verify_byte_codes_fn_new_t)(JNIEnv *, jclass, char *, jint, jint); + typedef jboolean (*verify_byte_codes_fn_t)(JNIEnv *, jclass, char *, jint, jint); } -static void* volatile _verify_byte_codes_fn = NULL; +static verify_byte_codes_fn_t volatile _verify_byte_codes_fn = NULL; -static volatile jint _is_new_verify_byte_codes_fn = (jint) true; +static verify_byte_codes_fn_t verify_byte_codes_fn() { -static void* verify_byte_codes_fn() { - if (OrderAccess::load_acquire(&_verify_byte_codes_fn) == NULL) { - void *lib_handle = os::native_java_library(); - void *func = os::dll_lookup(lib_handle, "VerifyClassCodesForMajorVersion"); - OrderAccess::release_store(&_verify_byte_codes_fn, func); - if (func == NULL) { - _is_new_verify_byte_codes_fn = false; - func = os::dll_lookup(lib_handle, "VerifyClassCodes"); - OrderAccess::release_store(&_verify_byte_codes_fn, func); - } - } - return (void*)_verify_byte_codes_fn; + if (_verify_byte_codes_fn != NULL) + return _verify_byte_codes_fn; + + MutexLocker locker(Verify_lock); + + if (_verify_byte_codes_fn != NULL) + return _verify_byte_codes_fn; + + // Load verify dll + char buffer[JVM_MAXPATHLEN]; + char ebuf[1024]; + if (!os::dll_locate_lib(buffer, sizeof(buffer), Arguments::get_dll_dir(), "verify")) + return NULL; // Caller will throw VerifyError + + void *lib_handle = os::dll_load(buffer, ebuf, sizeof(ebuf)); + if (lib_handle == NULL) + return NULL; // Caller will throw VerifyError + + void *fn = os::dll_lookup(lib_handle, "VerifyClassForMajorVersion"); + if (fn == NULL) + return NULL; // Caller will throw VerifyError + + return _verify_byte_codes_fn = CAST_TO_FN_PTR(verify_byte_codes_fn_t, fn); } @@ -283,7 +293,7 @@ JavaThread* thread = (JavaThread*)THREAD; JNIEnv *env = thread->jni_environment(); - void* verify_func = verify_byte_codes_fn(); + verify_byte_codes_fn_t verify_func = verify_byte_codes_fn(); if (verify_func == NULL) { jio_snprintf(message, message_len, "Could not link verifier"); @@ -302,16 +312,7 @@ // ThreadToNativeFromVM takes care of changing thread_state, so safepoint // code knows that we have left the VM - if (_is_new_verify_byte_codes_fn) { - verify_byte_codes_fn_new_t func = - CAST_TO_FN_PTR(verify_byte_codes_fn_new_t, verify_func); - result = (*func)(env, cls, message, (int)message_len, - klass->major_version()); - } else { - verify_byte_codes_fn_t func = - CAST_TO_FN_PTR(verify_byte_codes_fn_t, verify_func); - result = (*func)(env, cls, message, (int)message_len); - } + result = (*verify_func)(env, cls, message, (int)message_len, klass->major_version()); } JNIHandles::destroy_local(cls);
--- a/src/hotspot/share/code/compiledIC.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/code/compiledIC.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -756,4 +756,22 @@ tty->cr(); } +void CompiledDirectStaticCall::verify_mt_safe(const methodHandle& callee, address entry, + NativeMovConstReg* method_holder, + NativeJump* jump) { + // A generated lambda form might be deleted from the Lambdaform + // cache in MethodTypeForm. If a jit compiled lambdaform method + // becomes not entrant and the cache access returns null, the new + // resolve will lead to a new generated LambdaForm. + Method* old_method = reinterpret_cast<Method*>(method_holder->data()); + assert(old_method == NULL || old_method == callee() || + callee->is_compiled_lambda_form() || + !old_method->method_holder()->is_loader_alive() || + old_method->is_old(), // may be race patching deoptimized nmethod due to redefinition. + "a) MT-unsafe modification of inline cache"); + + address destination = jump->jump_destination(); + assert(destination == (address)-1 || destination == entry, + "b) MT-unsafe modification of inline cache"); +} #endif // !PRODUCT
--- a/src/hotspot/share/code/compiledIC.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/code/compiledIC.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -402,6 +402,9 @@ // Also used by CompiledIC void set_to_interpreted(const methodHandle& callee, address entry); + void verify_mt_safe(const methodHandle& callee, address entry, + NativeMovConstReg* method_holder, + NativeJump* jump) PRODUCT_RETURN; #if INCLUDE_AOT void set_to_far(const methodHandle& callee, address entry); #endif
--- a/src/hotspot/share/compiler/compilerDirectives.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/compiler/compilerDirectives.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -66,8 +66,7 @@ cflags(VectorizeDebug, uintx, 0, VectorizeDebug) \ cflags(CloneMapDebug, bool, false, CloneMapDebug) \ cflags(IGVPrintLevel, intx, PrintIdealGraphLevel, IGVPrintLevel) \ - cflags(MaxNodeLimit, intx, MaxNodeLimit, MaxNodeLimit) \ -ZGC_ONLY(cflags(ZTraceLoadBarriers, bool, false, ZTraceLoadBarriers)) + cflags(MaxNodeLimit, intx, MaxNodeLimit, MaxNodeLimit) #else #define compilerdirectives_c2_flags(cflags) #endif
--- a/src/hotspot/share/gc/g1/g1Analytics.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1Analytics.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -78,6 +78,8 @@ _alloc_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _prev_collection_pause_end_ms(0.0), _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)), + _concurrent_refine_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _logged_cards_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _cost_per_logged_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _cost_scan_hcc_seq(new TruncatedSeq(TruncatedSeqLength)), _young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), @@ -102,6 +104,10 @@ int index = MIN2(ParallelGCThreads - 1, 7u); _rs_length_diff_seq->add(rs_length_diff_defaults[index]); + // Start with inverse of maximum STW cost. + _concurrent_refine_rate_ms_seq->add(1/cost_per_logged_card_ms_defaults[0]); + // Some applications have very low rates for logging cards. + _logged_cards_rate_ms_seq->add(0.0); _cost_per_logged_card_ms_seq->add(cost_per_logged_card_ms_defaults[index]); _cost_scan_hcc_seq->add(0.0); _young_cards_per_entry_ratio_seq->add(young_cards_per_entry_ratio_defaults[index]); @@ -159,6 +165,14 @@ (pause_time_ms * _recent_prev_end_times_for_all_gcs_sec->num()) / interval_ms; } +void G1Analytics::report_concurrent_refine_rate_ms(double cards_per_ms) { + _concurrent_refine_rate_ms_seq->add(cards_per_ms); +} + +void G1Analytics::report_logged_cards_rate_ms(double cards_per_ms) { + _logged_cards_rate_ms_seq->add(cards_per_ms); +} + void G1Analytics::report_cost_per_logged_card_ms(double cost_per_logged_card_ms) { _cost_per_logged_card_ms_seq->add(cost_per_logged_card_ms); } @@ -223,6 +237,14 @@ return get_new_prediction(_alloc_rate_ms_seq); } +double G1Analytics::predict_concurrent_refine_rate_ms() const { + return get_new_prediction(_concurrent_refine_rate_ms_seq); +} + +double G1Analytics::predict_logged_cards_rate_ms() const { + return get_new_prediction(_logged_cards_rate_ms_seq); +} + double G1Analytics::predict_cost_per_logged_card_ms() const { return get_new_prediction(_cost_per_logged_card_ms_seq); }
--- a/src/hotspot/share/gc/g1/g1Analytics.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1Analytics.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -46,6 +46,8 @@ double _prev_collection_pause_end_ms; TruncatedSeq* _rs_length_diff_seq; + TruncatedSeq* _concurrent_refine_rate_ms_seq; + TruncatedSeq* _logged_cards_rate_ms_seq; TruncatedSeq* _cost_per_logged_card_ms_seq; TruncatedSeq* _cost_scan_hcc_seq; TruncatedSeq* _young_cards_per_entry_ratio_seq; @@ -99,6 +101,8 @@ void report_concurrent_mark_remark_times_ms(double ms); void report_concurrent_mark_cleanup_times_ms(double ms); void report_alloc_rate_ms(double alloc_rate); + void report_concurrent_refine_rate_ms(double cards_per_ms); + void report_logged_cards_rate_ms(double cards_per_ms); void report_cost_per_logged_card_ms(double cost_per_logged_card_ms); void report_cost_scan_hcc(double cost_scan_hcc); void report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc); @@ -116,6 +120,8 @@ double predict_alloc_rate_ms() const; int num_alloc_rate_ms() const; + double predict_concurrent_refine_rate_ms() const; + double predict_logged_cards_rate_ms() const; double predict_cost_per_logged_card_ms() const; double predict_scan_hcc_ms() const;
--- a/src/hotspot/share/gc/g1/g1CollectionSet.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1CollectionSet.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -409,7 +409,7 @@ guarantee(target_pause_time_ms > 0.0, "target_pause_time_ms = %1.6lf should be positive", target_pause_time_ms); - size_t pending_cards = _policy->pending_cards(); + size_t pending_cards = _policy->pending_cards_at_gc_start(); double base_time_ms = _policy->predict_base_elapsed_time_ms(pending_cards); double time_remaining_ms = MAX2(target_pause_time_ms - base_time_ms, 0.0);
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -412,6 +412,22 @@ dcqs.notify_if_necessary(); } +G1ConcurrentRefine::RefinementStats G1ConcurrentRefine::total_refinement_stats() const { + struct CollectData : public ThreadClosure { + Tickspan _total_time; + size_t _total_cards; + CollectData() : _total_time(), _total_cards(0) {} + virtual void do_thread(Thread* t) { + G1ConcurrentRefineThread* crt = static_cast<G1ConcurrentRefineThread*>(t); + _total_time += crt->total_refinement_time(); + _total_cards += crt->total_refined_cards(); + } + } collector; + // Cast away const so we can call non-modifying closure on threads. + const_cast<G1ConcurrentRefine*>(this)->threads_do(&collector); + return RefinementStats(collector._total_time, collector._total_cards); +} + size_t G1ConcurrentRefine::activation_threshold(uint worker_id) const { Thresholds thresholds = calc_thresholds(_green_zone, _yellow_zone, worker_id); return activation_level(thresholds); @@ -432,7 +448,8 @@ } } -bool G1ConcurrentRefine::do_refinement_step(uint worker_id) { +bool G1ConcurrentRefine::do_refinement_step(uint worker_id, + size_t* total_refined_cards) { G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set(); size_t curr_cards = dcqs.num_cards(); @@ -448,5 +465,6 @@ // Process the next buffer, if there are enough left. return dcqs.refine_completed_buffer_concurrently(worker_id + worker_id_offset(), - deactivation_threshold(worker_id)); + deactivation_threshold(worker_id), + total_refined_cards); }
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -27,6 +27,7 @@ #include "memory/allocation.hpp" #include "utilities/globalDefinitions.hpp" +#include "utilities/ticks.hpp" // Forward decl class G1ConcurrentRefine; @@ -118,11 +119,22 @@ // Adjust refinement thresholds based on work done during the pause and the goal time. void adjust(double logged_cards_scan_time, size_t processed_logged_cards, double goal_ms); + struct RefinementStats { + Tickspan _time; + size_t _cards; + RefinementStats(Tickspan time, size_t cards) : _time(time), _cards(cards) {} + }; + + RefinementStats total_refinement_stats() const; + // Cards in the dirty card queue set. size_t activation_threshold(uint worker_id) const; size_t deactivation_threshold(uint worker_id) const; - // Perform a single refinement step. Called by the refinement threads when woken up. - bool do_refinement_step(uint worker_id); + + // Perform a single refinement step; called by the refinement + // threads. Returns true if there was refinement work available. + // Increments *total_refined_cards. + bool do_refinement_step(uint worker_id, size_t* total_refined_cards); // Iterate over all concurrent refinement threads applying the given closure. void threads_do(ThreadClosure *tc);
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -37,6 +37,8 @@ ConcurrentGCThread(), _vtime_start(0.0), _vtime_accum(0.0), + _total_refinement_time(), + _total_refined_cards(0), _worker_id(worker_id), _active(false), _monitor(NULL), @@ -101,11 +103,12 @@ break; } - size_t buffers_processed = 0; log_debug(gc, refine)("Activated worker %d, on threshold: " SIZE_FORMAT ", current: " SIZE_FORMAT, _worker_id, _cr->activation_threshold(_worker_id), G1BarrierSet::dirty_card_queue_set().num_cards()); + size_t start_total_refined_cards = _total_refined_cards; // For logging. + { SuspendibleThreadSetJoiner sts_join; @@ -115,20 +118,22 @@ continue; // Re-check for termination after yield delay. } - if (!_cr->do_refinement_step(_worker_id)) { - break; + Ticks start_time = Ticks::now(); + if (!_cr->do_refinement_step(_worker_id, &_total_refined_cards)) { + break; // No cards to process. } - ++buffers_processed; + _total_refinement_time += (Ticks::now() - start_time); } } deactivate(); log_debug(gc, refine)("Deactivated worker %d, off threshold: " SIZE_FORMAT - ", current: " SIZE_FORMAT ", buffers processed: " - SIZE_FORMAT, + ", current: " SIZE_FORMAT ", refined cards: " + SIZE_FORMAT ", total refined cards: " SIZE_FORMAT, _worker_id, _cr->deactivation_threshold(_worker_id), G1BarrierSet::dirty_card_queue_set().num_cards(), - buffers_processed); + _total_refined_cards - start_total_refined_cards, + _total_refined_cards); if (os::supports_vtime()) { _vtime_accum = (os::elapsedVTime() - _vtime_start);
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -26,6 +26,7 @@ #define SHARE_GC_G1_G1CONCURRENTREFINETHREAD_HPP #include "gc/shared/concurrentGCThread.hpp" +#include "utilities/ticks.hpp" // Forward Decl. class G1ConcurrentRefine; @@ -38,6 +39,10 @@ double _vtime_start; // Initial virtual time. double _vtime_accum; // Accumulated virtual time. + + Tickspan _total_refinement_time; + size_t _total_refined_cards; + uint _worker_id; bool _active; @@ -61,6 +66,9 @@ // Activate this thread. void activate(); + Tickspan total_refinement_time() const { return _total_refinement_time; } + size_t total_refined_cards() const { return _total_refined_cards; } + // Total virtual time so far. double vtime_accum() { return _vtime_accum; } };
--- a/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -37,6 +37,7 @@ #include "runtime/atomic.hpp" #include "runtime/flags/flagSetting.hpp" #include "runtime/mutexLocker.hpp" +#include "runtime/os.hpp" #include "runtime/safepoint.hpp" #include "runtime/thread.inline.hpp" #include "runtime/threadSMR.hpp" @@ -62,6 +63,9 @@ } } +// Assumed to be zero by concurrent threads. +static uint par_ids_start() { return 0; } + G1DirtyCardQueueSet::G1DirtyCardQueueSet(Monitor* cbl_mon, BufferNode::Allocator* allocator) : PtrQueueSet(allocator), @@ -73,15 +77,16 @@ _process_completed_buffers(false), _max_cards(MaxCardsUnlimited), _max_cards_padding(0), - _free_ids(0, num_par_ids()), - _processed_buffers_mut(0), - _processed_buffers_rs_thread(0) + _free_ids(par_ids_start(), num_par_ids()), + _mutator_refined_cards_counters(NEW_C_HEAP_ARRAY(size_t, num_par_ids(), mtGC)) { + ::memset(_mutator_refined_cards_counters, 0, num_par_ids() * sizeof(size_t)); _all_active = true; } G1DirtyCardQueueSet::~G1DirtyCardQueueSet() { abandon_completed_buffers(); + FREE_C_HEAP_ARRAY(size_t, _mutator_refined_cards_counters); } // Determines how many mutator threads can process the buffers in parallel. @@ -89,6 +94,14 @@ return (uint)os::initial_active_processor_count(); } +size_t G1DirtyCardQueueSet::total_mutator_refined_cards() const { + size_t sum = 0; + for (uint i = 0; i < num_par_ids(); ++i) { + sum += _mutator_refined_cards_counters[i]; + } + return sum; +} + void G1DirtyCardQueueSet::handle_zero_index_for_thread(Thread* t) { G1ThreadLocalData::dirty_card_queue(t).handle_zero_index(); } @@ -213,7 +226,9 @@ return result; } -bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node, uint worker_id) { +bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node, + uint worker_id, + size_t* total_refined_cards) { G1RemSet* rem_set = G1CollectedHeap::heap()->rem_set(); size_t size = buffer_size(); void** buffer = BufferNode::make_buffer_from_node(node); @@ -223,6 +238,7 @@ CardTable::CardValue* cp = static_cast<CardTable::CardValue*>(buffer[i]); rem_set->refine_card_concurrently(cp, worker_id); } + *total_refined_cards += (i - node->index()); node->set_index(i); return i == size; } @@ -260,25 +276,27 @@ bool G1DirtyCardQueueSet::mut_process_buffer(BufferNode* node) { uint worker_id = _free_ids.claim_par_id(); // temporarily claim an id - bool result = refine_buffer(node, worker_id); + uint counter_index = worker_id - par_ids_start(); + size_t* counter = &_mutator_refined_cards_counters[counter_index]; + bool result = refine_buffer(node, worker_id, counter); _free_ids.release_par_id(worker_id); // release the id if (result) { assert_fully_consumed(node, buffer_size()); - Atomic::inc(&_processed_buffers_mut); } return result; } -bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id, size_t stop_at) { +bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id, + size_t stop_at, + size_t* total_refined_cards) { BufferNode* node = get_completed_buffer(stop_at); if (node == NULL) { return false; - } else if (refine_buffer(node, worker_id)) { + } else if (refine_buffer(node, worker_id, total_refined_cards)) { assert_fully_consumed(node, buffer_size()); // Done with fully processed buffer. deallocate_buffer(node); - Atomic::inc(&_processed_buffers_rs_thread); return true; } else { // Return partially processed buffer to the queue.
--- a/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -78,14 +78,15 @@ void abandon_completed_buffers(); - // Refine the cards in "node" from it's index to buffer_size. + // Refine the cards in "node" from its index to buffer_size. // Stops processing if SuspendibleThreadSet::should_yield() is true. // Returns true if the entire buffer was processed, false if there // is a pending yield request. The node's index is updated to exclude // the processed elements, e.g. up to the element before processing // stopped, or one past the last element if the entire buffer was - // processed. - bool refine_buffer(BufferNode* node, uint worker_id); + // processed. Increments *total_refined_cards by the number of cards + // processed and removed from the buffer. + bool refine_buffer(BufferNode* node, uint worker_id, size_t* total_refined_cards); bool mut_process_buffer(BufferNode* node); @@ -97,10 +98,9 @@ G1FreeIdSet _free_ids; - // The number of completed buffers processed by mutator and rs thread, - // respectively. - jint _processed_buffers_mut; - jint _processed_buffers_rs_thread; + // Array of cumulative dirty cards refined by mutator threads. + // Array has an entry per id in _free_ids. + size_t* _mutator_refined_cards_counters; public: G1DirtyCardQueueSet(Monitor* cbl_mon, BufferNode::Allocator* allocator); @@ -158,7 +158,12 @@ // Stops processing a buffer if SuspendibleThreadSet::should_yield(), // returning the incompletely processed buffer to the completed buffer // list, for later processing of the remainder. - bool refine_completed_buffer_concurrently(uint worker_id, size_t stop_at); + // + // Increments *total_refined_cards by the number of cards processed and + // removed from the buffer. + bool refine_completed_buffer_concurrently(uint worker_id, + size_t stop_at, + size_t* total_refined_cards); // If a full collection is happening, reset partial logs, and release // completed ones: the full collection will make them all irrelevant. @@ -181,13 +186,8 @@ return _max_cards_padding; } - jint processed_buffers_mut() { - return _processed_buffers_mut; - } - jint processed_buffers_rs_thread() { - return _processed_buffers_rs_thread; - } - + // Total dirty cards refined by mutator threads. + size_t total_mutator_refined_cards() const; }; inline G1DirtyCardQueueSet* G1DirtyCardQueue::dirty_card_qset() const {
--- a/src/hotspot/share/gc/g1/g1Policy.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1Policy.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -70,7 +70,11 @@ _free_regions_at_end_of_collection(0), _max_rs_length(0), _rs_length_prediction(0), - _pending_cards(0), + _pending_cards_at_gc_start(0), + _pending_cards_at_prev_gc_end(0), + _total_mutator_refined_cards(0), + _total_concurrent_refined_cards(0), + _total_concurrent_refinement_time(), _bytes_allocated_in_old_since_last_gc(0), _initial_mark_to_mixed(), _collection_set(NULL), @@ -442,6 +446,7 @@ collector_state()->set_in_young_only_phase(false); collector_state()->set_in_full_gc(true); _collection_set->clear_candidates(); + record_concurrent_refinement_data(true /* is_full_collection */); } void G1Policy::record_full_collection_end() { @@ -472,12 +477,67 @@ _survivor_surv_rate_group->reset(); update_young_list_max_and_target_length(); update_rs_length_prediction(); + _pending_cards_at_prev_gc_end = _g1h->pending_card_num(); _bytes_allocated_in_old_since_last_gc = 0; record_pause(FullGC, _full_collection_start_sec, end_sec); } +void G1Policy::record_concurrent_refinement_data(bool is_full_collection) { + _pending_cards_at_gc_start = _g1h->pending_card_num(); + + // Record info about concurrent refinement thread processing. + G1ConcurrentRefine* cr = _g1h->concurrent_refine(); + G1ConcurrentRefine::RefinementStats cr_stats = cr->total_refinement_stats(); + + Tickspan cr_time = cr_stats._time - _total_concurrent_refinement_time; + _total_concurrent_refinement_time = cr_stats._time; + + size_t cr_cards = cr_stats._cards - _total_concurrent_refined_cards; + _total_concurrent_refined_cards = cr_stats._cards; + + // Don't update rate if full collection. We could be in an implicit full + // collection after a non-full collection failure, in which case there + // wasn't any mutator/cr-thread activity since last recording. And if + // we're in an explicit full collection, the time since the last GC can + // be arbitrarily short, so not a very good sample. Similarly, don't + // update the rate if the current sample is empty or time is zero. + if (!is_full_collection && (cr_cards > 0) && (cr_time > Tickspan())) { + double rate = cr_cards / (cr_time.seconds() * MILLIUNITS); + _analytics->report_concurrent_refine_rate_ms(rate); + } + + // Record info about mutator thread processing. + G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set(); + size_t mut_total_cards = dcqs.total_mutator_refined_cards(); + size_t mut_cards = mut_total_cards - _total_mutator_refined_cards; + _total_mutator_refined_cards = mut_total_cards; + + // Record mutator's card logging rate. + // Don't update if full collection; see above. + if (!is_full_collection) { + size_t total_cards = _pending_cards_at_gc_start + cr_cards + mut_cards; + assert(_pending_cards_at_prev_gc_end <= total_cards, + "untracked cards: last pending: " SIZE_FORMAT + ", pending: " SIZE_FORMAT ", conc refine: " SIZE_FORMAT + ", mut refine:" SIZE_FORMAT, + _pending_cards_at_prev_gc_end, _pending_cards_at_gc_start, + cr_cards, mut_cards); + size_t logged_cards = total_cards - _pending_cards_at_prev_gc_end; + double logging_start_time = _analytics->prev_collection_pause_end_ms(); + double logging_end_time = Ticks::now().seconds() * MILLIUNITS; + double logging_time = logging_end_time - logging_start_time; + // Unlike above for conc-refine rate, here we should not require a + // non-empty sample, since an application could go some time with only + // young-gen or filtered out writes. But we'll ignore unusually short + // sample periods, as they may just pollute the predictions. + if (logging_time > 1.0) { // Require > 1ms sample time. + _analytics->report_logged_cards_rate_ms(logged_cards / logging_time); + } + } +} + void G1Policy::record_collection_pause_start(double start_time_sec) { // We only need to do this here as the policy will only be applied // to the GC we're about to start. so, no point is calculating this @@ -490,7 +550,8 @@ assert_used_and_recalculate_used_equal(_g1h); phase_times()->record_cur_collection_start_sec(start_time_sec); - _pending_cards = _g1h->pending_card_num(); + + record_concurrent_refinement_data(false /* is_full_collection */); _collection_set->reset_bytes_used_before(); _bytes_copied_during_gc = 0; @@ -744,7 +805,7 @@ // after the mixed gc phase. // During mixed gc we do not use them for young gen sizing. if (this_pause_was_young_only) { - _analytics->report_pending_cards((double) _pending_cards); + _analytics->report_pending_cards((double) _pending_cards_at_gc_start); _analytics->report_rs_length((double) _max_rs_length); } } @@ -798,6 +859,7 @@ scan_logged_cards_time_goal_ms -= scan_hcc_time_ms; } + _pending_cards_at_prev_gc_end = _g1h->pending_card_num(); double const logged_cards_time = logged_cards_processing_time(); log_debug(gc, ergo, refine)("Concurrent refinement times: Logged Cards Scan time goal: %1.2fms Logged Cards Scan time: %1.2fms HCC time: %1.2fms",
--- a/src/hotspot/share/gc/g1/g1Policy.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1Policy.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -100,7 +100,11 @@ size_t _rs_length_prediction; - size_t _pending_cards; + size_t _pending_cards_at_gc_start; + size_t _pending_cards_at_prev_gc_end; + size_t _total_mutator_refined_cards; + size_t _total_concurrent_refined_cards; + Tickspan _total_concurrent_refinement_time; // The amount of allocated bytes in old gen during the last mutator and the following // young GC phase. @@ -244,7 +248,15 @@ uint base_free_regions, double target_pause_time_ms) const; public: - size_t pending_cards() const { return _pending_cards; } + size_t pending_cards_at_gc_start() const { return _pending_cards_at_gc_start; } + + size_t total_concurrent_refined_cards() const { + return _total_concurrent_refined_cards; + } + + size_t total_mutator_refined_cards() const { + return _total_mutator_refined_cards; + } // Calculate the minimum number of old regions we'll add to the CSet // during a mixed GC. @@ -283,6 +295,9 @@ void record_pause(PauseKind kind, double start, double end); // Indicate that we aborted marking before doing any mixed GCs. void abort_time_to_mixed_tracking(); + + void record_concurrent_refinement_data(bool is_full_collection); + public: G1Policy(STWGCTimer* gc_timer);
--- a/src/hotspot/share/gc/g1/g1RemSet.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1RemSet.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -489,7 +489,6 @@ _scan_state(new G1RemSetScanState()), _prev_period_summary(), _g1h(g1h), - _num_conc_refined_cards(0), _ct(ct), _g1p(_g1h->policy()), _hot_card_cache(hot_card_cache) { @@ -1377,7 +1376,6 @@ G1ConcurrentRefineOopClosure conc_refine_cl(_g1h, worker_id); if (r->oops_on_memregion_seq_iterate_careful<false>(dirty_region, &conc_refine_cl) != NULL) { - _num_conc_refined_cards++; // Unsynchronized update, only used for logging. return; }
--- a/src/hotspot/share/gc/g1/g1RemSet.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1RemSet.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -61,7 +61,6 @@ G1RemSetSummary _prev_period_summary; G1CollectedHeap* _g1h; - size_t _num_conc_refined_cards; // Number of cards refined concurrently to the mutator. G1CardTable* _ct; G1Policy* _g1p; @@ -125,8 +124,6 @@ // Print accumulated summary info from the last time called. void print_periodic_summary_info(const char* header, uint period_count); - size_t num_conc_refined_cards() const { return _num_conc_refined_cards; } - // Rebuilds the remembered set by scanning from bottom to TARS for all regions // using the given work gang. void rebuild_rem_set(G1ConcurrentMark* cm, WorkGang* workers, uint worker_id_offset);
--- a/src/hotspot/share/gc/g1/g1RemSetSummary.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1RemSetSummary.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -27,6 +27,7 @@ #include "gc/g1/g1ConcurrentRefine.hpp" #include "gc/g1/g1ConcurrentRefineThread.hpp" #include "gc/g1/g1DirtyCardQueue.hpp" +#include "gc/g1/g1Policy.hpp" #include "gc/g1/g1RemSet.hpp" #include "gc/g1/g1RemSetSummary.hpp" #include "gc/g1/g1YoungRemSetSamplingThread.hpp" @@ -53,18 +54,17 @@ }; void G1RemSetSummary::update() { - _num_conc_refined_cards = _rem_set->num_conc_refined_cards(); - G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set(); - _num_processed_buf_mutator = dcqs.processed_buffers_mut(); - _num_processed_buf_rs_threads = dcqs.processed_buffers_rs_thread(); + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + + const G1Policy* policy = g1h->policy(); + _total_mutator_refined_cards = policy->total_mutator_refined_cards(); + _total_concurrent_refined_cards = policy->total_concurrent_refined_cards(); _num_coarsenings = HeapRegionRemSet::n_coarsenings(); - G1CollectedHeap* g1h = G1CollectedHeap::heap(); - G1ConcurrentRefine* cg1r = g1h->concurrent_refine(); if (_rs_threads_vtimes != NULL) { GetRSThreadVTimeClosure p(this); - cg1r->threads_do(&p); + g1h->concurrent_refine()->threads_do(&p); } set_sampling_thread_vtime(g1h->sampling_thread()->vtime_accum()); } @@ -83,9 +83,8 @@ G1RemSetSummary::G1RemSetSummary() : _rem_set(NULL), - _num_conc_refined_cards(0), - _num_processed_buf_mutator(0), - _num_processed_buf_rs_threads(0), + _total_mutator_refined_cards(0), + _total_concurrent_refined_cards(0), _num_coarsenings(0), _num_vtimes(G1ConcurrentRefine::max_num_threads()), _rs_threads_vtimes(NEW_C_HEAP_ARRAY(double, _num_vtimes, mtGC)), @@ -96,9 +95,8 @@ G1RemSetSummary::G1RemSetSummary(G1RemSet* rem_set) : _rem_set(rem_set), - _num_conc_refined_cards(0), - _num_processed_buf_mutator(0), - _num_processed_buf_rs_threads(0), + _total_mutator_refined_cards(0), + _total_concurrent_refined_cards(0), _num_coarsenings(0), _num_vtimes(G1ConcurrentRefine::max_num_threads()), _rs_threads_vtimes(NEW_C_HEAP_ARRAY(double, _num_vtimes, mtGC)), @@ -114,12 +112,10 @@ assert(other != NULL, "just checking"); assert(_num_vtimes == other->_num_vtimes, "just checking"); - _num_conc_refined_cards = other->num_conc_refined_cards(); + _total_mutator_refined_cards = other->total_mutator_refined_cards(); + _total_concurrent_refined_cards = other->total_concurrent_refined_cards(); - _num_processed_buf_mutator = other->num_processed_buf_mutator(); - _num_processed_buf_rs_threads = other->num_processed_buf_rs_threads(); - - _num_coarsenings = other->_num_coarsenings; + _num_coarsenings = other->num_coarsenings(); memcpy(_rs_threads_vtimes, other->_rs_threads_vtimes, sizeof(double) * _num_vtimes); @@ -130,10 +126,8 @@ assert(other != NULL, "just checking"); assert(_num_vtimes == other->_num_vtimes, "just checking"); - _num_conc_refined_cards = other->num_conc_refined_cards() - _num_conc_refined_cards; - - _num_processed_buf_mutator = other->num_processed_buf_mutator() - _num_processed_buf_mutator; - _num_processed_buf_rs_threads = other->num_processed_buf_rs_threads() - _num_processed_buf_rs_threads; + _total_mutator_refined_cards = other->total_mutator_refined_cards() - _total_mutator_refined_cards; + _total_concurrent_refined_cards = other->total_concurrent_refined_cards() - _total_concurrent_refined_cards; _num_coarsenings = other->num_coarsenings() - _num_coarsenings; @@ -356,16 +350,15 @@ void G1RemSetSummary::print_on(outputStream* out) { out->print_cr(" Recent concurrent refinement statistics"); - out->print_cr(" Processed " SIZE_FORMAT " cards concurrently", num_conc_refined_cards()); - out->print_cr(" Of " SIZE_FORMAT " completed buffers:", num_processed_buf_total()); - out->print_cr(" " SIZE_FORMAT_W(8) " (%5.1f%%) by concurrent RS threads.", - num_processed_buf_total(), - percent_of(num_processed_buf_rs_threads(), num_processed_buf_total())); + out->print_cr(" Of " SIZE_FORMAT " refined cards:", total_refined_cards()); + out->print_cr(" " SIZE_FORMAT_W(8) " (%5.1f%%) by concurrent refinement threads.", + total_concurrent_refined_cards(), + percent_of(total_concurrent_refined_cards(), total_refined_cards())); out->print_cr(" " SIZE_FORMAT_W(8) " (%5.1f%%) by mutator threads.", - num_processed_buf_mutator(), - percent_of(num_processed_buf_mutator(), num_processed_buf_total())); + total_mutator_refined_cards(), + percent_of(total_mutator_refined_cards(), total_refined_cards())); out->print_cr(" Did " SIZE_FORMAT " coarsenings.", num_coarsenings()); - out->print_cr(" Concurrent RS threads times (s)"); + out->print_cr(" Concurrent refinement threads times (s)"); out->print(" "); for (uint i = 0; i < _num_vtimes; i++) { out->print(" %5.2f", rs_thread_vtime(i));
--- a/src/hotspot/share/gc/g1/g1RemSetSummary.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/g1/g1RemSetSummary.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -38,9 +38,8 @@ G1RemSet* _rem_set; - size_t _num_conc_refined_cards; - size_t _num_processed_buf_mutator; - size_t _num_processed_buf_rs_threads; + size_t _total_mutator_refined_cards; + size_t _total_concurrent_refined_cards; size_t _num_coarsenings; @@ -76,20 +75,16 @@ return _sampling_thread_vtime; } - size_t num_conc_refined_cards() const { - return _num_conc_refined_cards; + size_t total_mutator_refined_cards() const { + return _total_mutator_refined_cards; } - size_t num_processed_buf_mutator() const { - return _num_processed_buf_mutator; + size_t total_concurrent_refined_cards() const { + return _total_concurrent_refined_cards; } - size_t num_processed_buf_rs_threads() const { - return _num_processed_buf_rs_threads; - } - - size_t num_processed_buf_total() const { - return num_processed_buf_mutator() + num_processed_buf_rs_threads(); + size_t total_refined_cards() const { + return total_mutator_refined_cards() + total_concurrent_refined_cards(); } size_t num_coarsenings() const {
--- a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -268,7 +268,7 @@ virtual void clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const; // Support for GC barriers emitted during parsing - virtual bool has_load_barriers() const { return false; } + virtual bool has_load_barrier_nodes() const { return false; } virtual bool is_gc_barrier_node(Node* node) const { return false; } virtual Node* step_over_gc_barrier(Node* c) const { return c; } virtual Node* step_over_gc_barrier_ctrl(Node* c) const { return c; } @@ -291,13 +291,9 @@ virtual bool is_gc_specific_loop_opts_pass(LoopOptsMode mode) const { return false; } virtual bool has_special_unique_user(const Node* node) const { return false; } - virtual bool needs_anti_dependence_check(const Node* node) const { return true; } - - virtual void barrier_insertion_phase(Compile* C, PhaseIterGVN &igvn) const { } enum CompilePhase { BeforeOptimize, - BeforeLateInsertion, BeforeMacroExpand, BeforeCodeGen }; @@ -324,6 +320,10 @@ virtual Node* split_if_pre(PhaseIdealLoop* phase, Node* n) const { return NULL; } virtual bool build_loop_late_post(PhaseIdealLoop* phase, Node* n) const { return false; } virtual bool sink_node(PhaseIdealLoop* phase, Node* n, Node* x, Node* x_ctrl, Node* n_ctrl) const { return false; } + + virtual void late_barrier_analysis() const { } + virtual int estimate_stub_size() const { return 0; } + virtual void emit_stubs(CodeBuffer& cb) const { } }; #endif // SHARE_GC_SHARED_C2_BARRIERSETC2_HPP
--- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -103,7 +103,7 @@ static const TypeFunc* write_ref_field_pre_entry_Type(); static const TypeFunc* shenandoah_clone_barrier_Type(); static const TypeFunc* shenandoah_load_reference_barrier_Type(); - virtual bool has_load_barriers() const { return true; } + virtual bool has_load_barrier_nodes() const { return true; } // This is the entry-point for the backend to perform accesses through the Access API. virtual void clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const;
--- a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -1275,12 +1275,32 @@ // The rethrow call may have too many projections to be // properly handled here. Given there's no reason for a // barrier to depend on the call, move it above the call - if (phase->get_ctrl(val) == ctrl) { - assert(val->Opcode() == Op_DecodeN, "unexpected node"); - assert(phase->is_dominator(phase->get_ctrl(val->in(1)), call->in(0)), "Load is too low"); - phase->set_ctrl(val, call->in(0)); - } - phase->set_ctrl(lrb, call->in(0)); + stack.push(lrb, 0); + do { + Node* n = stack.node(); + uint idx = stack.index(); + if (idx < n->req()) { + Node* in = n->in(idx); + stack.set_index(idx+1); + if (in != NULL) { + if (phase->has_ctrl(in)) { + if (phase->is_dominator(call, phase->get_ctrl(in))) { +#ifdef ASSERT + for (uint i = 0; i < stack.size(); i++) { + assert(stack.node_at(i) != in, "node shouldn't have been seen yet"); + } +#endif + stack.push(in, 0); + } + } else { + assert(phase->is_dominator(in, call->in(0)), "no dependency on the call"); + } + } + } else { + phase->set_ctrl(n, call->in(0)); + stack.pop(); + } + } while(stack.size() > 0); continue; } CallProjections projs = call->extract_projections(false, false);
--- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -69,7 +69,8 @@ // enough, but we also do not want to steal too much CPU from the concurrently running // application. Using 1/4 of available threads for concurrent GC seems a good // compromise here. - if (FLAG_IS_DEFAULT(ConcGCThreads)) { + bool ergo_conc = FLAG_IS_DEFAULT(ConcGCThreads); + if (ergo_conc) { FLAG_SET_DEFAULT(ConcGCThreads, MAX2(1, os::processor_count() / 4)); } @@ -82,7 +83,8 @@ // that will overwhelm the OS scheduler. Using 1/2 of available threads seems to be a fair // compromise here. Due to implementation constraints, it should not be lower than // the number of concurrent threads. - if (FLAG_IS_DEFAULT(ParallelGCThreads)) { + bool ergo_parallel = FLAG_IS_DEFAULT(ParallelGCThreads); + if (ergo_parallel) { FLAG_SET_DEFAULT(ParallelGCThreads, MAX2(1, os::processor_count() / 2)); } @@ -90,9 +92,21 @@ vm_exit_during_initialization("Shenandoah expects ParallelGCThreads > 0, check -XX:ParallelGCThreads=#"); } + // Make sure ergonomic decisions do not break the thread count invariants. + // This may happen when user overrides one of the flags, but not the other. + // When that happens, we want to adjust the setting that was set ergonomically. if (ParallelGCThreads < ConcGCThreads) { - warning("Shenandoah expects ConcGCThreads <= ParallelGCThreads, adjusting ParallelGCThreads automatically"); - FLAG_SET_DEFAULT(ParallelGCThreads, ConcGCThreads); + if (ergo_conc && !ergo_parallel) { + FLAG_SET_DEFAULT(ConcGCThreads, ParallelGCThreads); + } else if (!ergo_conc && ergo_parallel) { + FLAG_SET_DEFAULT(ParallelGCThreads, ConcGCThreads); + } else if (ergo_conc && ergo_parallel) { + // Should not happen, check the ergonomic computation above. Fail with relevant error. + vm_exit_during_initialization("Shenandoah thread count ergonomic error"); + } else { + // User settings error, report and ask user to rectify. + vm_exit_during_initialization("Shenandoah expects ConcGCThreads <= ParallelGCThreads, check -XX:ParallelGCThreads, -XX:ConcGCThreads"); + } } if (FLAG_IS_DEFAULT(ParallelRefProcEnabled)) {
--- a/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -314,9 +314,6 @@ diagnostic(bool, ShenandoahTerminationTrace, false, \ "Tracing task termination timings") \ \ - develop(bool, ShenandoahVerifyObjectEquals, false, \ - "Verify that == and != are not used on oops. Only in fastdebug") \ - \ diagnostic(bool, ShenandoahAlwaysPreTouch, false, \ "Pre-touch heap memory, overrides global AlwaysPreTouch") \ \
--- a/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -22,451 +22,157 @@ */ #include "precompiled.hpp" -#include "opto/castnode.hpp" +#include "classfile/javaClasses.hpp" +#include "gc/z/c2/zBarrierSetC2.hpp" +#include "gc/z/zBarrierSet.hpp" +#include "gc/z/zBarrierSetAssembler.hpp" +#include "gc/z/zBarrierSetRuntime.hpp" +#include "opto/block.hpp" #include "opto/compile.hpp" -#include "opto/escape.hpp" #include "opto/graphKit.hpp" -#include "opto/loopnode.hpp" #include "opto/machnode.hpp" -#include "opto/macro.hpp" #include "opto/memnode.hpp" -#include "opto/movenode.hpp" #include "opto/node.hpp" -#include "opto/phase.hpp" -#include "opto/phaseX.hpp" +#include "opto/regalloc.hpp" #include "opto/rootnode.hpp" -#include "opto/type.hpp" -#include "utilities/copy.hpp" #include "utilities/growableArray.hpp" #include "utilities/macros.hpp" -#include "gc/z/zBarrierSet.hpp" -#include "gc/z/c2/zBarrierSetC2.hpp" -#include "gc/z/zThreadLocalData.hpp" -#include "gc/z/zBarrierSetRuntime.hpp" -ZBarrierSetC2State::ZBarrierSetC2State(Arena* comp_arena) : - _load_barrier_nodes(new (comp_arena) GrowableArray<LoadBarrierNode*>(comp_arena, 8, 0, NULL)) {} +class ZBarrierSetC2State : public ResourceObj { +private: + GrowableArray<ZLoadBarrierStubC2*>* _stubs; + Node_Array _live; -int ZBarrierSetC2State::load_barrier_count() const { - return _load_barrier_nodes->length(); +public: + ZBarrierSetC2State(Arena* arena) : + _stubs(new (arena) GrowableArray<ZLoadBarrierStubC2*>(arena, 8, 0, NULL)), + _live(arena) {} + + GrowableArray<ZLoadBarrierStubC2*>* stubs() { + return _stubs; + } + + RegMask* live(const Node* node) { + if (!node->is_Mach()) { + // Don't need liveness for non-MachNodes + return NULL; + } + + const MachNode* const mach = node->as_Mach(); + if (mach->barrier_data() != ZLoadBarrierStrong && + mach->barrier_data() != ZLoadBarrierWeak) { + // Don't need liveness data for nodes without barriers + return NULL; + } + + RegMask* live = (RegMask*)_live[node->_idx]; + if (live == NULL) { + live = new (Compile::current()->comp_arena()->Amalloc_D(sizeof(RegMask))) RegMask(); + _live.map(node->_idx, (Node*)live); + } + + return live; + } +}; + +static ZBarrierSetC2State* barrier_set_state() { + return reinterpret_cast<ZBarrierSetC2State*>(Compile::current()->barrier_set_state()); } -void ZBarrierSetC2State::add_load_barrier_node(LoadBarrierNode * n) { - assert(!_load_barrier_nodes->contains(n), " duplicate entry in expand list"); - _load_barrier_nodes->append(n); +ZLoadBarrierStubC2* ZLoadBarrierStubC2::create(const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) { + ZLoadBarrierStubC2* const stub = new (Compile::current()->comp_arena()) ZLoadBarrierStubC2(node, ref_addr, ref, tmp, weak); + if (!Compile::current()->in_scratch_emit_size()) { + barrier_set_state()->stubs()->append(stub); + } + + return stub; } -void ZBarrierSetC2State::remove_load_barrier_node(LoadBarrierNode * n) { - // this function may be called twice for a node so check - // that the node is in the array before attempting to remove it - if (_load_barrier_nodes->contains(n)) { - _load_barrier_nodes->remove(n); - } +ZLoadBarrierStubC2::ZLoadBarrierStubC2(const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) : + _node(node), + _ref_addr(ref_addr), + _ref(ref), + _tmp(tmp), + _weak(weak), + _entry(), + _continuation() { + assert_different_registers(ref, ref_addr.base()); + assert_different_registers(ref, ref_addr.index()); } -LoadBarrierNode* ZBarrierSetC2State::load_barrier_node(int idx) const { - return _load_barrier_nodes->at(idx); +Address ZLoadBarrierStubC2::ref_addr() const { + return _ref_addr; +} + +Register ZLoadBarrierStubC2::ref() const { + return _ref; +} + +Register ZLoadBarrierStubC2::tmp() const { + return _tmp; +} + +address ZLoadBarrierStubC2::slow_path() const { + const DecoratorSet decorators = _weak ? ON_WEAK_OOP_REF : ON_STRONG_OOP_REF; + return ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators); +} + +RegMask& ZLoadBarrierStubC2::live() const { + return *barrier_set_state()->live(_node); +} + +Label* ZLoadBarrierStubC2::entry() { + // The _entry will never be bound when in_scratch_emit_size() is true. + // However, we still need to return a label that is not bound now, but + // will eventually be bound. Any lable will do, as it will only act as + // a placeholder, so we return the _continuation label. + return Compile::current()->in_scratch_emit_size() ? &_continuation : &_entry; +} + +Label* ZLoadBarrierStubC2::continuation() { + return &_continuation; } void* ZBarrierSetC2::create_barrier_state(Arena* comp_arena) const { - return new(comp_arena) ZBarrierSetC2State(comp_arena); + return new (comp_arena) ZBarrierSetC2State(comp_arena); } -ZBarrierSetC2State* ZBarrierSetC2::state() const { - return reinterpret_cast<ZBarrierSetC2State*>(Compile::current()->barrier_set_state()); +void ZBarrierSetC2::late_barrier_analysis() const { + analyze_dominating_barriers(); + compute_liveness_at_stubs(); } -bool ZBarrierSetC2::is_gc_barrier_node(Node* node) const { - // 1. This step follows potential oop projections of a load barrier before expansion - if (node->is_Proj()) { - node = node->in(0); +void ZBarrierSetC2::emit_stubs(CodeBuffer& cb) const { + MacroAssembler masm(&cb); + GrowableArray<ZLoadBarrierStubC2*>* const stubs = barrier_set_state()->stubs(); + + for (int i = 0; i < stubs->length(); i++) { + // Make sure there is enough space in the code buffer + if (cb.insts()->maybe_expand_to_ensure_remaining(Compile::MAX_inst_size) && cb.blob() == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + + ZBarrierSet::assembler()->generate_c2_load_barrier_stub(&masm, stubs->at(i)); } - // 2. This step checks for unexpanded load barriers - if (node->is_LoadBarrier()) { - return true; + masm.flush(); +} + +int ZBarrierSetC2::estimate_stub_size() const { + Compile* const C = Compile::current(); + BufferBlob* const blob = C->scratch_buffer_blob(); + GrowableArray<ZLoadBarrierStubC2*>* const stubs = barrier_set_state()->stubs(); + int size = 0; + + for (int i = 0; i < stubs->length(); i++) { + CodeBuffer cb(blob->content_begin(), (address)C->scratch_locs_memory() - blob->content_begin()); + MacroAssembler masm(&cb); + ZBarrierSet::assembler()->generate_c2_load_barrier_stub(&masm, stubs->at(i)); + size += cb.insts_size(); } - // 3. This step checks for the phi corresponding to an optimized load barrier expansion - if (node->is_Phi()) { - PhiNode* phi = node->as_Phi(); - Node* n = phi->in(1); - if (n != NULL && n->is_LoadBarrierSlowReg()) { - return true; - } - } - - return false; -} - -void ZBarrierSetC2::register_potential_barrier_node(Node* node) const { - if (node->is_LoadBarrier()) { - state()->add_load_barrier_node(node->as_LoadBarrier()); - } -} - -void ZBarrierSetC2::unregister_potential_barrier_node(Node* node) const { - if (node->is_LoadBarrier()) { - state()->remove_load_barrier_node(node->as_LoadBarrier()); - } -} - -void ZBarrierSetC2::eliminate_useless_gc_barriers(Unique_Node_List &useful, Compile* C) const { - // Remove useless LoadBarrier nodes - ZBarrierSetC2State* s = state(); - for (int i = s->load_barrier_count()-1; i >= 0; i--) { - LoadBarrierNode* n = s->load_barrier_node(i); - if (!useful.member(n)) { - unregister_potential_barrier_node(n); - } - } -} - -void ZBarrierSetC2::enqueue_useful_gc_barrier(PhaseIterGVN* igvn, Node* node) const { - if (node->is_LoadBarrier() && !node->as_LoadBarrier()->has_true_uses()) { - igvn->_worklist.push(node); - } -} - -const uint NoBarrier = 0; -const uint RequireBarrier = 1; -const uint WeakBarrier = 2; -const uint ExpandedBarrier = 4; - -static bool load_require_barrier(LoadNode* load) { return (load->barrier_data() & RequireBarrier) == RequireBarrier; } -static bool load_has_weak_barrier(LoadNode* load) { return (load->barrier_data() & WeakBarrier) == WeakBarrier; } -static bool load_has_expanded_barrier(LoadNode* load) { return (load->barrier_data() & ExpandedBarrier) == ExpandedBarrier; } -static void load_set_expanded_barrier(LoadNode* load) { return load->set_barrier_data(ExpandedBarrier); } - -static void load_set_barrier(LoadNode* load, bool weak) { - if (weak) { - load->set_barrier_data(RequireBarrier | WeakBarrier); - } else { - load->set_barrier_data(RequireBarrier); - } -} - -// == LoadBarrierNode == - -LoadBarrierNode::LoadBarrierNode(Compile* C, - Node* c, - Node* mem, - Node* val, - Node* adr, - bool weak) : - MultiNode(Number_of_Inputs), - _weak(weak) { - init_req(Control, c); - init_req(Memory, mem); - init_req(Oop, val); - init_req(Address, adr); - init_req(Similar, C->top()); - - init_class_id(Class_LoadBarrier); - BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); - bs->register_potential_barrier_node(this); -} - -uint LoadBarrierNode::size_of() const { - return sizeof(*this); -} - -bool LoadBarrierNode::cmp(const Node& n) const { - ShouldNotReachHere(); - return false; -} - -const Type *LoadBarrierNode::bottom_type() const { - const Type** floadbarrier = (const Type **)(Compile::current()->type_arena()->Amalloc_4((Number_of_Outputs)*sizeof(Type*))); - Node* in_oop = in(Oop); - floadbarrier[Control] = Type::CONTROL; - floadbarrier[Memory] = Type::MEMORY; - floadbarrier[Oop] = in_oop == NULL ? Type::TOP : in_oop->bottom_type(); - return TypeTuple::make(Number_of_Outputs, floadbarrier); -} - -const TypePtr* LoadBarrierNode::adr_type() const { - ShouldNotReachHere(); - return NULL; -} - -const Type *LoadBarrierNode::Value(PhaseGVN *phase) const { - const Type** floadbarrier = (const Type **)(phase->C->type_arena()->Amalloc_4((Number_of_Outputs)*sizeof(Type*))); - const Type* val_t = phase->type(in(Oop)); - floadbarrier[Control] = Type::CONTROL; - floadbarrier[Memory] = Type::MEMORY; - floadbarrier[Oop] = val_t; - return TypeTuple::make(Number_of_Outputs, floadbarrier); -} - -bool LoadBarrierNode::is_dominator(PhaseIdealLoop* phase, bool linear_only, Node *d, Node *n) { - if (phase != NULL) { - return phase->is_dominator(d, n); - } - - for (int i = 0; i < 10 && n != NULL; i++) { - n = IfNode::up_one_dom(n, linear_only); - if (n == d) { - return true; - } - } - - return false; -} - -LoadBarrierNode* LoadBarrierNode::has_dominating_barrier(PhaseIdealLoop* phase, bool linear_only, bool look_for_similar) { - if (is_weak()) { - // Weak barriers can't be eliminated - return NULL; - } - - Node* val = in(LoadBarrierNode::Oop); - if (in(Similar)->is_Proj() && in(Similar)->in(0)->is_LoadBarrier()) { - LoadBarrierNode* lb = in(Similar)->in(0)->as_LoadBarrier(); - assert(lb->in(Address) == in(Address), ""); - // Load barrier on Similar edge dominates so if it now has the Oop field it can replace this barrier. - if (lb->in(Oop) == in(Oop)) { - return lb; - } - // Follow chain of load barrier through Similar edges - while (!lb->in(Similar)->is_top()) { - lb = lb->in(Similar)->in(0)->as_LoadBarrier(); - assert(lb->in(Address) == in(Address), ""); - } - if (lb != in(Similar)->in(0)) { - return lb; - } - } - for (DUIterator_Fast imax, i = val->fast_outs(imax); i < imax; i++) { - Node* u = val->fast_out(i); - if (u != this && u->is_LoadBarrier() && u->in(Oop) == val && u->as_LoadBarrier()->has_true_uses()) { - Node* this_ctrl = in(LoadBarrierNode::Control); - Node* other_ctrl = u->in(LoadBarrierNode::Control); - if (is_dominator(phase, linear_only, other_ctrl, this_ctrl)) { - return u->as_LoadBarrier(); - } - } - } - - if (can_be_eliminated()) { - return NULL; - } - - if (!look_for_similar) { - return NULL; - } - - Node* addr = in(LoadBarrierNode::Address); - for (DUIterator_Fast imax, i = addr->fast_outs(imax); i < imax; i++) { - Node* u = addr->fast_out(i); - if (u != this && u->is_LoadBarrier() && u->as_LoadBarrier()->has_true_uses()) { - Node* this_ctrl = in(LoadBarrierNode::Control); - Node* other_ctrl = u->in(LoadBarrierNode::Control); - if (is_dominator(phase, linear_only, other_ctrl, this_ctrl)) { - ResourceMark rm; - Unique_Node_List wq; - wq.push(in(LoadBarrierNode::Control)); - bool ok = true; - bool dom_found = false; - for (uint next = 0; next < wq.size(); ++next) { - Node *n = wq.at(next); - if (n->is_top()) { - return NULL; - } - assert(n->is_CFG(), ""); - if (n->is_SafePoint()) { - ok = false; - break; - } - if (n == u) { - dom_found = true; - continue; - } - if (n->is_Region()) { - for (uint i = 1; i < n->req(); i++) { - Node* m = n->in(i); - if (m != NULL) { - wq.push(m); - } - } - } else { - Node* m = n->in(0); - if (m != NULL) { - wq.push(m); - } - } - } - if (ok) { - assert(dom_found, ""); - return u->as_LoadBarrier(); - } - break; - } - } - } - - return NULL; -} - -void LoadBarrierNode::push_dominated_barriers(PhaseIterGVN* igvn) const { - // Change to that barrier may affect a dominated barrier so re-push those - assert(!is_weak(), "sanity"); - Node* val = in(LoadBarrierNode::Oop); - - for (DUIterator_Fast imax, i = val->fast_outs(imax); i < imax; i++) { - Node* u = val->fast_out(i); - if (u != this && u->is_LoadBarrier() && u->in(Oop) == val) { - Node* this_ctrl = in(Control); - Node* other_ctrl = u->in(Control); - if (is_dominator(NULL, false, this_ctrl, other_ctrl)) { - igvn->_worklist.push(u); - } - } - - Node* addr = in(LoadBarrierNode::Address); - for (DUIterator_Fast imax, i = addr->fast_outs(imax); i < imax; i++) { - Node* u = addr->fast_out(i); - if (u != this && u->is_LoadBarrier() && u->in(Similar)->is_top()) { - Node* this_ctrl = in(Control); - Node* other_ctrl = u->in(Control); - if (is_dominator(NULL, false, this_ctrl, other_ctrl)) { - igvn->_worklist.push(u); - } - } - } - } -} - -Node *LoadBarrierNode::Identity(PhaseGVN *phase) { - LoadBarrierNode* dominating_barrier = has_dominating_barrier(NULL, true, false); - if (dominating_barrier != NULL) { - assert(!is_weak(), "Weak barriers cant be eliminated"); - assert(dominating_barrier->in(Oop) == in(Oop), ""); - return dominating_barrier; - } - - return this; -} - -Node *LoadBarrierNode::Ideal(PhaseGVN *phase, bool can_reshape) { - if (remove_dead_region(phase, can_reshape)) { - return this; - } - - Node *val = in(Oop); - Node *mem = in(Memory); - Node *ctrl = in(Control); - - assert(val->Opcode() != Op_LoadN, ""); - assert(val->Opcode() != Op_DecodeN, ""); - - if (mem->is_MergeMem()) { - Node *new_mem = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw); - set_req(Memory, new_mem); - if (mem->outcnt() == 0 && can_reshape) { - phase->is_IterGVN()->_worklist.push(mem); - } - return this; - } - - LoadBarrierNode *dominating_barrier = NULL; - if (!is_weak()) { - dominating_barrier = has_dominating_barrier(NULL, !can_reshape, !phase->C->major_progress()); - if (dominating_barrier != NULL && dominating_barrier->in(Oop) != in(Oop)) { - assert(in(Address) == dominating_barrier->in(Address), ""); - set_req(Similar, dominating_barrier->proj_out(Oop)); - return this; - } - } - - bool eliminate = can_reshape && (dominating_barrier != NULL || !has_true_uses()); - if (eliminate) { - if (can_reshape) { - PhaseIterGVN* igvn = phase->is_IterGVN(); - Node* out_ctrl = proj_out_or_null(Control); - Node* out_res = proj_out_or_null(Oop); - - if (out_ctrl != NULL) { - igvn->replace_node(out_ctrl, ctrl); - } - - // That transformation may cause the Similar edge on the load barrier to be invalid - fix_similar_in_uses(igvn); - if (out_res != NULL) { - if (dominating_barrier != NULL) { - assert(!is_weak(), "Sanity"); - igvn->replace_node(out_res, dominating_barrier->proj_out(Oop)); - } else { - igvn->replace_node(out_res, val); - } - } - } - return new ConINode(TypeInt::ZERO); - } - - // If the Similar edge is no longer a load barrier, clear it - Node* similar = in(Similar); - if (!similar->is_top() && !(similar->is_Proj() && similar->in(0)->is_LoadBarrier())) { - set_req(Similar, phase->C->top()); - return this; - } - - if (can_reshape && !is_weak()) { - // If this barrier is linked through the Similar edge by a - // dominated barrier and both barriers have the same Oop field, - // the dominated barrier can go away, so push it for reprocessing. - // We also want to avoid a barrier to depend on another dominating - // barrier through its Similar edge that itself depend on another - // barrier through its Similar edge and rather have the first - // depend on the third. - PhaseIterGVN* igvn = phase->is_IterGVN(); - Node* out_res = proj_out(Oop); - for (DUIterator_Fast imax, i = out_res->fast_outs(imax); i < imax; i++) { - Node* u = out_res->fast_out(i); - if (u->is_LoadBarrier() && u->in(Similar) == out_res && - (u->in(Oop) == val || !u->in(Similar)->is_top())) { - assert(!u->as_LoadBarrier()->is_weak(), "Sanity"); - igvn->_worklist.push(u); - } - } - push_dominated_barriers(igvn); - } - - return NULL; -} - -uint LoadBarrierNode::match_edge(uint idx) const { - ShouldNotReachHere(); - return 0; -} - -void LoadBarrierNode::fix_similar_in_uses(PhaseIterGVN* igvn) { - Node* out_res = proj_out_or_null(Oop); - if (out_res == NULL) { - return; - } - - for (DUIterator_Fast imax, i = out_res->fast_outs(imax); i < imax; i++) { - Node* u = out_res->fast_out(i); - if (u->is_LoadBarrier() && u->in(Similar) == out_res) { - igvn->replace_input_of(u, Similar, igvn->C->top()); - --i; - --imax; - } - } -} - -bool LoadBarrierNode::has_true_uses() const { - Node* out_res = proj_out_or_null(Oop); - if (out_res != NULL) { - for (DUIterator_Fast imax, i = out_res->fast_outs(imax); i < imax; i++) { - Node *u = out_res->fast_out(i); - if (!u->is_LoadBarrier() || u->in(Similar) != out_res) { - return true; - } - } - } - return false; + return size; } static bool barrier_needed(C2Access& access) { @@ -474,1223 +180,252 @@ } Node* ZBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const { - Node* p = BarrierSetC2::load_at_resolved(access, val_type); - if (!barrier_needed(access)) { - return p; + Node* result = BarrierSetC2::load_at_resolved(access, val_type); + if (barrier_needed(access) && access.raw_access()->is_Mem()) { + if ((access.decorators() & ON_WEAK_OOP_REF) != 0) { + access.raw_access()->as_Load()->set_barrier_data(ZLoadBarrierWeak); + } else { + access.raw_access()->as_Load()->set_barrier_data(ZLoadBarrierStrong); + } } - bool weak = (access.decorators() & ON_WEAK_OOP_REF) != 0; - if (p->isa_Load()) { - load_set_barrier(p->as_Load(), weak); - } - return p; + return result; } Node* ZBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val, Node* new_val, const Type* val_type) const { Node* result = BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type); - LoadStoreNode* lsn = result->as_LoadStore(); if (barrier_needed(access)) { - lsn->set_has_barrier(); + access.raw_access()->as_LoadStore()->set_barrier_data(ZLoadBarrierStrong); } - return lsn; + return result; } Node* ZBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val, Node* new_val, const Type* value_type) const { Node* result = BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type); - LoadStoreNode* lsn = result->as_LoadStore(); if (barrier_needed(access)) { - lsn->set_has_barrier(); + access.raw_access()->as_LoadStore()->set_barrier_data(ZLoadBarrierStrong); } - return lsn; + return result; } Node* ZBarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* val_type) const { Node* result = BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type); - LoadStoreNode* lsn = result->as_LoadStore(); if (barrier_needed(access)) { - lsn->set_has_barrier(); + access.raw_access()->as_LoadStore()->set_barrier_data(ZLoadBarrierStrong); } - return lsn; + return result; } -// == Macro Expansion == - -// Optimized, low spill, loadbarrier variant using stub specialized on register used -void ZBarrierSetC2::expand_loadbarrier_node(PhaseMacroExpand* phase, LoadBarrierNode* barrier) const { - PhaseIterGVN &igvn = phase->igvn(); - float unlikely = PROB_UNLIKELY(0.999); - - Node* in_ctrl = barrier->in(LoadBarrierNode::Control); - Node* in_mem = barrier->in(LoadBarrierNode::Memory); - Node* in_val = barrier->in(LoadBarrierNode::Oop); - Node* in_adr = barrier->in(LoadBarrierNode::Address); - - Node* out_ctrl = barrier->proj_out(LoadBarrierNode::Control); - Node* out_res = barrier->proj_out(LoadBarrierNode::Oop); - - assert(barrier->in(LoadBarrierNode::Oop) != NULL, "oop to loadbarrier node cannot be null"); - - Node* jthread = igvn.transform(new ThreadLocalNode()); - Node* adr = phase->basic_plus_adr(jthread, in_bytes(ZThreadLocalData::address_bad_mask_offset())); - Node* bad_mask = igvn.transform(LoadNode::make(igvn, in_ctrl, in_mem, adr, - TypeRawPtr::BOTTOM, TypeX_X, TypeX_X->basic_type(), - MemNode::unordered)); - Node* cast = igvn.transform(new CastP2XNode(in_ctrl, in_val)); - Node* obj_masked = igvn.transform(new AndXNode(cast, bad_mask)); - Node* cmp = igvn.transform(new CmpXNode(obj_masked, igvn.zerocon(TypeX_X->basic_type()))); - Node *bol = igvn.transform(new BoolNode(cmp, BoolTest::ne))->as_Bool(); - IfNode* iff = igvn.transform(new IfNode(in_ctrl, bol, unlikely, COUNT_UNKNOWN))->as_If(); - Node* then = igvn.transform(new IfTrueNode(iff)); - Node* elsen = igvn.transform(new IfFalseNode(iff)); - - Node* new_loadp = igvn.transform(new LoadBarrierSlowRegNode(then, in_adr, in_val, - (const TypePtr*) in_val->bottom_type(), barrier->is_weak())); - - // Create the final region/phi pair to converge cntl/data paths to downstream code - Node* result_region = igvn.transform(new RegionNode(3)); - result_region->set_req(1, then); - result_region->set_req(2, elsen); - - Node* result_phi = igvn.transform(new PhiNode(result_region, TypeInstPtr::BOTTOM)); - result_phi->set_req(1, new_loadp); - result_phi->set_req(2, barrier->in(LoadBarrierNode::Oop)); - - igvn.replace_node(out_ctrl, result_region); - igvn.replace_node(out_res, result_phi); - - assert(barrier->outcnt() == 0,"LoadBarrier macro node has non-null outputs after expansion!"); - - igvn.remove_dead_node(barrier); - igvn.remove_dead_node(out_ctrl); - igvn.remove_dead_node(out_res); - - assert(is_gc_barrier_node(result_phi), "sanity"); - assert(step_over_gc_barrier(result_phi) == in_val, "sanity"); - - phase->C->print_method(PHASE_BARRIER_EXPANSION, 4, barrier->_idx); +bool ZBarrierSetC2::array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, + bool is_clone, ArrayCopyPhase phase) const { + return type == T_OBJECT || type == T_ARRAY; } -bool ZBarrierSetC2::expand_barriers(Compile* C, PhaseIterGVN& igvn) const { - ZBarrierSetC2State* s = state(); - if (s->load_barrier_count() > 0) { - PhaseMacroExpand macro(igvn); +// == Dominating barrier elision == - int skipped = 0; - while (s->load_barrier_count() > skipped) { - int load_barrier_count = s->load_barrier_count(); - LoadBarrierNode * n = s->load_barrier_node(load_barrier_count-1-skipped); - if (igvn.type(n) == Type::TOP || (n->in(0) != NULL && n->in(0)->is_top())) { - // Node is unreachable, so don't try to expand it - s->remove_load_barrier_node(n); - continue; - } - if (!n->can_be_eliminated()) { - skipped++; - continue; - } - expand_loadbarrier_node(¯o, n); - assert(s->load_barrier_count() < load_barrier_count, "must have deleted a node from load barrier list"); - if (C->failing()) { - return true; - } - } - while (s->load_barrier_count() > 0) { - int load_barrier_count = s->load_barrier_count(); - LoadBarrierNode* n = s->load_barrier_node(load_barrier_count - 1); - assert(!(igvn.type(n) == Type::TOP || (n->in(0) != NULL && n->in(0)->is_top())), "should have been processed already"); - assert(!n->can_be_eliminated(), "should have been processed already"); - expand_loadbarrier_node(¯o, n); - assert(s->load_barrier_count() < load_barrier_count, "must have deleted a node from load barrier list"); - if (C->failing()) { - return true; - } - } - igvn.set_delay_transform(false); - igvn.optimize(); - if (C->failing()) { +static bool block_has_safepoint(const Block* block, uint from, uint to) { + for (uint i = from; i < to; i++) { + if (block->get_node(i)->is_MachSafePoint()) { + // Safepoint found return true; } } + // Safepoint not found return false; } -Node* ZBarrierSetC2::step_over_gc_barrier(Node* c) const { - Node* node = c; +static bool block_has_safepoint(const Block* block) { + return block_has_safepoint(block, 0, block->number_of_nodes()); +} - // 1. This step follows potential oop projections of a load barrier before expansion - if (node->is_Proj()) { - node = node->in(0); +static uint block_index(const Block* block, const Node* node) { + for (uint j = 0; j < block->number_of_nodes(); ++j) { + if (block->get_node(j) == node) { + return j; + } } + ShouldNotReachHere(); + return 0; +} - // 2. This step checks for unexpanded load barriers - if (node->is_LoadBarrier()) { - return node->in(LoadBarrierNode::Oop); - } +void ZBarrierSetC2::analyze_dominating_barriers() const { + ResourceMark rm; + Compile* const C = Compile::current(); + PhaseCFG* const cfg = C->cfg(); + Block_List worklist; + Node_List mem_ops; + Node_List barrier_loads; - // 3. This step checks for the phi corresponding to an optimized load barrier expansion - if (node->is_Phi()) { - PhiNode* phi = node->as_Phi(); - Node* n = phi->in(1); - if (n != NULL && n->is_LoadBarrierSlowReg()) { - assert(c == node, "projections from step 1 should only be seen before macro expansion"); - return phi->in(2); + // Step 1 - Find accesses, and track them in lists + for (uint i = 0; i < cfg->number_of_blocks(); ++i) { + const Block* const block = cfg->get_block(i); + for (uint j = 0; j < block->number_of_nodes(); ++j) { + const Node* const node = block->get_node(j); + if (!node->is_Mach()) { + continue; + } + + MachNode* const mach = node->as_Mach(); + switch (mach->ideal_Opcode()) { + case Op_LoadP: + case Op_CompareAndExchangeP: + case Op_CompareAndSwapP: + case Op_GetAndSetP: + if (mach->barrier_data() == ZLoadBarrierStrong) { + barrier_loads.push(mach); + } + case Op_StoreP: + mem_ops.push(mach); + break; + + default: + break; + } } } - return c; -} + // Step 2 - Find dominating accesses for each load + for (uint i = 0; i < barrier_loads.size(); i++) { + MachNode* const load = barrier_loads.at(i)->as_Mach(); + const TypePtr* load_adr_type = NULL; + intptr_t load_offset = 0; + const Node* const load_obj = load->get_base_and_disp(load_offset, load_adr_type); + Block* const load_block = cfg->get_block_for_node(load); + const uint load_index = block_index(load_block, load); -Node* ZBarrierSetC2::step_over_gc_barrier_ctrl(Node* c) const { - Node* node = c; + for (uint j = 0; j < mem_ops.size(); j++) { + MachNode* mem = mem_ops.at(j)->as_Mach(); + const TypePtr* mem_adr_type = NULL; + intptr_t mem_offset = 0; + const Node* mem_obj = mem_obj = mem->get_base_and_disp(mem_offset, mem_adr_type); + Block* mem_block = cfg->get_block_for_node(mem); + uint mem_index = block_index(mem_block, mem); - // 1. This step follows potential ctrl projections of a load barrier before expansion - if (node->is_Proj()) { - node = node->in(0); - } + if (load_obj == NodeSentinel || mem_obj == NodeSentinel || + load_obj == NULL || mem_obj == NULL || + load_offset < 0 || mem_offset < 0) { + continue; + } - // 2. This step checks for unexpanded load barriers - if (node->is_LoadBarrier()) { - return node->in(LoadBarrierNode::Control); - } + if (mem_obj != load_obj || mem_offset != load_offset) { + // Not the same addresses, not a candidate + continue; + } - return c; -} + if (load_block == mem_block) { + // Earlier accesses in the same block + if (mem_index < load_index && !block_has_safepoint(mem_block, mem_index + 1, load_index)) { + load->set_barrier_data(ZLoadBarrierElided); + } + } else if (mem_block->dominates(load_block)) { + // Dominating block? Look around for safepoints + ResourceMark rm; + Block_List stack; + VectorSet visited(Thread::current()->resource_area()); + stack.push(load_block); + bool safepoint_found = block_has_safepoint(load_block); + while (!safepoint_found && stack.size() > 0) { + Block* block = stack.pop(); + if (visited.test_set(block->_pre_order)) { + continue; + } + if (block_has_safepoint(block)) { + safepoint_found = true; + break; + } + if (block == mem_block) { + continue; + } -bool ZBarrierSetC2::array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, ArrayCopyPhase phase) const { - return is_reference_type(type); -} - -bool ZBarrierSetC2::final_graph_reshaping(Compile* compile, Node* n, uint opcode) const { - switch (opcode) { - case Op_LoadBarrier: - assert(0, "There should be no load barriers left"); - case Op_ZGetAndSetP: - case Op_ZCompareAndExchangeP: - case Op_ZCompareAndSwapP: - case Op_ZWeakCompareAndSwapP: -#ifdef ASSERT - if (VerifyOptoOopOffsets) { - MemNode *mem = n->as_Mem(); - // Check to see if address types have grounded out somehow. - const TypeInstPtr *tp = mem->in(MemNode::Address)->bottom_type()->isa_instptr(); - ciInstanceKlass *k = tp->klass()->as_instance_klass(); - bool oop_offset_is_sane = k->contains_field_offset(tp->offset()); - assert(!tp || oop_offset_is_sane, ""); - } -#endif - return true; - default: - return false; - } -} - -bool ZBarrierSetC2::matcher_find_shared_visit(Matcher* matcher, Matcher::MStack& mstack, Node* n, uint opcode, bool& mem_op, int& mem_addr_idx) const { - switch(opcode) { - case Op_CallLeaf: - if (n->as_Call()->entry_point() == ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr() || - n->as_Call()->entry_point() == ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded_addr()) { - mem_op = true; - mem_addr_idx = TypeFunc::Parms + 1; - return true; - } - return false; - default: - return false; - } -} - -bool ZBarrierSetC2::matcher_find_shared_post_visit(Matcher* matcher, Node* n, uint opcode) const { - switch(opcode) { - case Op_ZCompareAndExchangeP: - case Op_ZCompareAndSwapP: - case Op_ZWeakCompareAndSwapP: { - Node *mem = n->in(MemNode::Address); - Node *keepalive = n->in(5); - Node *pair1 = new BinaryNode(mem, keepalive); - - Node *newval = n->in(MemNode::ValueIn); - Node *oldval = n->in(LoadStoreConditionalNode::ExpectedIn); - Node *pair2 = new BinaryNode(oldval, newval); - - n->set_req(MemNode::Address, pair1); - n->set_req(MemNode::ValueIn, pair2); - n->del_req(5); - n->del_req(LoadStoreConditionalNode::ExpectedIn); - return true; - } - case Op_ZGetAndSetP: { - Node *keepalive = n->in(4); - Node *newval = n->in(MemNode::ValueIn); - Node *pair = new BinaryNode(newval, keepalive); - n->set_req(MemNode::ValueIn, pair); - n->del_req(4); - return true; - } - - default: - return false; - } -} - -// == Verification == - -#ifdef ASSERT - -static void verify_slippery_safepoints_internal(Node* ctrl) { - // Given a CFG node, make sure it does not contain both safepoints and loads - // that have expanded barriers. - bool found_safepoint = false; - bool found_load = false; - - for (DUIterator_Fast imax, i = ctrl->fast_outs(imax); i < imax; i++) { - Node* node = ctrl->fast_out(i); - if (node->in(0) != ctrl) { - // Skip outgoing precedence edges from ctrl. - continue; - } - if (node->is_SafePoint()) { - found_safepoint = true; - } - if (node->is_Load() && load_require_barrier(node->as_Load()) && - load_has_expanded_barrier(node->as_Load())) { - found_load = true; - } - } - assert(!found_safepoint || !found_load, "found load and safepoint in same block"); -} - -static void verify_slippery_safepoints(Compile* C) { - ResourceArea *area = Thread::current()->resource_area(); - Unique_Node_List visited(area); - Unique_Node_List checked(area); - - // Recursively walk the graph. - visited.push(C->root()); - while (visited.size() > 0) { - Node* node = visited.pop(); - - Node* ctrl = node; - if (!node->is_CFG()) { - ctrl = node->in(0); - } - - if (ctrl != NULL && !checked.member(ctrl)) { - // For each block found in the graph, verify that it does not - // contain both a safepoint and a load requiring barriers. - verify_slippery_safepoints_internal(ctrl); - - checked.push(ctrl); - } - - checked.push(node); - - for (DUIterator_Fast imax, i = node->fast_outs(imax); i < imax; i++) { - Node* use = node->fast_out(i); - if (checked.member(use)) continue; - if (visited.member(use)) continue; - visited.push(use); - } - } -} - -void ZBarrierSetC2::verify_gc_barriers(Compile* compile, CompilePhase phase) const { - switch(phase) { - case BarrierSetC2::BeforeOptimize: - case BarrierSetC2::BeforeLateInsertion: - assert(state()->load_barrier_count() == 0, "No barriers inserted yet"); - break; - case BarrierSetC2::BeforeMacroExpand: - // Barrier placement should be set by now. - verify_gc_barriers(false /*post_parse*/); - break; - case BarrierSetC2::BeforeCodeGen: - // Barriers has been fully expanded. - assert(state()->load_barrier_count() == 0, "No more macro barriers"); - verify_slippery_safepoints(compile); - break; - default: - assert(0, "Phase without verification"); - } -} - -// post_parse implies that there might be load barriers without uses after parsing -// That only applies when adding barriers at parse time. -void ZBarrierSetC2::verify_gc_barriers(bool post_parse) const { - ZBarrierSetC2State* s = state(); - Compile* C = Compile::current(); - ResourceMark rm; - VectorSet visited(Thread::current()->resource_area()); - - for (int i = 0; i < s->load_barrier_count(); i++) { - LoadBarrierNode* n = s->load_barrier_node(i); - - // The dominating barrier on the same address if it exists and - // this barrier must not be applied on the value from the same - // load otherwise the value is not reloaded before it's used the - // second time. - assert(n->in(LoadBarrierNode::Similar)->is_top() || - (n->in(LoadBarrierNode::Similar)->in(0)->is_LoadBarrier() && - n->in(LoadBarrierNode::Similar)->in(0)->in(LoadBarrierNode::Address) == n->in(LoadBarrierNode::Address) && - n->in(LoadBarrierNode::Similar)->in(0)->in(LoadBarrierNode::Oop) != n->in(LoadBarrierNode::Oop)), - "broken similar edge"); - - assert(n->as_LoadBarrier()->has_true_uses(), - "found unneeded load barrier"); - - // Several load barrier nodes chained through their Similar edge - // break the code that remove the barriers in final graph reshape. - assert(n->in(LoadBarrierNode::Similar)->is_top() || - (n->in(LoadBarrierNode::Similar)->in(0)->is_LoadBarrier() && - n->in(LoadBarrierNode::Similar)->in(0)->in(LoadBarrierNode::Similar)->is_top()), - "chain of Similar load barriers"); - - if (!n->in(LoadBarrierNode::Similar)->is_top()) { - ResourceMark rm; - Unique_Node_List wq; - Node* other = n->in(LoadBarrierNode::Similar)->in(0); - wq.push(n); - for (uint next = 0; next < wq.size(); ++next) { - Node *nn = wq.at(next); - assert(nn->is_CFG(), ""); - assert(!nn->is_SafePoint(), ""); - - if (nn == other) { - continue; + // Push predecessor blocks + for (uint p = 1; p < block->num_preds(); ++p) { + Block* pred = cfg->get_block_for_node(block->pred(p)); + stack.push(pred); + } } - if (nn->is_Region()) { - for (uint i = 1; i < nn->req(); i++) { - Node* m = nn->in(i); - if (m != NULL) { - wq.push(m); - } - } - } else { - Node* m = nn->in(0); - if (m != NULL) { - wq.push(m); - } + if (!safepoint_found) { + load->set_barrier_data(ZLoadBarrierElided); } } } } } -#endif // end verification code +// == Reduced spilling optimization == -// If a call is the control, we actually want its control projection -static Node* normalize_ctrl(Node* node) { - if (node->is_Call()) { - node = node->as_Call()->proj_out(TypeFunc::Control); - } - return node; -} +void ZBarrierSetC2::compute_liveness_at_stubs() const { + ResourceMark rm; + Compile* const C = Compile::current(); + Arena* const A = Thread::current()->resource_area(); + PhaseCFG* const cfg = C->cfg(); + PhaseRegAlloc* const regalloc = C->regalloc(); + RegMask* const live = NEW_ARENA_ARRAY(A, RegMask, cfg->number_of_blocks() * sizeof(RegMask)); + ZBarrierSetAssembler* const bs = ZBarrierSet::assembler(); + Block_List worklist; -static Node* get_ctrl_normalized(PhaseIdealLoop *phase, Node* node) { - return normalize_ctrl(phase->get_ctrl(node)); -} - -static void call_catch_cleanup_one(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl); - -// This code is cloning all uses of a load that is between a call and the catch blocks, -// to each use. - -static bool fixup_uses_in_catch(PhaseIdealLoop *phase, Node *start_ctrl, Node *node) { - - if (!phase->has_ctrl(node)) { - // This node is floating - doesn't need to be cloned. - assert(node != start_ctrl, "check"); - return false; + for (uint i = 0; i < cfg->number_of_blocks(); ++i) { + new ((void*)(live + i)) RegMask(); + worklist.push(cfg->get_block(i)); } - Node* ctrl = get_ctrl_normalized(phase, node); - if (ctrl != start_ctrl) { - // We are in a successor block - the node is ok. - return false; // Unwind - } + while (worklist.size() > 0) { + const Block* const block = worklist.pop(); + RegMask& old_live = live[block->_pre_order]; + RegMask new_live; - // Process successor nodes - int outcnt = node->outcnt(); - for (int i = 0; i < outcnt; i++) { - Node* n = node->raw_out(0); - assert(!n->is_LoadBarrier(), "Sanity"); - // Calling recursively, visiting leafs first - fixup_uses_in_catch(phase, start_ctrl, n); - } + // Initialize to union of successors + for (uint i = 0; i < block->_num_succs; i++) { + const uint succ_id = block->_succs[i]->_pre_order; + new_live.OR(live[succ_id]); + } - // Now all successors are outside - // - Clone this node to both successors - assert(!node->is_Store(), "Stores not expected here"); + // Walk block backwards, computing liveness + for (int i = block->number_of_nodes() - 1; i >= 0; --i) { + const Node* const node = block->get_node(i); - // In some very rare cases a load that doesn't need a barrier will end up here - // Treat it as a LoadP and the insertion of phis will be done correctly. - if (node->is_Load()) { - call_catch_cleanup_one(phase, node->as_Load(), phase->get_ctrl(node)); - } else { - for (DUIterator_Fast jmax, i = node->fast_outs(jmax); i < jmax; i++) { - Node* use = node->fast_out(i); - Node* clone = node->clone(); - assert(clone->outcnt() == 0, ""); - - assert(use->find_edge(node) != -1, "check"); - phase->igvn().rehash_node_delayed(use); - use->replace_edge(node, clone); - - Node* new_ctrl; - if (use->is_block_start()) { - new_ctrl = use; - } else if (use->is_CFG()) { - new_ctrl = use->in(0); - assert (new_ctrl != NULL, ""); - } else { - new_ctrl = get_ctrl_normalized(phase, use); + // Remove def bits + const OptoReg::Name first = bs->refine_register(node, regalloc->get_reg_first(node)); + const OptoReg::Name second = bs->refine_register(node, regalloc->get_reg_second(node)); + if (first != OptoReg::Bad) { + new_live.Remove(first); + } + if (second != OptoReg::Bad) { + new_live.Remove(second); } - phase->set_ctrl(clone, new_ctrl); - - if (phase->C->directive()->ZTraceLoadBarriersOption) tty->print_cr(" Clone op %i as %i to control %i", node->_idx, clone->_idx, new_ctrl->_idx); - phase->igvn().register_new_node_with_optimizer(clone); - --i, --jmax; - } - assert(node->outcnt() == 0, "must be empty now"); - - // Node node is dead. - phase->igvn().remove_dead_node(node); - } - return true; // unwind - return if a use was processed -} - -// Clone a load to a specific catch_proj -static Node* clone_load_to_catchproj(PhaseIdealLoop* phase, Node* load, Node* catch_proj) { - Node* cloned_load = load->clone(); - cloned_load->set_req(0, catch_proj); // set explicit control - phase->set_ctrl(cloned_load, catch_proj); // update - if (phase->C->directive()->ZTraceLoadBarriersOption) tty->print_cr(" Clone LOAD %i as %i to control %i", load->_idx, cloned_load->_idx, catch_proj->_idx); - phase->igvn().register_new_node_with_optimizer(cloned_load); - return cloned_load; -} - -static Node* get_dominating_region(PhaseIdealLoop* phase, Node* node, Node* stop) { - Node* region = node; - while (!region->isa_Region()) { - Node *up = phase->idom(region); - assert(up != region, "Must not loop"); - assert(up != stop, "Must not find original control"); - region = up; - } - return region; -} - -// Clone this load to each catch block -static void call_catch_cleanup_one(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl) { - bool trace = phase->C->directive()->ZTraceLoadBarriersOption; - phase->igvn().set_delay_transform(true); - - // Verify pre conditions - assert(ctrl->isa_Proj() && ctrl->in(0)->isa_Call(), "Must be a call proj"); - assert(ctrl->raw_out(0)->isa_Catch(), "Must be a catch"); - - if (ctrl->raw_out(0)->isa_Catch()->outcnt() == 1) { - if (trace) tty->print_cr("Cleaning up catch: Skipping load %i, call with single catch", load->_idx); - return; - } - - // Process the loads successor nodes - if any is between - // the call and the catch blocks, they need to be cloned to. - // This is done recursively - for (uint i = 0; i < load->outcnt();) { - Node *n = load->raw_out(i); - assert(!n->is_LoadBarrier(), "Sanity"); - if (!fixup_uses_in_catch(phase, ctrl, n)) { - // if no successor was cloned, progress to next out. - i++; - } - } - - // Now all the loads uses has been cloned down - // Only thing left is to clone the loads, but they must end up - // first in the catch blocks. - - // We clone the loads oo the catch blocks only when needed. - // An array is used to map the catch blocks to each lazily cloned load. - // In that way no extra unnecessary loads are cloned. - - // Any use dominated by original block must have an phi and a region added - - Node* catch_node = ctrl->raw_out(0); - int number_of_catch_projs = catch_node->outcnt(); - Node** proj_to_load_mapping = NEW_RESOURCE_ARRAY(Node*, number_of_catch_projs); - Copy::zero_to_bytes(proj_to_load_mapping, sizeof(Node*) * number_of_catch_projs); - - // The phi_map is used to keep track of where phis have already been inserted - int phi_map_len = phase->C->unique(); - Node** phi_map = NEW_RESOURCE_ARRAY(Node*, phi_map_len); - Copy::zero_to_bytes(phi_map, sizeof(Node*) * phi_map_len); - - for (unsigned int i = 0; i < load->outcnt(); i++) { - Node* load_use_control = NULL; - Node* load_use = load->raw_out(i); - - if (phase->has_ctrl(load_use)) { - load_use_control = get_ctrl_normalized(phase, load_use); - assert(load_use_control != ctrl, "sanity"); - } else { - load_use_control = load_use->in(0); - } - assert(load_use_control != NULL, "sanity"); - if (trace) tty->print_cr(" Handling use: %i, with control: %i", load_use->_idx, load_use_control->_idx); - - // Some times the loads use is a phi. For them we need to determine from which catch block - // the use is defined. - bool load_use_is_phi = false; - unsigned int load_use_phi_index = 0; - Node* phi_ctrl = NULL; - if (load_use->is_Phi()) { - // Find phi input that matches load - for (unsigned int u = 1; u < load_use->req(); u++) { - if (load_use->in(u) == load) { - load_use_is_phi = true; - load_use_phi_index = u; - assert(load_use->in(0)->is_Region(), "Region or broken"); - phi_ctrl = load_use->in(0)->in(u); - assert(phi_ctrl->is_CFG(), "check"); - assert(phi_ctrl != load, "check"); - break; + // Add use bits + for (uint j = 1; j < node->req(); ++j) { + const Node* const use = node->in(j); + const OptoReg::Name first = bs->refine_register(use, regalloc->get_reg_first(use)); + const OptoReg::Name second = bs->refine_register(use, regalloc->get_reg_second(use)); + if (first != OptoReg::Bad) { + new_live.Insert(first); } - } - assert(load_use_is_phi, "must find"); - assert(load_use_phi_index > 0, "sanity"); - } - - // For each load use, see which catch projs dominates, create load clone lazily and reconnect - bool found_dominating_catchproj = false; - for (int c = 0; c < number_of_catch_projs; c++) { - Node* catchproj = catch_node->raw_out(c); - assert(catchproj != NULL && catchproj->isa_CatchProj(), "Sanity"); - - if (!phase->is_dominator(catchproj, load_use_control)) { - if (load_use_is_phi && phase->is_dominator(catchproj, phi_ctrl)) { - // The loads use is local to the catchproj. - // fall out and replace load with catch-local load clone. - } else { - continue; - } - } - assert(!found_dominating_catchproj, "Max one should match"); - - // Clone loads to catch projs - Node* load_clone = proj_to_load_mapping[c]; - if (load_clone == NULL) { - load_clone = clone_load_to_catchproj(phase, load, catchproj); - proj_to_load_mapping[c] = load_clone; - } - phase->igvn().rehash_node_delayed(load_use); - - if (load_use_is_phi) { - // phis are special - the load is defined from a specific control flow - load_use->set_req(load_use_phi_index, load_clone); - } else { - // Multipe edges can be replaced at once - on calls for example - load_use->replace_edge(load, load_clone); - } - --i; // more than one edge can have been removed, but the next is in later iterations - - // We could break the for-loop after finding a dominating match. - // But keep iterating to catch any bad idom early. - found_dominating_catchproj = true; - } - - // We found no single catchproj that dominated the use - The use is at a point after - // where control flow from multiple catch projs have merged. We will have to create - // phi nodes before the use and tie the output from the cloned loads together. It - // can be a single phi or a number of chained phis, depending on control flow - if (!found_dominating_catchproj) { - - // Use phi-control if use is a phi - if (load_use_is_phi) { - load_use_control = phi_ctrl; - } - assert(phase->is_dominator(ctrl, load_use_control), "Common use but no dominator"); - - // Clone a load on all paths - for (int c = 0; c < number_of_catch_projs; c++) { - Node* catchproj = catch_node->raw_out(c); - Node* load_clone = proj_to_load_mapping[c]; - if (load_clone == NULL) { - load_clone = clone_load_to_catchproj(phase, load, catchproj); - proj_to_load_mapping[c] = load_clone; + if (second != OptoReg::Bad) { + new_live.Insert(second); } } - // Move up dominator tree from use until dom front is reached - Node* next_region = get_dominating_region(phase, load_use_control, ctrl); - while (phase->idom(next_region) != catch_node) { - next_region = phase->idom(next_region); - if (trace) tty->print_cr("Moving up idom to region ctrl %i", next_region->_idx); - } - assert(phase->is_dominator(catch_node, next_region), "Sanity"); - - // Create or reuse phi node that collect all cloned loads and feed it to the use. - Node* test_phi = phi_map[next_region->_idx]; - if ((test_phi != NULL) && test_phi->is_Phi()) { - // Reuse an already created phi - if (trace) tty->print_cr(" Using cached Phi %i on load_use %i", test_phi->_idx, load_use->_idx); - phase->igvn().rehash_node_delayed(load_use); - load_use->replace_edge(load, test_phi); - // Now this use is done - } else { - // Otherwise we need to create one or more phis - PhiNode* next_phi = new PhiNode(next_region, load->type()); - phi_map[next_region->_idx] = next_phi; // cache new phi - phase->igvn().rehash_node_delayed(load_use); - load_use->replace_edge(load, next_phi); - - int dominators_of_region = 0; - do { - // New phi, connect to region and add all loads as in. - Node* region = next_region; - assert(region->isa_Region() && region->req() > 2, "Catch dead region nodes"); - PhiNode* new_phi = next_phi; - - if (trace) tty->print_cr("Created Phi %i on load %i with control %i", new_phi->_idx, load->_idx, region->_idx); - - // Need to add all cloned loads to the phi, taking care that the right path is matched - dominators_of_region = 0; // reset for new region - for (unsigned int reg_i = 1; reg_i < region->req(); reg_i++) { - Node* region_pred = region->in(reg_i); - assert(region_pred->is_CFG(), "check"); - bool pred_has_dominator = false; - for (int c = 0; c < number_of_catch_projs; c++) { - Node* catchproj = catch_node->raw_out(c); - if (phase->is_dominator(catchproj, region_pred)) { - new_phi->set_req(reg_i, proj_to_load_mapping[c]); - if (trace) tty->print_cr(" - Phi in(%i) set to load %i", reg_i, proj_to_load_mapping[c]->_idx); - pred_has_dominator = true; - dominators_of_region++; - break; - } - } - - // Sometimes we need to chain several phis. - if (!pred_has_dominator) { - assert(dominators_of_region <= 1, "More than one region can't require extra phi"); - if (trace) tty->print_cr(" - Region %i pred %i not dominated by catch proj", region->_idx, region_pred->_idx); - // Continue search on on this region_pred - // - walk up to next region - // - create a new phi and connect to first new_phi - next_region = get_dominating_region(phase, region_pred, ctrl); - - // Lookup if there already is a phi, create a new otherwise - Node* test_phi = phi_map[next_region->_idx]; - if ((test_phi != NULL) && test_phi->is_Phi()) { - next_phi = test_phi->isa_Phi(); - dominators_of_region++; // record that a match was found and that we are done - if (trace) tty->print_cr(" Using cached phi Phi %i on control %i", next_phi->_idx, next_region->_idx); - } else { - next_phi = new PhiNode(next_region, load->type()); - phi_map[next_region->_idx] = next_phi; - } - new_phi->set_req(reg_i, next_phi); - } - } - - new_phi->set_req(0, region); - phase->igvn().register_new_node_with_optimizer(new_phi); - phase->set_ctrl(new_phi, region); - - assert(dominators_of_region != 0, "Must have found one this iteration"); - } while (dominators_of_region == 1); - } - --i; - } - } // end of loop over uses - - assert(load->outcnt() == 0, "All uses should be handled"); - phase->igvn().remove_dead_node(load); - phase->C->print_method(PHASE_CALL_CATCH_CLEANUP, 4, load->_idx); - - // Now we should be home - phase->igvn().set_delay_transform(false); -} - -// Sort out the loads that are between a call ant its catch blocks -static void process_catch_cleanup_candidate(PhaseIdealLoop* phase, LoadNode* load, bool verify) { - bool trace = phase->C->directive()->ZTraceLoadBarriersOption; - - Node* ctrl = get_ctrl_normalized(phase, load); - if (!ctrl->is_Proj() || (ctrl->in(0) == NULL) || !ctrl->in(0)->isa_Call()) { - return; - } - - Node* catch_node = ctrl->isa_Proj()->raw_out(0); - if (catch_node->is_Catch()) { - if (catch_node->outcnt() > 1) { - assert(!verify, "All loads should already have been moved"); - call_catch_cleanup_one(phase, load, ctrl); - } else { - if (trace) tty->print_cr("Call catch cleanup with only one catch: load %i ", load->_idx); - } - } -} - -void ZBarrierSetC2::barrier_insertion_phase(Compile* C, PhaseIterGVN& igvn) const { - PhaseIdealLoop::optimize(igvn, LoopOptsZBarrierInsertion); - if (C->failing()) return; -} - -bool ZBarrierSetC2::optimize_loops(PhaseIdealLoop* phase, LoopOptsMode mode, VectorSet& visited, Node_Stack& nstack, Node_List& worklist) const { - - if (mode == LoopOptsZBarrierInsertion) { - // First make sure all loads between call and catch are moved to the catch block - clean_catch_blocks(phase); - DEBUG_ONLY(clean_catch_blocks(phase, true /* verify */);) - - // Then expand barriers on all loads - insert_load_barriers(phase); - - // Handle all Unsafe that need barriers. - insert_barriers_on_unsafe(phase); - - phase->C->clear_major_progress(); - return true; - } else { - return false; - } -} - -static bool can_simplify_cas(LoadStoreNode* node) { - if (node->isa_LoadStoreConditional()) { - Node *expected_in = node->as_LoadStoreConditional()->in(LoadStoreConditionalNode::ExpectedIn); - return (expected_in->get_ptr_type() == TypePtr::NULL_PTR); - } else { - return false; - } -} - -static void insert_barrier_before_unsafe(PhaseIdealLoop* phase, LoadStoreNode* old_node) { - - Compile *C = phase->C; - PhaseIterGVN &igvn = phase->igvn(); - LoadStoreNode* zclone = NULL; - - Node *in_ctrl = old_node->in(MemNode::Control); - Node *in_mem = old_node->in(MemNode::Memory); - Node *in_adr = old_node->in(MemNode::Address); - Node *in_val = old_node->in(MemNode::ValueIn); - const TypePtr *adr_type = old_node->adr_type(); - const TypePtr* load_type = TypeOopPtr::BOTTOM; // The type for the load we are adding - - switch (old_node->Opcode()) { - case Op_CompareAndExchangeP: { - zclone = new ZCompareAndExchangePNode(in_ctrl, in_mem, in_adr, in_val, old_node->in(LoadStoreConditionalNode::ExpectedIn), - adr_type, old_node->get_ptr_type(), ((CompareAndExchangeNode*)old_node)->order()); - load_type = old_node->bottom_type()->is_ptr(); - break; - } - case Op_WeakCompareAndSwapP: { - if (can_simplify_cas(old_node)) { - break; - } - zclone = new ZWeakCompareAndSwapPNode(in_ctrl, in_mem, in_adr, in_val, old_node->in(LoadStoreConditionalNode::ExpectedIn), - ((CompareAndSwapNode*)old_node)->order()); - adr_type = TypePtr::BOTTOM; - break; - } - case Op_CompareAndSwapP: { - if (can_simplify_cas(old_node)) { - break; - } - zclone = new ZCompareAndSwapPNode(in_ctrl, in_mem, in_adr, in_val, old_node->in(LoadStoreConditionalNode::ExpectedIn), - ((CompareAndSwapNode*)old_node)->order()); - adr_type = TypePtr::BOTTOM; - break; - } - case Op_GetAndSetP: { - zclone = new ZGetAndSetPNode(in_ctrl, in_mem, in_adr, in_val, old_node->adr_type(), old_node->get_ptr_type()); - load_type = old_node->bottom_type()->is_ptr(); - break; - } - } - if (zclone != NULL) { - igvn.register_new_node_with_optimizer(zclone, old_node); - - // Make load - LoadPNode *load = new LoadPNode(NULL, in_mem, in_adr, adr_type, load_type, MemNode::unordered, - LoadNode::DependsOnlyOnTest); - load_set_expanded_barrier(load); - igvn.register_new_node_with_optimizer(load); - igvn.replace_node(old_node, zclone); - - Node *barrier = new LoadBarrierNode(C, NULL, in_mem, load, in_adr, false /* weak */); - Node *barrier_val = new ProjNode(barrier, LoadBarrierNode::Oop); - Node *barrier_ctrl = new ProjNode(barrier, LoadBarrierNode::Control); - - igvn.register_new_node_with_optimizer(barrier); - igvn.register_new_node_with_optimizer(barrier_val); - igvn.register_new_node_with_optimizer(barrier_ctrl); - - // loop over all of in_ctrl usages and move to barrier_ctrl - for (DUIterator_Last imin, i = in_ctrl->last_outs(imin); i >= imin; --i) { - Node *use = in_ctrl->last_out(i); - uint l; - for (l = 0; use->in(l) != in_ctrl; l++) {} - igvn.replace_input_of(use, l, barrier_ctrl); - } - - load->set_req(MemNode::Control, in_ctrl); - barrier->set_req(LoadBarrierNode::Control, in_ctrl); - zclone->add_req(barrier_val); // add req as keep alive. - - C->print_method(PHASE_ADD_UNSAFE_BARRIER, 4, zclone->_idx); - } -} - -void ZBarrierSetC2::insert_barriers_on_unsafe(PhaseIdealLoop* phase) const { - Compile *C = phase->C; - PhaseIterGVN &igvn = phase->igvn(); - uint new_ids = C->unique(); - VectorSet visited(Thread::current()->resource_area()); - GrowableArray<Node *> nodeStack(Thread::current()->resource_area(), 0, 0, NULL); - nodeStack.push(C->root()); - visited.test_set(C->root()->_idx); - - // Traverse all nodes, visit all unsafe ops that require a barrier - while (nodeStack.length() > 0) { - Node *n = nodeStack.pop(); - - bool is_old_node = (n->_idx < new_ids); // don't process nodes that were created during cleanup - if (is_old_node) { - if (n->is_LoadStore()) { - LoadStoreNode* lsn = n->as_LoadStore(); - if (lsn->has_barrier()) { - BasicType bt = lsn->in(MemNode::Address)->bottom_type()->basic_type(); - assert (is_reference_type(bt), "Sanity test"); - insert_barrier_before_unsafe(phase, lsn); - } - } - } - for (uint i = 0; i < n->len(); i++) { - if (n->in(i)) { - if (!visited.test_set(n->in(i)->_idx)) { - nodeStack.push(n->in(i)); - } - } - } - } - - igvn.optimize(); - C->print_method(PHASE_ADD_UNSAFE_BARRIER, 2); -} - -// The purpose of ZBarrierSetC2::clean_catch_blocks is to prepare the IR for -// splicing in load barrier nodes. -// -// The problem is that we might have instructions between a call and its catch nodes. -// (This is usually handled in PhaseCFG:call_catch_cleanup, which clones mach nodes in -// already scheduled blocks.) We can't have loads that require barriers there, -// because we need to splice in new control flow, and that would violate the IR. -// -// clean_catch_blocks find all Loads that require a barrier and clone them and any -// dependent instructions to each use. The loads must be in the beginning of the catch block -// before any store. -// -// Sometimes the loads use will be at a place dominated by all catch blocks, then we need -// a load in each catch block, and a Phi at the dominated use. - -void ZBarrierSetC2::clean_catch_blocks(PhaseIdealLoop* phase, bool verify) const { - - Compile *C = phase->C; - uint new_ids = C->unique(); - PhaseIterGVN &igvn = phase->igvn(); - VectorSet visited(Thread::current()->resource_area()); - GrowableArray<Node *> nodeStack(Thread::current()->resource_area(), 0, 0, NULL); - nodeStack.push(C->root()); - visited.test_set(C->root()->_idx); - - // Traverse all nodes, visit all loads that require a barrier - while(nodeStack.length() > 0) { - Node *n = nodeStack.pop(); - - for (uint i = 0; i < n->len(); i++) { - if (n->in(i)) { - if (!visited.test_set(n->in(i)->_idx)) { - nodeStack.push(n->in(i)); - } + // If this node tracks liveness, update it + RegMask* const regs = barrier_set_state()->live(node); + if (regs != NULL) { + regs->OR(new_live); } } - bool is_old_node = (n->_idx < new_ids); // don't process nodes that were created during cleanup - if (n->is_Load() && is_old_node) { - LoadNode* load = n->isa_Load(); - // only care about loads that will have a barrier - if (load_require_barrier(load)) { - process_catch_cleanup_candidate(phase, load, verify); - } - } - } - - C->print_method(PHASE_CALL_CATCH_CLEANUP, 2); -} - -class DomDepthCompareClosure : public CompareClosure<LoadNode*> { - PhaseIdealLoop* _phase; - -public: - DomDepthCompareClosure(PhaseIdealLoop* phase) : _phase(phase) { } - - int do_compare(LoadNode* const &n1, LoadNode* const &n2) { - int d1 = _phase->dom_depth(_phase->get_ctrl(n1)); - int d2 = _phase->dom_depth(_phase->get_ctrl(n2)); - if (d1 == d2) { - // Compare index if the depth is the same, ensures all entries are unique. - return n1->_idx - n2->_idx; - } else { - return d2 - d1; - } - } -}; - -// Traverse graph and add all loadPs to list, sorted by dom depth -void gather_loadnodes_sorted(PhaseIdealLoop* phase, GrowableArray<LoadNode*>* loadList) { - - VectorSet visited(Thread::current()->resource_area()); - GrowableArray<Node *> nodeStack(Thread::current()->resource_area(), 0, 0, NULL); - DomDepthCompareClosure ddcc(phase); - - nodeStack.push(phase->C->root()); - while(nodeStack.length() > 0) { - Node *n = nodeStack.pop(); - if (visited.test(n->_idx)) { - continue; - } - - if (n->isa_Load()) { - LoadNode *load = n->as_Load(); - if (load_require_barrier(load)) { - assert(phase->get_ctrl(load) != NULL, "sanity"); - assert(phase->dom_depth(phase->get_ctrl(load)) != 0, "sanity"); - loadList->insert_sorted(&ddcc, load); - } - } - - visited.set(n->_idx); - for (uint i = 0; i < n->req(); i++) { - if (n->in(i)) { - if (!visited.test(n->in(i)->_idx)) { - nodeStack.push(n->in(i)); - } + // Now at block top, see if we have any changes + new_live.SUBTRACT(old_live); + if (new_live.is_NotEmpty()) { + // Liveness has refined, update and propagate to prior blocks + old_live.OR(new_live); + for (uint i = 1; i < block->num_preds(); ++i) { + Block* const pred = cfg->get_block_for_node(block->pred(i)); + worklist.push(pred); } } } } - -// Add LoadBarriers to all LoadPs -void ZBarrierSetC2::insert_load_barriers(PhaseIdealLoop* phase) const { - - bool trace = phase->C->directive()->ZTraceLoadBarriersOption; - GrowableArray<LoadNode *> loadList(Thread::current()->resource_area(), 0, 0, NULL); - gather_loadnodes_sorted(phase, &loadList); - - PhaseIterGVN &igvn = phase->igvn(); - int count = 0; - - for (GrowableArrayIterator<LoadNode *> loadIter = loadList.begin(); loadIter != loadList.end(); ++loadIter) { - LoadNode *load = *loadIter; - - if (load_has_expanded_barrier(load)) { - continue; - } - - do { - // Insert a barrier on a loadP - // if another load is found that needs to be expanded first, retry on that one - LoadNode* result = insert_one_loadbarrier(phase, load, phase->get_ctrl(load)); - while (result != NULL) { - result = insert_one_loadbarrier(phase, result, phase->get_ctrl(result)); - } - } while (!load_has_expanded_barrier(load)); - } - - phase->C->print_method(PHASE_INSERT_BARRIER, 2); -} - -void push_antidependent_stores(PhaseIdealLoop* phase, Node_Stack& nodestack, LoadNode* start_load) { - // push all stores on the same mem, that can_alias - // Any load found must be handled first - PhaseIterGVN &igvn = phase->igvn(); - int load_alias_idx = igvn.C->get_alias_index(start_load->adr_type()); - - Node *mem = start_load->in(1); - for (DUIterator_Fast imax, u = mem->fast_outs(imax); u < imax; u++) { - Node *mem_use = mem->fast_out(u); - - if (mem_use == start_load) continue; - if (!mem_use->is_Store()) continue; - if (!phase->has_ctrl(mem_use)) continue; - if (phase->get_ctrl(mem_use) != phase->get_ctrl(start_load)) continue; - - // add any aliasing store in this block - StoreNode *store = mem_use->isa_Store(); - const TypePtr *adr_type = store->adr_type(); - if (igvn.C->can_alias(adr_type, load_alias_idx)) { - nodestack.push(store, 0); - } - } -} - -LoadNode* ZBarrierSetC2::insert_one_loadbarrier(PhaseIdealLoop* phase, LoadNode* start_load, Node* ctrl) const { - bool trace = phase->C->directive()->ZTraceLoadBarriersOption; - PhaseIterGVN &igvn = phase->igvn(); - - // Check for other loadPs at the same loop depth that is reachable by a DFS - // - if found - return it. It needs to be inserted first - // - otherwise proceed and insert barrier - - VectorSet visited(Thread::current()->resource_area()); - Node_Stack nodestack(100); - - nodestack.push(start_load, 0); - push_antidependent_stores(phase, nodestack, start_load); - - while(!nodestack.is_empty()) { - Node* n = nodestack.node(); // peek - nodestack.pop(); - if (visited.test(n->_idx)) { - continue; - } - - if (n->is_Load() && n != start_load && load_require_barrier(n->as_Load()) && !load_has_expanded_barrier(n->as_Load())) { - // Found another load that needs a barrier in the same block. Must expand later loads first. - if (trace) tty->print_cr(" * Found LoadP %i on DFS", n->_idx); - return n->as_Load(); // return node that should be expanded first - } - - if (!phase->has_ctrl(n)) continue; - if (phase->get_ctrl(n) != phase->get_ctrl(start_load)) continue; - if (n->is_Phi()) continue; - - visited.set(n->_idx); - // push all children - for (DUIterator_Fast imax, ii = n->fast_outs(imax); ii < imax; ii++) { - Node* c = n->fast_out(ii); - if (c != NULL) { - nodestack.push(c, 0); - } - } - } - - insert_one_loadbarrier_inner(phase, start_load, ctrl, visited); - return NULL; -} - -void ZBarrierSetC2::insert_one_loadbarrier_inner(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl, VectorSet visited2) const { - PhaseIterGVN &igvn = phase->igvn(); - Compile* C = igvn.C; - bool trace = C->directive()->ZTraceLoadBarriersOption; - - // create barrier - Node* barrier = new LoadBarrierNode(C, NULL, load->in(LoadNode::Memory), NULL, load->in(LoadNode::Address), load_has_weak_barrier(load)); - Node* barrier_val = new ProjNode(barrier, LoadBarrierNode::Oop); - Node* barrier_ctrl = new ProjNode(barrier, LoadBarrierNode::Control); - ctrl = normalize_ctrl(ctrl); - - if (trace) tty->print_cr("Insert load %i with barrier: %i and ctrl : %i", load->_idx, barrier->_idx, ctrl->_idx); - - // Splice control - // - insert barrier control diamond between loads ctrl and ctrl successor on path to block end. - // - If control successor is a catch, step over to next. - Node* ctrl_succ = NULL; - for (DUIterator_Fast imax, j = ctrl->fast_outs(imax); j < imax; j++) { - Node* tmp = ctrl->fast_out(j); - - // - CFG nodes is the ones we are going to splice (1 only!) - // - Phi nodes will continue to hang from the region node! - // - self loops should be skipped - if (tmp->is_Phi() || tmp == ctrl) { - continue; - } - - if (tmp->is_CFG()) { - assert(ctrl_succ == NULL, "There can be only one"); - ctrl_succ = tmp; - continue; - } - } - - // Now splice control - assert(ctrl_succ != load, "sanity"); - assert(ctrl_succ != NULL, "Broken IR"); - bool found = false; - for(uint k = 0; k < ctrl_succ->req(); k++) { - if (ctrl_succ->in(k) == ctrl) { - assert(!found, "sanity"); - if (trace) tty->print_cr(" Move CFG ctrl_succ %i to barrier_ctrl", ctrl_succ->_idx); - igvn.replace_input_of(ctrl_succ, k, barrier_ctrl); - found = true; - k--; - } - } - - // For all successors of ctrl - move all visited to become successors of barrier_ctrl instead - for (DUIterator_Fast imax, r = ctrl->fast_outs(imax); r < imax; r++) { - Node* tmp = ctrl->fast_out(r); - if (tmp->is_SafePoint() || (visited2.test(tmp->_idx) && (tmp != load))) { - if (trace) tty->print_cr(" Move ctrl_succ %i to barrier_ctrl", tmp->_idx); - igvn.replace_input_of(tmp, 0, barrier_ctrl); - --r; --imax; - } - } - - // Move the loads user to the barrier - for (DUIterator_Fast imax, i = load->fast_outs(imax); i < imax; i++) { - Node* u = load->fast_out(i); - if (u->isa_LoadBarrier()) { - continue; - } - - // find correct input - replace with iterator? - for(uint j = 0; j < u->req(); j++) { - if (u->in(j) == load) { - igvn.replace_input_of(u, j, barrier_val); - --i; --imax; // Adjust the iterator of the *outer* loop - break; // some nodes (calls) might have several uses from the same node - } - } - } - - // Connect barrier to load and control - barrier->set_req(LoadBarrierNode::Oop, load); - barrier->set_req(LoadBarrierNode::Control, ctrl); - - igvn.replace_input_of(load, MemNode::Control, ctrl); - load->pin(); - - igvn.rehash_node_delayed(load); - igvn.register_new_node_with_optimizer(barrier); - igvn.register_new_node_with_optimizer(barrier_val); - igvn.register_new_node_with_optimizer(barrier_ctrl); - load_set_expanded_barrier(load); - - C->print_method(PHASE_INSERT_BARRIER, 3, load->_idx); -} - -// The bad_mask in the ThreadLocalData shouldn't have an anti-dep-check. -// The bad_mask address if of type TypeRawPtr, but that will alias -// InitializeNodes until the type system is expanded. -bool ZBarrierSetC2::needs_anti_dependence_check(const Node* node) const { - MachNode* mnode = node->as_Mach(); - if (mnode != NULL) { - intptr_t offset = 0; - const TypePtr *adr_type2 = NULL; - const Node* base = mnode->get_base_and_disp(offset, adr_type2); - if ((base != NULL) && - (base->is_Mach() && base->as_Mach()->ideal_Opcode() == Op_ThreadLocal) && - (offset == in_bytes(ZThreadLocalData::address_bad_mask_offset()))) { - return false; - } - } - return true; -}
--- a/src/hotspot/share/gc/z/c2/zBarrierSetC2.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/z/c2/zBarrierSetC2.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -29,134 +29,38 @@ #include "opto/node.hpp" #include "utilities/growableArray.hpp" -class ZCompareAndSwapPNode : public CompareAndSwapPNode { -public: - ZCompareAndSwapPNode(Node* c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord) : CompareAndSwapPNode(c, mem, adr, val, ex, mem_ord) { } - virtual int Opcode() const; -}; +const uint8_t ZLoadBarrierStrong = 1; +const uint8_t ZLoadBarrierWeak = 2; +const uint8_t ZLoadBarrierElided = 3; -class ZWeakCompareAndSwapPNode : public WeakCompareAndSwapPNode { -public: - ZWeakCompareAndSwapPNode(Node* c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord) : WeakCompareAndSwapPNode(c, mem, adr, val, ex, mem_ord) { } - virtual int Opcode() const; -}; +class ZLoadBarrierStubC2 : public ResourceObj { +private: + const MachNode* _node; + const Address _ref_addr; + const Register _ref; + const Register _tmp; + const bool _weak; + Label _entry; + Label _continuation; -class ZCompareAndExchangePNode : public CompareAndExchangePNode { -public: - ZCompareAndExchangePNode(Node* c, Node *mem, Node *adr, Node *val, Node *ex, const TypePtr* at, const Type* t, MemNode::MemOrd mem_ord) : CompareAndExchangePNode(c, mem, adr, val, ex, at, t, mem_ord) { } - virtual int Opcode() const; -}; - -class ZGetAndSetPNode : public GetAndSetPNode { -public: - ZGetAndSetPNode(Node* c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* t) : GetAndSetPNode(c, mem, adr, val, at, t) { } - virtual int Opcode() const; -}; - -class LoadBarrierNode : public MultiNode { -private: - bool _weak; // On strong or weak oop reference - static bool is_dominator(PhaseIdealLoop* phase, bool linear_only, Node *d, Node *n); - void push_dominated_barriers(PhaseIterGVN* igvn) const; + ZLoadBarrierStubC2(const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak); public: - enum { - Control, - Memory, - Oop, - Address, - Number_of_Outputs = Address, - Similar, - Number_of_Inputs - }; + static ZLoadBarrierStubC2* create(const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak); - LoadBarrierNode(Compile* C, - Node* c, - Node* mem, - Node* val, - Node* adr, - bool weak); - - virtual int Opcode() const; - virtual uint size_of() const; - virtual bool cmp(const Node& n) const; - virtual const Type *bottom_type() const; - virtual const TypePtr* adr_type() const; - virtual const Type *Value(PhaseGVN *phase) const; - virtual Node *Identity(PhaseGVN *phase); - virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); - virtual uint match_edge(uint idx) const; - - LoadBarrierNode* has_dominating_barrier(PhaseIdealLoop* phase, - bool linear_only, - bool look_for_similar); - - void fix_similar_in_uses(PhaseIterGVN* igvn); - - bool has_true_uses() const; - - bool can_be_eliminated() const { - return !in(Similar)->is_top(); - } - - bool is_weak() const { - return _weak; - } -}; - -class LoadBarrierSlowRegNode : public TypeNode { -private: - bool _is_weak; -public: - LoadBarrierSlowRegNode(Node *c, - Node *adr, - Node *src, - const TypePtr* t, - bool weak) : - TypeNode(t, 3), _is_weak(weak) { - init_req(1, adr); - init_req(2, src); - init_class_id(Class_LoadBarrierSlowReg); - } - - virtual uint size_of() const { - return sizeof(*this); - } - - virtual const char * name() { - return "LoadBarrierSlowRegNode"; - } - - virtual Node *Ideal(PhaseGVN *phase, bool can_reshape) { - return NULL; - } - - virtual int Opcode() const; - - bool is_weak() { return _is_weak; } -}; - -class ZBarrierSetC2State : public ResourceObj { -private: - // List of load barrier nodes which need to be expanded before matching - GrowableArray<LoadBarrierNode*>* _load_barrier_nodes; - -public: - ZBarrierSetC2State(Arena* comp_arena); - int load_barrier_count() const; - void add_load_barrier_node(LoadBarrierNode* n); - void remove_load_barrier_node(LoadBarrierNode* n); - LoadBarrierNode* load_barrier_node(int idx) const; + Address ref_addr() const; + Register ref() const; + Register tmp() const; + address slow_path() const; + RegMask& live() const; + Label* entry(); + Label* continuation(); }; class ZBarrierSetC2 : public BarrierSetC2 { private: - ZBarrierSetC2State* state() const; - void expand_loadbarrier_node(PhaseMacroExpand* phase, LoadBarrierNode* barrier) const; - -#ifdef ASSERT - void verify_gc_barriers(bool post_parse) const; -#endif + void compute_liveness_at_stubs() const; + void analyze_dominating_barriers() const; protected: virtual Node* load_at_resolved(C2Access& access, const Type* val_type) const; @@ -174,43 +78,14 @@ public: virtual void* create_barrier_state(Arena* comp_arena) const; + virtual bool array_copy_requires_gc_barriers(bool tightly_coupled_alloc, + BasicType type, + bool is_clone, + ArrayCopyPhase phase) const; - virtual bool has_load_barriers() const { return true; } - virtual bool is_gc_barrier_node(Node* node) const; - virtual Node* step_over_gc_barrier(Node* c) const; - virtual Node* step_over_gc_barrier_ctrl(Node* c) const; - - virtual void register_potential_barrier_node(Node* node) const; - virtual void unregister_potential_barrier_node(Node* node) const; - virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { } - virtual void enqueue_useful_gc_barrier(PhaseIterGVN* igvn, Node* node) const; - virtual void eliminate_useless_gc_barriers(Unique_Node_List &useful, Compile* C) const; - - virtual bool array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, ArrayCopyPhase phase) const; - - virtual bool expand_barriers(Compile* C, PhaseIterGVN& igvn) const; - virtual bool final_graph_reshaping(Compile* compile, Node* n, uint opcode) const; - virtual bool matcher_find_shared_visit(Matcher* matcher, Matcher::MStack& mstack, Node* n, uint opcode, bool& mem_op, int& mem_addr_idx) const; - virtual bool matcher_find_shared_post_visit(Matcher* matcher, Node* n, uint opcode) const; - virtual bool needs_anti_dependence_check(const Node* node) const; - -#ifdef ASSERT - virtual void verify_gc_barriers(Compile* compile, CompilePhase phase) const; -#endif - - // Load barrier insertion and expansion external - virtual void barrier_insertion_phase(Compile* C, PhaseIterGVN &igvn) const; - virtual bool optimize_loops(PhaseIdealLoop* phase, LoopOptsMode mode, VectorSet& visited, Node_Stack& nstack, Node_List& worklist) const; - virtual bool is_gc_specific_loop_opts_pass(LoopOptsMode mode) const { return (mode == LoopOptsZBarrierInsertion); } - virtual bool strip_mined_loops_expanded(LoopOptsMode mode) const { return mode == LoopOptsZBarrierInsertion; } - -private: - // Load barrier insertion and expansion internal - void insert_barriers_on_unsafe(PhaseIdealLoop* phase) const; - void clean_catch_blocks(PhaseIdealLoop* phase, bool verify = false) const; - void insert_load_barriers(PhaseIdealLoop* phase) const; - LoadNode* insert_one_loadbarrier(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl) const; - void insert_one_loadbarrier_inner(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl, VectorSet visited) const; + virtual void late_barrier_analysis() const; + virtual int estimate_stub_size() const; + virtual void emit_stubs(CodeBuffer& cb) const; }; #endif // SHARE_GC_Z_C2_ZBARRIERSETC2_HPP
--- a/src/hotspot/share/gc/z/zArguments.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/z/zArguments.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -37,11 +37,6 @@ void ZArguments::initialize() { GCArguments::initialize(); - // Check max heap size - if (MaxHeapSize > ZMaxHeapSize) { - vm_exit_during_initialization("Java heap too large"); - } - // Enable NUMA by default if (FLAG_IS_DEFAULT(UseNUMA)) { FLAG_SET_DEFAULT(UseNUMA, true);
--- a/src/hotspot/share/gc/z/zBarrierSetAssembler.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/z/zBarrierSetAssembler.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,10 +24,7 @@ #ifndef SHARE_GC_Z_ZBARRIERSETASSEMBLER_HPP #define SHARE_GC_Z_ZBARRIERSETASSEMBLER_HPP -#include "asm/macroAssembler.hpp" #include "gc/shared/barrierSetAssembler.hpp" -#include "oops/accessDecorators.hpp" -#include "utilities/globalDefinitions.hpp" #include "utilities/macros.hpp" class ZBarrierSetAssemblerBase : public BarrierSetAssembler {
--- a/src/hotspot/share/gc/z/zGlobals.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/z/zGlobals.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -44,10 +44,6 @@ const size_t ZGranuleSizeShift = ZPlatformGranuleSizeShift; const size_t ZGranuleSize = (size_t)1 << ZGranuleSizeShift; -// Max heap size shift/size -const size_t ZMaxHeapSizeShift = ZPlatformMaxHeapSizeShift; -const size_t ZMaxHeapSize = (size_t)1 << ZMaxHeapSizeShift; - // Page types const uint8_t ZPageTypeSmall = 0; const uint8_t ZPageTypeMedium = 1;
--- a/src/hotspot/share/gc/z/zVirtualMemory.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/gc/z/zVirtualMemory.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -31,6 +31,13 @@ _manager(), _initialized(false) { + // Check max supported heap size + if (max_capacity > ZAddressOffsetMax) { + log_error(gc)("Java heap too large (max supported heap size is " SIZE_FORMAT "G)", + ZAddressOffsetMax / G); + return; + } + log_info(gc, init)("Address Space: " SIZE_FORMAT "T", ZAddressOffsetMax / K / G); // Reserve address space
--- a/src/hotspot/share/include/jvm.h Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/include/jvm.h Fri Oct 11 10:39:58 2019 +0200 @@ -1044,19 +1044,6 @@ #include "classfile_constants.h" /* - * A function defined by the byte-code verifier and called by the VM. - * This is not a function implemented in the VM. - * - * Returns JNI_FALSE if verification fails. A detailed error message - * will be places in msg_buf, whose length is specified by buf_len. - */ -typedef jboolean (*verifier_fn_t)(JNIEnv *env, - jclass cb, - char * msg_buf, - jint buf_len); - - -/* * Support for a VM-independent class format checker. */ typedef struct { @@ -1086,28 +1073,6 @@ typedef jstring (*to_java_string_fn_t)(JNIEnv *env, char *str); -typedef char *(*to_c_string_fn_t)(JNIEnv *env, jstring s, jboolean *b); - -/* This is the function defined in libjava.so that performs class - * format checks. This functions fills in size information about - * the class file and returns: - * - * 0: good - * -1: out of memory - * -2: bad format - * -3: unsupported version - * -4: bad class name - */ - -typedef jint (*check_format_fn_t)(char *class_name, - unsigned char *data, - unsigned int data_size, - class_size_info *class_size, - char *message_buffer, - jint buffer_length, - jboolean measure_only, - jboolean check_relaxed); - #define JVM_RECOGNIZED_CLASS_MODIFIERS (JVM_ACC_PUBLIC | \ JVM_ACC_FINAL | \ JVM_ACC_SUPER | \
--- a/src/hotspot/share/jfr/recorder/jfrRecorder.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/jfr/recorder/jfrRecorder.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -168,7 +168,7 @@ static bool is_cds_dump_requested() { // we will not be able to launch recordings if a cds dump is being requested - if ((DumpSharedSpaces || DynamicDumpSharedSpaces) && (JfrOptionSet::startup_recording_options() != NULL)) { + if (Arguments::is_dumping_archive() && (JfrOptionSet::startup_recording_options() != NULL)) { warning("JFR will be disabled during CDS dumping"); teardown_startup_support(); return true;
--- a/src/hotspot/share/jfr/recorder/repository/jfrEmergencyDump.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/jfr/recorder/repository/jfrEmergencyDump.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -393,6 +393,10 @@ Service_lock->unlock(); } + if (UseNotificationThread && Notification_lock->owned_by_self()) { + Notification_lock->unlock(); + } + if (CodeCache_lock->owned_by_self()) { CodeCache_lock->unlock(); }
--- a/src/hotspot/share/memory/filemap.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/memory/filemap.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -263,7 +263,7 @@ void SharedClassPathEntry::init(bool is_modules_image, ClassPathEntry* cpe, TRAPS) { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "dump time only"); + Arguments::assert_is_dumping_archive(); _timestamp = 0; _filesize = 0; _from_class_path_attr = false; @@ -397,7 +397,7 @@ } void FileMapInfo::allocate_shared_path_table() { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "Sanity"); + Arguments::assert_is_dumping_archive(); EXCEPTION_MARK; // The following calls should never throw, but would exit VM on error. ClassLoaderData* loader_data = ClassLoaderData::the_null_class_loader_data(); @@ -444,7 +444,7 @@ } void FileMapInfo::check_nonempty_dir_in_shared_path_table() { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "dump time only"); + Arguments::assert_is_dumping_archive(); bool has_nonempty_dir = false; @@ -471,7 +471,7 @@ } void FileMapInfo::record_non_existent_class_path_entry(const char* path) { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "dump time only"); + Arguments::assert_is_dumping_archive(); log_info(class, path)("non-existent Class-Path entry %s", path); if (_non_existent_class_paths == NULL) { _non_existent_class_paths = new (ResourceObj::C_HEAP, mtInternal)GrowableArray<const char*>(10, true); @@ -480,7 +480,7 @@ } int FileMapInfo::num_non_existent_class_paths() { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "dump time only"); + Arguments::assert_is_dumping_archive(); if (_non_existent_class_paths != NULL) { return _non_existent_class_paths->length(); } else { @@ -1150,7 +1150,7 @@ void FileMapInfo::write_region(int region, char* base, size_t size, bool read_only, bool allow_exec) { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "Dump time only"); + Arguments::assert_is_dumping_archive(); FileMapRegion* si = space_at(region); char* target_base = base;
--- a/src/hotspot/share/memory/metaspaceShared.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/memory/metaspaceShared.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -425,7 +425,7 @@ } void MetaspaceShared::commit_shared_space_to(char* newtop) { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "dump-time only"); + Arguments::assert_is_dumping_archive(); char* base = _shared_rs.base(); size_t need_committed_size = newtop - base; size_t has_committed_size = _shared_vs.committed_size(); @@ -509,8 +509,7 @@ } uintx MetaspaceShared::object_delta_uintx(void* obj) { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, - "supported only for dumping"); + Arguments::assert_is_dumping_archive(); if (DumpSharedSpaces) { assert(shared_rs()->contains(obj), "must be"); } else {
--- a/src/hotspot/share/memory/universe.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/memory/universe.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -713,7 +713,7 @@ } #if INCLUDE_CDS - if (DumpSharedSpaces || DynamicDumpSharedSpaces) { + if (Arguments::is_dumping_archive()) { MetaspaceShared::prepare_for_dumping(); } #endif
--- a/src/hotspot/share/oops/constMethod.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/oops/constMethod.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -26,6 +26,7 @@ #define SHARE_OOPS_CONSTMETHOD_HPP #include "oops/oop.hpp" +#include "runtime/arguments.hpp" #include "utilities/align.hpp" // An ConstMethod represents portions of a Java method which are not written to after @@ -293,7 +294,7 @@ _adapter = adapter; } void set_adapter_trampoline(AdapterHandlerEntry** trampoline) { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "must be"); + Arguments::assert_is_dumping_archive(); if (DumpSharedSpaces) { assert(*trampoline == NULL, "must be NULL during dump time, to be initialized at run time");
--- a/src/hotspot/share/oops/cpCache.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/oops/cpCache.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -714,7 +714,7 @@ } void ConstantPoolCache::walk_entries_for_initialization(bool check_only) { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "sanity"); + Arguments::assert_is_dumping_archive(); // When dumping the archive, we want to clean up the ConstantPoolCache // to remove any effect of linking due to the execution of Java code -- // each ConstantPoolCacheEntry will have the same contents as if
--- a/src/hotspot/share/oops/instanceKlass.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/oops/instanceKlass.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -494,7 +494,7 @@ assert(is_instance_klass(), "is layout incorrect?"); assert(size_helper() == parser.layout_size(), "incorrect size_helper?"); - if (DumpSharedSpaces || DynamicDumpSharedSpaces) { + if (Arguments::is_dumping_archive()) { SystemDictionaryShared::init_dumptime_info(this); } } @@ -644,7 +644,7 @@ } set_annotations(NULL); - if (DumpSharedSpaces || DynamicDumpSharedSpaces) { + if (Arguments::is_dumping_archive()) { SystemDictionaryShared::remove_dumptime_info(this); } } @@ -2361,7 +2361,7 @@ // (1) We are running AOT to generate a shared library. return true; } - if (DumpSharedSpaces || DynamicDumpSharedSpaces) { + if (Arguments::is_dumping_archive()) { // (2) We are running -Xshare:dump or -XX:ArchiveClassesAtExit to create a shared archive return true; } @@ -2609,7 +2609,7 @@ // notify ClassLoadingService of class unload ClassLoadingService::notify_class_unloaded(ik); - if (DumpSharedSpaces || DynamicDumpSharedSpaces) { + if (Arguments::is_dumping_archive()) { SystemDictionaryShared::remove_dumptime_info(ik); }
--- a/src/hotspot/share/oops/klass.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/oops/klass.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -525,7 +525,7 @@ } void Klass::remove_unshareable_info() { - assert (DumpSharedSpaces || DynamicDumpSharedSpaces, + assert (Arguments::is_dumping_archive(), "only called during CDS dump time"); JFR_ONLY(REMOVE_ID(this);) if (log_is_enabled(Trace, cds, unshareable)) { @@ -543,7 +543,7 @@ } void Klass::remove_java_mirror() { - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "only called during CDS dump time"); + Arguments::assert_is_dumping_archive(); if (log_is_enabled(Trace, cds, unshareable)) { ResourceMark rm; log_trace(cds, unshareable)("remove java_mirror: %s", external_name());
--- a/src/hotspot/share/oops/klassVtable.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/oops/klassVtable.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -128,11 +128,6 @@ *vtable_length_ret = vtable_length; } -int klassVtable::index_of(Method* m, int len) const { - assert(m->has_vtable_index(), "do not ask this of non-vtable methods"); - return m->vtable_index(); -} - // Copy super class's vtable to the first part (prefix) of this class's vtable, // and return the number of entries copied. Expects that 'super' is the Java // super class (arrays can have "array" super classes that must be skipped). @@ -169,7 +164,6 @@ // Note: Arrays can have intermediate array supers. Use java_super to skip them. InstanceKlass* super = _klass->java_super(); - int nofNewEntries = 0; bool is_shared = _klass->is_shared(); @@ -1029,15 +1023,6 @@ } #endif // INCLUDE_JVMTI -// CDS/RedefineClasses support - clear vtables so they can be reinitialized -void klassVtable::clear_vtable() { - for (int i = 0; i < _length; i++) table()[i].clear(); -} - -bool klassVtable::is_initialized() { - return _length == 0 || table()[0].method() != NULL; -} - //----------------------------------------------------------------------------------------- // Itable code @@ -1481,31 +1466,6 @@ #endif } - -// inverse to itable_index -Method* klassItable::method_for_itable_index(InstanceKlass* intf, int itable_index) { - assert(intf->is_interface(), "sanity check"); - assert(intf->verify_itable_index(itable_index), ""); - Array<Method*>* methods = InstanceKlass::cast(intf)->methods(); - - if (itable_index < 0 || itable_index >= method_count_for_interface(intf)) - return NULL; // help caller defend against bad indices - - int index = itable_index; - Method* m = methods->at(index); - int index2 = -1; - while (!m->has_itable_index() || - (index2 = m->itable_index()) != itable_index) { - assert(index2 < itable_index, "monotonic"); - if (++index == methods->length()) - return NULL; - m = methods->at(index); - } - assert(m->itable_index() == itable_index, "correct inverse"); - - return m; -} - void klassVtable::verify(outputStream* st, bool forced) { // make sure table is initialized if (!Universe::is_fully_initialized()) return;
--- a/src/hotspot/share/oops/klassVtable.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/oops/klassVtable.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -48,13 +48,6 @@ int _verify_count; // to make verify faster #endif - // Ordering important, so greater_than (>) can be used as an merge operator. - enum AccessType { - acc_private = 0, - acc_package_private = 1, - acc_publicprotected = 2 - }; - public: klassVtable(Klass* klass, void* base, int length) : _klass(klass) { _tableOffset = (address)base - (address)klass; _length = length; @@ -66,22 +59,12 @@ int length() const { return _length; } inline Method* method_at(int i) const; inline Method* unchecked_method_at(int i) const; - inline Method** adr_method_at(int i) const; // searching; all methods return -1 if not found - int index_of(Method* m) const { return index_of(m, _length); } int index_of_miranda(Symbol* name, Symbol* signature); void initialize_vtable(bool checkconstraints, TRAPS); // initialize vtable of a new klass - // CDS/RedefineClasses support - clear vtables so they can be reinitialized - // at dump time. Clearing gives us an easy way to tell if the vtable has - // already been reinitialized at dump time (see dump.cpp). Vtables can - // be initialized at run time by RedefineClasses so dumping the right order - // is necessary. - void clear_vtable(); - bool is_initialized(); - // computes vtable length (in words) and the number of miranda methods static void compute_vtable_size_and_num_mirandas(int* vtable_length, int* num_new_mirandas, @@ -125,7 +108,6 @@ private: void copy_vtable_to(vtableEntry* start); int initialize_from_super(Klass* super); - int index_of(Method* m, int len) const; // same as index_of, but search only up to len void put_method_at(Method* m, int index); static bool needs_new_vtable_entry(const methodHandle& m, const Klass* super, @@ -223,12 +205,6 @@ return table()[i].method(); } -inline Method** klassVtable::adr_method_at(int i) const { - // Allow one past the last entry to be referenced; useful for loop bounds. - assert(i >= 0 && i <= _length, "index out of bounds"); - return (Method**)(address(table() + i) + vtableEntry::method_offset_in_bytes()); -} - // -------------------------------------------------------------------------------- class klassItable; class itableMethodEntry; @@ -336,9 +312,6 @@ static int compute_itable_size(Array<InstanceKlass*>* transitive_interfaces); static void setup_itable_offset_table(InstanceKlass* klass); - // Resolving of method to index - static Method* method_for_itable_index(InstanceKlass* klass, int itable_index); - // Debugging/Statistics static void print_statistics() PRODUCT_RETURN; private:
--- a/src/hotspot/share/oops/method.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/oops/method.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -1015,7 +1015,7 @@ void Method::unlink_method() { _code = NULL; - assert(DumpSharedSpaces || DynamicDumpSharedSpaces, "dump time only"); + Arguments::assert_is_dumping_archive(); // Set the values to what they should be at run time. Note that // this Method can no longer be executed during dump time. _i2i_entry = Interpreter::entry_for_cds_method(this);
--- a/src/hotspot/share/opto/c2compiler.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/opto/c2compiler.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -102,7 +102,8 @@ assert(is_initialized(), "Compiler thread must be initialized"); bool subsume_loads = SubsumeLoads; - bool do_escape_analysis = DoEscapeAnalysis && !env->should_retain_local_variables(); + bool do_escape_analysis = DoEscapeAnalysis && !env->should_retain_local_variables() + && !env->jvmti_can_get_owned_monitor_info(); bool eliminate_boxing = EliminateAutoBox; while (!env->failing()) {
--- a/src/hotspot/share/opto/classes.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/opto/classes.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -49,9 +49,6 @@ #include "opto/valuetypenode.hpp" #include "opto/vectornode.hpp" #include "utilities/macros.hpp" -#if INCLUDE_ZGC -#include "gc/z/c2/zBarrierSetC2.hpp" -#endif #if INCLUDE_SHENANDOAHGC #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp" #endif
--- a/src/hotspot/share/opto/classes.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/opto/classes.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -195,17 +195,6 @@ macro(LoadN) macro(LoadRange) macro(LoadS) -#if INCLUDE_ZGC -#define zgcmacro(x) macro(x) -#else -#define zgcmacro(x) optionalmacro(x) -#endif -zgcmacro(LoadBarrier) -zgcmacro(LoadBarrierSlowReg) -zgcmacro(ZCompareAndSwapP) -zgcmacro(ZWeakCompareAndSwapP) -zgcmacro(ZCompareAndExchangeP) -zgcmacro(ZGetAndSetP) macro(Lock) macro(Loop) macro(LoopLimit)
--- a/src/hotspot/share/opto/compile.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/opto/compile.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -77,9 +77,6 @@ #include "utilities/align.hpp" #include "utilities/copy.hpp" #include "utilities/macros.hpp" -#if INCLUDE_ZGC -#include "gc/z/c2/zBarrierSetC2.hpp" -#endif // -------------------- Compile::mach_constant_base_node ----------------------- @@ -1026,6 +1023,7 @@ _has_method_handle_invokes(false), _clinit_barrier_on_entry(false), _comp_arena(mtCompiler), + _barrier_set_state(BarrierSet::barrier_set()->barrier_set_c2()->create_barrier_state(comp_arena())), _env(ci_env), _directive(directive), _log(ci_env->log()), @@ -2845,13 +2843,6 @@ print_method(PHASE_MACRO_EXPANSION, 2); } -#ifdef ASSERT - bs->verify_gc_barriers(this, BarrierSetC2::BeforeLateInsertion); -#endif - - bs->barrier_insertion_phase(C, igvn); - if (failing()) return; - { TracePhase tp("barrierExpand", &timers[_t_barrierExpand]); if (bs->expand_barriers(this, igvn)) {
--- a/src/hotspot/share/opto/compile.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/opto/compile.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -56,7 +56,6 @@ class IdealGraphPrinter; class InlineTree; class Int_Array; -class LoadBarrierNode; class Matcher; class MachConstantNode; class MachConstantBaseNode; @@ -98,7 +97,6 @@ LoopOptsNone, LoopOptsShenandoahExpand, LoopOptsShenandoahPostExpand, - LoopOptsZBarrierInsertion, LoopOptsSkipSplitIf, LoopOptsVerify }; @@ -1211,11 +1209,7 @@ bool in_scratch_emit_size() const { return _in_scratch_emit_size; } enum ScratchBufferBlob { -#if defined(PPC64) MAX_inst_size = 2048, -#else - MAX_inst_size = 1024, -#endif MAX_locs_size = 128, // number of relocInfo elements MAX_const_size = 128, MAX_stubs_size = 128 @@ -1290,14 +1284,30 @@ // Process an OopMap Element while emitting nodes void Process_OopMap_Node(MachNode *mach, int code_offset); + class BufferSizingData { + public: + int _stub; + int _code; + int _const; + int _reloc; + + BufferSizingData() : + _stub(0), + _code(0), + _const(0), + _reloc(0) + { }; + }; + // Initialize code buffer - CodeBuffer* init_buffer(uint* blk_starts); + void estimate_buffer_size(int& const_req); + CodeBuffer* init_buffer(BufferSizingData& buf_sizes); // Write out basic block data to code buffer void fill_buffer(CodeBuffer* cb, uint* blk_starts); // Determine which variable sized branches can be shortened - void shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size); + void shorten_branches(uint* blk_starts, BufferSizingData& buf_sizes); // Compute the size of first NumberOfLoopInstrToAlign instructions // at the head of a loop.
--- a/src/hotspot/share/opto/loopnode.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/opto/loopnode.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -993,18 +993,6 @@ } } - if (UseZGC && !inner_out->in(0)->is_CountedLoopEnd()) { - // In some very special cases there can be a load that has no other uses than the - // counted loop safepoint. Then its loadbarrier will be placed between the inner - // loop exit and the safepoint. This is very rare - - Node* ifnode = inner_out->in(1)->in(0); - // Region->IfTrue->If == Region->Iffalse->If - if (ifnode == inner_out->in(2)->in(0)) { - inner_out = ifnode->in(0); - } - } - CountedLoopEndNode* cle = inner_out->in(0)->as_CountedLoopEnd(); assert(cle == inner->loopexit_or_null(), "mismatch"); bool has_skeleton = outer_le->in(1)->bottom_type()->singleton() && outer_le->in(1)->bottom_type()->is_int()->get_con() == 0; @@ -4049,28 +4037,32 @@ // dominated by early is considered a potentially interfering store. // This can produce false positives. if (n->is_Load() && LCA != early) { - Node_List worklist; - - Node *mem = n->in(MemNode::Memory); - for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { - Node* s = mem->fast_out(i); - worklist.push(s); - } - while(worklist.size() != 0 && LCA != early) { - Node* s = worklist.pop(); - if (s->is_Load() || s->Opcode() == Op_SafePoint || - (s->is_CallStaticJava() && s->as_CallStaticJava()->uncommon_trap_request() != 0)) { - continue; - } else if (s->is_MergeMem()) { - for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) { - Node* s1 = s->fast_out(i); - worklist.push(s1); - } - } else { - Node *sctrl = has_ctrl(s) ? get_ctrl(s) : s->in(0); - assert(sctrl != NULL || s->outcnt() == 0, "must have control"); - if (sctrl != NULL && !sctrl->is_top() && is_dominator(early, sctrl)) { - LCA = dom_lca_for_get_late_ctrl(LCA, sctrl, n); + int load_alias_idx = C->get_alias_index(n->adr_type()); + if (C->alias_type(load_alias_idx)->is_rewritable()) { + + Node_List worklist; + + Node *mem = n->in(MemNode::Memory); + for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { + Node* s = mem->fast_out(i); + worklist.push(s); + } + while(worklist.size() != 0 && LCA != early) { + Node* s = worklist.pop(); + if (s->is_Load() || s->Opcode() == Op_SafePoint || + (s->is_CallStaticJava() && s->as_CallStaticJava()->uncommon_trap_request() != 0)) { + continue; + } else if (s->is_MergeMem()) { + for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) { + Node* s1 = s->fast_out(i); + worklist.push(s1); + } + } else { + Node *sctrl = has_ctrl(s) ? get_ctrl(s) : s->in(0); + assert(sctrl != NULL || s->outcnt() == 0, "must have control"); + if (sctrl != NULL && !sctrl->is_top() && C->can_alias(s->adr_type(), load_alias_idx) && is_dominator(early, sctrl)) { + LCA = dom_lca_for_get_late_ctrl(LCA, sctrl, n); + } } } }
--- a/src/hotspot/share/opto/loopopts.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/opto/loopopts.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -42,9 +42,6 @@ #include "opto/subnode.hpp" #include "opto/valuetypenode.hpp" #include "utilities/macros.hpp" -#if INCLUDE_ZGC -#include "gc/z/c2/zBarrierSetC2.hpp" -#endif //============================================================================= //------------------------------split_thru_phi--------------------------------- @@ -1082,26 +1079,21 @@ // uses. // A better fix for this problem can be found in the BugTraq entry, but // expediency for Mantis demands this hack. - // 6855164: If the merge point has a FastLockNode with a PhiNode input, we stop - // split_if_with_blocks from splitting a block because we could not move around - // the FastLockNode. +#ifdef _LP64 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { Node* n = region->fast_out(i); if (n->is_Phi()) { for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { Node* m = n->fast_out(j); - if (m->is_FastLock()) - return false; -#ifdef _LP64 if (m->Opcode() == Op_ConvI2L) return false; if (m->is_CastII() && m->isa_CastII()->has_range_check()) { return false; } -#endif } } } +#endif return true; }
--- a/src/hotspot/share/opto/machnode.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/opto/machnode.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -197,7 +197,7 @@ // ADLC inherit from this class. class MachNode : public Node { public: - MachNode() : Node((uint)0), _num_opnds(0), _opnds(NULL) { + MachNode() : Node((uint)0), _barrier(0), _num_opnds(0), _opnds(NULL) { init_class_id(Class_Mach); } // Required boilerplate @@ -211,6 +211,9 @@ // no constant base node input. virtual uint mach_constant_base_node_input() const { return (uint)-1; } + uint8_t barrier_data() const { return _barrier; } + void set_barrier_data(uint data) { _barrier = data; } + // Copy inputs and operands to new node of instruction. // Called from cisc_version() and short_branch_version(). // !!!! The method's body is defined in ad_<arch>.cpp file. @@ -255,6 +258,9 @@ // output have choices - but they must use the same choice. virtual uint two_adr( ) const { return 0; } + // The GC might require some barrier metadata for machine code emission. + uint8_t _barrier; + // Array of complex operand pointers. Each corresponds to zero or // more leafs. Must be set by MachNode constructor to point to an // internal array of MachOpers. The MachOper array is sized by
--- a/src/hotspot/share/opto/matcher.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/opto/matcher.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -1813,6 +1813,13 @@ _shared_nodes.map(leaf->_idx, ex); } + // Have mach nodes inherit GC barrier data + if (leaf->is_LoadStore()) { + mach->set_barrier_data(leaf->as_LoadStore()->barrier_data()); + } else if (leaf->is_Mem()) { + mach->set_barrier_data(leaf->as_Mem()->barrier_data()); + } + return ex; }
--- a/src/hotspot/share/opto/memnode.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/opto/memnode.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -50,9 +50,6 @@ #include "utilities/copy.hpp" #include "utilities/macros.hpp" #include "utilities/vmError.hpp" -#if INCLUDE_ZGC -#include "gc/z/c2/zBarrierSetC2.hpp" -#endif // Portions of code courtesy of Clifford Click @@ -3001,7 +2998,7 @@ : Node(required), _type(rt), _adr_type(at), - _has_barrier(false) + _barrier(0) { init_req(MemNode::Control, c ); init_req(MemNode::Memory , mem);
--- a/src/hotspot/share/opto/memnode.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/opto/memnode.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -43,6 +43,8 @@ bool _unaligned_access; // Unaligned access from unsafe bool _mismatched_access; // Mismatched access from unsafe: byte read in integer array for instance bool _unsafe_access; // Access of unsafe origin. + uint8_t _barrier; // Bit field with barrier information + protected: #ifdef ASSERT const TypePtr* _adr_type; // What kind of memory is being addressed? @@ -62,18 +64,30 @@ unset // The memory ordering is not set (used for testing) } MemOrd; protected: - MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at ) - : Node(c0,c1,c2 ), _unaligned_access(false), _mismatched_access(false), _unsafe_access(false) { + MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at ) : + Node(c0,c1,c2), + _unaligned_access(false), + _mismatched_access(false), + _unsafe_access(false), + _barrier(0) { init_class_id(Class_Mem); debug_only(_adr_type=at; adr_type();) } - MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3 ) - : Node(c0,c1,c2,c3), _unaligned_access(false), _mismatched_access(false), _unsafe_access(false) { + MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3 ) : + Node(c0,c1,c2,c3), + _unaligned_access(false), + _mismatched_access(false), + _unsafe_access(false), + _barrier(0) { init_class_id(Class_Mem); debug_only(_adr_type=at; adr_type();) } - MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3, Node *c4) - : Node(c0,c1,c2,c3,c4), _unaligned_access(false), _mismatched_access(false), _unsafe_access(false) { + MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3, Node *c4) : + Node(c0,c1,c2,c3,c4), + _unaligned_access(false), + _mismatched_access(false), + _unsafe_access(false), + _barrier(0) { init_class_id(Class_Mem); debug_only(_adr_type=at; adr_type();) } @@ -129,6 +143,9 @@ #endif } + uint8_t barrier_data() { return _barrier; } + void set_barrier_data(uint8_t barrier_data) { _barrier = barrier_data; } + // Search through memory states which precede this node (load or store). // Look for an exact match for the address, with no intervening // aliased stores. @@ -185,8 +202,6 @@ // this field. const MemOrd _mo; - uint _barrier; // Bit field with barrier information - AllocateNode* is_new_object_mark_load(PhaseGVN *phase) const; protected: @@ -200,7 +215,7 @@ public: LoadNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *rt, MemOrd mo, ControlDependency control_dependency) - : MemNode(c,mem,adr,at), _control_dependency(control_dependency), _mo(mo), _barrier(0), _type(rt) { + : MemNode(c,mem,adr,at), _control_dependency(control_dependency), _mo(mo), _type(rt) { init_class_id(Class_Load); } inline bool is_unordered() const { return !is_acquire(); } @@ -269,10 +284,6 @@ Node* convert_to_unsigned_load(PhaseGVN& gvn); Node* convert_to_signed_load(PhaseGVN& gvn); - void copy_barrier_info(const Node* src) { _barrier = src->as_Load()->_barrier; } - uint barrier_data() { return _barrier; } - void set_barrier_data(uint barrier_data) { _barrier |= barrier_data; } - void pin() { _control_dependency = Pinned; } bool has_unknown_control_dependency() const { return _control_dependency == UnknownControl; } @@ -864,7 +875,7 @@ private: const Type* const _type; // What kind of value is loaded? const TypePtr* _adr_type; // What kind of memory is being addressed? - bool _has_barrier; + uint8_t _barrier; // Bit field with barrier information virtual uint size_of() const; // Size is bigger public: LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* rt, uint required ); @@ -877,8 +888,9 @@ bool result_not_used() const; MemBarNode* trailing_membar() const; - void set_has_barrier() { _has_barrier = true; }; - bool has_barrier() const { return _has_barrier; }; + + uint8_t barrier_data() { return _barrier; } + void set_barrier_data(uint8_t barrier_data) { _barrier = barrier_data; } }; class LoadStoreConditionalNode : public LoadStoreNode { @@ -930,6 +942,7 @@ MemNode::MemOrd order() const { return _mem_ord; } + virtual uint size_of() const { return sizeof(*this); } }; class CompareAndExchangeNode : public LoadStoreNode { @@ -947,6 +960,7 @@ MemNode::MemOrd order() const { return _mem_ord; } + virtual uint size_of() const { return sizeof(*this); } }; //------------------------------CompareAndSwapBNode---------------------------
--- a/src/hotspot/share/opto/node.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/opto/node.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -546,9 +546,6 @@ if (n->is_SafePoint()) { n->as_SafePoint()->clone_replaced_nodes(); } - if (n->is_Load()) { - n->as_Load()->copy_barrier_info(this); - } if (n->is_ValueTypeBase()) { C->add_value_type(n); } @@ -1482,10 +1479,6 @@ if (req() < 2 || (_flags & Flag_needs_anti_dependence_check) == 0) { return false; } - BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); - if (!bs->needs_anti_dependence_check(this)) { - return false; - } return in(1)->bottom_type()->has_memory(); }
--- a/src/hotspot/share/opto/node.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/opto/node.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -83,8 +83,6 @@ class JumpNode; class JumpProjNode; class LoadNode; -class LoadBarrierNode; -class LoadBarrierSlowRegNode; class LoadStoreNode; class LoadStoreConditionalNode; class LockNode; @@ -646,7 +644,6 @@ DEFINE_CLASS_ID(MemBar, Multi, 3) DEFINE_CLASS_ID(Initialize, MemBar, 0) DEFINE_CLASS_ID(MemBarStoreStore, MemBar, 1) - DEFINE_CLASS_ID(LoadBarrier, Multi, 4) DEFINE_CLASS_ID(Mach, Node, 1) DEFINE_CLASS_ID(MachReturn, Mach, 0) @@ -684,7 +681,6 @@ DEFINE_CLASS_ID(EncodeNarrowPtr, Type, 6) DEFINE_CLASS_ID(EncodeP, EncodeNarrowPtr, 0) DEFINE_CLASS_ID(EncodePKlass, EncodeNarrowPtr, 1) - DEFINE_CLASS_ID(LoadBarrierSlowReg, Type, 7) DEFINE_CLASS_ID(ValueTypeBase, Type, 8) DEFINE_CLASS_ID(ValueType, ValueTypeBase, 0) DEFINE_CLASS_ID(ValueTypePtr, ValueTypeBase, 1) @@ -844,8 +840,6 @@ DEFINE_CLASS_QUERY(Load) DEFINE_CLASS_QUERY(LoadStore) DEFINE_CLASS_QUERY(LoadStoreConditional) - DEFINE_CLASS_QUERY(LoadBarrier) - DEFINE_CLASS_QUERY(LoadBarrierSlowReg) DEFINE_CLASS_QUERY(Lock) DEFINE_CLASS_QUERY(Loop) DEFINE_CLASS_QUERY(Mach)
--- a/src/hotspot/share/opto/output.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/opto/output.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -31,6 +31,8 @@ #include "compiler/compileBroker.hpp" #include "compiler/compilerDirectives.hpp" #include "compiler/oopMap.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/c2/barrierSetC2.hpp" #include "memory/allocation.inline.hpp" #include "opto/ad.hpp" #include "opto/callnode.hpp" @@ -124,15 +126,19 @@ } } + // Keeper of sizing aspects + BufferSizingData buf_sizes = BufferSizingData(); + + // Initialize code buffer + estimate_buffer_size(buf_sizes._const); + if (failing()) return; + + // Pre-compute the length of blocks and replace + // long branches with short if machine supports it. + // Must be done before ScheduleAndBundle due to SPARC delay slots uint* blk_starts = NEW_RESOURCE_ARRAY(uint, _cfg->number_of_blocks() + 1); blk_starts[0] = 0; - - // Initialize code buffer and process short branches. - CodeBuffer* cb = init_buffer(blk_starts); - - if (cb == NULL || failing()) { - return; - } + shorten_branches(blk_starts, buf_sizes); if (!is_osr_compilation() && _method && _method->has_scalarized_args()) { // Compute the offsets of the entry points required by the value type calling convention @@ -160,24 +166,18 @@ } ScheduleAndBundle(); - -#ifndef PRODUCT - if (trace_opto_output()) { - tty->print("\n---- After ScheduleAndBundle ----\n"); - for (uint i = 0; i < _cfg->number_of_blocks(); i++) { - tty->print("\nBB#%03d:\n", i); - Block* block = _cfg->get_block(i); - for (uint j = 0; j < block->number_of_nodes(); j++) { - Node* n = block->get_node(j); - OptoReg::Name reg = _regalloc->get_reg_first(n); - tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : ""); - n->dump(); - } - } + if (failing()) { + return; } -#endif - - if (failing()) { + + // Late barrier analysis must be done after schedule and bundle + // Otherwise liveness based spilling will fail + BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); + bs->late_barrier_analysis(); + + // Complete sizing of codebuffer + CodeBuffer* cb = init_buffer(buf_sizes); + if (cb == NULL || failing()) { return; } @@ -258,7 +258,7 @@ // The architecture description provides short branch variants for some long // branch instructions. Replace eligible long branches with short branches. -void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size) { +void Compile::shorten_branches(uint* blk_starts, BufferSizingData& buf_sizes) { // Compute size of each block, method size, and relocation information size uint nblocks = _cfg->number_of_blocks(); @@ -276,11 +276,11 @@ bool has_short_branch_candidate = false; // Initialize the sizes to 0 - code_size = 0; // Size in bytes of generated code - stub_size = 0; // Size in bytes of all stub entries + int code_size = 0; // Size in bytes of generated code + int stub_size = 0; // Size in bytes of all stub entries // Size in bytes of all relocation entries, including those in local stubs. // Start with 2-bytes of reloc info for the unvalidated entry point - reloc_size = 1; // Number of relocation entries + int reloc_size = 1; // Number of relocation entries // Make three passes. The first computes pessimistic blk_starts, // relative jmp_offset and reloc_size information. The second performs @@ -516,6 +516,10 @@ // a relocation index. // The CodeBuffer will expand the locs array if this estimate is too low. reloc_size *= 10 / sizeof(relocInfo); + + buf_sizes._reloc = reloc_size; + buf_sizes._code = code_size; + buf_sizes._stub = stub_size; } //------------------------------FillLocArray----------------------------------- @@ -527,8 +531,8 @@ // This should never have accepted Bad before assert(OptoReg::is_valid(regnum), "location must be valid"); return (OptoReg::is_reg(regnum)) - ? new LocationValue(Location::new_reg_loc(l_type, OptoReg::as_VMReg(regnum)) ) - : new LocationValue(Location::new_stk_loc(l_type, ra->reg2offset(regnum))); + ? new LocationValue(Location::new_reg_loc(l_type, OptoReg::as_VMReg(regnum)) ) + : new LocationValue(Location::new_stk_loc(l_type, ra->reg2offset(regnum))); } @@ -647,12 +651,12 @@ } #endif //_LP64 else if( (t->base() == Type::FloatBot || t->base() == Type::FloatCon) && - OptoReg::is_reg(regnum) ) { + OptoReg::is_reg(regnum) ) { array->append(new_loc_value( _regalloc, regnum, Matcher::float_in_double() - ? Location::float_in_dbl : Location::normal )); + ? Location::float_in_dbl : Location::normal )); } else if( t->base() == Type::Int && OptoReg::is_reg(regnum) ) { array->append(new_loc_value( _regalloc, regnum, Matcher::int_in_long - ? Location::int_in_long : Location::normal )); + ? Location::int_in_long : Location::normal )); } else if( t->base() == Type::NarrowOop ) { array->append(new_loc_value( _regalloc, regnum, Location::narrowoop )); } else { @@ -663,48 +667,48 @@ // No register. It must be constant data. switch (t->base()) { - case Type::Half: // Second half of a double - ShouldNotReachHere(); // Caller should skip 2nd halves - break; - case Type::AnyPtr: - array->append(new ConstantOopWriteValue(NULL)); - break; - case Type::AryPtr: - case Type::InstPtr: // fall through - array->append(new ConstantOopWriteValue(t->isa_oopptr()->const_oop()->constant_encoding())); - break; - case Type::NarrowOop: - if (t == TypeNarrowOop::NULL_PTR) { + case Type::Half: // Second half of a double + ShouldNotReachHere(); // Caller should skip 2nd halves + break; + case Type::AnyPtr: array->append(new ConstantOopWriteValue(NULL)); - } else { - array->append(new ConstantOopWriteValue(t->make_ptr()->isa_oopptr()->const_oop()->constant_encoding())); + break; + case Type::AryPtr: + case Type::InstPtr: // fall through + array->append(new ConstantOopWriteValue(t->isa_oopptr()->const_oop()->constant_encoding())); + break; + case Type::NarrowOop: + if (t == TypeNarrowOop::NULL_PTR) { + array->append(new ConstantOopWriteValue(NULL)); + } else { + array->append(new ConstantOopWriteValue(t->make_ptr()->isa_oopptr()->const_oop()->constant_encoding())); + } + break; + case Type::Int: + array->append(new ConstantIntValue(t->is_int()->get_con())); + break; + case Type::RawPtr: + // A return address (T_ADDRESS). + assert((intptr_t)t->is_ptr()->get_con() < (intptr_t)0x10000, "must be a valid BCI"); +#ifdef _LP64 + // Must be restored to the full-width 64-bit stack slot. + array->append(new ConstantLongValue(t->is_ptr()->get_con())); +#else + array->append(new ConstantIntValue(t->is_ptr()->get_con())); +#endif + break; + case Type::FloatCon: { + float f = t->is_float_constant()->getf(); + array->append(new ConstantIntValue(jint_cast(f))); + break; } - break; - case Type::Int: - array->append(new ConstantIntValue(t->is_int()->get_con())); - break; - case Type::RawPtr: - // A return address (T_ADDRESS). - assert((intptr_t)t->is_ptr()->get_con() < (intptr_t)0x10000, "must be a valid BCI"); + case Type::DoubleCon: { + jdouble d = t->is_double_constant()->getd(); #ifdef _LP64 - // Must be restored to the full-width 64-bit stack slot. - array->append(new ConstantLongValue(t->is_ptr()->get_con())); + array->append(new ConstantIntValue((jint)0)); + array->append(new ConstantDoubleValue(d)); #else - array->append(new ConstantIntValue(t->is_ptr()->get_con())); -#endif - break; - case Type::FloatCon: { - float f = t->is_float_constant()->getf(); - array->append(new ConstantIntValue(jint_cast(f))); - break; - } - case Type::DoubleCon: { - jdouble d = t->is_double_constant()->getd(); -#ifdef _LP64 - array->append(new ConstantIntValue((jint)0)); - array->append(new ConstantDoubleValue(d)); -#else - // Repack the double as two jints. + // Repack the double as two jints. // The convention the interpreter uses is that the second local // holds the first raw word of the native double representation. // This is actually reasonable, since locals and stack arrays @@ -716,15 +720,15 @@ array->append(new ConstantIntValue(acc.words[1])); array->append(new ConstantIntValue(acc.words[0])); #endif - break; - } - case Type::Long: { - jlong d = t->is_long()->get_con(); + break; + } + case Type::Long: { + jlong d = t->is_long()->get_con(); #ifdef _LP64 - array->append(new ConstantIntValue((jint)0)); - array->append(new ConstantLongValue(d)); + array->append(new ConstantIntValue((jint)0)); + array->append(new ConstantLongValue(d)); #else - // Repack the long as two jints. + // Repack the long as two jints. // The convention the interpreter uses is that the second local // holds the first raw word of the native double representation. // This is actually reasonable, since locals and stack arrays @@ -736,14 +740,14 @@ array->append(new ConstantIntValue(acc.words[1])); array->append(new ConstantIntValue(acc.words[0])); #endif - break; - } - case Type::Top: // Add an illegal value here - array->append(new LocationValue(Location())); - break; - default: - ShouldNotReachHere(); - break; + break; + } + case Type::Top: // Add an illegal value here + array->append(new LocationValue(Location())); + break; + default: + ShouldNotReachHere(); + break; } } @@ -912,58 +916,58 @@ // A simplified version of Process_OopMap_Node, to handle non-safepoints. class NonSafepointEmitter { - Compile* C; - JVMState* _pending_jvms; - int _pending_offset; - - void emit_non_safepoint(); + Compile* C; + JVMState* _pending_jvms; + int _pending_offset; + + void emit_non_safepoint(); public: - NonSafepointEmitter(Compile* compile) { - this->C = compile; - _pending_jvms = NULL; - _pending_offset = 0; - } - - void observe_instruction(Node* n, int pc_offset) { - if (!C->debug_info()->recording_non_safepoints()) return; - - Node_Notes* nn = C->node_notes_at(n->_idx); - if (nn == NULL || nn->jvms() == NULL) return; - if (_pending_jvms != NULL && - _pending_jvms->same_calls_as(nn->jvms())) { - // Repeated JVMS? Stretch it up here. - _pending_offset = pc_offset; - } else { + NonSafepointEmitter(Compile* compile) { + this->C = compile; + _pending_jvms = NULL; + _pending_offset = 0; + } + + void observe_instruction(Node* n, int pc_offset) { + if (!C->debug_info()->recording_non_safepoints()) return; + + Node_Notes* nn = C->node_notes_at(n->_idx); + if (nn == NULL || nn->jvms() == NULL) return; if (_pending_jvms != NULL && + _pending_jvms->same_calls_as(nn->jvms())) { + // Repeated JVMS? Stretch it up here. + _pending_offset = pc_offset; + } else { + if (_pending_jvms != NULL && + _pending_offset < pc_offset) { + emit_non_safepoint(); + } + _pending_jvms = NULL; + if (pc_offset > C->debug_info()->last_pc_offset()) { + // This is the only way _pending_jvms can become non-NULL: + _pending_jvms = nn->jvms(); + _pending_offset = pc_offset; + } + } + } + + // Stay out of the way of real safepoints: + void observe_safepoint(JVMState* jvms, int pc_offset) { + if (_pending_jvms != NULL && + !_pending_jvms->same_calls_as(jvms) && _pending_offset < pc_offset) { emit_non_safepoint(); } _pending_jvms = NULL; - if (pc_offset > C->debug_info()->last_pc_offset()) { - // This is the only way _pending_jvms can become non-NULL: - _pending_jvms = nn->jvms(); - _pending_offset = pc_offset; + } + + void flush_at_end() { + if (_pending_jvms != NULL) { + emit_non_safepoint(); } + _pending_jvms = NULL; } - } - - // Stay out of the way of real safepoints: - void observe_safepoint(JVMState* jvms, int pc_offset) { - if (_pending_jvms != NULL && - !_pending_jvms->same_calls_as(jvms) && - _pending_offset < pc_offset) { - emit_non_safepoint(); - } - _pending_jvms = NULL; - } - - void flush_at_end() { - if (_pending_jvms != NULL) { - emit_non_safepoint(); - } - _pending_jvms = NULL; - } }; void NonSafepointEmitter::emit_non_safepoint() { @@ -993,15 +997,11 @@ } //------------------------------init_buffer------------------------------------ -CodeBuffer* Compile::init_buffer(uint* blk_starts) { +void Compile::estimate_buffer_size(int& const_req) { // Set the initially allocated size - int code_req = initial_code_capacity; - int locs_req = initial_locs_capacity; - int stub_req = initial_stub_capacity; - int const_req = initial_const_capacity; - - int pad_req = NativeCall::instruction_size; + const_req = initial_const_capacity; + // The extra spacing after the code is necessary on some platforms. // Sometimes we need to patch in a jump after the last instruction, // if the nmethod has been deoptimized. (See 4932387, 4894843.) @@ -1017,7 +1017,7 @@ // Compute prolog code size _method_size = 0; - _frame_slots = OptoReg::reg2stack(_matcher->_old_SP)+_regalloc->_framesize; + _frame_slots = OptoReg::reg2stack(_matcher->_old_SP) + _regalloc->_framesize; #if defined(IA64) && !defined(AIX) if (save_argument_registers()) { // 4815101: this is a stub with implicit and unknown precision fp args. @@ -1066,11 +1066,18 @@ // Initialize the space for the BufferBlob used to find and verify // instruction size in MachNode::emit_size() init_scratch_buffer_blob(const_req); - if (failing()) return NULL; // Out of memory - - // Pre-compute the length of blocks and replace - // long branches with short if machine supports it. - shorten_branches(blk_starts, code_req, locs_req, stub_req); +} + +CodeBuffer* Compile::init_buffer(BufferSizingData& buf_sizes) { + + int stub_req = buf_sizes._stub; + int code_req = buf_sizes._code; + int const_req = buf_sizes._const; + + int pad_req = NativeCall::instruction_size; + + BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); + stub_req += bs->estimate_stub_size(); // nmethod and CodeBuffer count stubs & constants as part of method's code. // class HandlerImpl is platform-specific and defined in the *.ad files. @@ -1083,18 +1090,18 @@ code_req = const_req = stub_req = exception_handler_req = deopt_handler_req = 0x10; // force expansion int total_req = - const_req + - code_req + - pad_req + - stub_req + - exception_handler_req + - deopt_handler_req; // deopt handler + const_req + + code_req + + pad_req + + stub_req + + exception_handler_req + + deopt_handler_req; // deopt handler if (has_method_handle_invokes()) total_req += deopt_handler_req; // deopt MH handler CodeBuffer* cb = code_buffer(); - cb->initialize(total_req, locs_req); + cb->initialize(total_req, buf_sizes._reloc); // Have we run out of code space? if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) { @@ -1315,12 +1322,12 @@ Process_OopMap_Node(mach, current_offset); } // End if safepoint - // If this is a null check, then add the start of the previous instruction to the list + // If this is a null check, then add the start of the previous instruction to the list else if( mach->is_MachNullCheck() ) { inct_starts[inct_cnt++] = previous_offset; } - // If this is a branch, then fill in the label with the target BB's label + // If this is a branch, then fill in the label with the target BB's label else if (mach->is_MachBranch()) { // This requires the TRUE branch target be in succs[0] uint block_num = block->non_connector_successor(0)->_pre_order; @@ -1331,8 +1338,8 @@ bool delay_slot_is_used = valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay(); if (!delay_slot_is_used && mach->may_be_short_branch()) { - assert(delay_slot == NULL, "not expecting delay slot node"); - int br_size = n->size(_regalloc); + assert(delay_slot == NULL, "not expecting delay slot node"); + int br_size = n->size(_regalloc); int offset = blk_starts[block_num] - current_offset; if (block_num >= i) { // Current and following block's offset are not @@ -1390,7 +1397,7 @@ } } #ifdef ASSERT - // Check that oop-store precedes the card-mark + // Check that oop-store precedes the card-mark else if (mach->ideal_Opcode() == Op_StoreCM) { uint storeCM_idx = j; int count = 0; @@ -1561,6 +1568,10 @@ } #endif + BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); + bs->emit_stubs(*cb); + if (failing()) return; + #ifndef PRODUCT // Information on the size of the method, without the extraneous code Scheduling::increment_method_size(cb->insts_size()); @@ -1735,20 +1746,20 @@ // Initializer for class Scheduling Scheduling::Scheduling(Arena *arena, Compile &compile) - : _arena(arena), - _cfg(compile.cfg()), - _regalloc(compile.regalloc()), - _scheduled(arena), - _available(arena), - _reg_node(arena), - _pinch_free_list(arena), - _next_node(NULL), - _bundle_instr_count(0), - _bundle_cycle_number(0), - _bundle_use(0, 0, resource_count, &_bundle_use_elements[0]) + : _arena(arena), + _cfg(compile.cfg()), + _regalloc(compile.regalloc()), + _scheduled(arena), + _available(arena), + _reg_node(arena), + _pinch_free_list(arena), + _next_node(NULL), + _bundle_instr_count(0), + _bundle_cycle_number(0), + _bundle_use(0, 0, resource_count, &_bundle_use_elements[0]) #ifndef PRODUCT - , _branches(0) - , _unconditional_delays(0) + , _branches(0) + , _unconditional_delays(0) #endif { // Create a MachNopNode @@ -1829,8 +1840,8 @@ _bundle_use.reset(); memcpy(_bundle_use_elements, - Pipeline_Use::elaborated_elements, - sizeof(Pipeline_Use::elaborated_elements)); + Pipeline_Use::elaborated_elements, + sizeof(Pipeline_Use::elaborated_elements)); } // Perform instruction scheduling and bundling over the sequence of @@ -1857,6 +1868,22 @@ // Walk backwards over each basic block, computing the needed alignment // Walk over all the basic blocks scheduling.DoScheduling(); + +#ifndef PRODUCT + if (trace_opto_output()) { + tty->print("\n---- After ScheduleAndBundle ----\n"); + for (uint i = 0; i < _cfg->number_of_blocks(); i++) { + tty->print("\nBB#%03d:\n", i); + Block* block = _cfg->get_block(i); + for (uint j = 0; j < block->number_of_nodes(); j++) { + Node* n = block->get_node(j); + OptoReg::Name reg = _regalloc->get_reg_first(n); + tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : ""); + n->dump(); + } + } + } +#endif } // Compute the latency of all the instructions. This is fairly simple, @@ -1925,7 +1952,7 @@ #ifndef PRODUCT if (_cfg->C->trace_opto_output()) tty->print("# NodeFitsInBundle [%4d]: FALSE; latency %4d > %d\n", - n->_idx, _current_latency[n_idx], _bundle_cycle_number); + n->_idx, _current_latency[n_idx], _bundle_cycle_number); #endif return (false); } @@ -1942,7 +1969,7 @@ #ifndef PRODUCT if (_cfg->C->trace_opto_output()) tty->print("# NodeFitsInBundle [%4d]: FALSE; too many instructions: %d > %d\n", - n->_idx, _bundle_instr_count + instruction_count, Pipeline::_max_instrs_per_cycle); + n->_idx, _bundle_instr_count + instruction_count, Pipeline::_max_instrs_per_cycle); #endif return (false); } @@ -2150,12 +2177,12 @@ // Don't allow safepoints in the branch shadow, that will // cause a number of difficulties if ( avail_pipeline->instructionCount() == 1 && - !avail_pipeline->hasMultipleBundles() && - !avail_pipeline->hasBranchDelay() && - Pipeline::instr_has_unit_size() && - d->size(_regalloc) == Pipeline::instr_unit_size() && - NodeFitsInBundle(d) && - !node_bundling(d)->used_in_delay()) { + !avail_pipeline->hasMultipleBundles() && + !avail_pipeline->hasBranchDelay() && + Pipeline::instr_has_unit_size() && + d->size(_regalloc) == Pipeline::instr_unit_size() && + NodeFitsInBundle(d) && + !node_bundling(d)->used_in_delay()) { if (d->is_Mach() && !d->is_MachSafePoint()) { // A node that fits in the delay slot was found, so we need to @@ -2200,13 +2227,13 @@ // step of the bundles if (!NodeFitsInBundle(n)) { #ifndef PRODUCT - if (_cfg->C->trace_opto_output()) - tty->print("# *** STEP(branch won't fit) ***\n"); + if (_cfg->C->trace_opto_output()) + tty->print("# *** STEP(branch won't fit) ***\n"); #endif - // Update the state information - _bundle_instr_count = 0; - _bundle_cycle_number += 1; - _bundle_use.step(1); + // Update the state information + _bundle_instr_count = 0; + _bundle_cycle_number += 1; + _bundle_use.step(1); } } @@ -2252,8 +2279,8 @@ #ifndef PRODUCT if (_cfg->C->trace_opto_output()) tty->print("# *** STEP(%d >= %d instructions) ***\n", - instruction_count + _bundle_instr_count, - Pipeline::_max_instrs_per_cycle); + instruction_count + _bundle_instr_count, + Pipeline::_max_instrs_per_cycle); #endif step(1); } @@ -2459,7 +2486,7 @@ } assert(!last->is_Mach() || last->as_Mach()->ideal_Opcode() != Op_Con, ""); if( last->is_Catch() || - (last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) { + (last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) { // There might be a prior call. Skip it. while (_bb_start < _bb_end && bb->get_node(--_bb_end)->is_MachProj()); } else if( last->is_MachNullCheck() ) { @@ -2529,7 +2556,7 @@ } #endif #ifdef ASSERT - verify_good_schedule(bb,"after block local scheduling"); + verify_good_schedule(bb,"after block local scheduling"); #endif } @@ -2877,31 +2904,31 @@ // void Scheduling::garbage_collect_pinch_nodes() { #ifndef PRODUCT - if (_cfg->C->trace_opto_output()) tty->print("Reclaimed pinch nodes:"); + if (_cfg->C->trace_opto_output()) tty->print("Reclaimed pinch nodes:"); #endif - int trace_cnt = 0; - for (uint k = 0; k < _reg_node.Size(); k++) { - Node* pinch = _reg_node[k]; - if ((pinch != NULL) && pinch->Opcode() == Op_Node && - // no predecence input edges - (pinch->req() == pinch->len() || pinch->in(pinch->req()) == NULL) ) { - cleanup_pinch(pinch); - _pinch_free_list.push(pinch); - _reg_node.map(k, NULL); + int trace_cnt = 0; + for (uint k = 0; k < _reg_node.Size(); k++) { + Node* pinch = _reg_node[k]; + if ((pinch != NULL) && pinch->Opcode() == Op_Node && + // no predecence input edges + (pinch->req() == pinch->len() || pinch->in(pinch->req()) == NULL) ) { + cleanup_pinch(pinch); + _pinch_free_list.push(pinch); + _reg_node.map(k, NULL); #ifndef PRODUCT - if (_cfg->C->trace_opto_output()) { - trace_cnt++; - if (trace_cnt > 40) { - tty->print("\n"); - trace_cnt = 0; - } - tty->print(" %d", pinch->_idx); + if (_cfg->C->trace_opto_output()) { + trace_cnt++; + if (trace_cnt > 40) { + tty->print("\n"); + trace_cnt = 0; } + tty->print(" %d", pinch->_idx); + } #endif - } } + } #ifndef PRODUCT - if (_cfg->C->trace_opto_output()) tty->print("\n"); + if (_cfg->C->trace_opto_output()) tty->print("\n"); #endif } @@ -2938,19 +2965,19 @@ void Scheduling::print_statistics() { // Print the size added by nops for bundling tty->print("Nops added %d bytes to total of %d bytes", - _total_nop_size, _total_method_size); + _total_nop_size, _total_method_size); if (_total_method_size > 0) tty->print(", for %.2f%%", - ((double)_total_nop_size) / ((double) _total_method_size) * 100.0); + ((double)_total_nop_size) / ((double) _total_method_size) * 100.0); tty->print("\n"); // Print the number of branch shadows filled if (Pipeline::_branch_has_delay_slot) { tty->print("Of %d branches, %d had unconditional delay slots filled", - _total_branches, _total_unconditional_delays); + _total_branches, _total_unconditional_delays); if (_total_branches > 0) tty->print(", for %.2f%%", - ((double)_total_unconditional_delays) / ((double)_total_branches) * 100.0); + ((double)_total_unconditional_delays) / ((double)_total_branches) * 100.0); tty->print("\n"); } @@ -2964,6 +2991,6 @@ if (total_bundles > 0) tty->print("Average ILP (excluding nops) is %.2f\n", - ((double)total_instructions) / ((double)total_bundles)); + ((double)total_instructions) / ((double)total_bundles)); } #endif
--- a/src/hotspot/share/opto/output.hpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/opto/output.hpp Fri Oct 11 10:39:58 2019 +0200 @@ -40,7 +40,6 @@ class PhaseChaitin; class Pipeline_Use_Element; class Pipeline_Use; - #ifndef PRODUCT #define DEBUG_ARG(x) , x #else @@ -49,10 +48,7 @@ // Define the initial sizes for allocation of the resizable code buffer enum { - initial_code_capacity = 16 * 1024, - initial_stub_capacity = 4 * 1024, - initial_const_capacity = 4 * 1024, - initial_locs_capacity = 3 * 1024 + initial_const_capacity = 4 * 1024 }; //------------------------------Scheduling----------------------------------
--- a/src/hotspot/share/opto/phaseX.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/opto/phaseX.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -1670,14 +1670,14 @@ // of the mirror load depends on the type of 'n'. See LoadNode::Value(). // LoadBarrier?(LoadP(LoadP(AddP(foo:Klass, #java_mirror)))) BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); - bool has_load_barriers = bs->has_load_barriers(); + bool has_load_barrier_nodes = bs->has_load_barrier_nodes(); if (use_op == Op_LoadP && use->bottom_type()->isa_rawptr()) { for (DUIterator_Fast i2max, i2 = use->fast_outs(i2max); i2 < i2max; i2++) { Node* u = use->fast_out(i2); const Type* ut = u->bottom_type(); if (u->Opcode() == Op_LoadP && ut->isa_instptr()) { - if (has_load_barriers) { + if (has_load_barrier_nodes) { // Search for load barriers behind the load for (DUIterator_Fast i3max, i3 = u->fast_outs(i3max); i3 < i3max; i3++) { Node* b = u->fast_out(i3); @@ -1848,14 +1848,14 @@ // Loading the java mirror from a Klass requires two loads and the type // of the mirror load depends on the type of 'n'. See LoadNode::Value(). BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); - bool has_load_barriers = bs->has_load_barriers(); + bool has_load_barrier_nodes = bs->has_load_barrier_nodes(); if (m_op == Op_LoadP && m->bottom_type()->isa_rawptr()) { for (DUIterator_Fast i2max, i2 = m->fast_outs(i2max); i2 < i2max; i2++) { Node* u = m->fast_out(i2); const Type* ut = u->bottom_type(); if (u->Opcode() == Op_LoadP && ut->isa_instptr() && ut != type(u)) { - if (has_load_barriers) { + if (has_load_barrier_nodes) { // Search for load barriers behind the load for (DUIterator_Fast i3max, i3 = u->fast_outs(i3max); i3 < i3max; i3++) { Node* b = u->fast_out(i3);
--- a/src/hotspot/share/opto/split_if.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/opto/split_if.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -111,83 +111,90 @@ n->dump(); } #endif - // Clone down any block-local BoolNode uses of this CmpNode - for (DUIterator i = n->outs(); n->has_out(i); i++) { - Node* bol = n->out(i); - assert( bol->is_Bool(), "" ); - if (bol->outcnt() == 1) { - Node* use = bol->unique_out(); - if (use->Opcode() == Op_Opaque4) { - if (use->outcnt() == 1) { - Node* iff = use->unique_out(); - assert(iff->is_If(), "unexpected node type"); - Node *use_c = iff->in(0); + if (!n->is_FastLock()) { + // Clone down any block-local BoolNode uses of this CmpNode + for (DUIterator i = n->outs(); n->has_out(i); i++) { + Node* bol = n->out(i); + assert( bol->is_Bool(), "" ); + if (bol->outcnt() == 1) { + Node* use = bol->unique_out(); + if (use->Opcode() == Op_Opaque4) { + if (use->outcnt() == 1) { + Node* iff = use->unique_out(); + assert(iff->is_If(), "unexpected node type"); + Node *use_c = iff->in(0); + if (use_c == blk1 || use_c == blk2) { + continue; + } + } + } else { + // We might see an Opaque1 from a loop limit check here + assert(use->is_If() || use->is_CMove() || use->Opcode() == Op_Opaque1, "unexpected node type"); + Node *use_c = use->is_If() ? use->in(0) : get_ctrl(use); if (use_c == blk1 || use_c == blk2) { + assert(use->is_CMove(), "unexpected node type"); continue; } } - } else { - // We might see an Opaque1 from a loop limit check here - assert(use->is_If() || use->is_CMove() || use->Opcode() == Op_Opaque1, "unexpected node type"); - Node *use_c = use->is_If() ? use->in(0) : get_ctrl(use); - if (use_c == blk1 || use_c == blk2) { - assert(use->is_CMove(), "unexpected node type"); - continue; + } + if (get_ctrl(bol) == blk1 || get_ctrl(bol) == blk2) { + // Recursively sink any BoolNode +#ifndef PRODUCT + if( PrintOpto && VerifyLoopOptimizations ) { + tty->print("Cloning down: "); + bol->dump(); } +#endif + for (DUIterator j = bol->outs(); bol->has_out(j); j++) { + Node* u = bol->out(j); + // Uses are either IfNodes, CMoves or Opaque4 + if (u->Opcode() == Op_Opaque4) { + assert(u->in(1) == bol, "bad input"); + for (DUIterator_Last kmin, k = u->last_outs(kmin); k >= kmin; --k) { + Node* iff = u->last_out(k); + assert(iff->is_If() || iff->is_CMove(), "unexpected node type"); + assert( iff->in(1) == u, "" ); + // Get control block of either the CMove or the If input + Node *iff_ctrl = iff->is_If() ? iff->in(0) : get_ctrl(iff); + Node *x1 = bol->clone(); + Node *x2 = u->clone(); + register_new_node(x1, iff_ctrl); + register_new_node(x2, iff_ctrl); + _igvn.replace_input_of(x2, 1, x1); + _igvn.replace_input_of(iff, 1, x2); + } + _igvn.remove_dead_node(u); + --j; + } else { + // We might see an Opaque1 from a loop limit check here + assert(u->is_If() || u->is_CMove() || u->Opcode() == Op_Opaque1, "unexpected node type"); + assert(u->in(1) == bol, ""); + // Get control block of either the CMove or the If input + Node *u_ctrl = u->is_If() ? u->in(0) : get_ctrl(u); + assert((u_ctrl != blk1 && u_ctrl != blk2) || u->is_CMove(), "won't converge"); + Node *x = bol->clone(); + register_new_node(x, u_ctrl); + _igvn.replace_input_of(u, 1, x); + --j; + } + } + _igvn.remove_dead_node(bol); + --i; } } - if (get_ctrl(bol) == blk1 || get_ctrl(bol) == blk2) { - // Recursively sink any BoolNode -#ifndef PRODUCT - if( PrintOpto && VerifyLoopOptimizations ) { - tty->print("Cloning down: "); - bol->dump(); - } -#endif - for (DUIterator j = bol->outs(); bol->has_out(j); j++) { - Node* u = bol->out(j); - // Uses are either IfNodes, CMoves or Opaque4 - if (u->Opcode() == Op_Opaque4) { - assert(u->in(1) == bol, "bad input"); - for (DUIterator_Last kmin, k = u->last_outs(kmin); k >= kmin; --k) { - Node* iff = u->last_out(k); - assert(iff->is_If() || iff->is_CMove(), "unexpected node type"); - assert( iff->in(1) == u, "" ); - // Get control block of either the CMove or the If input - Node *iff_ctrl = iff->is_If() ? iff->in(0) : get_ctrl(iff); - Node *x1 = bol->clone(); - Node *x2 = u->clone(); - register_new_node(x1, iff_ctrl); - register_new_node(x2, iff_ctrl); - _igvn.replace_input_of(x2, 1, x1); - _igvn.replace_input_of(iff, 1, x2); - } - _igvn.remove_dead_node(u); - --j; - } else { - // We might see an Opaque1 from a loop limit check here - assert(u->is_If() || u->is_CMove() || u->Opcode() == Op_Opaque1, "unexpected node type"); - assert(u->in(1) == bol, ""); - // Get control block of either the CMove or the If input - Node *u_ctrl = u->is_If() ? u->in(0) : get_ctrl(u); - assert((u_ctrl != blk1 && u_ctrl != blk2) || u->is_CMove(), "won't converge"); - Node *x = bol->clone(); - register_new_node(x, u_ctrl); - _igvn.replace_input_of(u, 1, x); - --j; - } - } - _igvn.remove_dead_node(bol); - --i; - } } // Clone down this CmpNode for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; --j) { - Node* bol = n->last_out(j); - assert( bol->in(1) == n, "" ); + Node* use = n->last_out(j); + uint pos = 1; + if (n->is_FastLock()) { + pos = TypeFunc::Parms + 2; + assert(use->is_Lock(), "FastLock only used by LockNode"); + } + assert(use->in(pos) == n, "" ); Node *x = n->clone(); - register_new_node(x, get_ctrl(bol)); - _igvn.replace_input_of(bol, 1, x); + register_new_node(x, ctrl_or_self(use)); + _igvn.replace_input_of(use, pos, x); } _igvn.remove_dead_node( n );
--- a/src/hotspot/share/prims/jniCheck.cpp Thu Oct 10 14:11:18 2019 +0200 +++ b/src/hotspot/share/prims/jniCheck.cpp Fri Oct 11 10:39:58 2019 +0200 @@ -450,16 +450,16 @@ Method* jniCheck::validate_jmethod_id(JavaThread* thr, jmethodID method_id) { ASSERT_OOPS_ALLOWED; // do the fast jmethodID check first - Method* moop = Method::checked_resolve_jmethod_id(method_id); - if (moop == NULL) { + Method* m = Method::checked_resolve_jmethod_id(method_id); + if (m == NULL) { ReportJNIFatalError(thr, fatal_wrong_class_or_method); } - // jmethodIDs are supposed to be weak handles in the class loader data, + // jmethodIDs are handles in the class loader data, // but that can be expensive so check it last else if (!Method::is_method_id(method_id)) { ReportJNIFatalError(thr, fatal_non_weak_method); } - return moop; + return m; } @@ -520,18 +520,29 @@ } } -void jniCheck::validate_call_object(JavaThread* thr, jobject obj, jmethodID method_id) { - /* validate the object being passed */ +void jniCheck::validate_call(JavaThread* thr, jclass clazz, jmethodID method_id, jobject obj) { ASSERT_OOPS_ALLOWED; - jniCheck::validate_jmethod_id(thr, method_id); - jniCheck::validate_object(thr, obj); -} + Method* m = jniCheck::validate_jmethod_id(thr, method_id); + InstanceKlass* holder = m->method_holder(); -void jniCheck::validate_call_class(JavaThread* thr, jclass clazz, jmethodID method_id) { - /* validate the class being passed */ - ASSERT_OOPS_ALLOWED; - jniCheck::validate_jmethod_id(thr, method_id); - jniCheck::validate_class(thr, clazz, false); + if (clazz != NULL) { + Klass* k = jniCheck::validate_class(thr, clazz, false); + // Check that method is in the class, must be InstanceKlass + if (!InstanceKlass::cast(k)->is_subtype_of(holder)) { + ReportJNIFatalError(thr, fatal_wrong_class_or_method); + } + } + + if (obj != NULL) { + oop recv = jniCheck::validate_object(thr, obj); + assert(recv != NULL, "validate_object checks that"); + Klass* ik = recv->klass(); + + // Check that the object is a subtype of method holder too. + if (!InstanceKlass::cast(ik)->is_subtype_of(holder)) { + ReportJNIFatalError(thr, fatal_wrong_class_or_method); + } + } } @@ -597,8 +608,7 @@ jboolean isStatic)) functionEnter(thr); IN_VM( - jniCheck::validate_class(thr, cls, false); - jniCheck::validate_jmethod_id(thr, methodID); + jniCheck::validate_call(thr, cls, methodID); ) jobject result = UNCHECKED()->ToReflectedMethod(env, cls, methodID, isStatic); @@ -854,8 +864,7 @@ functionEnter(thr); va_list args; IN_VM( - jniCheck::validate_class(thr, clazz, false); - jniCheck::validate_jmethod_id(thr, methodID); + jniCheck::validate_call(thr, clazz, methodID); ) va_start(args, methodID); jobject result = UNCHECKED()->NewObjectV(env,clazz,methodID,args); @@ -871,8 +880,7 @@ va_list args)) functionEnter(thr); IN_VM( - jniCheck::validate_class(thr, clazz, false); - jniCheck::validate_jmethod_id(thr, methodID); + jniCheck::validate_call(thr, clazz, methodID); ) jobject result = UNCHECKED()->NewObjectV(env,clazz,methodID,args); functionExit(thr); @@ -886,8 +894,7 @@ const jvalue *args)) functionEnter(thr); IN_VM( - jniCheck::validate_class(thr, clazz, false); - jniCheck::validate_jmethod_id(thr, methodID); + jniCheck::validate_call(thr, clazz, methodID); ) jobject result = UNCHECKED()->NewObjectA(env,clazz,methodID,args); functionExit(thr); @@ -943,7 +950,7 @@ functionEnter(thr); \ va_list args; \ IN_VM( \ - jniCheck::validate_call_object(thr, obj, methodID); \ + jniCheck::validate_call(thr, NULL, methodID, obj); \ ) \ va_start(args,methodID); \ ResultType result =UNCHECKED()->Call##Result##MethodV(env, obj, methodID, \ @@ -961,7 +968,7 @@ va_list args)) \ functionEnter(thr); \ IN_VM(\ - jniCheck::validate_call_object(thr, obj, methodID); \ + jniCheck::validate_call(thr, NULL, methodID, obj); \ ) \ ResultType result = UNCHECKED()->Call##Result##MethodV(env, obj, methodID,\ args); \ @@ -977,7 +984,7 @@ const jvalue * args)) \ functionEnter(thr); \ IN_VM( \ - jniCheck::validate_call_object(thr, obj, methodID); \ + jniCheck::validate_call(thr, NULL, methodID, obj); \ ) \ ResultType result = UNCHECKED()->Call##Result##MethodA(env, obj, methodID,\ args); \ @@ -1004,7 +1011,7 @@ functionEnter(thr); va_list args; IN_VM( - jniCheck::validate_call_object(thr, obj, methodID); + jniCheck::validate_call(thr, NULL, methodID, obj); ) va_start(args,methodID); UNCHECKED()->CallVoidMethodV(env,obj,methodID,args); @@ -1020,7 +1027,7 @@ va_list args)) functionEnter(thr); IN_VM( - jniCheck::validate_call_object(thr, obj, methodID); + jniCheck::validate_call(thr, NULL, methodID, obj); ) UNCHECKED()->CallVoidMethodV(env,obj,methodID,args); thr->set_pending_jni_exception_check("CallVoidMethodV"); @@ -1034,7 +1041,7 @@ const jvalue * args)) functionEnter(thr); IN_VM( - jniCheck::validate_call_object(thr, obj, methodID); + jniCheck::validate_call(thr, NULL, methodID, obj); ) UNCHECKED()->CallVoidMethodA(env,obj,methodID,args); thr->set_pending_jni_exception_check("CallVoidMethodA"); @@ -1051,8 +1058,7 @@ functionEnter(thr); \ va_list args; \ IN_VM( \ - jniCheck::validate_call_object(thr, obj, methodID); \ - jniCheck::validate_call_class(thr, clazz, methodID); \ + jniCheck::validate_call(thr, clazz, methodID, obj); \ ) \ va_start(args,methodID); \ ResultType result = UNCHECKED()->CallNonvirtual##Result##MethodV(env, \ @@ -1074,8 +1080,7 @@ va_list args)) \ functionEnter(thr); \ IN_VM( \ - jniCheck::validate_call_object(thr, obj, methodID); \ - jniCheck::validate_call_class(thr, clazz, methodID); \ + jniCheck::validate_call(thr, clazz, methodID, obj); \ ) \ ResultType result = UNCHECKED()->CallNonvirtual##Result##MethodV(env, \ obj, \ @@ -1095,8 +1100,7 @@ const jvalue * args)) \ functionEnter(thr); \ IN_VM( \ - jniCheck::validate_call_object(thr, obj, methodID); \ - jniCheck::validate_call_class(th