OpenJDK / amber / amber
changeset 33585:3121a0276778
Merge
author | bobv |
---|---|
date | Wed, 21 Oct 2015 16:38:48 -0400 |
parents | 6e0c38d7868b c2d95df2c54e |
children | df2af0ed648d |
files | hotspot/make/bsd/makefiles/gcc.make hotspot/make/bsd/makefiles/jsig.make hotspot/src/os/bsd/vm/os_bsd.cpp hotspot/src/share/vm/compiler/disassembler.cpp hotspot/src/share/vm/gc/shared/genRemSet.cpp hotspot/src/share/vm/gc/shared/genRemSet.hpp hotspot/src/share/vm/gc/shared/watermark.hpp hotspot/test/compiler/TestMoveStoresOutOfLoopsStoreNoCtrl.java hotspot/test/runtime/6888954/vmerrors.sh |
diffstat | 1001 files changed, 68873 insertions(+), 8565 deletions(-) [+] |
line wrap: on
line diff
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/compiler/ImmutableOopMapSet.java Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/compiler/ImmutableOopMapSet.java Wed Oct 21 16:38:48 2015 -0400 @@ -67,9 +67,6 @@ } } - public void visitValueLocation(Address valueAddr) { - } - public void visitNarrowOopLocation(Address narrowOopAddr) { addressVisitor.visitCompOopAddress(narrowOopAddr); } @@ -216,9 +213,9 @@ } } - // We want narow oop, value and oop oop_types - OopMapValue.OopTypes[] values = new OopMapValue.OopTypes[]{ - OopMapValue.OopTypes.OOP_VALUE, OopMapValue.OopTypes.VALUE_VALUE, OopMapValue.OopTypes.NARROWOOP_VALUE + // We want narow oop and oop oop_types + OopMapValue.OopTypes[] values = new OopMapValue.OopTypes[] { + OopMapValue.OopTypes.OOP_VALUE, OopMapValue.OopTypes.NARROWOOP_VALUE }; { @@ -231,8 +228,6 @@ // to detect in the debugging system // assert(Universe::is_heap_or_null(*loc), "found non oop pointer"); visitor.visitOopLocation(loc); - } else if (omv.getType() == OopMapValue.OopTypes.VALUE_VALUE) { - visitor.visitValueLocation(loc); } else if (omv.getType() == OopMapValue.OopTypes.NARROWOOP_VALUE) { visitor.visitNarrowOopLocation(loc); }
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/compiler/OopMapValue.java Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/compiler/OopMapValue.java Wed Oct 21 16:38:48 2015 -0400 @@ -49,7 +49,6 @@ // Types of OopValues static int UNUSED_VALUE; static int OOP_VALUE; - static int VALUE_VALUE; static int NARROWOOP_VALUE; static int CALLEE_SAVED_VALUE; static int DERIVED_OOP_VALUE; @@ -73,7 +72,6 @@ REGISTER_MASK_IN_PLACE = db.lookupIntConstant("OopMapValue::register_mask_in_place").intValue(); UNUSED_VALUE = db.lookupIntConstant("OopMapValue::unused_value").intValue(); OOP_VALUE = db.lookupIntConstant("OopMapValue::oop_value").intValue(); - VALUE_VALUE = db.lookupIntConstant("OopMapValue::value_value").intValue(); NARROWOOP_VALUE = db.lookupIntConstant("OopMapValue::narrowoop_value").intValue(); CALLEE_SAVED_VALUE = db.lookupIntConstant("OopMapValue::callee_saved_value").intValue(); DERIVED_OOP_VALUE = db.lookupIntConstant("OopMapValue::derived_oop_value").intValue(); @@ -82,7 +80,6 @@ public static abstract class OopTypes { public static final OopTypes UNUSED_VALUE = new OopTypes() { int getValue() { return OopMapValue.UNUSED_VALUE; }}; public static final OopTypes OOP_VALUE = new OopTypes() { int getValue() { return OopMapValue.OOP_VALUE; }}; - public static final OopTypes VALUE_VALUE = new OopTypes() { int getValue() { return OopMapValue.VALUE_VALUE; }}; public static final OopTypes NARROWOOP_VALUE = new OopTypes() { int getValue() { return OopMapValue.NARROWOOP_VALUE; }}; public static final OopTypes CALLEE_SAVED_VALUE = new OopTypes() { int getValue() { return OopMapValue.CALLEE_SAVED_VALUE; }}; public static final OopTypes DERIVED_OOP_VALUE = new OopTypes() { int getValue() { return OopMapValue.DERIVED_OOP_VALUE; }}; @@ -105,7 +102,6 @@ // Querying public boolean isOop() { return (getValue() & TYPE_MASK_IN_PLACE) == OOP_VALUE; } - public boolean isValue() { return (getValue() & TYPE_MASK_IN_PLACE) == VALUE_VALUE; } public boolean isNarrowOop() { return (getValue() & TYPE_MASK_IN_PLACE) == NARROWOOP_VALUE; } public boolean isCalleeSaved() { return (getValue() & TYPE_MASK_IN_PLACE) == CALLEE_SAVED_VALUE; } public boolean isDerivedOop() { return (getValue() & TYPE_MASK_IN_PLACE) == DERIVED_OOP_VALUE; } @@ -117,7 +113,6 @@ int which = (getValue() & TYPE_MASK_IN_PLACE); if (which == UNUSED_VALUE) return OopTypes.UNUSED_VALUE; else if (which == OOP_VALUE) return OopTypes.OOP_VALUE; - else if (which == VALUE_VALUE) return OopTypes.VALUE_VALUE; else if (which == NARROWOOP_VALUE) return OopTypes.NARROWOOP_VALUE; else if (which == CALLEE_SAVED_VALUE) return OopTypes.CALLEE_SAVED_VALUE; else if (which == DERIVED_OOP_VALUE) return OopTypes.DERIVED_OOP_VALUE;
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/compiler/OopMapVisitor.java Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/compiler/OopMapVisitor.java Wed Oct 21 16:38:48 2015 -0400 @@ -31,6 +31,5 @@ public interface OopMapVisitor { public void visitOopLocation(Address oopAddr); public void visitDerivedOopLocation(Address baseOopAddr, Address derivedOopAddr); - public void visitValueLocation(Address valueAddr); public void visitNarrowOopLocation(Address narrowOopAddr); }
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/oops/Method.java Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/oops/Method.java Wed Oct 21 16:38:48 2015 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -52,21 +52,19 @@ } private static synchronized void initialize(TypeDataBase db) throws WrongTypeException { - Type type = db.lookupType("Method"); + type = db.lookupType("Method"); constMethod = type.getAddressField("_constMethod"); methodData = type.getAddressField("_method_data"); methodCounters = type.getAddressField("_method_counters"); - methodSize = new CIntField(type.getCIntegerField("_method_size"), 0); accessFlags = new CIntField(type.getCIntegerField("_access_flags"), 0); code = type.getAddressField("_code"); vtableIndex = new CIntField(type.getCIntegerField("_vtable_index"), 0); - bytecodeOffset = type.getSize(); /* - interpreterEntry = type.getAddressField("_interpreter_entry"); fromCompiledCodeEntryPoint = type.getAddressField("_from_compiled_code_entry_point"); + interpreterEntry = type.getAddressField("_from_interpreted_entry"); + */ - */ objectInitializerName = null; classInitializerName = null; } @@ -77,16 +75,22 @@ public boolean isMethod() { return true; } + // Not a Method field, used to keep type. + private static Type type; + // Fields private static AddressField constMethod; private static AddressField methodData; private static AddressField methodCounters; - private static CIntField methodSize; private static CIntField accessFlags; private static CIntField vtableIndex; - private static long bytecodeOffset; private static AddressField code; + /* + private static AddressCField fromCompiledCodeEntryPoint; + private static AddressField interpreterEntry; + */ + // constant method names - <init>, <clinit> // Initialized lazily to avoid initialization ordering dependencies between Method and SymbolTable @@ -106,11 +110,6 @@ } - /* - private static AddressCField interpreterEntry; - private static AddressCField fromCompiledCodeEntryPoint; - */ - // Accessors for declared fields public ConstMethod getConstMethod() { Address addr = constMethod.getValue(getAddress()); @@ -128,7 +127,6 @@ return (MethodCounters) VMObjectFactory.newObject(MethodCounters.class, addr); } /** WARNING: this is in words, not useful in this system; use getObjectSize() instead */ - public long getMethodSize() { return methodSize.getValue(this); } public long getMaxStack() { return getConstMethod().getMaxStack(); } public long getMaxLocals() { return getConstMethod().getMaxLocals(); } public long getSizeOfParameters() { return getConstMethod().getSizeOfParameters(); } @@ -265,7 +263,7 @@ } public long getSize() { - return getMethodSize(); + return type.getSize() + (isNative() ? 2: 0); } public void printValueOn(PrintStream tty) { @@ -273,7 +271,6 @@ } public void iterateFields(MetadataVisitor visitor) { - visitor.doCInt(methodSize, true); visitor.doCInt(accessFlags, true); }
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/Frame.java Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/Frame.java Wed Oct 21 16:38:48 2015 -0400 @@ -536,9 +536,6 @@ } } - public void visitValueLocation(Address valueAddr) { - } - public void visitNarrowOopLocation(Address compOopAddr) { addressVisitor.visitCompOopAddress(compOopAddr); }
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/ui/classbrowser/HTMLGenerator.java Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/ui/classbrowser/HTMLGenerator.java Wed Oct 21 16:38:48 2015 -0400 @@ -1220,9 +1220,6 @@ oms = new OopMapStream(map, OopMapValue.OopTypes.NARROWOOP_VALUE); buf.append(omvIterator.iterate(oms, "NarrowOops:", false)); - oms = new OopMapStream(map, OopMapValue.OopTypes.VALUE_VALUE); - buf.append(omvIterator.iterate(oms, "Values:", false)); - oms = new OopMapStream(map, OopMapValue.OopTypes.CALLEE_SAVED_VALUE); buf.append(omvIterator.iterate(oms, "Callee saved:", true));
--- a/hotspot/make/bsd/makefiles/compiler1.make Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/make/bsd/makefiles/compiler1.make Wed Oct 21 16:38:48 2015 -0400 @@ -28,4 +28,7 @@ VM_SUBDIR = client +# We don't support the JVMCI in a client VM. +INCLUDE_JVMCI := false + CFLAGS += -DCOMPILER1
--- a/hotspot/make/bsd/makefiles/gcc.make Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/make/bsd/makefiles/gcc.make Wed Oct 21 16:38:48 2015 -0400 @@ -149,6 +149,7 @@ PCH_FLAG/sharedRuntimeTrig.o = $(PCH_FLAG/NO_PCH) PCH_FLAG/sharedRuntimeTrans.o = $(PCH_FLAG/NO_PCH) PCH_FLAG/unsafe.o = $(PCH_FLAG/NO_PCH) + PCH_FLAG/jvmciCompilerToVM.o = $(PCH_FLAG/NO_PCH) endif else # ($(USE_CLANG), true) @@ -320,10 +321,11 @@ # Work around some compiler bugs. ifeq ($(USE_CLANG), true) - # Clang <= 6.1 + # Clang < 6 | <= 6.1 | <= 7.0 ifeq ($(shell expr \ $(CC_VER_MAJOR) \< 6 \| \ - \( $(CC_VER_MAJOR) = 6 \& $(CC_VER_MINOR) \<= 1 \) \ + \( $(CC_VER_MAJOR) = 6 \& $(CC_VER_MINOR) \<= 1 \) \| \ + \( $(CC_VER_MAJOR) = 7 \& $(CC_VER_MINOR) \<= 0 \) \ ), 1) OPT_CFLAGS/loopTransform.o += $(OPT_CFLAGS/NOOPT) OPT_CFLAGS/unsafe.o += -O1
--- a/hotspot/make/bsd/makefiles/jsig.make Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/make/bsd/makefiles/jsig.make Wed Oct 21 16:38:48 2015 -0400 @@ -63,11 +63,11 @@ @echo $(LOG_INFO) Making signal interposition lib... ifeq ($(STATIC_BUILD),true) $(QUIETLY) $(CC) -c $(SYMFLAG) $(EXTRA_CFLAGS) $(ARCHFLAG) $(PICFLAG) \ - $(LFLAGS_JSIG) $(JSIG_DEBUG_CFLAGS) -o $(JSIG).o $< + $(LFLAGS_JSIG) $(JSIG_DEBUG_CFLAGS) -o $(JSIG).o $< $(QUIETLY) $(AR) $(ARFLAGS) $@ $(JSIG).o else $(QUIETLY) $(CC) $(SYMFLAG) $(ARCHFLAG) $(SHARED_FLAG) $(PICFLAG) \ - $(LFLAGS_JSIG) $(JSIG_DEBUG_CFLAGS) -o $@ $< + $(LFLAGS_JSIG) $(JSIG_DEBUG_CFLAGS) $(EXTRA_CFLAGS) -o $@ $< endif ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) ifeq ($(OS_VENDOR), Darwin)
--- a/hotspot/make/bsd/makefiles/minimal1.make Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/make/bsd/makefiles/minimal1.make Wed Oct 21 16:38:48 2015 -0400 @@ -38,6 +38,7 @@ INCLUDE_NMT := false INCLUDE_TRACE := false INCLUDE_CDS := false +INCLUDE_JVMCI := false CXXFLAGS += -DMINIMAL_JVM -DCOMPILER1 -DVMTYPE=\"Minimal\" CFLAGS += -DMINIMAL_JVM -DCOMPILER1 -DVMTYPE=\"Minimal\"
--- a/hotspot/make/excludeSrc.make Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/make/excludeSrc.make Wed Oct 21 16:38:48 2015 -0400 @@ -106,6 +106,25 @@ memTracker.cpp nmtDCmd.cpp mallocSiteTable.cpp endif +ifneq (,$(findstring $(Platform_arch_model), x86_64, sparc)) + # JVMCI is supported only on x86_64 and SPARC. +else + INCLUDE_JVMCI := false +endif + +ifeq ($(INCLUDE_JVMCI), false) + CXXFLAGS += -DINCLUDE_JVMCI=0 + CFLAGS += -DINCLUDE_JVMCI=0 + + jvmci_dir := $(HS_COMMON_SRC)/share/vm/jvmci + jvmci_dir_alt := $(HS_ALT_SRC)/share/vm/jvmci + jvmci_exclude := $(notdir $(wildcard $(jvmci_dir)/*.cpp)) \ + $(notdir $(wildcard $(jvmci_dir_alt)/*.cpp)) + Src_Files_EXCLUDE += $(jvmci_exclude) \ + jvmciCodeInstaller_aarch64.cpp jvmciCodeInstaller_ppc.cpp jvmciCodeInstaller_sparc.cpp \ + jvmciCodeInstaller_x86.cpp +endif + -include $(HS_ALT_MAKE)/excludeSrc.make .PHONY: $(HS_ALT_MAKE)/excludeSrc.make
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/make/gensrc/Gensrc-jdk.vm.ci.gmk Wed Oct 21 16:38:48 2015 -0400 @@ -0,0 +1,121 @@ +# +# Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. Oracle designates this +# particular file as subject to the "Classpath" exception as provided +# by Oracle in the LICENSE file that accompanied this code. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# + +default: all + +include $(SPEC) +include MakeBase.gmk +include JavaCompilation.gmk +include SetupJavaCompilers.gmk + +GENSRC_DIR := $(SUPPORT_OUTPUTDIR)/gensrc/jdk.vm.ci +SRC_DIR := $(HOTSPOT_TOPDIR)/src/jdk.vm.ci/share/classes + +################################################################################ +# Compile the annotation processor + +$(eval $(call SetupJavaCompilation, BUILD_JVMCI_OPTIONS, \ + SETUP := GENERATE_OLDBYTECODE, \ + SRC := $(SRC_DIR)/jdk.vm.ci.options/src \ + $(SRC_DIR)/jdk.vm.ci.options.processor/src \ + $(SRC_DIR)/jdk.vm.ci.inittimer/src, \ + BIN := $(BUILDTOOLS_OUTPUTDIR)/jvmci_options, \ + JAR := $(BUILDTOOLS_OUTPUTDIR)/jdk.vm.ci.options.jar, \ +)) + +$(eval $(call SetupJavaCompilation, BUILD_JVMCI_SERVICE, \ + SETUP := GENERATE_OLDBYTECODE, \ + SRC := $(SRC_DIR)/jdk.vm.ci.service/src \ + $(SRC_DIR)/jdk.vm.ci.service.processor/src, \ + BIN := $(BUILDTOOLS_OUTPUTDIR)/jvmci_service, \ + JAR := $(BUILDTOOLS_OUTPUTDIR)/jdk.vm.ci.service.jar, \ +)) + +################################################################################ + +PROC_SRC_SUBDIRS := \ + jdk.vm.ci.compiler \ + jdk.vm.ci.hotspot \ + jdk.vm.ci.hotspot.amd64 \ + jdk.vm.ci.hotspot.sparc \ + # + +PROC_SRC_DIRS := $(patsubst %, $(SRC_DIR)/%/src, $(PROC_SRC_SUBDIRS)) + +PROC_SRCS := $(filter %.java, $(call CacheFind, $(PROC_SRC_DIRS))) + +ALL_SRC_DIRS := $(wildcard $(SRC_DIR)/*/src) +SOURCEPATH := $(call PathList, $(ALL_SRC_DIRS)) +PROCESSOR_PATH := $(call PathList, \ + $(BUILDTOOLS_OUTPUTDIR)/jdk.vm.ci.options.jar \ + $(BUILDTOOLS_OUTPUTDIR)/jdk.vm.ci.service.jar) + +$(GENSRC_DIR)/_gensrc_proc_done: $(PROC_SRCS) \ + $(BUILD_JVMCI_OPTIONS) $(BUILD_JVMCI_SERVICE) + $(MKDIR) -p $(@D) + $(eval $(call ListPathsSafely,PROC_SRCS,$(@D)/_gensrc_proc_files)) + $(JAVA_SMALL) $(NEW_JAVAC) \ + -sourcepath $(SOURCEPATH) \ + -implicit:none \ + -proc:only \ + -processorpath $(PROCESSOR_PATH) \ + -d $(GENSRC_DIR) \ + -s $(GENSRC_DIR) \ + @$(@D)/_gensrc_proc_files + $(TOUCH) $@ + +TARGETS += $(GENSRC_DIR)/_gensrc_proc_done + +################################################################################ + +$(GENSRC_DIR)/META-INF/services/jdk.vm.ci.options.OptionDescriptors: \ + $(GENSRC_DIR)/_gensrc_proc_done + $(MKDIR) -p $(@D) + ($(CD) $(GENSRC_DIR)/META-INF/jvmci.options && \ + $(RM) -f $@; \ + for i in $$(ls); do \ + echo $${i}_OptionDescriptors >> $@; \ + done) + +TARGETS += $(GENSRC_DIR)/META-INF/services/jdk.vm.ci.options.OptionDescriptors + +################################################################################ + +$(GENSRC_DIR)/_providers_converted: $(GENSRC_DIR)/_gensrc_proc_done + $(MKDIR) -p $(GENSRC_DIR)/META-INF/services + ($(CD) $(GENSRC_DIR)/META-INF/jvmci.providers && \ + for i in $$($(LS)); do \ + c=$$($(CAT) $$i | $(TR) -d '\n\r'); \ + $(ECHO) $$i >> $(GENSRC_DIR)/META-INF/services/$$c; \ + done) + $(TOUCH) $@ + +TARGETS += $(GENSRC_DIR)/_providers_converted + +################################################################################ + +all: $(TARGETS) + +.PHONY: default all
--- a/hotspot/make/linux/makefiles/compiler1.make Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/make/linux/makefiles/compiler1.make Wed Oct 21 16:38:48 2015 -0400 @@ -28,4 +28,7 @@ VM_SUBDIR = client +# We don't support the JVMCI in a client VM. +INCLUDE_JVMCI := false + CFLAGS += -DCOMPILER1
--- a/hotspot/make/linux/makefiles/gcc.make Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/make/linux/makefiles/gcc.make Wed Oct 21 16:38:48 2015 -0400 @@ -213,12 +213,16 @@ # Since GCC 4.3, -Wconversion has changed its meanings to warn these implicit # conversions which might affect the values. Only enable it in earlier versions. ifeq "$(shell expr \( $(CC_VER_MAJOR) \> 4 \) \| \( \( $(CC_VER_MAJOR) = 4 \) \& \( $(CC_VER_MINOR) \>= 3 \) \))" "0" + # GCC < 4.3 WARNING_FLAGS += -Wconversion endif ifeq "$(shell expr \( $(CC_VER_MAJOR) \> 4 \) \| \( \( $(CC_VER_MAJOR) = 4 \) \& \( $(CC_VER_MINOR) \>= 8 \) \))" "1" + # GCC >= 4.8 # This flag is only known since GCC 4.3. Gcc 4.8 contains a fix so that with templates no # warnings are issued: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=11856 WARNING_FLAGS += -Wtype-limits + # GCC < 4.8 don't accept this flag for C++. + WARNING_FLAGS += -Wno-format-zero-length endif endif
--- a/hotspot/make/linux/makefiles/minimal1.make Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/make/linux/makefiles/minimal1.make Wed Oct 21 16:38:48 2015 -0400 @@ -38,6 +38,7 @@ INCLUDE_NMT := false INCLUDE_TRACE := false INCLUDE_CDS := false +INCLUDE_JVMCI := false CXXFLAGS += -DMINIMAL_JVM -DCOMPILER1 -DVMTYPE=\"Minimal\" CFLAGS += -DMINIMAL_JVM -DCOMPILER1 -DVMTYPE=\"Minimal\"
--- a/hotspot/make/solaris/makefiles/compiler1.make Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/make/solaris/makefiles/compiler1.make Wed Oct 21 16:38:48 2015 -0400 @@ -28,4 +28,7 @@ VM_SUBDIR = client +# We don't support the JVMCI in a client VM. +INCLUDE_JVMCI := false + CFLAGS += -DCOMPILER1
--- a/hotspot/make/windows/build_vm_def.sh Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/make/windows/build_vm_def.sh Wed Oct 21 16:38:48 2015 -0400 @@ -52,6 +52,7 @@ CAT="$MKS_HOME/cat.exe" RM="$MKS_HOME/rm.exe" DUMPBIN="link.exe /dump" +export VS_UNICODE_OUTPUT= if [ "$1" = "-nosa" ]; then echo EXPORTS > vm.def
--- a/hotspot/make/windows/create_obj_files.sh Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/make/windows/create_obj_files.sh Wed Oct 21 16:38:48 2015 -0400 @@ -111,6 +111,7 @@ COMPILER2_SPECIFIC_FILES="opto libadt bcEscapeAnalyzer.cpp c2_* runtime_*" COMPILER1_SPECIFIC_FILES="c1_*" +JVMCI_SPECIFIC_FILES="*jvmci* *JVMCI*" SHARK_SPECIFIC_FILES="shark" ZERO_SPECIFIC_FILES="zero" @@ -119,11 +120,11 @@ # Exclude per type. case "${TYPE}" in - "compiler1") Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${COMPILER2_SPECIFIC_FILES} ${ZERO_SPECIFIC_FILES} ${SHARK_SPECIFIC_FILES} ciTypeFlow.cpp" ;; + "compiler1") Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${COMPILER2_SPECIFIC_FILES} ${JVMCI_SPECIFIC_FILES} ${ZERO_SPECIFIC_FILES} ${SHARK_SPECIFIC_FILES} ciTypeFlow.cpp" ;; "compiler2") Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${COMPILER1_SPECIFIC_FILES} ${ZERO_SPECIFIC_FILES} ${SHARK_SPECIFIC_FILES}" ;; "tiered") Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${ZERO_SPECIFIC_FILES} ${SHARK_SPECIFIC_FILES}" ;; - "zero") Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${COMPILER1_SPECIFIC_FILES} ${COMPILER2_SPECIFIC_FILES} ${SHARK_SPECIFIC_FILES} ciTypeFlow.cpp" ;; - "shark") Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${COMPILER1_SPECIFIC_FILES} ${COMPILER2_SPECIFIC_FILES} ${ZERO_SPECIFIC_FILES}" ;; + "zero") Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${COMPILER1_SPECIFIC_FILES} ${COMPILER2_SPECIFIC_FILES} ${JVMCI_SPECIFIC_FILES} ${SHARK_SPECIFIC_FILES} ciTypeFlow.cpp" ;; + "shark") Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${COMPILER1_SPECIFIC_FILES} ${COMPILER2_SPECIFIC_FILES} ${JVMCI_SPECIFIC_FILES} ${ZERO_SPECIFIC_FILES}" ;; esac # Special handling of arch model.
--- a/hotspot/make/windows/makefiles/compile.make Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/make/windows/makefiles/compile.make Wed Oct 21 16:38:48 2015 -0400 @@ -1,5 +1,5 @@ # -# Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -31,6 +31,7 @@ # /nologo Supress copyright message at every cl.exe startup # /W3 Warning level 3 # /Zi Include debugging information +# /d2Zi+ Extended debugging symbols for optimized code (/Zo in VS2013 Update 3 and later) # /WX Treat any warning error as a fatal error # /MD Use dynamic multi-threaded runtime (msvcrt.dll or msvc*NN.dll) # /MTd Use static multi-threaded runtime debug versions @@ -57,7 +58,7 @@ # Let's add debug information when Full Debug Symbols is enabled !if "$(ENABLE_FULL_DEBUG_SYMBOLS)" == "1" -CXX_FLAGS=$(CXX_FLAGS) /Zi +CXX_FLAGS=$(CXX_FLAGS) /Zi /d2Zi+ !endif # Based on BUILDARCH we add some flags and select the default compiler name
--- a/hotspot/make/windows/makefiles/projectcreator.make Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/make/windows/makefiles/projectcreator.make Wed Oct 21 16:38:48 2015 -0400 @@ -145,6 +145,10 @@ -ignorePath_TARGET tiered \ -ignorePath_TARGET c1_ +ProjectCreatorIDEOptionsIgnoreJVMCI=\ + -ignorePath_TARGET src/share/vm/jvmci \ + -ignorePath_TARGET vm/jvmci + ProjectCreatorIDEOptionsIgnoreCompiler2=\ -ignorePath_TARGET compiler2 \ -ignorePath_TARGET tiered \ @@ -165,6 +169,8 @@ ################################################## ProjectCreatorIDEOptions=$(ProjectCreatorIDEOptions) \ -define_compiler1 COMPILER1 \ + -define_compiler1 INCLUDE_JVMCI=0 \ +$(ProjectCreatorIDEOptionsIgnoreJVMCI:TARGET=compiler1) \ $(ProjectCreatorIDEOptionsIgnoreCompiler2:TARGET=compiler1) ##################################################
--- a/hotspot/make/windows/makefiles/vm.make Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/make/windows/makefiles/vm.make Wed Oct 21 16:38:48 2015 -0400 @@ -40,7 +40,7 @@ !endif !if "$(Variant)" == "compiler1" -CXX_FLAGS=$(CXX_FLAGS) /D "COMPILER1" +CXX_FLAGS=$(CXX_FLAGS) /D "COMPILER1" /D INCLUDE_JVMCI=0 !endif !if "$(Variant)" == "compiler2" @@ -152,6 +152,7 @@ VM_PATH=$(VM_PATH);../generated/jvmtifiles VM_PATH=$(VM_PATH);../generated/tracefiles VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/c1 +VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/jvmci VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/compiler VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/code VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/interpreter @@ -163,6 +164,7 @@ VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc/cms VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc/g1 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/asm +VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/logging VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/memory VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/oops VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/prims @@ -232,6 +234,9 @@ {$(COMMONSRC)\share\vm\classfile}.cpp.obj:: $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $< +{$(COMMONSRC)\share\vm\jvmci}.cpp.obj:: + $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $< + {$(COMMONSRC)\share\vm\gc\parallel}.cpp.obj:: $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $< @@ -250,6 +255,9 @@ {$(COMMONSRC)\share\vm\asm}.cpp.obj:: $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $< +{$(COMMONSRC)\share\vm\logging}.cpp.obj:: + $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $< + {$(COMMONSRC)\share\vm\memory}.cpp.obj:: $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $< @@ -330,6 +338,9 @@ {$(ALTSRC)\share\vm\asm}.cpp.obj:: $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $< +{$(ALTSRC)\share\vm\logging}.cpp.obj:: + $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $< + {$(ALTSRC)\share\vm\memory}.cpp.obj:: $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad Wed Oct 21 16:38:48 2015 -0400 @@ -1039,6 +1039,7 @@ bool leading_membar(const MemBarNode *barrier); bool is_card_mark_membar(const MemBarNode *barrier); + bool is_CAS(int opcode); MemBarNode *leading_to_normal(MemBarNode *leading); MemBarNode *normal_to_leading(const MemBarNode *barrier); @@ -1057,6 +1058,9 @@ bool unnecessary_volatile(const Node *barrier); bool needs_releasing_store(const Node *store); + // predicate controlling translation of CompareAndSwapX + bool needs_acquiring_load_exclusive(const Node *load); + // predicate controlling translation of StoreCM bool unnecessary_storestore(const Node *storecm); %} @@ -1088,15 +1092,58 @@ // str<x> // dmb ish // + // We can also use ldaxr and stlxr to implement compare and swap CAS + // sequences. These are normally translated to an instruction + // sequence like the following + // + // dmb ish + // retry: + // ldxr<x> rval raddr + // cmp rval rold + // b.ne done + // stlxr<x> rval, rnew, rold + // cbnz rval retry + // done: + // cset r0, eq + // dmb ishld + // + // Note that the exclusive store is already using an stlxr + // instruction. That is required to ensure visibility to other + // threads of the exclusive write (assuming it succeeds) before that + // of any subsequent writes. + // + // The following instruction sequence is an improvement on the above + // + // retry: + // ldaxr<x> rval raddr + // cmp rval rold + // b.ne done + // stlxr<x> rval, rnew, rold + // cbnz rval retry + // done: + // cset r0, eq + // + // We don't need the leading dmb ish since the stlxr guarantees + // visibility of prior writes in the case that the swap is + // successful. Crucially we don't have to worry about the case where + // the swap is not successful since no valid program should be + // relying on visibility of prior changes by the attempting thread + // in the case where the CAS fails. + // + // Similarly, we don't need the trailing dmb ishld if we substitute + // an ldaxr instruction since that will provide all the guarantees we + // require regarding observation of changes made by other threads + // before any change to the CAS address observed by the load. + // // In order to generate the desired instruction sequence we need to // be able to identify specific 'signature' ideal graph node // sequences which i) occur as a translation of a volatile reads or - // writes and ii) do not occur through any other translation or - // graph transformation. We can then provide alternative aldc - // matching rules which translate these node sequences to the - // desired machine code sequences. Selection of the alternative - // rules can be implemented by predicates which identify the - // relevant node sequences. + // writes or CAS operations and ii) do not occur through any other + // translation or graph transformation. We can then provide + // alternative aldc matching rules which translate these node + // sequences to the desired machine code sequences. Selection of the + // alternative rules can be implemented by predicates which identify + // the relevant node sequences. // // The ideal graph generator translates a volatile read to the node // sequence @@ -1163,6 +1210,15 @@ // get if it is fed and feeds a cpuorder membar and if its feed // membar also feeds an acquiring load. // + // Finally an inlined (Unsafe) CAS operation is translated to the + // following ideal graph + // + // MemBarRelease + // MemBarCPUOrder + // CompareAndSwapX {CardMark}-optional + // MemBarCPUOrder + // MemBarAcquire + // // So, where we can identify these volatile read and write // signatures we can choose to plant either of the above two code // sequences. For a volatile read we can simply plant a normal @@ -1177,6 +1233,14 @@ // and MemBarVolatile and instead plant a simple stlr<x> // instruction. // + // when we recognise a CAS signature we can choose to plant a dmb + // ish as a translation for the MemBarRelease, the conventional + // macro-instruction sequence for the CompareAndSwap node (which + // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire. + // Alternatively, we can elide generation of the dmb instructions + // and plant the alternative CompareAndSwap macro-instruction + // sequence (which uses ldaxr<x>). + // // Of course, the above only applies when we see these signature // configurations. We still want to plant dmb instructions in any // other cases where we may see a MemBarAcquire, MemBarRelease or @@ -1194,7 +1258,8 @@ // relevant dmb instructions. // - // graph traversal helpers used for volatile put/get optimization + // graph traversal helpers used for volatile put/get and CAS + // optimization // 1) general purpose helpers @@ -1220,16 +1285,19 @@ return NULL; } - if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) + if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) { return NULL; + } membar = ctl->lookup(0); - if (!membar || !membar->is_MemBar()) + if (!membar || !membar->is_MemBar()) { return NULL; - - if (mem->lookup(0) != membar) + } + + if (mem->lookup(0) != membar) { return NULL; + } return membar->as_MemBar(); } @@ -1259,8 +1327,9 @@ } } - if (child == NULL) + if (child == NULL) { return NULL; + } for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { x = mem->fast_out(i); @@ -1283,15 +1352,18 @@ { int opcode = barrier->Opcode(); // if this is a release membar we are ok - if (opcode == Op_MemBarRelease) + if (opcode == Op_MemBarRelease) { return true; + } // if its a cpuorder membar . . . - if (opcode != Op_MemBarCPUOrder) + if (opcode != Op_MemBarCPUOrder) { return false; + } // then the parent has to be a release membar MemBarNode *parent = parent_membar(barrier); - if (!parent) + if (!parent) { return false; + } opcode = parent->Opcode(); return opcode == Op_MemBarRelease; } @@ -1314,11 +1386,13 @@ bool is_card_mark_membar(const MemBarNode *barrier) { - if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) + if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) { return false; - - if (barrier->Opcode() != Op_MemBarVolatile) + } + + if (barrier->Opcode() != Op_MemBarVolatile) { return false; + } ProjNode *mem = barrier->proj_out(TypeFunc::Memory); @@ -1333,8 +1407,8 @@ } - // 3) helper predicates to traverse volatile put graphs which may - // contain GC barrier subgraphs + // 3) helper predicates to traverse volatile put or CAS graphs which + // may contain GC barrier subgraphs // Preamble // -------- @@ -1404,8 +1478,7 @@ // currently being unmarked in which case the volatile put graph // will look slightly different // - // MemBarRelease - // MemBarCPUOrder___________________________________________ + // MemBarRelease____________________________________________ // || \\ Ctl \ Ctl \ \\ Mem \ // || StoreN/P[mo_release] CastP2X If LoadB | // | \ / \ | @@ -1419,7 +1492,7 @@ // memory flow includes the following subgraph: // // MemBarRelease - // MemBarCPUOrder + // {MemBarCPUOrder} // | \ . . . // | StoreX[mo_release] . . . // | / @@ -1431,8 +1504,48 @@ // detected starting from any candidate MemBarRelease, // StoreX[mo_release] or MemBarVolatile. // + // A simple variation on this normal case occurs for an unsafe CAS + // operation. The basic graph for a non-object CAS is + // + // MemBarRelease + // || + // MemBarCPUOrder + // || \\ . . . + // || CompareAndSwapX + // || | + // || SCMemProj + // | \ / + // | MergeMem + // | / + // MemBarCPUOrder + // || + // MemBarAcquire + // + // The same basic variations on this arrangement (mutatis mutandis) + // occur when a card mark is introduced. i.e. we se the same basic + // shape but the StoreP/N is replaced with CompareAndSawpP/N and the + // tail of the graph is a pair comprising a MemBarCPUOrder + + // MemBarAcquire. + // + // So, in the case of a CAS the normal graph has the variant form + // + // MemBarRelease + // MemBarCPUOrder + // | \ . . . + // | CompareAndSwapX . . . + // | | + // | SCMemProj + // | / . . . + // MergeMem + // | + // MemBarCPUOrder + // MemBarAcquire + // + // This graph can also easily be detected starting from any + // candidate MemBarRelease, CompareAndSwapX or MemBarAcquire. + // // the code below uses two helper predicates, leading_to_normal and - // normal_to_leading to identify this configuration, one validating + // normal_to_leading to identify these normal graphs, one validating // the layout starting from the top membar and searching down and // the other validating the layout starting from the lower membar // and searching up. @@ -1450,7 +1563,9 @@ // they are only inserted for object puts. This significantly // complicates the task of identifying whether a MemBarRelease, // StoreX[mo_release] or MemBarVolatile forms part of a volatile put - // when using these GC configurations (see below). + // when using these GC configurations (see below). It adds similar + // complexity to the task of identifying whether a MemBarRelease, + // CompareAndSwapX or MemBarAcquire forms part of a CAS. // // In both cases the post-write subtree includes an auxiliary // MemBarVolatile (StoreLoad barrier) separating the object put and @@ -1489,7 +1604,8 @@ // (LoadB) from the card. Ctl and Mem are fed to the If via an // intervening StoreLoad barrier (MemBarVolatile). // - // So, with CMS we may see a node graph which looks like this + // So, with CMS we may see a node graph for a volatile object store + // which looks like this // // MemBarRelease // MemBarCPUOrder_(leading)__________________ @@ -1524,6 +1640,55 @@ // from the StoreCM into the trailing membar (n.b. the latter // proceeds via a Phi associated with the If region). // + // The graph for a CAS varies slightly, the obvious difference being + // that the StoreN/P node is replaced by a CompareAndSwapP/N node + // and the trailing MemBarVolatile by a MemBarCPUOrder + + // MemBarAcquire pair. The other important difference is that the + // CompareAndSwap node's SCMemProj is not merged into the card mark + // membar - it still feeds the trailing MergeMem. This also means + // that the card mark membar receives its Mem feed directly from the + // leading membar rather than via a MergeMem. + // + // MemBarRelease + // MemBarCPUOrder__(leading)_________________________ + // || \\ C \ + // MemBarVolatile (card mark) CompareAndSwapN/P CastP2X + // C | || M | | + // | LoadB | ______/| + // | | | / | + // | Cmp | / SCMemProj + // | / | / | + // If | / / + // | \ | / / + // IfFalse IfTrue | / / + // \ / \ |/ prec / + // \ / StoreCM / + // \ / | / + // Region . . . / + // | \ / + // | . . . \ / Bot + // | MergeMem + // | | + // MemBarCPUOrder + // MemBarAcquire (trailing) + // + // This has a slightly different memory subgraph to the one seen + // previously but the core of it is the same as for the CAS normal + // sungraph + // + // MemBarRelease + // MemBarCPUOrder____ + // || \ . . . + // MemBarVolatile CompareAndSwapX . . . + // | \ | + // . . . SCMemProj + // | / . . . + // MergeMem + // | + // MemBarCPUOrder + // MemBarAcquire + // + // // G1 is quite a lot more complicated. The nodes inserted on behalf // of G1 may comprise: a pre-write graph which adds the old value to // the SATB queue; the releasing store itself; and, finally, a @@ -1575,12 +1740,16 @@ // n.b. the LoadB in this subgraph is not the card read -- it's a // read of the SATB queue active flag. // + // Once again the CAS graph is a minor variant on the above with the + // expected substitutions of CompareAndSawpX for StoreN/P and + // MemBarCPUOrder + MemBarAcquire for trailing MemBarVolatile. + // // The G1 post-write subtree is also optional, this time when the // new value being written is either null or can be identified as a // newly allocated (young gen) object with no intervening control // flow. The latter cannot happen but the former may, in which case - // the card mark membar is omitted and the memory feeds from the - // leading membar and the StoreN/P are merged direct into the + // the card mark membar is omitted and the memory feeds form the + // leading membar and the SToreN/P are merged direct into the // trailing membar as per the normal subgraph. So, the only special // case which arises is when the post-write subgraph is generated. // @@ -1668,113 +1837,53 @@ // value check has been elided the total number of Phis is 2 // otherwise it is 3. // + // The CAS graph when using G1GC also includes a pre-write subgraph + // and an optional post-write subgraph. Teh sam evarioations are + // introduced as for CMS with conditional card marking i.e. the + // StoreP/N is swapped for a CompareAndSwapP/N, the tariling + // MemBarVolatile for a MemBarCPUOrder + MemBarAcquire pair and the + // Mem feed from the CompareAndSwapP/N includes a precedence + // dependency feed to the StoreCM and a feed via an SCMemProj to the + // trailing membar. So, as before the configuration includes the + // normal CAS graph as a subgraph of the memory flow. + // // So, the upshot is that in all cases the volatile put graph will // include a *normal* memory subgraph betwen the leading membar and - // its child membar. When that child is not a card mark membar then - // it marks the end of a volatile put subgraph. If the child is a - // card mark membar then the normal subgraph will form part of a - // volatile put subgraph if and only if the child feeds an - // AliasIdxBot Mem feed to a trailing barrier via a MergeMem. That - // feed is either direct (for CMS) or via 2 or 3 Phi nodes merging - // the leading barrier memory flow (for G1). + // its child membar, either a volatile put graph (including a + // releasing StoreX) or a CAS graph (including a CompareAndSwapX). + // When that child is not a card mark membar then it marks the end + // of the volatile put or CAS subgraph. If the child is a card mark + // membar then the normal subgraph will form part of a volatile put + // subgraph if and only if the child feeds an AliasIdxBot Mem feed + // to a trailing barrier via a MergeMem. That feed is either direct + // (for CMS) or via 2 or 3 Phi nodes merging the leading barrier + // memory flow (for G1). // // The predicates controlling generation of instructions for store // and barrier nodes employ a few simple helper functions (described - // below) which identify the presence or absence of these subgraph - // configurations and provide a means of traversing from one node in - // the subgraph to another. + // below) which identify the presence or absence of all these + // subgraph configurations and provide a means of traversing from + // one node in the subgraph to another. + + // is_CAS(int opcode) + // + // return true if opcode is one of the possible CompareAndSwapX + // values otherwise false. + + bool is_CAS(int opcode) + { + return (opcode == Op_CompareAndSwapI || + opcode == Op_CompareAndSwapL || + opcode == Op_CompareAndSwapN || + opcode == Op_CompareAndSwapP); + } // leading_to_normal // - //graph traversal helper which detects the normal case Mem feed - // from a release membar (or, optionally, its cpuorder child) to a - // dependent volatile membar i.e. it ensures that the following Mem - // flow subgraph is present. - // - // MemBarRelease - // MemBarCPUOrder - // | \ . . . - // | StoreN/P[mo_release] . . . - // | / - // MergeMem - // | - // MemBarVolatile - // - // if the correct configuration is present returns the volatile - // membar otherwise NULL. - // - // the input membar is expected to be either a cpuorder membar or a - // release membar. in the latter case it should not have a cpu membar - // child. - // - // the returned membar may be a card mark membar rather than a - // trailing membar. - - MemBarNode *leading_to_normal(MemBarNode *leading) - { - assert((leading->Opcode() == Op_MemBarRelease || - leading->Opcode() == Op_MemBarCPUOrder), - "expecting a volatile or cpuroder membar!"); - - // check the mem flow - ProjNode *mem = leading->proj_out(TypeFunc::Memory); - - if (!mem) - return NULL; - - Node *x = NULL; - StoreNode * st = NULL; - MergeMemNode *mm = NULL; - - for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { - x = mem->fast_out(i); - if (x->is_MergeMem()) { - if (mm != NULL) - return NULL; - // two merge mems is one too many - mm = x->as_MergeMem(); - } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) { - // two releasing stores is one too many - if (st != NULL) - return NULL; - st = x->as_Store(); - } - } - - if (!mm || !st) - return NULL; - - bool found = false; - // ensure the store feeds the merge - for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) { - if (st->fast_out(i) == mm) { - found = true; - break; - } - } - - if (!found) - return NULL; - - MemBarNode *mbvol = NULL; - // ensure the merge feeds a volatile membar - for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) { - x = mm->fast_out(i); - if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) { - mbvol = x->as_MemBar(); - break; - } - } - - return mbvol; - } - - // normal_to_leading - // - // graph traversal helper which detects the normal case Mem feed - // from either a card mark or a trailing membar to a preceding - // release membar (optionally its cpuorder child) i.e. it ensures - // that the following Mem flow subgraph is present. + //graph traversal helper which detects the normal case Mem feed from + // a release membar (or, optionally, its cpuorder child) to a + // dependent volatile membar i.e. it ensures that one or other of + // the following Mem flow subgraph is present. // // MemBarRelease // MemBarCPUOrder {leading} @@ -1783,7 +1892,165 @@ // | / // MergeMem // | - // MemBarVolatile + // MemBarVolatile {trailing or card mark} + // + // MemBarRelease + // MemBarCPUOrder {leading} + // | \ . . . + // | CompareAndSwapX . . . + // | + // . . . SCMemProj + // \ | + // | MergeMem + // | / + // MemBarCPUOrder + // MemBarAcquire {trailing} + // + // if the correct configuration is present returns the trailing + // membar otherwise NULL. + // + // the input membar is expected to be either a cpuorder membar or a + // release membar. in the latter case it should not have a cpu membar + // child. + // + // the returned value may be a card mark or trailing membar + // + + MemBarNode *leading_to_normal(MemBarNode *leading) + { + assert((leading->Opcode() == Op_MemBarRelease || + leading->Opcode() == Op_MemBarCPUOrder), + "expecting a volatile or cpuroder membar!"); + + // check the mem flow + ProjNode *mem = leading->proj_out(TypeFunc::Memory); + + if (!mem) { + return NULL; + } + + Node *x = NULL; + StoreNode * st = NULL; + LoadStoreNode *cas = NULL; + MergeMemNode *mm = NULL; + + for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { + x = mem->fast_out(i); + if (x->is_MergeMem()) { + if (mm != NULL) { + return NULL; + } + // two merge mems is one too many + mm = x->as_MergeMem(); + } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) { + // two releasing stores/CAS nodes is one too many + if (st != NULL || cas != NULL) { + return NULL; + } + st = x->as_Store(); + } else if (is_CAS(x->Opcode())) { + if (st != NULL || cas != NULL) { + return NULL; + } + cas = x->as_LoadStore(); + } + } + + // must have a store or a cas + if (!st && !cas) { + return NULL; + } + + // must have a merge if we also have st + if (st && !mm) { + return NULL; + } + + Node *y = NULL; + if (cas) { + // look for an SCMemProj + for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) { + x = cas->fast_out(i); + if (x->is_Proj()) { + y = x; + break; + } + } + if (y == NULL) { + return NULL; + } + // the proj must feed a MergeMem + for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) { + x = y->fast_out(i); + if (x->is_MergeMem()) { + mm = x->as_MergeMem(); + break; + } + } + if (mm == NULL) + return NULL; + } else { + // ensure the store feeds the existing mergemem; + for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) { + if (st->fast_out(i) == mm) { + y = st; + break; + } + } + if (y == NULL) { + return NULL; + } + } + + MemBarNode *mbar = NULL; + // ensure the merge feeds to the expected type of membar + for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) { + x = mm->fast_out(i); + if (x->is_MemBar()) { + int opcode = x->Opcode(); + if (opcode == Op_MemBarVolatile && st) { + mbar = x->as_MemBar(); + } else if (cas && opcode == Op_MemBarCPUOrder) { + MemBarNode *y = x->as_MemBar(); + y = child_membar(y); + if (y != NULL && y->Opcode() == Op_MemBarAcquire) { + mbar = y; + } + } + break; + } + } + + return mbar; + } + + // normal_to_leading + // + // graph traversal helper which detects the normal case Mem feed + // from either a card mark or a trailing membar to a preceding + // release membar (optionally its cpuorder child) i.e. it ensures + // that one or other of the following Mem flow subgraphs is present. + // + // MemBarRelease + // MemBarCPUOrder {leading} + // | \ . . . + // | StoreN/P[mo_release] . . . + // | / + // MergeMem + // | + // MemBarVolatile {card mark or trailing} + // + // MemBarRelease + // MemBarCPUOrder {leading} + // | \ . . . + // | CompareAndSwapX . . . + // | + // . . . SCMemProj + // \ | + // | MergeMem + // | / + // MemBarCPUOrder + // MemBarAcquire {trailing} // // this predicate checks for the same flow as the previous predicate // but starting from the bottom rather than the top. @@ -1797,51 +2064,116 @@ MemBarNode *normal_to_leading(const MemBarNode *barrier) { // input must be a volatile membar - assert(barrier->Opcode() == Op_MemBarVolatile, "expecting a volatile membar"); + assert((barrier->Opcode() == Op_MemBarVolatile || + barrier->Opcode() == Op_MemBarAcquire), + "expecting a volatile or an acquire membar"); Node *x; + bool is_cas = barrier->Opcode() == Op_MemBarAcquire; + + // if we have an acquire membar then it must be fed via a CPUOrder + // membar + + if (is_cas) { + // skip to parent barrier which must be a cpuorder + x = parent_membar(barrier); + if (x->Opcode() != Op_MemBarCPUOrder) + return NULL; + } else { + // start from the supplied barrier + x = (Node *)barrier; + } // the Mem feed to the membar should be a merge - x = barrier->in(TypeFunc::Memory); + x = x ->in(TypeFunc::Memory); if (!x->is_MergeMem()) return NULL; MergeMemNode *mm = x->as_MergeMem(); - // the AliasIdxBot slice should be another MemBar projection - x = mm->in(Compile::AliasIdxBot); + if (is_cas) { + // the merge should be fed from the CAS via an SCMemProj node + x = NULL; + for (uint idx = 1; idx < mm->req(); idx++) { + if (mm->in(idx)->Opcode() == Op_SCMemProj) { + x = mm->in(idx); + break; + } + } + if (x == NULL) { + return NULL; + } + // check for a CAS feeding this proj + x = x->in(0); + int opcode = x->Opcode(); + if (!is_CAS(opcode)) { + return NULL; + } + // the CAS should get its mem feed from the leading membar + x = x->in(MemNode::Memory); + } else { + // the merge should get its Bottom mem feed from the leading membar + x = mm->in(Compile::AliasIdxBot); + } + // ensure this is a non control projection - if (!x->is_Proj() || x->is_CFG()) + if (!x->is_Proj() || x->is_CFG()) { return NULL; + } // if it is fed by a membar that's the one we want x = x->in(0); - if (!x->is_MemBar()) + if (!x->is_MemBar()) { return NULL; + } MemBarNode *leading = x->as_MemBar(); // reject invalid candidates - if (!leading_membar(leading)) + if (!leading_membar(leading)) { return NULL; - - // ok, we have a leading ReleaseMembar, now for the sanity clauses - - // the leading membar must feed Mem to a releasing store + } + + // ok, we have a leading membar, now for the sanity clauses + + // the leading membar must feed Mem to a releasing store or CAS ProjNode *mem = leading->proj_out(TypeFunc::Memory); StoreNode *st = NULL; + LoadStoreNode *cas = NULL; for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { x = mem->fast_out(i); if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) { + // two stores or CASes is one too many + if (st != NULL || cas != NULL) { + return NULL; + } st = x->as_Store(); - break; + } else if (is_CAS(x->Opcode())) { + if (st != NULL || cas != NULL) { + return NULL; + } + cas = x->as_LoadStore(); } } - if (st == NULL) + + // we should not have both a store and a cas + if (st == NULL & cas == NULL) { return NULL; - - // the releasing store has to feed the same merge - for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) { - if (st->fast_out(i) == mm) - return leading; + } + + if (st == NULL) { + // nothing more to check + return leading; + } else { + // we should not have a store if we started from an acquire + if (is_cas) { + return NULL; + } + + // the store should feed the merge we used to get here + for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) { + if (st->fast_out(i) == mm) { + return leading; + } + } } return NULL; @@ -1865,8 +2197,8 @@ // Bot | / // MergeMem // | - // MemBarVolatile (trailing) - // + // | + // MemBarVolatile {trailing} // // 2) // MemBarRelease/CPUOrder (leading) @@ -1884,7 +2216,8 @@ // Bot | / // MergeMem // | - // MemBarVolatile (trailing) + // MemBarVolatile {trailing} + // // // 3) // MemBarRelease/CPUOrder (leading) @@ -1905,7 +2238,8 @@ // Bot | / // MergeMem // | - // MemBarVolatile (trailing) + // | + // MemBarVolatile {trailing} // // configuration 1 is only valid if UseConcMarkSweepGC && // UseCondCardMark @@ -1955,8 +2289,9 @@ break; } } - if (!phi) + if (!phi) { return NULL; + } // look for another merge below this phi feed = phi; } else { @@ -1969,7 +2304,7 @@ assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge"); MemBarNode *trailing = NULL; - // be sure we have a volatile membar below the merge + // be sure we have a trailing membar the merge for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) { x = mm->fast_out(i); if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) { @@ -1984,24 +2319,32 @@ // trailing_to_card_mark // // graph traversal helper which detects extra, non-normal Mem feed - // from a trailing membar to a preceding card mark volatile membar - // i.e. it identifies whether one of the three possible extra GC - // post-write Mem flow subgraphs is present + // from a trailing volatile membar to a preceding card mark volatile + // membar i.e. it identifies whether one of the three possible extra + // GC post-write Mem flow subgraphs is present // // this predicate checks for the same flow as the previous predicate // but starting from the bottom rather than the top. // - // if the configurationis present returns the card mark membar + // if the configuration is present returns the card mark membar // otherwise NULL + // + // n.b. the supplied membar is expected to be a trailing + // MemBarVolatile i.e. the caller must ensure the input node has the + // correct opcode MemBarNode *trailing_to_card_mark(const MemBarNode *trailing) { - assert(!is_card_mark_membar(trailing), "not expecting a card mark membar"); - + assert(trailing->Opcode() == Op_MemBarVolatile, + "expecting a volatile membar"); + assert(!is_card_mark_membar(trailing), + "not expecting a card mark membar"); + + // the Mem feed to the membar should be a merge Node *x = trailing->in(TypeFunc::Memory); - // the Mem feed to the membar should be a merge - if (!x->is_MergeMem()) + if (!x->is_MergeMem()) { return NULL; + } MergeMemNode *mm = x->as_MergeMem(); @@ -2054,13 +2397,15 @@ } // the proj has to come from the card mark membar x = x->in(0); - if (!x->is_MemBar()) + if (!x->is_MemBar()) { return NULL; + } MemBarNode *card_mark_membar = x->as_MemBar(); - if (!is_card_mark_membar(card_mark_membar)) + if (!is_card_mark_membar(card_mark_membar)) { return NULL; + } return card_mark_membar; } @@ -2068,7 +2413,7 @@ // trailing_to_leading // // graph traversal helper which checks the Mem flow up the graph - // from a (non-card mark) volatile membar attempting to locate and + // from a (non-card mark) trailing membar attempting to locate and // return an associated leading membar. it first looks for a // subgraph in the normal configuration (relying on helper // normal_to_leading). failing that it then looks for one of the @@ -2081,22 +2426,35 @@ // if the configuration is valid returns the cpuorder member for // preference or when absent the release membar otherwise NULL. // - // n.b. the input membar is expected to be a volatile membar but - // must *not* be a card mark membar. + // n.b. the input membar is expected to be either a volatile or + // acquire membar but in the former case must *not* be a card mark + // membar. MemBarNode *trailing_to_leading(const MemBarNode *trailing) { - assert(!is_card_mark_membar(trailing), "not expecting a card mark membar"); + assert((trailing->Opcode() == Op_MemBarAcquire || + trailing->Opcode() == Op_MemBarVolatile), + "expecting an acquire or volatile membar"); + assert((trailing->Opcode() != Op_MemBarVolatile || + !is_card_mark_membar(trailing)), + "not expecting a card mark membar"); MemBarNode *leading = normal_to_leading(trailing); - if (leading) + if (leading) { return leading; + } + + // nothing more to do if this is an acquire + if (trailing->Opcode() == Op_MemBarAcquire) { + return NULL; + } MemBarNode *card_mark_membar = trailing_to_card_mark(trailing); - if (!card_mark_membar) + if (!card_mark_membar) { return NULL; + } return normal_to_leading(card_mark_membar); } @@ -2105,10 +2463,12 @@ bool unnecessary_acquire(const Node *barrier) { - // assert barrier->is_MemBar(); - if (UseBarriersForVolatile) + assert(barrier->is_MemBar(), "expecting a membar"); + + if (UseBarriersForVolatile) { // we need to plant a dmb return false; + } // a volatile read derived from bytecode (or also from an inlined // SHA field read via LibraryCallKit::load_field_from_object) @@ -2140,8 +2500,9 @@ // // where * tags node we were passed // and |k means input k - if (x->is_DecodeNarrowPtr()) + if (x->is_DecodeNarrowPtr()) { x = x->in(1); + } return (x->is_Load() && x->as_Load()->is_acquire()); } @@ -2167,8 +2528,9 @@ return false; ctl = parent->proj_out(TypeFunc::Control); mem = parent->proj_out(TypeFunc::Memory); - if (!ctl || !mem) + if (!ctl || !mem) { return false; + } // ensure the proj nodes both feed a LoadX[mo_acquire] LoadNode *ld = NULL; for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) { @@ -2180,38 +2542,46 @@ } } // it must be an acquiring load - if (! ld || ! ld->is_acquire()) - return false; - for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { - x = mem->fast_out(i); - // if we see the same load we drop it and stop searching - if (x == ld) { - ld = NULL; - break; - } - } - // we must have dropped the load - if (ld) - return false; - // check for a child cpuorder membar - MemBarNode *child = child_membar(barrier->as_MemBar()); - if (!child || child->Opcode() != Op_MemBarCPUOrder) - return false; - - return true; + if (ld && ld->is_acquire()) { + + for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { + x = mem->fast_out(i); + // if we see the same load we drop it and stop searching + if (x == ld) { + ld = NULL; + break; + } + } + // we must have dropped the load + if (ld == NULL) { + // check for a child cpuorder membar + MemBarNode *child = child_membar(barrier->as_MemBar()); + if (child && child->Opcode() == Op_MemBarCPUOrder) + return true; + } + } + + // final option for unnecessary mebar is that it is a trailing node + // belonging to a CAS + + MemBarNode *leading = trailing_to_leading(barrier->as_MemBar()); + + return leading != NULL; } bool needs_acquiring_load(const Node *n) { - // assert n->is_Load(); - if (UseBarriersForVolatile) + assert(n->is_Load(), "expecting a load"); + if (UseBarriersForVolatile) { // we use a normal load and a dmb return false; + } LoadNode *ld = n->as_Load(); - if (!ld->is_acquire()) + if (!ld->is_acquire()) { return false; + } // check if this load is feeding an acquire membar // @@ -2261,20 +2631,23 @@ membar = parent_membar(ld); - if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) + if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) { return false; + } // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain membar = child_membar(membar); - if (!membar || !membar->Opcode() == Op_MemBarAcquire) + if (!membar || !membar->Opcode() == Op_MemBarAcquire) { return false; + } membar = child_membar(membar); - if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) + if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) { return false; + } return true; } @@ -2285,9 +2658,10 @@ n->Opcode() == Op_MemBarRelease), "expecting a release membar"); - if (UseBarriersForVolatile) + if (UseBarriersForVolatile) { // we need to plant a dmb return false; + } // if there is a dependent CPUOrder barrier then use that as the // leading @@ -2303,12 +2677,14 @@ // must start with a normal feed MemBarNode *child_barrier = leading_to_normal(barrier); - if (!child_barrier) + if (!child_barrier) { return false; - - if (!is_card_mark_membar(child_barrier)) + } + + if (!is_card_mark_membar(child_barrier)) { // this is the trailing membar and we are done return true; + } // must be sure this card mark feeds a trailing membar MemBarNode *trailing = card_mark_to_trailing(child_barrier); @@ -2318,17 +2694,19 @@ bool unnecessary_volatile(const Node *n) { // assert n->is_MemBar(); - if (UseBarriersForVolatile) + if (UseBarriersForVolatile) { // we need to plant a dmb return false; + } MemBarNode *mbvol = n->as_MemBar(); // first we check if this is part of a card mark. if so then we have // to generate a StoreLoad barrier - if (is_card_mark_membar(mbvol)) + if (is_card_mark_membar(mbvol)) { return false; + } // ok, if it's not a card mark then we still need to check if it is // a trailing membar of a volatile put hgraph. @@ -2341,29 +2719,33 @@ bool needs_releasing_store(const Node *n) { // assert n->is_Store(); - if (UseBarriersForVolatile) + if (UseBarriersForVolatile) { // we use a normal store and dmb combination return false; + } StoreNode *st = n->as_Store(); // the store must be marked as releasing - if (!st->is_release()) + if (!st->is_release()) { return false; + } // the store must be fed by a membar Node *x = st->lookup(StoreNode::Memory); - if (! x || !x->is_Proj()) + if (! x || !x->is_Proj()) { return false; + } ProjNode *proj = x->as_Proj(); x = proj->lookup(0); - if (!x || !x->is_MemBar()) + if (!x || !x->is_MemBar()) { return false; + } MemBarNode *barrier = x->as_MemBar(); @@ -2372,24 +2754,76 @@ // volatile put graph. // reject invalid candidates - if (!leading_membar(barrier)) + if (!leading_membar(barrier)) { return false; + } // does this lead a normal subgraph? MemBarNode *mbvol = leading_to_normal(barrier); - if (!mbvol) + if (!mbvol) { return false; + } // all done unless this is a card mark - if (!is_card_mark_membar(mbvol)) + if (!is_card_mark_membar(mbvol)) { return true; + } // we found a card mark -- just make sure we have a trailing barrier return (card_mark_to_trailing(mbvol) != NULL); } +// predicate controlling translation of CAS +// +// returns true if CAS needs to use an acquiring load otherwise false + +bool needs_acquiring_load_exclusive(const Node *n) +{ + assert(is_CAS(n->Opcode()), "expecting a compare and swap"); + if (UseBarriersForVolatile) { + return false; + } + + // CAS nodes only ought to turn up in inlined unsafe CAS operations +#ifdef ASSERT + LoadStoreNode *st = n->as_LoadStore(); + + // the store must be fed by a membar + + Node *x = st->lookup(StoreNode::Memory); + + assert (x && x->is_Proj(), "CAS not fed by memory proj!"); + + ProjNode *proj = x->as_Proj(); + + x = proj->lookup(0); + + assert (x && x->is_MemBar(), "CAS not fed by membar!"); + + MemBarNode *barrier = x->as_MemBar(); + + // the barrier must be a cpuorder mmebar fed by a release membar + + assert(barrier->Opcode() == Op_MemBarCPUOrder, + "CAS not fed by cpuorder membar!"); + + MemBarNode *b = parent_membar(barrier); + assert ((b != NULL && b->Opcode() == Op_MemBarRelease), + "CAS not fed by cpuorder+release membar pair!"); + + // does this lead a normal subgraph? + MemBarNode *mbar = leading_to_normal(barrier); + + assert(mbar != NULL, "CAS not embedded in normal graph!"); + + assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire"); +#endif // ASSERT + // so we can just return true here + return true; +} + // predicate controlling translation of StoreCM // // returns true if a StoreStore must precede the card write otherwise @@ -2403,14 +2837,16 @@ // and the associated card mark when we are using CMS without // conditional card marking - if (!UseConcMarkSweepGC || UseCondCardMark) + if (!UseConcMarkSweepGC || UseCondCardMark) { return true; + } // if we are implementing volatile puts using barriers then the // object put as an str so we must insert the dmb ishst - if (UseBarriersForVolatile) + if (UseBarriersForVolatile) { return false; + } // we can omit the dmb ishst if this StoreCM is part of a volatile // put because in thta case the put will be implemented by stlr @@ -2422,19 +2858,22 @@ Node *x = storecm->in(StoreNode::Memory); - if (!x->is_Proj()) + if (!x->is_Proj()) { return false; + } x = x->in(0); - if (!x->is_MemBar()) + if (!x->is_MemBar()) { return false; + } MemBarNode *leading = x->as_MemBar(); // reject invalid candidates - if (!leading_membar(leading)) + if (!leading_membar(leading)) { return false; + } // we can omit the StoreStore if it is the head of a normal subgraph return (leading_to_normal(leading) != NULL); @@ -3024,6 +3463,10 @@ return true; // Per default match rules are supported. } +const int Matcher::float_pressure(int default_pressure_threshold) { + return default_pressure_threshold; +} + int Matcher::regnum_to_fpu_offset(int regnum) { Unimplemented(); @@ -8365,9 +8808,13 @@ // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher // can't match them +// standard CompareAndSwapX when we are using barriers +// these have higher priority than the rules selected by a predicate + instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{ match(Set res (CompareAndSwapI mem (Binary oldval newval))); + ins_cost(2 * VOLATILE_REF_COST); effect(KILL cr); @@ -8385,6 +8832,7 @@ instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{ match(Set res (CompareAndSwapL mem (Binary oldval newval))); + ins_cost(2 * VOLATILE_REF_COST); effect(KILL cr); @@ -8402,6 +8850,7 @@ instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ match(Set res (CompareAndSwapP mem (Binary oldval newval))); + ins_cost(2 * VOLATILE_REF_COST); effect(KILL cr); @@ -8419,6 +8868,7 @@ instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{ match(Set res (CompareAndSwapN mem (Binary oldval newval))); + ins_cost(2 * VOLATILE_REF_COST); effect(KILL cr); @@ -8433,6 +8883,84 @@ ins_pipe(pipe_slow); %} +// alternative CompareAndSwapX when we are eliding barriers + +instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{ + + predicate(needs_acquiring_load_exclusive(n)); + match(Set res (CompareAndSwapI mem (Binary oldval newval))); + ins_cost(VOLATILE_REF_COST); + + effect(KILL cr); + + format %{ + "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval" + "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval), + aarch64_enc_cset_eq(res)); + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{ + + predicate(needs_acquiring_load_exclusive(n)); + match(Set res (CompareAndSwapL mem (Binary oldval newval))); + ins_cost(VOLATILE_REF_COST); + + effect(KILL cr); + + format %{ + "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval" + "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval), + aarch64_enc_cset_eq(res)); + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ + + predicate(needs_acquiring_load_exclusive(n)); + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + ins_cost(VOLATILE_REF_COST); + + effect(KILL cr); + + format %{ + "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval" + "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval), + aarch64_enc_cset_eq(res)); + + ins_pipe(pipe_slow); +%} + +instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{ + + predicate(needs_acquiring_load_exclusive(n)); + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + ins_cost(VOLATILE_REF_COST); + + effect(KILL cr); + + format %{ + "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval" + "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} + + ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval), + aarch64_enc_cset_eq(res)); + + ins_pipe(pipe_slow); +%} + instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{ match(Set prev (GetAndSetI mem newv)); @@ -13286,6 +13814,25 @@ ins_pipe(pipe_cmp_branch); %} +instruct cmpP_narrowOop_imm0_branch(cmpOp cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{ + match(If cmp (CmpP (DecodeN oop) zero)); + predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne + || n->in(1)->as_Bool()->_test._test == BoolTest::eq); + effect(USE labl); + + ins_cost(BRANCH_COST); + format %{ "cb$cmp $oop, $labl" %} + ins_encode %{ + Label* L = $labl$$label; + Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode; + if (cond == Assembler::EQ) + __ cbzw($oop$$Register, *L); + else + __ cbnzw($oop$$Register, *L); + %} + ins_pipe(pipe_cmp_branch); +%} + // Conditional Far Branch // Conditional Far Branch Unsigned // TODO: fixme @@ -14662,6 +15209,102 @@ ins_pipe(pipe_class_default); %} +// --------------------------------- SQRT ------------------------------------- + +instruct vsqrt2D(vecX dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SqrtVD src)); + format %{ "fsqrt $dst, $src\t# vector (2D)" %} + ins_encode %{ + __ fsqrt(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +// --------------------------------- ABS -------------------------------------- + +instruct vabs2F(vecD dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AbsVF src)); + ins_cost(INSN_COST * 3); + format %{ "fabs $dst,$src\t# vector (2S)" %} + ins_encode %{ + __ fabs(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vabs4F(vecX dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AbsVF src)); + ins_cost(INSN_COST * 3); + format %{ "fabs $dst,$src\t# vector (4S)" %} + ins_encode %{ + __ fabs(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vabs2D(vecX dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AbsVD src)); + ins_cost(INSN_COST * 3); + format %{ "fabs $dst,$src\t# vector (2D)" %} + ins_encode %{ + __ fabs(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +// --------------------------------- NEG -------------------------------------- + +instruct vneg2F(vecD dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (NegVF src)); + ins_cost(INSN_COST * 3); + format %{ "fneg $dst,$src\t# vector (2S)" %} + ins_encode %{ + __ fneg(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vneg4F(vecX dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (NegVF src)); + ins_cost(INSN_COST * 3); + format %{ "fneg $dst,$src\t# vector (4S)" %} + ins_encode %{ + __ fneg(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vneg2D(vecX dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (NegVD src)); + ins_cost(INSN_COST * 3); + format %{ "fneg $dst,$src\t# vector (2D)" %} + ins_encode %{ + __ fneg(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + // --------------------------------- AND -------------------------------------- instruct vand8B(vecD dst, vecD src1, vecD src2)
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -2311,6 +2311,12 @@ #define MSG "invalid arrangement" +#define ASSERTION (T == T2S || T == T4S || T == T2D) + INSN(fsqrt, 1, 0b11111); + INSN(fabs, 0, 0b01111); + INSN(fneg, 1, 0b01111); +#undef ASSERTION + #define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H || T == T2S || T == T4S) INSN(rev64, 0, 0b00000); #undef ASSERTION
--- a/hotspot/src/cpu/aarch64/vm/c2_globals_aarch64.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/aarch64/vm/c2_globals_aarch64.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -68,10 +68,11 @@ // Peephole and CISC spilling both break the graph, and so makes the // scheduler sick. -define_pd_global(bool, OptoPeephole, true); +define_pd_global(bool, OptoPeephole, false); define_pd_global(bool, UseCISCSpill, true); define_pd_global(bool, OptoScheduling, false); define_pd_global(bool, OptoBundling, false); +define_pd_global(bool, OptoRegScheduling, false); define_pd_global(intx, ReservedCodeCacheSize, 48*M); define_pd_global(intx, NonProfiledCodeHeapSize, 21*M);
--- a/hotspot/src/cpu/aarch64/vm/compiledIC_aarch64.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/aarch64/vm/compiledIC_aarch64.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -51,13 +51,15 @@ // ---------------------------------------------------------------------------- #define __ _masm. -address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) { +address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) { // Stub is fixed up when the corresponding call is converted from // calling compiled code to calling interpreted code. // mov rmethod, 0 // jmp -4 # to self - address mark = cbuf.insts_mark(); // Get mark within main instrs section. + if (mark == NULL) { + mark = cbuf.insts_mark(); // Get mark within main instrs section. + } // Note that the code buffer's insts_mark is always relative to insts. // That's why we must use the macroassembler to generate a stub.
--- a/hotspot/src/cpu/aarch64/vm/cppInterpreterGenerator_aarch64.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/aarch64/vm/cppInterpreterGenerator_aarch64.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -30,5 +30,6 @@ void generate_more_monitors(); void generate_deopt_handling(); + void lock_method(void); #endif // CPU_AARCH64_VM_CPPINTERPRETERGENERATOR_AARCH64_HPP
--- a/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -55,10 +55,17 @@ define_pd_global(intx, OptoLoopAlignment, 16); define_pd_global(intx, InlineFrequencyCount, 100); -define_pd_global(intx, StackYellowPages, 2); -define_pd_global(intx, StackRedPages, 1); +#define DEFAULT_STACK_YELLOW_PAGES (2) +#define DEFAULT_STACK_RED_PAGES (1) +#define DEFAULT_STACK_SHADOW_PAGES (4 DEBUG_ONLY(+5)) -define_pd_global(intx, StackShadowPages, 4 DEBUG_ONLY(+5)); +#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES +#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES +#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES + +define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES); +define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES); +define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); define_pd_global(bool, RewriteBytecodes, true); define_pd_global(bool, RewriteFrequentPairs, true);
--- a/hotspot/src/cpu/aarch64/vm/interp_masm_aarch64.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/aarch64/vm/interp_masm_aarch64.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -42,6 +42,11 @@ // Implementation of InterpreterMacroAssembler +void InterpreterMacroAssembler::jump_to_entry(address entry) { + assert(entry, "Entry must have been generated by now"); + b(entry); +} + #ifndef CC_INTERP void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { @@ -1542,14 +1547,14 @@ if (MethodData::profile_arguments()) { Label done; int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset()); - add(mdp, mdp, off_to_args); for (int i = 0; i < TypeProfileArgsLimit; i++) { if (i > 0 || MethodData::profile_return()) { // If return value type is profiled we may have no argument to profile - ldr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args)); + ldr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset()))); sub(tmp, tmp, i*TypeStackSlotEntries::per_arg_count()); cmp(tmp, TypeStackSlotEntries::per_arg_count()); + add(rscratch1, mdp, off_to_args); br(Assembler::LT, done); } ldr(tmp, Address(callee, Method::const_offset())); @@ -1557,26 +1562,27 @@ // stack offset o (zero based) from the start of the argument // list, for n arguments translates into offset n - o - 1 from // the end of the argument list - ldr(rscratch1, Address(mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args)); + ldr(rscratch1, Address(mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i)))); sub(tmp, tmp, rscratch1); sub(tmp, tmp, 1); Address arg_addr = argument_address(tmp); ldr(tmp, arg_addr); - Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args); + Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))); profile_obj_type(tmp, mdo_arg_addr); int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); - add(mdp, mdp, to_add); off_to_args += to_add; } if (MethodData::profile_return()) { - ldr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args)); + ldr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset()))); sub(tmp, tmp, TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count()); } + add(rscratch1, mdp, off_to_args); bind(done); + mov(mdp, rscratch1); if (MethodData::profile_return()) { // We're right after the type profile for the last
--- a/hotspot/src/cpu/aarch64/vm/interp_masm_aarch64.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/aarch64/vm/interp_masm_aarch64.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -66,6 +66,8 @@ void load_earlyret_value(TosState state); + void jump_to_entry(address entry); + #ifdef CC_INTERP void save_bcp() { /* not needed in c++ interpreter and harmless */ } void restore_bcp() { /* not needed in c++ interpreter and harmless */ }
--- a/hotspot/src/cpu/aarch64/vm/interpreterGenerator_aarch64.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/aarch64/vm/interpreterGenerator_aarch64.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -41,14 +41,13 @@ address generate_native_entry(bool synchronized); address generate_abstract_entry(void); address generate_math_entry(AbstractInterpreter::MethodKind kind); - address generate_jump_to_normal_entry(void); - address generate_accessor_entry(void) { return generate_jump_to_normal_entry(); } - address generate_empty_entry(void) { return generate_jump_to_normal_entry(); } + address generate_accessor_entry(void) { return NULL; } + address generate_empty_entry(void) { return NULL; } void generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs); address generate_Reference_get_entry(); address generate_CRC32_update_entry(); address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind); - void lock_method(void); + address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; } void generate_stack_overflow_check(void); void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue);
--- a/hotspot/src/cpu/aarch64/vm/interpreter_aarch64.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/aarch64/vm/interpreter_aarch64.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -236,17 +236,6 @@ __ blrt(rscratch1, gpargs, fpargs, rtype); } -// Jump into normal path for accessor and empty entry to jump to normal entry -// The "fast" optimization don't update compilation count therefore can disable inlining -// for these functions that should be inlined. -address InterpreterGenerator::generate_jump_to_normal_entry(void) { - address entry_point = __ pc(); - - assert(Interpreter::entry_for_kind(Interpreter::zerolocals) != NULL, "should already be generated"); - __ b(Interpreter::entry_for_kind(Interpreter::zerolocals)); - return entry_point; -} - // Abstract method entry // Attempt to execute abstract method. Throw exception address InterpreterGenerator::generate_abstract_entry(void) {
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/cpu/aarch64/vm/jvmciCodeInstaller_aarch64.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "jvmci/jvmciCodeInstaller.hpp" +#include "jvmci/jvmciRuntime.hpp" +#include "jvmci/jvmciCompilerToVM.hpp" +#include "jvmci/jvmciJavaClasses.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_aarch64.inline.hpp" + +jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) { + Unimplemented(); + return 0; +} + +void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) { + Unimplemented(); +} + +void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) { + Unimplemented(); +} + +void CodeInstaller::pd_relocate_CodeBlob(CodeBlob* cb, NativeInstruction* inst) { + Unimplemented(); +} + +void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) { + Unimplemented(); +} + +void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) { + Unimplemented(); +} + +void CodeInstaller::pd_relocate_poll(address pc, jint mark) { + Unimplemented(); +} + +// convert JVMCI register indices (as used in oop maps) to HotSpot registers +VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) { + return NULL; +} + +bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) { + return false; +}
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -1709,6 +1709,20 @@ return idivq_offset; } +void MacroAssembler::membar(Membar_mask_bits order_constraint) { + address prev = pc() - NativeMembar::instruction_size; + if (prev == code()->last_membar()) { + NativeMembar *bar = NativeMembar_at(prev); + // We are merging two memory barrier instructions. On AArch64 we + // can do this simply by ORing them together. + bar->set_kind(bar->get_kind() | order_constraint); + BLOCK_COMMENT("merged membar"); + } else { + code()->set_last_membar(pc()); + dmb(Assembler::barrier(order_constraint)); + } +} + // MacroAssembler routines found actually to be needed void MacroAssembler::push(Register src) @@ -2238,7 +2252,7 @@ ttyLocker ttyl; ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); - assert(false, err_msg("DEBUG MESSAGE: %s", msg)); + assert(false, "DEBUG MESSAGE: %s", msg); } } @@ -2286,18 +2300,30 @@ } #endif -void MacroAssembler::push_CPU_state() { - push(0x3fffffff, sp); // integer registers except lr & sp - +void MacroAssembler::push_CPU_state(bool save_vectors) { + push(0x3fffffff, sp); // integer registers except lr & sp + + if (!save_vectors) { for (int i = 30; i >= 0; i -= 2) stpd(as_FloatRegister(i), as_FloatRegister(i+1), Address(pre(sp, -2 * wordSize))); + } else { + for (int i = 30; i >= 0; i -= 2) + stpq(as_FloatRegister(i), as_FloatRegister(i+1), + Address(pre(sp, -4 * wordSize))); + } } -void MacroAssembler::pop_CPU_state() { - for (int i = 0; i < 32; i += 2) - ldpd(as_FloatRegister(i), as_FloatRegister(i+1), - Address(post(sp, 2 * wordSize))); +void MacroAssembler::pop_CPU_state(bool restore_vectors) { + if (!restore_vectors) { + for (int i = 0; i < 32; i += 2) + ldpd(as_FloatRegister(i), as_FloatRegister(i+1), + Address(post(sp, 2 * wordSize))); + } else { + for (int i = 0; i < 32; i += 2) + ldpq(as_FloatRegister(i), as_FloatRegister(i+1), + Address(post(sp, 4 * wordSize))); + } pop(0x3fffffff, sp); // integer registers except lr & sp } @@ -3027,6 +3053,24 @@ _masm->bind(_label); } +void MacroAssembler::addptr(const Address &dst, int32_t src) { + Address adr; + switch(dst.getMode()) { + case Address::base_plus_offset: + // This is the expected mode, although we allow all the other + // forms below. + adr = form_address(rscratch2, dst.base(), dst.offset(), LogBytesPerWord); + break; + default: + lea(rscratch2, dst); + adr = Address(rscratch2); + break; + } + ldr(rscratch1, adr); + add(rscratch1, rscratch1, src); + str(rscratch1, adr); +} + void MacroAssembler::cmpptr(Register src1, Address src2) { unsigned long offset; adrp(rscratch1, src2, offset); @@ -3063,11 +3107,15 @@ if (UseCondCardMark) { Label L_already_dirty; + membar(StoreLoad); ldrb(rscratch2, Address(obj, rscratch1)); cbz(rscratch2, L_already_dirty); strb(zr, Address(obj, rscratch1)); bind(L_already_dirty); } else { + if (UseConcMarkSweepGC && CMSPrecleaningEnabled) { + membar(StoreStore); + } strb(zr, Address(obj, rscratch1)); } }
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -152,6 +152,13 @@ strw(scratch, a); } + void bind(Label& L) { + Assembler::bind(L); + code()->clear_last_membar(); + } + + void membar(Membar_mask_bits order_constraint); + // Frame creation and destruction shared between JITs. void build_frame(int framesize); void remove_frame(int framesize); @@ -777,8 +784,8 @@ DEBUG_ONLY(void verify_heapbase(const char* msg);) - void push_CPU_state(); - void pop_CPU_state() ; + void push_CPU_state(bool save_vectors = false); + void pop_CPU_state(bool restore_vectors = false) ; // Round up to a power of two void round_to(Register reg, int modulus); @@ -908,13 +915,7 @@ // Arithmetics - void addptr(Address dst, int32_t src) { - lea(rscratch2, dst); - ldr(rscratch1, Address(rscratch2)); - add(rscratch1, rscratch1, src); - str(rscratch1, Address(rscratch2)); - } - + void addptr(const Address &dst, int32_t src); void cmpptr(Register src1, Address src2); // Various forms of CAS
--- a/hotspot/src/cpu/aarch64/vm/methodHandles_aarch64.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/aarch64/vm/methodHandles_aarch64.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -50,7 +50,7 @@ #ifdef ASSERT static int check_nonzero(const char* xname, int x) { - assert(x != 0, err_msg("%s should be nonzero", xname)); + assert(x != 0, "%s should be nonzero", xname); return x; } #define NONZERO(x) check_nonzero(#x, x) @@ -407,7 +407,7 @@ } default: - fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid))); + fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)); break; }
--- a/hotspot/src/cpu/aarch64/vm/nativeInst_aarch64.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/aarch64/vm/nativeInst_aarch64.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -101,6 +101,12 @@ static bool maybe_cpool_ref(address instr) { return is_adrp_at(instr) || is_ldr_literal_at(instr); } + + bool is_Membar() { + unsigned int insn = uint_at(0); + return Instruction_aarch64::extract(insn, 31, 12) == 0b11010101000000110011 && + Instruction_aarch64::extract(insn, 7, 0) == 0b10111111; + } }; inline NativeInstruction* nativeInstruction_at(address address) { @@ -487,4 +493,15 @@ return (NativeCallTrampolineStub*)addr; } +class NativeMembar : public NativeInstruction { +public: + unsigned int get_kind() { return Instruction_aarch64::extract(uint_at(0), 11, 8); } + void set_kind(int order_kind) { Instruction_aarch64::patch(addr_at(0), 11, 8, order_kind); } +}; + +inline NativeMembar *NativeMembar_at(address addr) { + assert(nativeInstruction_at(addr)->is_Membar(), "no membar found"); + return (NativeMembar*)addr; +} + #endif // CPU_AARCH64_VM_NATIVEINST_AARCH64_HPP
--- a/hotspot/src/cpu/aarch64/vm/relocInfo_aarch64.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/aarch64/vm/relocInfo_aarch64.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -102,12 +102,5 @@ } } -void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { - if (NativeInstruction::maybe_cpool_ref(addr())) { - address old_addr = old_addr_for(addr(), src, dest); - MacroAssembler::pd_patch_instruction(addr(), MacroAssembler::target_addr_for_insn(old_addr)); - } -} - void metadata_Relocation::pd_fix_value(address x) { }
--- a/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -75,8 +75,8 @@ // FIXME -- this is used by C1 class RegisterSaver { public: - static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); - static void restore_live_registers(MacroAssembler* masm); + static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false); + static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); // Offsets into the register save area // Used by deoptimization when it is managing result register @@ -108,7 +108,17 @@ }; -OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { +OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { +#ifdef COMPILER2 + if (save_vectors) { + // Save upper half of vector registers + int vect_words = 32 * 8 / wordSize; + additional_frame_words += vect_words; + } +#else + assert(!save_vectors, "vectors are generated only by C2"); +#endif + int frame_size_in_bytes = round_to(additional_frame_words*wordSize + reg_save_size*BytesPerInt, 16); // OopMap frame size is in compiler stack slots (jint's) not bytes or words @@ -122,7 +132,7 @@ // Save registers, fpu state, and flags. __ enter(); - __ push_CPU_state(); + __ push_CPU_state(save_vectors); // Set an oopmap for the call site. This oopmap will map all // oop-registers and debug-info registers as callee-saved. This @@ -139,14 +149,14 @@ // register slots are 8 bytes // wide, 32 floating-point // registers - oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots), r->as_VMReg()); } } for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) { FloatRegister r = as_FloatRegister(i); - int sp_offset = 2 * i; + int sp_offset = save_vectors ? (4 * i) : (2 * i); oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg()); } @@ -154,8 +164,11 @@ return oop_map; } -void RegisterSaver::restore_live_registers(MacroAssembler* masm) { - __ pop_CPU_state(); +void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { +#ifndef COMPILER2 + assert(!restore_vectors, "vectors are generated only by C2"); +#endif + __ pop_CPU_state(restore_vectors); __ leave(); } @@ -177,9 +190,9 @@ } // Is vector's size (in bytes) bigger than a size saved by default? -// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. +// 8 bytes vector registers are saved by default on AArch64. bool SharedRuntime::is_wide_vector(int size) { - return size > 16; + return size > 8; } // The java_calling_convention describes stack locations as ideal slots on // a frame with no abi restrictions. Since we must observe abi restrictions @@ -460,11 +473,11 @@ } -static void gen_i2c_adapter(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs) { +void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs) { // Note: r13 contains the senderSP on entry. We must preserve it since // we may do a i2c -> c2i transition if we lose a race where compiled @@ -1146,7 +1159,7 @@ assert((unsigned)gpargs < 256, "eek!"); assert((unsigned)fpargs < 32, "eek!"); __ lea(rscratch1, RuntimeAddress(dest)); - __ mov(rscratch2, (gpargs << 6) | (fpargs << 2) | type); + if (UseBuiltinSim) __ mov(rscratch2, (gpargs << 6) | (fpargs << 2) | type); __ blrt(rscratch1, rscratch2); __ maybe_isb(); } @@ -1194,7 +1207,7 @@ } else if (iid == vmIntrinsics::_invokeBasic) { has_receiver = true; } else { - fatal(err_msg_res("unexpected intrinsic id %d", iid)); + fatal("unexpected intrinsic id %d", iid); } if (member_reg != noreg) { @@ -1521,14 +1534,13 @@ int vep_offset = ((intptr_t)__ pc()) - start; - // Generate stack overflow check - // If we have to make this method not-entrant we'll overwrite its // first instruction with a jump. For this action to be legal we // must ensure that this first instruction is a B, BL, NOP, BKPT, // SVC, HVC, or SMC. Make it a NOP. __ nop(); + // Generate stack overflow check if (UseStackBanging) { __ bang_stack_with_offset(StackShadowPages*os::vm_page_size()); } else { @@ -1709,23 +1721,20 @@ // need to spill before we call out int c_arg = total_c_args - total_in_args; - // Pre-load a static method's oop into r20. Used both by locking code and - // the normal JNI call code. + // Pre-load a static method's oop into c_rarg1. if (method->is_static() && !is_critical_native) { // load oop into a register - __ movoop(oop_handle_reg, + __ movoop(c_rarg1, JNIHandles::make_local(method->method_holder()->java_mirror()), /*immediate*/true); // Now handlize the static class mirror it's known not-null. - __ str(oop_handle_reg, Address(sp, klass_offset)); + __ str(c_rarg1, Address(sp, klass_offset)); map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); // Now get the handle - __ lea(oop_handle_reg, Address(sp, klass_offset)); - // store the klass handle as second argument - __ mov(c_rarg1, oop_handle_reg); + __ lea(c_rarg1, Address(sp, klass_offset)); // and protect the arg if we must spill c_arg--; } @@ -1740,19 +1749,13 @@ __ set_last_Java_frame(sp, noreg, (address)the_pc, rscratch1); - - // We have all of the arguments setup at this point. We must not touch any register - // argument registers at this point (what if we save/restore them there are no oop? - + Label dtrace_method_entry, dtrace_method_entry_done; { - SkipIfEqual skip(masm, &DTraceMethodProbes, false); - // protect the args we've loaded - save_args(masm, total_c_args, c_arg, out_regs); - __ mov_metadata(c_rarg1, method()); - __ call_VM_leaf( - CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), - rthread, c_rarg1); - restore_args(masm, total_c_args, c_arg, out_regs); + unsigned long offset; + __ adrp(rscratch1, ExternalAddress((address)&DTraceMethodProbes), offset); + __ ldrb(rscratch1, Address(rscratch1, offset)); + __ cbnzw(rscratch1, dtrace_method_entry); + __ bind(dtrace_method_entry_done); } // RedefineClasses() tracing support for obsolete method entry @@ -1782,7 +1785,6 @@ if (method->is_synchronized()) { assert(!is_critical_native, "unhandled"); - const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); // Get the handle (the 2nd argument) @@ -1838,7 +1840,6 @@ // Finally just about ready to make the JNI call - // get JNIEnv* which is first argument to native if (!is_critical_native) { __ lea(c_rarg0, Address(rthread, in_bytes(JavaThread::jni_environment_offset()))); @@ -1879,9 +1880,9 @@ // Unpack native results. switch (ret_type) { - case T_BOOLEAN: __ ubfx(r0, r0, 0, 8); break; + case T_BOOLEAN: __ ubfx(r0, r0, 0, 8); break; case T_CHAR : __ ubfx(r0, r0, 0, 16); break; - case T_BYTE : __ sbfx(r0, r0, 0, 8); break; + case T_BYTE : __ sbfx(r0, r0, 0, 8); break; case T_SHORT : __ sbfx(r0, r0, 0, 16); break; case T_INT : __ sbfx(r0, r0, 0, 32); break; case T_DOUBLE : @@ -1904,14 +1905,17 @@ // Thread A is resumed to finish this native method, but doesn't block here since it // didn't see any synchronization is progress, and escapes. __ mov(rscratch1, _thread_in_native_trans); - __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); - __ stlrw(rscratch1, rscratch2); if(os::is_MP()) { if (UseMembar) { + __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset())); + // Force this write out before the read below __ dmb(Assembler::SY); } else { + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); + __ stlrw(rscratch1, rscratch2); + // Write serialization page so VM thread can do a pseudo remote membar. // We use the current thread pointer to calculate a thread specific // offset to write to within the page. This minimizes bus traffic @@ -1920,25 +1924,220 @@ } } + // check for safepoint operation in progress and/or pending suspend requests + Label safepoint_in_progress, safepoint_in_progress_done; + { + assert(SafepointSynchronize::_not_synchronized == 0, "fix this code"); + unsigned long offset; + __ adrp(rscratch1, + ExternalAddress((address)SafepointSynchronize::address_of_state()), + offset); + __ ldrw(rscratch1, Address(rscratch1, offset)); + __ cbnzw(rscratch1, safepoint_in_progress); + __ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset())); + __ cbnzw(rscratch1, safepoint_in_progress); + __ bind(safepoint_in_progress_done); + } + + // change thread state Label after_transition; - - // check for safepoint operation in progress and/or pending suspend requests + __ mov(rscratch1, _thread_in_Java); + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); + __ stlrw(rscratch1, rscratch2); + __ bind(after_transition); + + Label reguard; + Label reguard_done; + __ ldrb(rscratch1, Address(rthread, JavaThread::stack_guard_state_offset())); + __ cmpw(rscratch1, JavaThread::stack_guard_yellow_disabled); + __ br(Assembler::EQ, reguard); + __ bind(reguard_done); + + // native result if any is live + + // Unlock + Label unlock_done; + Label slow_path_unlock; + if (method->is_synchronized()) { + + // Get locked oop from the handle we passed to jni + __ ldr(obj_reg, Address(oop_handle_reg, 0)); + + Label done; + + if (UseBiasedLocking) { + __ biased_locking_exit(obj_reg, old_hdr, done); + } + + // Simple recursive lock? + + __ ldr(rscratch1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); + __ cbz(rscratch1, done); + + // Must save r0 if if it is live now because cmpxchg must use it + if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { + save_native_result(masm, ret_type, stack_slots); + } + + + // get address of the stack lock + __ lea(r0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); + // get old displaced header + __ ldr(old_hdr, Address(r0, 0)); + + // Atomic swap old header if oop still contains the stack lock + Label succeed; + __ cmpxchgptr(r0, old_hdr, obj_reg, rscratch1, succeed, &slow_path_unlock); + __ bind(succeed); + + // slow path re-enters here + __ bind(unlock_done); + if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { + restore_native_result(masm, ret_type, stack_slots); + } + + __ bind(done); + } + + Label dtrace_method_exit, dtrace_method_exit_done; { - Label Continue; - - { unsigned long offset; - __ adrp(rscratch1, - ExternalAddress((address)SafepointSynchronize::address_of_state()), - offset); - __ ldrw(rscratch1, Address(rscratch1, offset)); + unsigned long offset; + __ adrp(rscratch1, ExternalAddress((address)&DTraceMethodProbes), offset); + __ ldrb(rscratch1, Address(rscratch1, offset)); + __ cbnzw(rscratch1, dtrace_method_exit); + __ bind(dtrace_method_exit_done); + } + + __ reset_last_Java_frame(false, true); + + // Unpack oop result + if (ret_type == T_OBJECT || ret_type == T_ARRAY) { + Label L; + __ cbz(r0, L); + __ ldr(r0, Address(r0, 0)); + __ bind(L); + __ verify_oop(r0); + } + + if (!is_critical_native) { + // reset handle block + __ ldr(r2, Address(rthread, JavaThread::active_handles_offset())); + __ str(zr, Address(r2, JNIHandleBlock::top_offset_in_bytes())); + } + + __ leave(); + + if (!is_critical_native) { + // Any exception pending? + __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + __ cbnz(rscratch1, exception_pending); + } + + // record exit from native wrapper code + if (NotifySimulator) { + __ notify(Assembler::method_reentry); + } + + // We're done + __ ret(lr); + + // Unexpected paths are out of line and go here + + if (!is_critical_native) { + // forward the exception + __ bind(exception_pending); + + // and forward the exception + __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + } + + // Slow path locking & unlocking + if (method->is_synchronized()) { + + __ block_comment("Slow path lock {"); + __ bind(slow_path_lock); + + // has last_Java_frame setup. No exceptions so do vanilla call not call_VM + // args are (oop obj, BasicLock* lock, JavaThread* thread) + + // protect the args we've loaded + save_args(masm, total_c_args, c_arg, out_regs); + + __ mov(c_rarg0, obj_reg); + __ mov(c_rarg1, lock_reg); + __ mov(c_rarg2, rthread); + + // Not a leaf but we have last_Java_frame setup as we want + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3); + restore_args(masm, total_c_args, c_arg, out_regs); + +#ifdef ASSERT + { Label L; + __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + __ cbz(rscratch1, L); + __ stop("no pending exception allowed on exit from monitorenter"); + __ bind(L); } - __ cmpw(rscratch1, SafepointSynchronize::_not_synchronized); - - Label L; - __ br(Assembler::NE, L); - __ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset())); - __ cbz(rscratch1, Continue); - __ bind(L); +#endif + __ b(lock_done); + + __ block_comment("} Slow path lock"); + + __ block_comment("Slow path unlock {"); + __ bind(slow_path_unlock); + + // If we haven't already saved the native result we must save it now as xmm registers + // are still exposed. + + if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { + save_native_result(masm, ret_type, stack_slots); + } + + __ mov(c_rarg2, rthread); + __ lea(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); + __ mov(c_rarg0, obj_reg); + + // Save pending exception around call to VM (which contains an EXCEPTION_MARK) + // NOTE that obj_reg == r19 currently + __ ldr(r19, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + __ str(zr, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + + rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), 3, 0, 1); + +#ifdef ASSERT + { + Label L; + __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + __ cbz(rscratch1, L); + __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); + __ bind(L); + } +#endif /* ASSERT */ + + __ str(r19, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + + if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { + restore_native_result(masm, ret_type, stack_slots); + } + __ b(unlock_done); + + __ block_comment("} Slow path unlock"); + + } // synchronized + + // SLOW PATH Reguard the stack if needed + + __ bind(reguard); + save_native_result(masm, ret_type, stack_slots); + rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), 0, 0, 0); + restore_native_result(masm, ret_type, stack_slots); + // and continue + __ b(reguard_done); + + // SLOW PATH safepoint + { + __ block_comment("safepoint {"); + __ bind(safepoint_in_progress); // Don't use call_VM as it will see a possible pending exception and forward it // and never return here preventing us from clearing _last_native_pc down below. @@ -1960,209 +2159,45 @@ if (is_critical_native) { // The call above performed the transition to thread_in_Java so - // skip the transition logic below. + // skip the transition logic above. __ b(after_transition); } - __ bind(Continue); + __ b(safepoint_in_progress_done); + __ block_comment("} safepoint"); } - // change thread state - __ mov(rscratch1, _thread_in_Java); - __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); - __ stlrw(rscratch1, rscratch2); - __ bind(after_transition); - - Label reguard; - Label reguard_done; - __ ldrb(rscratch1, Address(rthread, JavaThread::stack_guard_state_offset())); - __ cmpw(rscratch1, JavaThread::stack_guard_yellow_disabled); - __ br(Assembler::EQ, reguard); - __ bind(reguard_done); - - // native result if any is live - - // Unlock - Label unlock_done; - Label slow_path_unlock; - if (method->is_synchronized()) { - - // Get locked oop from the handle we passed to jni - __ ldr(obj_reg, Address(oop_handle_reg, 0)); - - Label done; - - if (UseBiasedLocking) { - __ biased_locking_exit(obj_reg, old_hdr, done); - } - - // Simple recursive lock? - - __ ldr(rscratch1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); - __ cbz(rscratch1, done); - - // Must save r0 if if it is live now because cmpxchg must use it - if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { - save_native_result(masm, ret_type, stack_slots); - } - - - // get address of the stack lock - __ lea(r0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); - // get old displaced header - __ ldr(old_hdr, Address(r0, 0)); - - // Atomic swap old header if oop still contains the stack lock - Label succeed; - __ cmpxchgptr(r0, old_hdr, obj_reg, rscratch1, succeed, &slow_path_unlock); - __ bind(succeed); - - // slow path re-enters here - __ bind(unlock_done); - if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { - restore_native_result(masm, ret_type, stack_slots); - } - - __ bind(done); - + // SLOW PATH dtrace support + { + __ block_comment("dtrace entry {"); + __ bind(dtrace_method_entry); + + // We have all of the arguments setup at this point. We must not touch any register + // argument registers at this point (what if we save/restore them there are no oop? + + save_args(masm, total_c_args, c_arg, out_regs); + __ mov_metadata(c_rarg1, method()); + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), + rthread, c_rarg1); + restore_args(masm, total_c_args, c_arg, out_regs); + __ b(dtrace_method_entry_done); + __ block_comment("} dtrace entry"); } + { - SkipIfEqual skip(masm, &DTraceMethodProbes, false); + __ block_comment("dtrace exit {"); + __ bind(dtrace_method_exit); save_native_result(masm, ret_type, stack_slots); __ mov_metadata(c_rarg1, method()); __ call_VM_leaf( CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), rthread, c_rarg1); restore_native_result(masm, ret_type, stack_slots); + __ b(dtrace_method_exit_done); + __ block_comment("} dtrace exit"); } - __ reset_last_Java_frame(false, true); - - // Unpack oop result - if (ret_type == T_OBJECT || ret_type == T_ARRAY) { - Label L; - __ cbz(r0, L); - __ ldr(r0, Address(r0, 0)); - __ bind(L); - __ verify_oop(r0); - } - - if (!is_critical_native) { - // reset handle block - __ ldr(r2, Address(rthread, JavaThread::active_handles_offset())); - __ str(zr, Address(r2, JNIHandleBlock::top_offset_in_bytes())); - } - - __ leave(); - - if (!is_critical_native) { - // Any exception pending? - __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); - __ cbnz(rscratch1, exception_pending); - } - - // record exit from native wrapper code - if (NotifySimulator) { - __ notify(Assembler::method_reentry); - } - - // We're done - __ ret(lr); - - // Unexpected paths are out of line and go here - - if (!is_critical_native) { - // forward the exception - __ bind(exception_pending); - - // and forward the exception - __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); - } - - // Slow path locking & unlocking - if (method->is_synchronized()) { - - // BEGIN Slow path lock - __ bind(slow_path_lock); - - // has last_Java_frame setup. No exceptions so do vanilla call not call_VM - // args are (oop obj, BasicLock* lock, JavaThread* thread) - - // protect the args we've loaded - save_args(masm, total_c_args, c_arg, out_regs); - - __ mov(c_rarg0, obj_reg); - __ mov(c_rarg1, lock_reg); - __ mov(c_rarg2, rthread); - - // Not a leaf but we have last_Java_frame setup as we want - __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3); - restore_args(masm, total_c_args, c_arg, out_regs); - -#ifdef ASSERT - { Label L; - __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); - __ cbz(rscratch1, L); - __ stop("no pending exception allowed on exit from monitorenter"); - __ bind(L); - } -#endif - __ b(lock_done); - - // END Slow path lock - - // BEGIN Slow path unlock - __ bind(slow_path_unlock); - - // If we haven't already saved the native result we must save it now as xmm registers - // are still exposed. - - if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { - save_native_result(masm, ret_type, stack_slots); - } - - __ mov(c_rarg2, rthread); - __ lea(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); - __ mov(c_rarg0, obj_reg); - - // Save pending exception around call to VM (which contains an EXCEPTION_MARK) - // NOTE that obj_reg == r19 currently - __ ldr(r19, Address(rthread, in_bytes(Thread::pending_exception_offset()))); - __ str(zr, Address(rthread, in_bytes(Thread::pending_exception_offset()))); - - rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), 3, 0, 1); - -#ifdef ASSERT - { - Label L; - __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); - __ cbz(rscratch1, L); - __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); - __ bind(L); - } -#endif /* ASSERT */ - - __ str(r19, Address(rthread, in_bytes(Thread::pending_exception_offset()))); - - if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { - restore_native_result(masm, ret_type, stack_slots); - } - __ b(unlock_done); - - // END Slow path unlock - - } // synchronized - - // SLOW PATH Reguard the stack if needed - - __ bind(reguard); - save_native_result(masm, ret_type, stack_slots); - rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), 0, 0, 0); - restore_native_result(masm, ret_type, stack_slots); - // and continue - __ b(reguard_done); - - __ flush(); @@ -2742,7 +2777,7 @@ bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP); // Save registers, fpu state, and flags - map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_vectors); // The following is basically a call_VM. However, we need the precise // address of the call in order to generate an oopmap. Hence, we do all the @@ -2793,7 +2828,7 @@ __ bind(noException); // Normal exit, restore registers and exit. - RegisterSaver::restore_live_registers(masm); + RegisterSaver::restore_live_registers(masm, save_vectors); __ ret(lr);
--- a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -746,6 +746,9 @@ const Register count = end; // 'end' register contains bytes count now __ mov(scratch, (address)ct->byte_map_base); __ add(start, start, scratch); + if (UseConcMarkSweepGC) { + __ membar(__ StoreStore); + } __ BIND(L_loop); __ strb(zr, Address(start, count)); __ subs(count, count, 1); @@ -2395,6 +2398,274 @@ return start; } + /*** + * Arguments: + * + * Inputs: + * c_rarg0 - int adler + * c_rarg1 - byte* buff + * c_rarg2 - int len + * + * Output: + * c_rarg0 - int adler result + */ + address generate_updateBytesAdler32() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "updateBytesAdler32"); + address start = __ pc(); + + Label L_simple_by1_loop, L_nmax, L_nmax_loop, L_by16, L_by16_loop, L_by1_loop, L_do_mod, L_combine, L_by1; + + // Aliases + Register adler = c_rarg0; + Register s1 = c_rarg0; + Register s2 = c_rarg3; + Register buff = c_rarg1; + Register len = c_rarg2; + Register nmax = r4; + Register base = r5; + Register count = r6; + Register temp0 = rscratch1; + Register temp1 = rscratch2; + Register temp2 = r7; + + // Max number of bytes we can process before having to take the mod + // 0x15B0 is 5552 in decimal, the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 + unsigned long BASE = 0xfff1; + unsigned long NMAX = 0x15B0; + + __ mov(base, BASE); + __ mov(nmax, NMAX); + + // s1 is initialized to the lower 16 bits of adler + // s2 is initialized to the upper 16 bits of adler + __ ubfx(s2, adler, 16, 16); // s2 = ((adler >> 16) & 0xffff) + __ uxth(s1, adler); // s1 = (adler & 0xffff) + + // The pipelined loop needs at least 16 elements for 1 iteration + // It does check this, but it is more effective to skip to the cleanup loop + __ cmp(len, 16); + __ br(Assembler::HS, L_nmax); + __ cbz(len, L_combine); + + __ bind(L_simple_by1_loop); + __ ldrb(temp0, Address(__ post(buff, 1))); + __ add(s1, s1, temp0); + __ add(s2, s2, s1); + __ subs(len, len, 1); + __ br(Assembler::HI, L_simple_by1_loop); + + // s1 = s1 % BASE + __ subs(temp0, s1, base); + __ csel(s1, temp0, s1, Assembler::HS); + + // s2 = s2 % BASE + __ lsr(temp0, s2, 16); + __ lsl(temp1, temp0, 4); + __ sub(temp1, temp1, temp0); + __ add(s2, temp1, s2, ext::uxth); + + __ subs(temp0, s2, base); + __ csel(s2, temp0, s2, Assembler::HS); + + __ b(L_combine); + + __ bind(L_nmax); + __ subs(len, len, nmax); + __ sub(count, nmax, 16); + __ br(Assembler::LO, L_by16); + + __ bind(L_nmax_loop); + + __ ldp(temp0, temp1, Address(__ post(buff, 16))); + + __ add(s1, s1, temp0, ext::uxtb); + __ ubfx(temp2, temp0, 8, 8); + __ add(s2, s2, s1); + __ add(s1, s1, temp2); + __ ubfx(temp2, temp0, 16, 8); + __ add(s2, s2, s1); + __ add(s1, s1, temp2); + __ ubfx(temp2, temp0, 24, 8); + __ add(s2, s2, s1); + __ add(s1, s1, temp2); + __ ubfx(temp2, temp0, 32, 8); + __ add(s2, s2, s1); + __ add(s1, s1, temp2); + __ ubfx(temp2, temp0, 40, 8); + __ add(s2, s2, s1); + __ add(s1, s1, temp2); + __ ubfx(temp2, temp0, 48, 8); + __ add(s2, s2, s1); + __ add(s1, s1, temp2); + __ add(s2, s2, s1); + __ add(s1, s1, temp0, Assembler::LSR, 56); + __ add(s2, s2, s1); + + __ add(s1, s1, temp1, ext::uxtb); + __ ubfx(temp2, temp1, 8, 8); + __ add(s2, s2, s1); + __ add(s1, s1, temp2); + __ ubfx(temp2, temp1, 16, 8); + __ add(s2, s2, s1); + __ add(s1, s1, temp2); + __ ubfx(temp2, temp1, 24, 8); + __ add(s2, s2, s1); + __ add(s1, s1, temp2); + __ ubfx(temp2, temp1, 32, 8); + __ add(s2, s2, s1); + __ add(s1, s1, temp2); + __ ubfx(temp2, temp1, 40, 8); + __ add(s2, s2, s1); + __ add(s1, s1, temp2); + __ ubfx(temp2, temp1, 48, 8); + __ add(s2, s2, s1); + __ add(s1, s1, temp2); + __ add(s2, s2, s1); + __ add(s1, s1, temp1, Assembler::LSR, 56); + __ add(s2, s2, s1); + + __ subs(count, count, 16); + __ br(Assembler::HS, L_nmax_loop); + + // s1 = s1 % BASE + __ lsr(temp0, s1, 16); + __ lsl(temp1, temp0, 4); + __ sub(temp1, temp1, temp0); + __ add(temp1, temp1, s1, ext::uxth); + + __ lsr(temp0, temp1, 16); + __ lsl(s1, temp0, 4); + __ sub(s1, s1, temp0); + __ add(s1, s1, temp1, ext:: uxth); + + __ subs(temp0, s1, base); + __ csel(s1, temp0, s1, Assembler::HS); + + // s2 = s2 % BASE + __ lsr(temp0, s2, 16); + __ lsl(temp1, temp0, 4); + __ sub(temp1, temp1, temp0); + __ add(temp1, temp1, s2, ext::uxth); + + __ lsr(temp0, temp1, 16); + __ lsl(s2, temp0, 4); + __ sub(s2, s2, temp0); + __ add(s2, s2, temp1, ext:: uxth); + + __ subs(temp0, s2, base); + __ csel(s2, temp0, s2, Assembler::HS); + + __ subs(len, len, nmax); + __ sub(count, nmax, 16); + __ br(Assembler::HS, L_nmax_loop); + + __ bind(L_by16); + __ adds(len, len, count); + __ br(Assembler::LO, L_by1); + + __ bind(L_by16_loop); + + __ ldp(temp0, temp1, Address(__ post(buff, 16))); + + __ add(s1, s1, temp0, ext::uxtb); + __ ubfx(temp2, temp0, 8, 8); + __ add(s2, s2, s1); + __ add(s1, s1, temp2); + __ ubfx(temp2, temp0, 16, 8); + __ add(s2, s2, s1); + __ add(s1, s1, temp2); + __ ubfx(temp2, temp0, 24, 8); + __ add(s2, s2, s1); + __ add(s1, s1, temp2); + __ ubfx(temp2, temp0, 32, 8); + __ add(s2, s2, s1); + __ add(s1, s1, temp2); + __ ubfx(temp2, temp0, 40, 8); + __ add(s2, s2, s1); + __ add(s1, s1, temp2); + __ ubfx(temp2, temp0, 48, 8); + __ add(s2, s2, s1); + __ add(s1, s1, temp2); + __ add(s2, s2, s1); + __ add(s1, s1, temp0, Assembler::LSR, 56); + __ add(s2, s2, s1); + + __ add(s1, s1, temp1, ext::uxtb); + __ ubfx(temp2, temp1, 8, 8); + __ add(s2, s2, s1); + __ add(s1, s1, temp2); + __ ubfx(temp2, temp1, 16, 8); + __ add(s2, s2, s1); + __ add(s1, s1, temp2); + __ ubfx(temp2, temp1, 24, 8); + __ add(s2, s2, s1); + __ add(s1, s1, temp2); + __ ubfx(temp2, temp1, 32, 8); + __ add(s2, s2, s1); + __ add(s1, s1, temp2); + __ ubfx(temp2, temp1, 40, 8); + __ add(s2, s2, s1); + __ add(s1, s1, temp2); + __ ubfx(temp2, temp1, 48, 8); + __ add(s2, s2, s1); + __ add(s1, s1, temp2); + __ add(s2, s2, s1); + __ add(s1, s1, temp1, Assembler::LSR, 56); + __ add(s2, s2, s1); + + __ subs(len, len, 16); + __ br(Assembler::HS, L_by16_loop); + + __ bind(L_by1); + __ adds(len, len, 15); + __ br(Assembler::LO, L_do_mod); + + __ bind(L_by1_loop); + __ ldrb(temp0, Address(__ post(buff, 1))); + __ add(s1, temp0, s1); + __ add(s2, s2, s1); + __ subs(len, len, 1); + __ br(Assembler::HS, L_by1_loop); + + __ bind(L_do_mod); + // s1 = s1 % BASE + __ lsr(temp0, s1, 16); + __ lsl(temp1, temp0, 4); + __ sub(temp1, temp1, temp0); + __ add(temp1, temp1, s1, ext::uxth); + + __ lsr(temp0, temp1, 16); + __ lsl(s1, temp0, 4); + __ sub(s1, s1, temp0); + __ add(s1, s1, temp1, ext:: uxth); + + __ subs(temp0, s1, base); + __ csel(s1, temp0, s1, Assembler::HS); + + // s2 = s2 % BASE + __ lsr(temp0, s2, 16); + __ lsl(temp1, temp0, 4); + __ sub(temp1, temp1, temp0); + __ add(temp1, temp1, s2, ext::uxth); + + __ lsr(temp0, temp1, 16); + __ lsl(s2, temp0, 4); + __ sub(s2, s2, temp0); + __ add(s2, s2, temp1, ext:: uxth); + + __ subs(temp0, s2, base); + __ csel(s2, temp0, s2, Assembler::HS); + + // Combine lower bits and higher bits + __ bind(L_combine); + __ orr(s1, s1, s2, Assembler::LSL, 16); // adler = s1 | (s2 << 16) + + __ ret(lr); + + return start; + } + /** * Arguments: * @@ -3613,6 +3884,11 @@ StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(); } + // generate Adler32 intrinsics code + if (UseAdler32Intrinsics) { + StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32(); + } + // Safefetch stubs. generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, &StubRoutines::_safefetch32_fault_pc,
--- a/hotspot/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -535,7 +535,7 @@ // r0 // c_rarg0, c_rarg1, c_rarg2, c_rarg3, ...(param regs) // rscratch1, rscratch2 (scratch regs) -void InterpreterGenerator::lock_method(void) { +void TemplateInterpreterGenerator::lock_method() { // synchronize method const Address access_flags(rmethod, Method::access_flags_offset()); const Address monitor_block_top( @@ -721,8 +721,7 @@ // generate a vanilla interpreter entry as the slow path __ bind(slow_path); - (void) generate_normal_entry(false); - + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); return entry; } #endif // INCLUDE_ALL_GCS @@ -779,12 +778,10 @@ // generate a vanilla native entry as the slow path __ bind(slow_path); - - (void) generate_native_entry(false); - + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); return entry; } - return generate_native_entry(false); + return NULL; } /** @@ -841,12 +838,10 @@ // generate a vanilla native entry as the slow path __ bind(slow_path); - - (void) generate_native_entry(false); - + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); return entry; } - return generate_native_entry(false); + return NULL; } void InterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
--- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -178,9 +178,8 @@ warning("UseCRC32 specified, but not supported on this CPU"); } - if (UseAdler32Intrinsics) { - warning("Adler32Intrinsics not available on this CPU."); - FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); + if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { + FLAG_SET_DEFAULT(UseAdler32Intrinsics, true); } if (auxv & HWCAP_AES) {
--- a/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -60,6 +60,7 @@ define_pd_global(bool, OptoPeephole, false); define_pd_global(bool, UseCISCSpill, false); define_pd_global(bool, OptoBundling, false); +define_pd_global(bool, OptoRegScheduling, false); // GL: // Detected a problem with unscaled compressed oops and // narrow_oop_use_complex_address() == false.
--- a/hotspot/src/cpu/ppc/vm/compiledIC_ppc.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/ppc/vm/compiledIC_ppc.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -94,10 +94,12 @@ const int IC_pos_in_java_to_interp_stub = 8; #define __ _masm. -address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) { +address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark/* = NULL*/) { #ifdef COMPILER2 - // Get the mark within main instrs section which is set to the address of the call. - address call_addr = cbuf.insts_mark(); + if (mark == NULL) { + // Get the mark within main instrs section which is set to the address of the call. + mark = cbuf.insts_mark(); + } // Note that the code buffer's insts_mark is always relative to insts. // That's why we must use the macroassembler to generate a stub. @@ -117,7 +119,7 @@ // Create a static stub relocation which relates this stub // with the call instruction at insts_call_instruction_offset in the // instructions code-section. - __ relocate(static_stub_Relocation::spec(call_addr)); + __ relocate(static_stub_Relocation::spec(mark)); const int stub_start_offset = __ offset(); // Now, create the stub's code:
--- a/hotspot/src/cpu/ppc/vm/globals_ppc.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/ppc/vm/globals_ppc.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -41,6 +41,18 @@ define_pd_global(bool, TrapBasedNullChecks, true); define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast. +#define DEFAULT_STACK_YELLOW_PAGES (6) +#define DEFAULT_STACK_RED_PAGES (1) +#define DEFAULT_STACK_SHADOW_PAGES (6 DEBUG_ONLY(+2)) + +#define MIN_STACK_YELLOW_PAGES (1) +#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES +#define MIN_STACK_SHADOW_PAGES (1) + +define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES); +define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES); +define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); + // Use large code-entry alignment. define_pd_global(intx, CodeEntryAlignment, 128); define_pd_global(intx, OptoLoopAlignment, 16);
--- a/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -46,7 +46,7 @@ MacroAssembler::null_check_throw(a, offset, temp_reg, exception_entry); } -void InterpreterMacroAssembler::branch_to_entry(address entry, Register Rscratch) { +void InterpreterMacroAssembler::jump_to_entry(address entry, Register Rscratch) { assert(entry, "Entry must have been generated by now"); if (is_within_range_of_b(entry, pc())) { b(entry);
--- a/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -39,7 +39,7 @@ void null_check_throw(Register a, int offset, Register temp_reg); - void branch_to_entry(address entry, Register Rscratch); + void jump_to_entry(address entry, Register Rscratch); // Handy address generation macros. #define thread_(field_name) in_bytes(JavaThread::field_name ## _offset()), R16_thread
--- a/hotspot/src/cpu/ppc/vm/interpreterGenerator_ppc.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/ppc/vm/interpreterGenerator_ppc.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -31,12 +31,12 @@ private: address generate_abstract_entry(void); - address generate_jump_to_normal_entry(void); - address generate_accessor_entry(void) { return generate_jump_to_normal_entry(); } - address generate_empty_entry(void) { return generate_jump_to_normal_entry(); } + address generate_accessor_entry(void) { return NULL; } + address generate_empty_entry(void) { return NULL; } address generate_Reference_get_entry(void); address generate_CRC32_update_entry(); address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind); + address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; } #endif // CPU_PPC_VM_INTERPRETERGENERATOR_PPC_HPP
--- a/hotspot/src/cpu/ppc/vm/interpreter_ppc.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/ppc/vm/interpreter_ppc.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -427,18 +427,6 @@ return entry; } -// Call an accessor method (assuming it is resolved, otherwise drop into -// vanilla (slow path) entry. -address InterpreterGenerator::generate_jump_to_normal_entry(void) { - address entry = __ pc(); - address normal_entry = Interpreter::entry_for_kind(Interpreter::zerolocals); - assert(normal_entry != NULL, "should already be generated."); - __ branch_to_entry(normal_entry, R11_scratch1); - __ flush(); - - return entry; -} - // Abstract method entry. // address InterpreterGenerator::generate_abstract_entry(void) { @@ -529,13 +517,13 @@ // regular method entry code to generate the NPE. // - address entry = __ pc(); + if (UseG1GC) { + address entry = __ pc(); - const int referent_offset = java_lang_ref_Reference::referent_offset; - guarantee(referent_offset > 0, "referent offset not initialized"); + const int referent_offset = java_lang_ref_Reference::referent_offset; + guarantee(referent_offset > 0, "referent offset not initialized"); - if (UseG1GC) { - Label slow_path; + Label slow_path; // Debugging not possible, so can't use __ skip_if_jvmti_mode(slow_path, GR31_SCRATCH); @@ -577,13 +565,11 @@ // Generate regular method entry. __ bind(slow_path); - __ branch_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals), R11_scratch1); - __ flush(); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals), R11_scratch1); + return entry; + } - return entry; - } else { - return generate_jump_to_normal_entry(); - } + return NULL; } void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) {
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/cpu/ppc/vm/jvmciCodeInstaller_ppc.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "jvmci/jvmciCodeInstaller.hpp" +#include "jvmci/jvmciRuntime.hpp" +#include "jvmci/jvmciCompilerToVM.hpp" +#include "jvmci/jvmciJavaClasses.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_ppc.inline.hpp" + +jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) { + Unimplemented(); + return 0; +} + +void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) { + Unimplemented(); +} + +void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) { + Unimplemented(); +} + +void CodeInstaller::pd_relocate_CodeBlob(CodeBlob* cb, NativeInstruction* inst) { + Unimplemented(); +} + +void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) { + Unimplemented(); +} + +void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) { + Unimplemented(); +} + +void CodeInstaller::pd_relocate_poll(address pc, jint mark) { + Unimplemented(); +} + +// convert JVMCI register indices (as used in oop maps) to HotSpot registers +VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) { + return NULL; +} + +bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) { + return false; +}
--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -594,13 +594,6 @@ "can't identify emitted call"); } else { // variant 1: -#if defined(ABI_ELFv2) - nop(); - calculate_address_from_global_toc(R12, dest, true, true, false); - mtctr(R12); - nop(); - nop(); -#else mr(R0, R11); // spill R11 -> R0. // Load the destination address into CTR, @@ -610,7 +603,6 @@ mtctr(R11); mr(R11, R0); // spill R11 <- R0. nop(); -#endif // do the call/jump if (link) { @@ -4292,7 +4284,7 @@ static void stop_on_request(int tp, const char* msg) { tty->print("PPC assembly code requires stop: (%s) %s\n", stop_types[tp%/*stop_end*/4], msg); - guarantee(false, err_msg("PPC assembly code requires stop: %s", msg)); + guarantee(false, "PPC assembly code requires stop: %s", msg); } // Call a C-function that prints output.
--- a/hotspot/src/cpu/ppc/vm/methodHandles_ppc.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/ppc/vm/methodHandles_ppc.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -60,7 +60,7 @@ #ifdef ASSERT static int check_nonzero(const char* xname, int x) { - assert(x != 0, err_msg("%s should be nonzero", xname)); + assert(x != 0, "%s should be nonzero", xname); return x; } #define NONZERO(x) check_nonzero(#x, x) @@ -434,7 +434,7 @@ } default: - fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid))); + fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)); break; }
--- a/hotspot/src/cpu/ppc/vm/nativeInst_ppc.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/ppc/vm/nativeInst_ppc.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -149,7 +149,7 @@ if (!NativeCall::is_call_at(addr)) { tty->print_cr("not a NativeCall at " PTR_FORMAT, p2i(addr)); // TODO: PPC port: Disassembler::decode(addr - 20, addr + 20, tty); - fatal(err_msg("not a NativeCall at " PTR_FORMAT, p2i(addr))); + fatal("not a NativeCall at " PTR_FORMAT, p2i(addr)); } } #endif // ASSERT @@ -162,7 +162,7 @@ if (!NativeFarCall::is_far_call_at(addr)) { tty->print_cr("not a NativeFarCall at " PTR_FORMAT, p2i(addr)); // TODO: PPC port: Disassembler::decode(addr, 20, 20, tty); - fatal(err_msg("not a NativeFarCall at " PTR_FORMAT, p2i(addr))); + fatal("not a NativeFarCall at " PTR_FORMAT, p2i(addr)); } } #endif // ASSERT @@ -308,7 +308,7 @@ ! MacroAssembler::is_bl(*((int*) addr))) { tty->print_cr("not a NativeMovConstReg at " PTR_FORMAT, p2i(addr)); // TODO: PPC port: Disassembler::decode(addr, 20, 20, tty); - fatal(err_msg("not a NativeMovConstReg at " PTR_FORMAT, p2i(addr))); + fatal("not a NativeMovConstReg at " PTR_FORMAT, p2i(addr)); } } } @@ -346,7 +346,7 @@ if (!NativeJump::is_jump_at(addr)) { tty->print_cr("not a NativeJump at " PTR_FORMAT, p2i(addr)); // TODO: PPC port: Disassembler::decode(addr, 20, 20, tty); - fatal(err_msg("not a NativeJump at " PTR_FORMAT, p2i(addr))); + fatal("not a NativeJump at " PTR_FORMAT, p2i(addr)); } } #endif // ASSERT
--- a/hotspot/src/cpu/ppc/vm/ppc.ad Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/ppc/vm/ppc.ad Wed Oct 21 16:38:48 2015 -0400 @@ -2064,6 +2064,10 @@ return true; // Per default match rules are supported. } +const int Matcher::float_pressure(int default_pressure_threshold) { + return default_pressure_threshold; +} + int Matcher::regnum_to_fpu_offset(int regnum) { // No user for this method? Unimplemented();
--- a/hotspot/src/cpu/ppc/vm/relocInfo_ppc.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/ppc/vm/relocInfo_ppc.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -125,8 +125,5 @@ void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { } -void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { -} - void metadata_Relocation::pd_fix_value(address x) { }
--- a/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -475,9 +475,8 @@ // Is vector's size (in bytes) bigger than a size saved by default? bool SharedRuntime::is_wide_vector(int size) { - ResourceMark rm; // Note, MaxVectorSize == 8 on PPC64. - assert(size <= 8, err_msg_res("%d bytes vectors are not supported", size)); + assert(size <= 8, "%d bytes vectors are not supported", size); return size > 8; } #ifdef COMPILER2 @@ -957,11 +956,11 @@ return c2i_entrypoint; } -static void gen_i2c_adapter(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs) { +void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs) { // Load method's entry-point from method. __ ld(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method); @@ -1631,7 +1630,7 @@ } else if (iid == vmIntrinsics::_invokeBasic) { has_receiver = true; } else { - fatal(err_msg_res("unexpected intrinsic id %d", iid)); + fatal("unexpected intrinsic id %d", iid); } if (member_reg != noreg) {
--- a/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -841,7 +841,7 @@ // Only called by MacroAssembler::verify_oop static void verify_oop_helper(const char* message, oop o) { if (!o->is_oop_or_null()) { - fatal(message); + fatal("%s", message); } ++ StubRoutines::_verify_oop_count; }
--- a/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -620,7 +620,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { if (!math_entry_available(kind)) { NOT_PRODUCT(__ should_not_reach_here();) - return Interpreter::entry_for_kind(Interpreter::zerolocals); + return NULL; } address entry = __ pc(); @@ -1126,14 +1126,6 @@ generate_fixed_frame(false, Rsize_of_parameters, Rsize_of_locals); -#ifdef FAST_DISPATCH - __ unimplemented("Fast dispatch in generate_normal_entry"); -#if 0 - __ set((intptr_t)Interpreter::dispatch_table(), IdispatchTables); - // Set bytecode dispatch table base. -#endif -#endif - // -------------------------------------------------------------------------- // Zero out non-parameter locals. // Note: *Always* zero out non-parameter locals as Sparc does. It's not @@ -1266,9 +1258,8 @@ * int java.util.zip.CRC32.update(int crc, int b) */ address InterpreterGenerator::generate_CRC32_update_entry() { - address start = __ pc(); // Remember stub start address (is rtn value). - if (UseCRC32Intrinsics) { + address start = __ pc(); // Remember stub start address (is rtn value). Label slow_path; // Safepoint check @@ -1313,11 +1304,11 @@ // Generate a vanilla native entry as the slow path. BLOCK_COMMENT("} CRC32_update"); BIND(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native), R11_scratch1); + return start; } - (void) generate_native_entry(false); - - return start; + return NULL; } // CRC32 Intrinsics. @@ -1327,9 +1318,8 @@ * int java.util.zip.CRC32.updateByteBuffer(int crc, long* buf, int off, int len) */ address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { - address start = __ pc(); // Remember stub start address (is rtn value). - if (UseCRC32Intrinsics) { + address start = __ pc(); // Remember stub start address (is rtn value). Label slow_path; // Safepoint check @@ -1406,11 +1396,11 @@ // Generate a vanilla native entry as the slow path. BLOCK_COMMENT("} CRC32_updateBytes(Buffer)"); BIND(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native), R11_scratch1); + return start; } - (void) generate_native_entry(false); - - return start; + return NULL; } // These should never be compiled since the interpreter will prefer
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -389,7 +389,7 @@ static void assert_signed_range(intptr_t x, int nbits) { assert(nbits == 32 || (-(1 << nbits-1) <= x && x < ( 1 << nbits-1)), - err_msg("value out of range: x=" INTPTR_FORMAT ", nbits=%d", x, nbits)); + "value out of range: x=" INTPTR_FORMAT ", nbits=%d", x, nbits); } static void assert_signed_word_disp_range(intptr_t x, int nbits) {
--- a/hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -64,6 +64,7 @@ define_pd_global(bool, UseCISCSpill, false); define_pd_global(bool, OptoBundling, false); define_pd_global(bool, OptoScheduling, true); +define_pd_global(bool, OptoRegScheduling, false); #ifdef _LP64 // We need to make sure that all generated code is within
--- a/hotspot/src/cpu/sparc/vm/compiledIC_sparc.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/sparc/vm/compiledIC_sparc.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -53,14 +53,15 @@ // ---------------------------------------------------------------------------- #define __ _masm. -address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) { -#ifdef COMPILER2 +address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) { // Stub is fixed up when the corresponding call is converted from calling // compiled code to calling interpreted code. // set (empty), G5 // jmp -1 - address mark = cbuf.insts_mark(); // Get mark within main instrs section. + if (mark == NULL) { + mark = cbuf.insts_mark(); // Get mark within main instrs section. + } MacroAssembler _masm(&cbuf); @@ -80,12 +81,11 @@ __ delayed()->nop(); + assert(__ pc() - base <= to_interp_stub_size(), "wrong stub size"); + // Update current stubs pointer and restore code_end. __ end_a_stub(); return base; -#else - ShouldNotReachHere(); -#endif } #undef __
--- a/hotspot/src/cpu/sparc/vm/cppInterpreterGenerator_sparc.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/sparc/vm/cppInterpreterGenerator_sparc.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -31,6 +31,7 @@ void generate_more_monitors(); void generate_deopt_handling(); + void lock_method(void); void adjust_callers_stack(Register args); void generate_compute_interpreter_state(const Register state, const Register prev_state,
--- a/hotspot/src/cpu/sparc/vm/cppInterpreter_sparc.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/sparc/vm/cppInterpreter_sparc.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -468,7 +468,7 @@ // If G1 is not enabled then attempt to go through the accessor entry point // Reference.get is an accessor - return generate_jump_to_normal_entry(); + return NULL; } // @@ -1164,7 +1164,7 @@ } // Find preallocated monitor and lock method (C++ interpreter) // -void InterpreterGenerator::lock_method(void) { +void CppInterpreterGenerator::lock_method() { // Lock the current method. // Destroys registers L2_scratch, L3_scratch, O0 //
--- a/hotspot/src/cpu/sparc/vm/globals_sparc.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/sparc/vm/globals_sparc.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -52,19 +52,27 @@ define_pd_global(intx, InlineFrequencyCount, 50); // we can use more inlining on the SPARC define_pd_global(intx, InlineSmallCode, 1500); +#define DEFAULT_STACK_YELLOW_PAGES (2) +#define DEFAULT_STACK_RED_PAGES (1) + #ifdef _LP64 // Stack slots are 2X larger in LP64 than in the 32 bit VM. define_pd_global(intx, ThreadStackSize, 1024); define_pd_global(intx, VMThreadStackSize, 1024); -define_pd_global(intx, StackShadowPages, 10 DEBUG_ONLY(+1)); +#define DEFAULT_STACK_SHADOW_PAGES (10 DEBUG_ONLY(+1)) #else define_pd_global(intx, ThreadStackSize, 512); define_pd_global(intx, VMThreadStackSize, 512); -define_pd_global(intx, StackShadowPages, 3 DEBUG_ONLY(+1)); -#endif +#define DEFAULT_STACK_SHADOW_PAGES (3 DEBUG_ONLY(+1)) +#endif // _LP64 -define_pd_global(intx, StackYellowPages, 2); -define_pd_global(intx, StackRedPages, 1); +#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES +#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES +#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES + +define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES); +define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES); +define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); define_pd_global(bool, RewriteBytecodes, true); define_pd_global(bool, RewriteFrequentPairs, true); @@ -82,6 +90,7 @@ \ product(intx, UseVIS, 99, \ "Highest supported VIS instructions set on Sparc") \ + range(0, 99) \ \ product(bool, UseCBCond, false, \ "Use compare and branch instruction on SPARC") \ @@ -91,12 +100,14 @@ \ product(intx, BlockZeroingLowLimit, 2048, \ "Minimum size in bytes when block zeroing will be used") \ + range(1, max_jint) \ \ product(bool, UseBlockCopy, false, \ "Use special cpu instructions for block copy") \ \ product(intx, BlockCopyLowLimit, 2048, \ "Minimum size in bytes when block copy will be used") \ + range(1, max_jint) \ \ develop(bool, UseV8InstrsOnly, false, \ "Use SPARC-V8 Compliant instruction subset") \ @@ -108,9 +119,11 @@ "Do not use swap instructions, but only CAS (in a loop) on SPARC")\ \ product(uintx, ArraycopySrcPrefetchDistance, 0, \ - "Distance to prefetch source array in arracopy") \ + "Distance to prefetch source array in arraycopy") \ + constraint(ArraycopySrcPrefetchDistanceConstraintFunc, AfterErgo) \ \ product(uintx, ArraycopyDstPrefetchDistance, 0, \ - "Distance to prefetch destination array in arracopy") \ + "Distance to prefetch destination array in arraycopy") \ + constraint(ArraycopyDstPrefetchDistanceConstraintFunc, AfterErgo) \ #endif // CPU_SPARC_VM_GLOBALS_SPARC_HPP
--- a/hotspot/src/cpu/sparc/vm/interp_masm_sparc.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/sparc/vm/interp_masm_sparc.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -59,6 +59,13 @@ #endif // CC_INTERP +void InterpreterMacroAssembler::jump_to_entry(address entry) { + assert(entry, "Entry must have been generated by now"); + AddressLiteral al(entry); + jump_to(al, G3_scratch); + delayed()->nop(); +} + void InterpreterMacroAssembler::compute_extra_locals_size_in_bytes(Register args_size, Register locals_size, Register delta) { // Note: this algorithm is also used by C1's OSR entry sequence. // Any changes should also be applied to CodeEmitter::emit_osr_entry(). @@ -1643,26 +1650,73 @@ bind(skip_receiver_profile); // The method data pointer needs to be updated to reflect the new target. +#if INCLUDE_JVMCI + if (MethodProfileWidth == 0) { + update_mdp_by_constant(in_bytes(VirtualCallData::virtual_call_data_size())); + } +#else update_mdp_by_constant(in_bytes(VirtualCallData::virtual_call_data_size())); - bind (profile_continue); +#endif + bind(profile_continue); } } -void InterpreterMacroAssembler::record_klass_in_profile_helper( - Register receiver, Register scratch, - int start_row, Label& done, bool is_virtual_call) { +#if INCLUDE_JVMCI +void InterpreterMacroAssembler::profile_called_method(Register method, Register scratch) { + assert_different_registers(method, scratch); + if (ProfileInterpreter && MethodProfileWidth > 0) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(profile_continue); + + Label done; + record_item_in_profile_helper(method, scratch, 0, done, MethodProfileWidth, + &VirtualCallData::method_offset, &VirtualCallData::method_count_offset, in_bytes(VirtualCallData::nonprofiled_receiver_count_offset())); + bind(done); + + update_mdp_by_constant(in_bytes(VirtualCallData::virtual_call_data_size())); + bind(profile_continue); + } +} +#endif // INCLUDE_JVMCI + +void InterpreterMacroAssembler::record_klass_in_profile_helper(Register receiver, Register scratch, + Label& done, bool is_virtual_call) { if (TypeProfileWidth == 0) { if (is_virtual_call) { increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch); } - return; +#if INCLUDE_JVMCI + else if (EnableJVMCI) { + increment_mdp_data_at(in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset()), scratch); + } +#endif + } else { + int non_profiled_offset = -1; + if (is_virtual_call) { + non_profiled_offset = in_bytes(CounterData::count_offset()); + } +#if INCLUDE_JVMCI + else if (EnableJVMCI) { + non_profiled_offset = in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset()); + } +#endif + + record_item_in_profile_helper(receiver, scratch, 0, done, TypeProfileWidth, + &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset); } - - int last_row = VirtualCallData::row_limit() - 1; +} + +void InterpreterMacroAssembler::record_item_in_profile_helper(Register item, + Register scratch, int start_row, Label& done, int total_rows, + OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, + int non_profiled_offset) { + int last_row = total_rows - 1; assert(start_row <= last_row, "must be work left to do"); - // Test this row for both the receiver and for null. + // Test this row for both the item and for null. // Take any of three different outcomes: - // 1. found receiver => increment count and goto done + // 1. found item => increment count and goto done // 2. found null => keep looking for case 1, maybe allocate this cell // 3. found something else => keep looking for cases 1 and 2 // Case 3 is handled by a recursive call. @@ -1670,28 +1724,28 @@ Label next_test; bool test_for_null_also = (row == start_row); - // See if the receiver is receiver[n]. - int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row)); - test_mdp_data_at(recvr_offset, receiver, next_test, scratch); + // See if the item is item[n]. + int item_offset = in_bytes(item_offset_fn(row)); + test_mdp_data_at(item_offset, item, next_test, scratch); // delayed()->tst(scratch); - // The receiver is receiver[n]. Increment count[n]. - int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row)); + // The receiver is item[n]. Increment count[n]. + int count_offset = in_bytes(item_count_offset_fn(row)); increment_mdp_data_at(count_offset, scratch); ba_short(done); bind(next_test); if (test_for_null_also) { Label found_null; - // Failed the equality check on receiver[n]... Test for null. + // Failed the equality check on item[n]... Test for null. if (start_row == last_row) { // The only thing left to do is handle the null case. - if (is_virtual_call) { + if (non_profiled_offset >= 0) { brx(Assembler::zero, false, Assembler::pn, found_null); delayed()->nop(); - // Receiver did not match any saved receiver and there is no empty row for it. + // Item did not match any saved item and there is no empty row for it. // Increment total counter to indicate polymorphic case. - increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch); + increment_mdp_data_at(non_profiled_offset, scratch); ba_short(done); bind(found_null); } else { @@ -1705,21 +1759,22 @@ delayed()->nop(); // Put all the "Case 3" tests here. - record_klass_in_profile_helper(receiver, scratch, start_row + 1, done, is_virtual_call); - - // Found a null. Keep searching for a matching receiver, + record_item_in_profile_helper(item, scratch, start_row + 1, done, total_rows, + item_offset_fn, item_count_offset_fn, non_profiled_offset); + + // Found a null. Keep searching for a matching item, // but remember that this is an empty (unused) slot. bind(found_null); } } - // In the fall-through case, we found no matching receiver, but we - // observed the receiver[start_row] is NULL. - - // Fill in the receiver field and increment the count. - int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row)); - set_mdp_data_at(recvr_offset, receiver); - int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row)); + // In the fall-through case, we found no matching item, but we + // observed the item[start_row] is NULL. + + // Fill in the item field and increment the count. + int item_offset = in_bytes(item_offset_fn(start_row)); + set_mdp_data_at(item_offset, item); + int count_offset = in_bytes(item_count_offset_fn(start_row)); mov(DataLayout::counter_increment, scratch); set_mdp_data_at(count_offset, scratch); if (start_row > 0) { @@ -1732,7 +1787,7 @@ assert(ProfileInterpreter, "must be profiling"); Label done; - record_klass_in_profile_helper(receiver, scratch, 0, done, is_virtual_call); + record_klass_in_profile_helper(receiver, scratch, done, is_virtual_call); bind (done); } @@ -1788,7 +1843,7 @@ // The method data pointer needs to be updated. int mdp_delta = in_bytes(BitData::bit_data_size()); if (TypeProfileCasts) { - mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + mdp_delta = in_bytes(ReceiverTypeData::receiver_type_data_size()); } update_mdp_by_constant(mdp_delta); @@ -1806,7 +1861,7 @@ int mdp_delta = in_bytes(BitData::bit_data_size()); if (TypeProfileCasts) { - mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + mdp_delta = in_bytes(ReceiverTypeData::receiver_type_data_size()); // Record the object type. record_klass_in_profile(klass, scratch, false); @@ -1828,7 +1883,7 @@ int count_offset = in_bytes(CounterData::count_offset()); // Back up the address, since we have already bumped the mdp. - count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); + count_offset -= in_bytes(ReceiverTypeData::receiver_type_data_size()); // *Decrement* the counter. We expect to see zero or small negatives. increment_mdp_data_at(count_offset, scratch, true);
--- a/hotspot/src/cpu/sparc/vm/interp_masm_sparc.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/sparc/vm/interp_masm_sparc.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -30,6 +30,8 @@ // This file specializes the assember with interpreter-specific macros +typedef ByteSize (*OffsetFunction)(uint); + REGISTER_DECLARATION( Register, Otos_i , O0); // tos for ints, etc REGISTER_DECLARATION( Register, Otos_l , O0); // for longs REGISTER_DECLARATION( Register, Otos_l1, O0); // for 1st part of longs @@ -80,6 +82,8 @@ InterpreterMacroAssembler(CodeBuffer* c) : MacroAssembler(c) {} + void jump_to_entry(address entry); + #ifndef CC_INTERP virtual void load_earlyret_value(TosState state); @@ -299,7 +303,11 @@ void record_klass_in_profile(Register receiver, Register scratch, bool is_virtual_call); void record_klass_in_profile_helper(Register receiver, Register scratch, - int start_row, Label& done, bool is_virtual_call); + Label& done, bool is_virtual_call); + void record_item_in_profile_helper(Register item, + Register scratch, int start_row, Label& done, int total_rows, + OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, + int non_profiled_offset); void update_mdp_by_offset(int offset_of_disp, Register scratch); void update_mdp_by_offset(Register reg, int offset_of_disp, @@ -312,6 +320,7 @@ void profile_call(Register scratch); void profile_final_call(Register scratch); void profile_virtual_call(Register receiver, Register scratch, bool receiver_can_be_null = false); + void profile_called_method(Register method, Register scratch) NOT_JVMCI_RETURN; void profile_ret(TosState state, Register return_bci, Register scratch); void profile_null_seen(Register scratch); void profile_typecheck(Register klass, Register scratch);
--- a/hotspot/src/cpu/sparc/vm/interpreterGenerator_sparc.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/sparc/vm/interpreterGenerator_sparc.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -34,11 +34,9 @@ address generate_abstract_entry(void); // there are no math intrinsics on sparc address generate_math_entry(AbstractInterpreter::MethodKind kind) { return NULL; } - address generate_jump_to_normal_entry(void); - address generate_accessor_entry(void) { return generate_jump_to_normal_entry(); } - address generate_empty_entry(void) { return generate_jump_to_normal_entry(); } + address generate_accessor_entry(void) { return NULL; } + address generate_empty_entry(void) { return NULL; } address generate_Reference_get_entry(void); - void lock_method(void); void save_native_result(void); void restore_native_result(void); @@ -48,4 +46,5 @@ // Not supported address generate_CRC32_update_entry() { return NULL; } address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; } + address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; } #endif // CPU_SPARC_VM_INTERPRETERGENERATOR_SPARC_HPP
--- a/hotspot/src/cpu/sparc/vm/interpreter_sparc.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/sparc/vm/interpreter_sparc.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -241,15 +241,6 @@ // Various method entries -address InterpreterGenerator::generate_jump_to_normal_entry(void) { - address entry = __ pc(); - assert(Interpreter::entry_for_kind(Interpreter::zerolocals) != NULL, "should already be generated"); - AddressLiteral al(Interpreter::entry_for_kind(Interpreter::zerolocals)); - __ jump_to(al, G3_scratch); - __ delayed()->nop(); - return entry; -} - // Abstract method entry // Attempt to execute abstract method. Throw exception //
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/cpu/sparc/vm/jvmciCodeInstaller_sparc.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "jvmci/jvmciCodeInstaller.hpp" +#include "jvmci/jvmciRuntime.hpp" +#include "jvmci/jvmciCompilerToVM.hpp" +#include "jvmci/jvmciJavaClasses.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_sparc.inline.hpp" + +jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) { + if (inst->is_call() || inst->is_jump()) { + return pc_offset + NativeCall::instruction_size; + } else if (inst->is_call_reg()) { + return pc_offset + NativeCallReg::instruction_size; + } else if (inst->is_sethi()) { + return pc_offset + NativeFarCall::instruction_size; + } else { + fatal("unsupported type of instruction for call site"); + return 0; + } +} + +void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) { + address pc = _instructions->start() + pc_offset; + Handle obj = HotSpotObjectConstantImpl::object(constant); + jobject value = JNIHandles::make_local(obj()); + if (HotSpotObjectConstantImpl::compressed(constant)) { +#ifdef _LP64 + int oop_index = _oop_recorder->find_index(value); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + _instructions->relocate(pc, rspec, 1); +#else + fatal("compressed oop on 32bit"); +#endif + } else { + NativeMovConstReg* move = nativeMovConstReg_at(pc); + move->set_data((intptr_t) value); + + // We need two relocations: one on the sethi and one on the add. + int oop_index = _oop_recorder->find_index(value); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + _instructions->relocate(pc + NativeMovConstReg::sethi_offset, rspec); + _instructions->relocate(pc + NativeMovConstReg::add_offset, rspec); + } +} + +void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) { + address pc = _instructions->start() + pc_offset; + NativeInstruction* inst = nativeInstruction_at(pc); + NativeInstruction* inst1 = nativeInstruction_at(pc + 4); + if(inst->is_sethi() && inst1->is_nop()) { + address const_start = _constants->start(); + address dest = _constants->start() + data_offset; + if(_constants_size > 0) { + _instructions->relocate(pc + NativeMovConstReg::sethi_offset, internal_word_Relocation::spec((address) dest)); + _instructions->relocate(pc + NativeMovConstReg::add_offset, internal_word_Relocation::spec((address) dest)); + } + TRACE_jvmci_3("relocating at " PTR_FORMAT " (+%d) with destination at %d", p2i(pc), pc_offset, data_offset); + }else { + int const_size = align_size_up(_constants->end()-_constants->start(), CodeEntryAlignment); + NativeMovRegMem* load = nativeMovRegMem_at(pc); + // This offset must match with SPARCLoadConstantTableBaseOp.emitCode + load->set_offset(- (const_size - data_offset + Assembler::min_simm13())); + TRACE_jvmci_3("relocating ld at " PTR_FORMAT " (+%d) with destination at %d", p2i(pc), pc_offset, data_offset); + } +} + +void CodeInstaller::pd_relocate_CodeBlob(CodeBlob* cb, NativeInstruction* inst) { + fatal("CodeInstaller::pd_relocate_CodeBlob - sparc unimp"); +} + +void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) { + address pc = (address) inst; + if (inst->is_call()) { + NativeCall* call = nativeCall_at(pc); + call->set_destination((address) foreign_call_destination); + _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec()); + } else if (inst->is_sethi()) { + NativeJump* jump = nativeJump_at(pc); + jump->set_jump_destination((address) foreign_call_destination); + _instructions->relocate(jump->instruction_address(), runtime_call_Relocation::spec()); + } else { + fatal(err_msg("unknown call or jump instruction at " PTR_FORMAT, p2i(pc))); + } + TRACE_jvmci_3("relocating (foreign call) at " PTR_FORMAT, p2i(inst)); +} + +void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) { +#ifdef ASSERT + Method* method = NULL; + // we need to check, this might also be an unresolved method + if (hotspot_method->is_a(HotSpotResolvedJavaMethodImpl::klass())) { + method = getMethodFromHotSpotMethod(hotspot_method); + } +#endif + switch (_next_call_type) { + case INLINE_INVOKE: + break; + case INVOKEVIRTUAL: + case INVOKEINTERFACE: { + assert(method == NULL || !method->is_static(), "cannot call static method with invokeinterface"); + NativeCall* call = nativeCall_at(_instructions->start() + pc_offset); + call->set_destination(SharedRuntime::get_resolve_virtual_call_stub()); + _instructions->relocate(call->instruction_address(), virtual_call_Relocation::spec(_invoke_mark_pc)); + break; + } + case INVOKESTATIC: { + assert(method == NULL || method->is_static(), "cannot call non-static method with invokestatic"); + NativeCall* call = nativeCall_at(_instructions->start() + pc_offset); + call->set_destination(SharedRuntime::get_resolve_static_call_stub()); + _instructions->relocate(call->instruction_address(), relocInfo::static_call_type); + break; + } + case INVOKESPECIAL: { + assert(method == NULL || !method->is_static(), "cannot call static method with invokespecial"); + NativeCall* call = nativeCall_at(_instructions->start() + pc_offset); + call->set_destination(SharedRuntime::get_resolve_opt_virtual_call_stub()); + _instructions->relocate(call->instruction_address(), relocInfo::opt_virtual_call_type); + break; + } + default: + fatal("invalid _next_call_type value"); + break; + } +} + +void CodeInstaller::pd_relocate_poll(address pc, jint mark) { + switch (mark) { + case POLL_NEAR: + fatal("unimplemented"); + break; + case POLL_FAR: + _instructions->relocate(pc, relocInfo::poll_type); + break; + case POLL_RETURN_NEAR: + fatal("unimplemented"); + break; + case POLL_RETURN_FAR: + _instructions->relocate(pc, relocInfo::poll_return_type); + break; + default: + fatal("invalid mark value"); + break; + } +} + +// convert JVMCI register indices (as used in oop maps) to HotSpot registers +VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) { + if (jvmci_reg < RegisterImpl::number_of_registers) { + return as_Register(jvmci_reg)->as_VMReg(); + } else { + jint floatRegisterNumber = jvmci_reg - RegisterImpl::number_of_registers; + floatRegisterNumber += MAX2(0, floatRegisterNumber-32); // Beginning with f32, only every second register is going to be addressed + if (floatRegisterNumber < FloatRegisterImpl::number_of_registers) { + return as_FloatRegister(floatRegisterNumber)->as_VMReg(); + } + ShouldNotReachHere(); + return NULL; + } +} + +bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) { + return !hotspotRegister->is_FloatRegister(); +}
--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -1596,7 +1596,7 @@ else { ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); } - assert(false, err_msg("DEBUG MESSAGE: %s", msg)); + assert(false, "DEBUG MESSAGE: %s", msg); }
--- a/hotspot/src/cpu/sparc/vm/memset_with_concurrent_readers_sparc.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/sparc/vm/memset_with_concurrent_readers_sparc.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -61,8 +61,8 @@ " sub %[offset], %[end], %[offset]\n\t" // offset := start - end " sllx %[offset], 2, %[offset]\n\t" // scale offset for instruction size of 4 " add %[offset], 40, %[offset]\n\t" // offset += 10 * instruction size - " rd %pc, %[pc]\n\t" // dispatch on scaled offset - " jmpl %[pc]+%[offset], %g0\n\t" + " rd %%pc, %[pc]\n\t" // dispatch on scaled offset + " jmpl %[pc]+%[offset], %%g0\n\t" " nop\n\t" // DISPATCH: no direct reference, but without it the store block may be elided. "1:\n\t" @@ -108,7 +108,7 @@ // Unroll loop x8. " sub %[aend], %[ato], %[temp]\n\t" " cmp %[temp], 56\n\t" // cc := (aligned_end - aligned_to) > 7 words - " ba %xcc, 2f\n\t" // goto TEST always + " ba %%xcc, 2f\n\t" // goto TEST always " sub %[aend], 56, %[temp]\n\t" // limit := aligned_end - 7 words // LOOP: "1:\n\t" // unrolled x8 store loop top @@ -123,7 +123,7 @@ " stx %[xvalue], [%[ato]-8]\n\t" // TEST: "2:\n\t" - " bgu,a %xcc, 1b\n\t" // goto LOOP if more than 7 words remaining + " bgu,a %%xcc, 1b\n\t" // goto LOOP if more than 7 words remaining " add %[ato], 64, %[ato]\n\t" // aligned_to += 8, for next iteration // Fill remaining < 8 full words. // Dispatch on (aligned_end - aligned_to). @@ -132,8 +132,8 @@ " sub %[ato], %[aend], %[ato]\n\t" // offset := aligned_to - aligned_end " srax %[ato], 1, %[ato]\n\t" // scale offset for instruction size of 4 " add %[ato], 40, %[ato]\n\t" // offset += 10 * instruction size - " rd %pc, %[temp]\n\t" // dispatch on scaled offset - " jmpl %[temp]+%[ato], %g0\n\t" + " rd %%pc, %[temp]\n\t" // dispatch on scaled offset + " jmpl %[temp]+%[ato], %%g0\n\t" " nop\n\t" // DISPATCH: no direct reference, but without it the store block may be elided. "3:\n\t"
--- a/hotspot/src/cpu/sparc/vm/methodHandles_sparc.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/sparc/vm/methodHandles_sparc.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -56,7 +56,7 @@ #ifdef ASSERT static int check_nonzero(const char* xname, int x) { - assert(x != 0, err_msg("%s should be nonzero", xname)); + assert(x != 0, "%s should be nonzero", xname); return x; } #define NONZERO(x) check_nonzero(#x, x) @@ -453,7 +453,7 @@ } default: - fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid))); + fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)); break; }
--- a/hotspot/src/cpu/sparc/vm/nativeInst_sparc.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/sparc/vm/nativeInst_sparc.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -53,6 +53,7 @@ bool is_nop() { return long_at(0) == nop_instruction(); } bool is_call() { return is_op(long_at(0), Assembler::call_op); } + bool is_call_reg() { return is_op(long_at(0), Assembler::arith_op); } bool is_sethi() { return (is_op2(long_at(0), Assembler::sethi_op2) && inv_rd(long_at(0)) != G0); } @@ -415,6 +416,19 @@ return call; } +class NativeCallReg: public NativeInstruction { + public: + enum Sparc_specific_constants { + instruction_size = 8, + return_address_offset = 8, + instruction_offset = 0 + }; + + address next_instruction_address() const { + return addr_at(instruction_size); + } +}; + // The NativeFarCall is an abstraction for accessing/manipulating native call-anywhere // instructions in the sparcv9 vm. Used to call native methods which may be loaded // anywhere in the address space, possibly out of reach of a call instruction.
--- a/hotspot/src/cpu/sparc/vm/relocInfo_sparc.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/sparc/vm/relocInfo_sparc.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -197,8 +197,5 @@ void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { } -void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { -} - void metadata_Relocation::pd_fix_value(address x) { }
--- a/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -43,6 +43,9 @@ #include "compiler/compileBroker.hpp" #include "shark/sharkCompiler.hpp" #endif +#if INCLUDE_JVMCI +#include "jvmci/jvmciJavaClasses.hpp" +#endif #define __ masm-> @@ -316,7 +319,7 @@ // 8 bytes FP registers are saved by default on SPARC. bool SharedRuntime::is_wide_vector(int size) { // Note, MaxVectorSize == 8 on SPARC. - assert(size <= 8, err_msg_res("%d bytes vectors are not supported", size)); + assert(size <= 8, "%d bytes vectors are not supported", size); return size > 8; } @@ -464,7 +467,7 @@ break; default: - fatal(err_msg_res("unknown basic type %d", sig_bt[i])); + fatal("unknown basic type %d", sig_bt[i]); break; } } @@ -513,10 +516,10 @@ const VMRegPair *regs, Label& skip_fixup); void gen_i2c_adapter(int total_args_passed, - // VMReg max_arg, - int comp_args_on_stack, // VMRegStackSlots - const BasicType *sig_bt, - const VMRegPair *regs); + // VMReg max_arg, + int comp_args_on_stack, // VMRegStackSlots + const BasicType *sig_bt, + const VMRegPair *regs); AdapterGenerator(MacroAssembler *_masm) : masm(_masm) {} }; @@ -760,13 +763,11 @@ __ bind(L_fail); } -void AdapterGenerator::gen_i2c_adapter( - int total_args_passed, - // VMReg max_arg, - int comp_args_on_stack, // VMRegStackSlots - const BasicType *sig_bt, - const VMRegPair *regs) { - +void AdapterGenerator::gen_i2c_adapter(int total_args_passed, + // VMReg max_arg, + int comp_args_on_stack, // VMRegStackSlots + const BasicType *sig_bt, + const VMRegPair *regs) { // Generate an I2C adapter: adjust the I-frame to make space for the C-frame // layout. Lesp was saved by the calling I-frame and will be restored on // return. Meanwhile, outgoing arg space is all owned by the callee @@ -990,6 +991,21 @@ // Jump to the compiled code just as if compiled code was doing it. __ ld_ptr(G5_method, in_bytes(Method::from_compiled_offset()), G3); +#if INCLUDE_JVMCI + if (EnableJVMCI) { + // check if this call should be routed towards a specific entry point + __ ld(Address(G2_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), G1); + __ cmp(G0, G1); + Label no_alternative_target; + __ br(Assembler::equal, false, Assembler::pn, no_alternative_target); + __ delayed()->nop(); + + __ ld_ptr(G2_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset()), G3); + __ st_ptr(G0, Address(G2_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset()))); + + __ bind(no_alternative_target); + } +#endif // INCLUDE_JVMCI // 6243940 We might end up in handle_wrong_method if // the callee is deoptimized as we race thru here. If that @@ -1006,6 +1022,15 @@ __ delayed()->nop(); } +void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs) { + AdapterGenerator agen(masm); + agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs); +} + // --------------------------------------------------------------- AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, int total_args_passed, @@ -1016,9 +1041,7 @@ AdapterFingerPrint* fingerprint) { address i2c_entry = __ pc(); - AdapterGenerator agen(masm); - - agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs); + gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); // ------------------------------------------------------------------------- @@ -1063,7 +1086,7 @@ } address c2i_entry = __ pc(); - + AdapterGenerator agen(masm); agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, L_skip_fixup); __ flush(); @@ -1859,7 +1882,7 @@ } else if (iid == vmIntrinsics::_invokeBasic) { has_receiver = true; } else { - fatal(err_msg_res("unexpected intrinsic id %d", iid)); + fatal("unexpected intrinsic id %d", iid); } if (member_reg != noreg) { @@ -2916,6 +2939,11 @@ pad += StackShadowPages*16 + 32; } #endif +#if INCLUDE_JVMCI + if (EnableJVMCI) { + pad += 1000; // Increase the buffer size when compiling for JVMCI + } +#endif #ifdef _LP64 CodeBuffer buffer("deopt_blob", 2100+pad, 512); #else @@ -2982,6 +3010,45 @@ __ ba(cont); __ delayed()->mov(Deoptimization::Unpack_deopt, L0deopt_mode); + +#if INCLUDE_JVMCI + Label after_fetch_unroll_info_call; + int implicit_exception_uncommon_trap_offset = 0; + int uncommon_trap_offset = 0; + + if (EnableJVMCI) { + masm->block_comment("BEGIN implicit_exception_uncommon_trap"); + implicit_exception_uncommon_trap_offset = __ offset() - start; + + __ ld_ptr(G2_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()), O7); + __ st_ptr(G0, Address(G2_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()))); + __ add(O7, -8, O7); + + uncommon_trap_offset = __ offset() - start; + + // Save everything in sight. + (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words); + __ set_last_Java_frame(SP, NULL); + + __ ld(G2_thread, in_bytes(JavaThread::pending_deoptimization_offset()), O1); + __ sub(G0, 1, L1); + __ st(L1, G2_thread, in_bytes(JavaThread::pending_deoptimization_offset())); + + __ mov((int32_t)Deoptimization::Unpack_reexecute, L0deopt_mode); + __ mov(G2_thread, O0); + __ call(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)); + __ delayed()->nop(); + oop_maps->add_gc_map( __ offset()-start, map->deep_copy()); + __ get_thread(); + __ add(O7, 8, O7); + __ reset_last_Java_frame(); + + __ ba(after_fetch_unroll_info_call); + __ delayed()->nop(); // Delay slot + masm->block_comment("END implicit_exception_uncommon_trap"); + } // EnableJVMCI +#endif // INCLUDE_JVMCI + int exception_offset = __ offset() - start; // restore G2, the trampoline destroyed it @@ -3004,6 +3071,7 @@ int exception_in_tls_offset = __ offset() - start; // No need to update oop_map as each call to save_live_registers will produce identical oopmap + // Opens a new stack frame (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words); // Restore G2_thread @@ -3035,7 +3103,12 @@ // Reexecute entry, similar to c2 uncommon trap // int reexecute_offset = __ offset() - start; - +#if INCLUDE_JVMCI && !defined(COMPILER1) + if (EnableJVMCI && UseJVMCICompiler) { + // JVMCI does not use this kind of deoptimization + __ should_not_reach_here(); + } +#endif // No need to update oop_map as each call to save_live_registers will produce identical oopmap (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words); @@ -3059,6 +3132,11 @@ __ reset_last_Java_frame(); +#if INCLUDE_JVMCI + if (EnableJVMCI) { + __ bind(after_fetch_unroll_info_call); + } +#endif // NOTE: we know that only O0/O1 will be reloaded by restore_result_registers // so this move will survive @@ -3124,6 +3202,12 @@ masm->flush(); _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_words); _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); +#if INCLUDE_JVMCI + if (EnableJVMCI) { + _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset); + _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset); + } +#endif } #ifdef COMPILER2
--- a/hotspot/src/cpu/sparc/vm/sparc.ad Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/sparc/vm/sparc.ad Wed Oct 21 16:38:48 2015 -0400 @@ -1098,7 +1098,7 @@ Register r = as_Register(ra_->get_encode(this)); CodeSection* consts_section = __ code()->consts(); int consts_size = consts_section->align_at_start(consts_section->size()); - assert(constant_table.size() == consts_size, err_msg("must be: %d == %d", constant_table.size(), consts_size)); + assert(constant_table.size() == consts_size, "must be: %d == %d", constant_table.size(), consts_size); if (UseRDPCForConstantTableBase) { // For the following RDPC logic to work correctly the consts @@ -1860,6 +1860,10 @@ return true; // Per default match rules are supported. } +const int Matcher::float_pressure(int default_pressure_threshold) { + return default_pressure_threshold; +} + int Matcher::regnum_to_fpu_offset(int regnum) { return regnum - 32; // The FP registers are in the second chunk }
--- a/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -204,6 +204,20 @@ address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, int step) { address entry = __ pc(); __ get_constant_pool_cache(LcpoolCache); // load LcpoolCache +#if INCLUDE_JVMCI + // Check if we need to take lock at entry of synchronized method. + if (UseJVMCICompiler) { + Label L; + Address pending_monitor_enter_addr(G2_thread, JavaThread::pending_monitorenter_offset()); + __ ldbool(pending_monitor_enter_addr, Gtemp); // Load if pending monitor enter + __ cmp_and_br_short(Gtemp, G0, Assembler::equal, Assembler::pn, L); + // Clear flag. + __ stbool(G0, pending_monitor_enter_addr); + // Take lock. + lock_method(); + __ bind(L); + } +#endif { Label L; Address exception_addr(G2_thread, Thread::pending_exception_offset()); __ ld_ptr(exception_addr, Gtemp); // Load pending exception. @@ -349,7 +363,7 @@ // Allocate monitor and lock method (asm interpreter) // ebx - Method* // -void InterpreterGenerator::lock_method(void) { +void TemplateInterpreterGenerator::lock_method() { __ ld(Lmethod, in_bytes(Method::access_flags_offset()), O0); // Load access flags. #ifdef ASSERT @@ -779,14 +793,14 @@ // Generate regular method entry __ bind(slow_path); - (void) generate_normal_entry(false); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); return entry; } #endif // INCLUDE_ALL_GCS // If G1 is not enabled then attempt to go through the accessor entry point // Reference.get is an accessor - return generate_jump_to_normal_entry(); + return NULL; } //
--- a/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -37,9 +37,9 @@ #ifdef _LP64 // The sethi() instruction generates lots more instructions when shell // stack limit is unlimited, so that's why this is much bigger. - const static int InterpreterCodeSize = 210 * K; + const static int InterpreterCodeSize = 260 * K; #else - const static int InterpreterCodeSize = 180 * K; + const static int InterpreterCodeSize = 230 * K; #endif #endif // CPU_SPARC_VM_TEMPLATEINTERPRETER_SPARC_HPP
--- a/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -2949,12 +2949,14 @@ void TemplateTable::generate_vtable_call(Register Rrecv, Register Rindex, Register Rret) { + Register Rtemp = G4_scratch; Register Rcall = Rindex; assert_different_registers(Rcall, G5_method, Gargs, Rret); // get target Method* & entry point __ lookup_virtual_method(Rrecv, Rindex, G5_method); __ profile_arguments_type(G5_method, Rcall, Gargs, true); + __ profile_called_method(G5_method, Rtemp); __ call_from_interpreter(Rcall, Gargs, Rret); } @@ -3211,6 +3213,7 @@ assert_different_registers(Rcall, G5_method, Gargs, Rret); __ profile_arguments_type(G5_method, Rcall, Gargs, true); + __ profile_called_method(G5_method, Rscratch); __ call_from_interpreter(Rcall, Gargs, Rret); } @@ -3486,7 +3489,8 @@ Register RspecifiedKlass = O4; // Check for casting a NULL - __ br_null_short(Otos_i, Assembler::pn, is_null); + __ br_null(Otos_i, false, Assembler::pn, is_null); + __ delayed()->nop(); // Get value klass in RobjKlass __ load_klass(Otos_i, RobjKlass); // get value klass @@ -3542,7 +3546,8 @@ Register RspecifiedKlass = O4; // Check for casting a NULL - __ br_null_short(Otos_i, Assembler::pt, is_null); + __ br_null(Otos_i, false, Assembler::pt, is_null); + __ delayed()->nop(); // Get value klass in RobjKlass __ load_klass(Otos_i, RobjKlass); // get value klass
--- a/hotspot/src/cpu/sparc/vm/vmStructs_sparc.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/sparc/vm/vmStructs_sparc.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -37,10 +37,11 @@ /******************************/ \ /* JavaFrameAnchor */ \ /******************************/ \ - volatile_nonstatic_field(JavaFrameAnchor, _flags, int) + volatile_nonstatic_field(JavaFrameAnchor, _flags, int) \ + static_field(VM_Version, _features, int) -#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) - +#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ + declare_toplevel_type(VM_Version) #define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ /******************************/ \ @@ -78,7 +79,11 @@ declare_c2_constant(R_G4_num) \ declare_c2_constant(R_G5_num) \ declare_c2_constant(R_G6_num) \ - declare_c2_constant(R_G7_num) + declare_c2_constant(R_G7_num) \ + declare_constant(VM_Version::vis1_instructions_m) \ + declare_constant(VM_Version::vis2_instructions_m) \ + declare_constant(VM_Version::vis3_instructions_m) \ + declare_constant(VM_Version::cbcond_instructions_m) #define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -40,10 +40,6 @@ PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes(); PrefetchFieldsAhead = prefetch_fields_ahead(); - assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 1, "invalid value"); - if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0; - if( AllocatePrefetchInstr > 1 ) AllocatePrefetchInstr = 0; - // Allocation prefetch settings intx cache_line_size = prefetch_data_size(); if( cache_line_size > AllocatePrefetchStepSize ) @@ -59,13 +55,6 @@ AllocatePrefetchDistance = allocate_prefetch_distance(); AllocatePrefetchStyle = allocate_prefetch_style(); - assert((AllocatePrefetchDistance % AllocatePrefetchStepSize) == 0 && - (AllocatePrefetchDistance > 0), "invalid value"); - if ((AllocatePrefetchDistance % AllocatePrefetchStepSize) != 0 || - (AllocatePrefetchDistance <= 0)) { - AllocatePrefetchDistance = AllocatePrefetchStepSize; - } - if (AllocatePrefetchStyle == 3 && !has_blk_init()) { warning("BIS instructions are not available on this CPU"); FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1); @@ -73,13 +62,6 @@ guarantee(VM_Version::has_v9(), "only SPARC v9 is supported"); - assert(ArraycopySrcPrefetchDistance < 4096, "invalid value"); - if (ArraycopySrcPrefetchDistance >= 4096) - ArraycopySrcPrefetchDistance = 4064; - assert(ArraycopyDstPrefetchDistance < 4096, "invalid value"); - if (ArraycopyDstPrefetchDistance >= 4096) - ArraycopyDstPrefetchDistance = 4064; - UseSSE = 0; // Only on x86 and x64 _supports_cx8 = has_v9();
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -29,6 +29,7 @@ #include "runtime/vm_version.hpp" class VM_Version: public Abstract_VM_Version { + friend class VMStructs; protected: enum Feature_Flag { v8_instructions = 0,
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -733,11 +733,11 @@ // these asserts are somewhat nonsensical #ifndef _LP64 assert(which == imm_operand || which == disp32_operand, - err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip))); + "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip)); #else assert((which == call32_operand || which == imm_operand) && is_64bit || which == narrow_oop_operand && !is_64bit, - err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip))); + "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip)); #endif // _LP64 return ip; @@ -770,6 +770,7 @@ case 0x55: // andnps case 0x56: // orps case 0x57: // xorps + case 0x59: //mulpd case 0x6E: // movd case 0x7E: // movd case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush @@ -877,21 +878,35 @@ // Check second byte NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions")); + int vex_opcode; // First byte if ((0xFF & *inst) == VEX_3bytes) { + vex_opcode = VEX_OPCODE_MASK & *ip; ip++; // third byte is_64bit = ((VEX_W & *ip) == VEX_W); + } else { + vex_opcode = VEX_OPCODE_0F; } ip++; // opcode // To find the end of instruction (which == end_pc_operand). - switch (0xFF & *ip) { - case 0x61: // pcmpestri r, r/a, #8 - case 0x70: // pshufd r, r/a, #8 - case 0x73: // psrldq r, #8 - tail_size = 1; // the imm8 - break; - default: - break; + switch (vex_opcode) { + case VEX_OPCODE_0F: + switch (0xFF & *ip) { + case 0x70: // pshufd r, r/a, #8 + case 0x71: // ps[rl|ra|ll]w r, #8 + case 0x72: // ps[rl|ra|ll]d r, #8 + case 0x73: // ps[rl|ra|ll]q r, #8 + case 0xC2: // cmp[ps|pd|ss|sd] r, r, r/a, #8 + case 0xC4: // pinsrw r, r, r/a, #8 + case 0xC5: // pextrw r/a, r, #8 + case 0xC6: // shufp[s|d] r, r, r/a, #8 + tail_size = 1; // the imm8 + break; + } + break; + case VEX_OPCODE_0F_3A: + tail_size = 1; + break; } ip++; // skip opcode debug_only(has_disp32 = true); // has both kinds of operands! @@ -1604,6 +1619,85 @@ emit_int8((unsigned char)0xA2); } +// Opcode / Instruction Op / En 64 - Bit Mode Compat / Leg Mode Description Implemented +// F2 0F 38 F0 / r CRC32 r32, r / m8 RM Valid Valid Accumulate CRC32 on r / m8. v +// F2 REX 0F 38 F0 / r CRC32 r32, r / m8* RM Valid N.E. Accumulate CRC32 on r / m8. - +// F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8 RM Valid N.E. Accumulate CRC32 on r / m8. - +// +// F2 0F 38 F1 / r CRC32 r32, r / m16 RM Valid Valid Accumulate CRC32 on r / m16. v +// +// F2 0F 38 F1 / r CRC32 r32, r / m32 RM Valid Valid Accumulate CRC32 on r / m32. v +// +// F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64 RM Valid N.E. Accumulate CRC32 on r / m64. v +void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) { + assert(VM_Version::supports_sse4_2(), ""); + int8_t w = 0x01; + Prefix p = Prefix_EMPTY; + + emit_int8((int8_t)0xF2); + switch (sizeInBytes) { + case 1: + w = 0; + break; + case 2: + case 4: + break; + LP64_ONLY(case 8:) + // This instruction is not valid in 32 bits + // Note: + // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf + // + // Page B - 72 Vol. 2C says + // qwreg2 to qwreg 1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : 11 qwreg1 qwreg2 + // mem64 to qwreg 1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : mod qwreg r / m + // F0!!! + // while 3 - 208 Vol. 2A + // F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64 RM Valid N.E.Accumulate CRC32 on r / m64. + // + // the 0 on a last bit is reserved for a different flavor of this instruction : + // F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8 RM Valid N.E.Accumulate CRC32 on r / m8. + p = REX_W; + break; + default: + assert(0, "Unsupported value for a sizeInBytes argument"); + break; + } + LP64_ONLY(prefix(crc, v, p);) + emit_int8((int8_t)0x0F); + emit_int8(0x38); + emit_int8((int8_t)(0xF0 | w)); + emit_int8(0xC0 | ((crc->encoding() & 0x7) << 3) | (v->encoding() & 7)); +} + +void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) { + assert(VM_Version::supports_sse4_2(), ""); + InstructionMark im(this); + int8_t w = 0x01; + Prefix p = Prefix_EMPTY; + + emit_int8((int8_t)0xF2); + switch (sizeInBytes) { + case 1: + w = 0; + break; + case 2: + case 4: + break; + LP64_ONLY(case 8:) + // This instruction is not valid in 32 bits + p = REX_W; + break; + default: + assert(0, "Unsupported value for a sizeInBytes argument"); + break; + } + LP64_ONLY(prefix(crc, adr, p);) + emit_int8((int8_t)0x0F); + emit_int8(0x38); + emit_int8((int8_t)(0xF0 | w)); + emit_operand(crc, adr); +} + void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3, /* no_mask_reg */ false, /* legacy_mode */ true); @@ -2399,7 +2493,7 @@ void Assembler::movsbl(Register dst, Register src) { // movsxb NOT_LP64(assert(src->has_byte_register(), "must have byte register")); - int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); + int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true); emit_int8(0x0F); emit_int8((unsigned char)0xBE); emit_int8((unsigned char)(0xC0 | encode)); @@ -2516,7 +2610,7 @@ void Assembler::movzbl(Register dst, Register src) { // movzxb NOT_LP64(assert(src->has_byte_register(), "must have byte register")); - int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); + int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true); emit_int8(0x0F); emit_int8((unsigned char)0xB6); emit_int8(0xC0 | encode); @@ -2951,6 +3045,15 @@ emit_int8(imm8); } +void Assembler::pextrw(Register dst, XMMRegister src, int imm8) { + assert(VM_Version::supports_sse2(), ""); + int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true, + VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); + emit_int8((unsigned char)0xC5); + emit_int8((unsigned char)(0xC0 | encode)); + emit_int8(imm8); +} + void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) { assert(VM_Version::supports_sse4_1(), ""); int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true, @@ -2969,6 +3072,15 @@ emit_int8(imm8); } +void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) { + assert(VM_Version::supports_sse2(), ""); + int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true, + VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); + emit_int8((unsigned char)0xC4); + emit_int8((unsigned char)(0xC0 | encode)); + emit_int8(imm8); +} + void Assembler::pmovzxbw(XMMRegister dst, Address src) { assert(VM_Version::supports_sse4_1(), ""); if (VM_Version::supports_evex()) { @@ -3984,6 +4096,16 @@ } } +void Assembler::mulpd(XMMRegister dst, Address src) { + _instruction_uses_vl = true; + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + if (VM_Version::supports_evex()) { + emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66); + } else { + emit_simd_arith(0x59, dst, src, VEX_SIMD_66); + } +} + void Assembler::mulps(XMMRegister dst, XMMRegister src) { _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); @@ -4172,6 +4294,26 @@ emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); } +void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) { + _instruction_uses_vl = true; + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + if (VM_Version::supports_evex()) { + emit_simd_arith_q(0x15, dst, src, VEX_SIMD_66); + } else { + emit_simd_arith(0x15, dst, src, VEX_SIMD_66); + } +} + +void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) { + _instruction_uses_vl = true; + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + if (VM_Version::supports_evex()) { + emit_simd_arith_q(0x14, dst, src, VEX_SIMD_66); + } else { + emit_simd_arith(0x14, dst, src, VEX_SIMD_66); + } +} + void Assembler::xorpd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); if (VM_Version::supports_avx512dq()) { @@ -4792,8 +4934,9 @@ } -// AND packed integers +// logical operations packed integers void Assembler::pand(XMMRegister dst, XMMRegister src) { + _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); emit_simd_arith(0xDB, dst, src, VEX_SIMD_66); } @@ -4814,6 +4957,17 @@ emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len); } +void Assembler::pandn(XMMRegister dst, XMMRegister src) { + _instruction_uses_vl = true; + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + if (VM_Version::supports_evex()) { + emit_simd_arith_q(0xDF, dst, src, VEX_SIMD_66); + } + else { + emit_simd_arith(0xDF, dst, src, VEX_SIMD_66); + } +} + void Assembler::por(XMMRegister dst, XMMRegister src) { _instruction_uses_vl = true; NOT_LP64(assert(VM_Version::supports_sse2(), "")); @@ -6223,6 +6377,14 @@ emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding())); } +// 0F A4 / r ib +void Assembler::shldl(Register dst, Register src, int8_t imm8) { + emit_int8(0x0F); + emit_int8((unsigned char)0xA4); + emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding())); + emit_int8(imm8); +} + void Assembler::shrdl(Register dst, Register src) { emit_int8(0x0F); emit_int8((unsigned char)0xAD); @@ -6362,12 +6524,12 @@ return reg_enc; } -int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) { +int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte) { if (dst_enc < 8) { if (src_enc >= 8) { prefix(REX_B); src_enc -= 8; - } else if (byteinst && src_enc >= 4) { + } else if ((src_is_byte && src_enc >= 4) || (dst_is_byte && dst_enc >= 4)) { prefix(REX); } } else { @@ -6408,6 +6570,40 @@ } } +void Assembler::prefix(Register dst, Register src, Prefix p) { + if (src->encoding() >= 8) { + p = (Prefix)(p | REX_B); + } + if (dst->encoding() >= 8) { + p = (Prefix)( p | REX_R); + } + if (p != Prefix_EMPTY) { + // do not generate an empty prefix + prefix(p); + } +} + +void Assembler::prefix(Register dst, Address adr, Prefix p) { + if (adr.base_needs_rex()) { + if (adr.index_needs_rex()) { + assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X"); + } else { + prefix(REX_B); + } + } else { + if (adr.index_needs_rex()) { + assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X"); + } + } + if (dst->encoding() >= 8) { + p = (Prefix)(p | REX_R); + } + if (p != Prefix_EMPTY) { + // do not generate an empty prefix + prefix(p); + } +} + void Assembler::prefix(Address adr) { if (adr.base_needs_rex()) { if (adr.index_needs_rex()) {
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -506,7 +506,8 @@ VEX_3bytes = 0xC4, VEX_2bytes = 0xC5, - EVEX_4bytes = 0x62 + EVEX_4bytes = 0x62, + Prefix_EMPTY = 0x0 }; enum VexPrefix { @@ -535,7 +536,8 @@ VEX_OPCODE_NONE = 0x0, VEX_OPCODE_0F = 0x1, VEX_OPCODE_0F_38 = 0x2, - VEX_OPCODE_0F_3A = 0x3 + VEX_OPCODE_0F_3A = 0x3, + VEX_OPCODE_MASK = 0x1F }; enum AvxVectorLen { @@ -611,10 +613,15 @@ int prefix_and_encode(int reg_enc, bool byteinst = false); int prefixq_and_encode(int reg_enc); - int prefix_and_encode(int dst_enc, int src_enc, bool byteinst = false); + int prefix_and_encode(int dst_enc, int src_enc) { + return prefix_and_encode(dst_enc, false, src_enc, false); + } + int prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte); int prefixq_and_encode(int dst_enc, int src_enc); void prefix(Register reg); + void prefix(Register dst, Register src, Prefix p); + void prefix(Register dst, Address adr, Prefix p); void prefix(Address adr); void prefixq(Address adr); @@ -1177,6 +1184,10 @@ // Identify processor type and features void cpuid(); + // CRC32C + void crc32(Register crc, Register v, int8_t sizeInBytes); + void crc32(Register crc, Address adr, int8_t sizeInBytes); + // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value void cvtsd2ss(XMMRegister dst, XMMRegister src); void cvtsd2ss(XMMRegister dst, Address src); @@ -1672,10 +1683,14 @@ // SSE 4.1 extract void pextrd(Register dst, XMMRegister src, int imm8); void pextrq(Register dst, XMMRegister src, int imm8); + // SSE 2 extract + void pextrw(Register dst, XMMRegister src, int imm8); // SSE 4.1 insert void pinsrd(XMMRegister dst, Register src, int imm8); void pinsrq(XMMRegister dst, Register src, int imm8); + // SSE 2 insert + void pinsrw(XMMRegister dst, Register src, int imm8); // SSE4.1 packed move void pmovzxbw(XMMRegister dst, XMMRegister src); @@ -1783,6 +1798,7 @@ void setb(Condition cc, Register dst); void shldl(Register dst, Register src); + void shldl(Register dst, Register src, int8_t imm8); void shll(Register dst, int imm8); void shll(Register dst); @@ -1925,6 +1941,7 @@ // Multiply Packed Floating-Point Values void mulpd(XMMRegister dst, XMMRegister src); + void mulpd(XMMRegister dst, Address src); void mulps(XMMRegister dst, XMMRegister src); void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); @@ -1951,6 +1968,9 @@ void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len); void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + void unpckhpd(XMMRegister dst, XMMRegister src); + void unpcklpd(XMMRegister dst, XMMRegister src); + // Bitwise Logical XOR of Packed Floating-Point Values void xorpd(XMMRegister dst, XMMRegister src); void xorps(XMMRegister dst, XMMRegister src); @@ -2046,6 +2066,9 @@ void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + // Andn packed integers + void pandn(XMMRegister dst, XMMRegister src); + // Or packed integers void por(XMMRegister dst, XMMRegister src); void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
--- a/hotspot/src/cpu/x86/vm/assembler_x86.inline.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/x86/vm/assembler_x86.inline.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -33,10 +33,12 @@ inline int Assembler::prefix_and_encode(int reg_enc, bool byteinst) { return reg_enc; } inline int Assembler::prefixq_and_encode(int reg_enc) { return reg_enc; } -inline int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) { return dst_enc << 3 | src_enc; } +inline int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte) { return dst_enc << 3 | src_enc; } inline int Assembler::prefixq_and_encode(int dst_enc, int src_enc) { return dst_enc << 3 | src_enc; } inline void Assembler::prefix(Register reg) {} +inline void Assembler::prefix(Register dst, Register src, Prefix p) {} +inline void Assembler::prefix(Register dst, Address adr, Prefix p) {} inline void Assembler::prefix(Address adr) {} inline void Assembler::prefixq(Address adr) {}
--- a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -2457,9 +2457,6 @@ // Should consider not saving rbx, if not necessary __ trigfunc('t', op->as_Op2()->fpu_stack_size()); break; - case lir_exp : - __ exp_with_fallback(op->as_Op2()->fpu_stack_size()); - break; case lir_pow : __ pow_with_fallback(op->as_Op2()->fpu_stack_size()); break; @@ -2684,7 +2681,7 @@ #endif // _LP64 } } else { - fatal(err_msg("unexpected type: %s", basictype_to_str(c->type()))); + fatal("unexpected type: %s", basictype_to_str(c->type())); } // cpu register - address } else if (opr2->is_address()) {
--- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -808,6 +808,12 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow), "wrong type"); + + if (x->id() == vmIntrinsics::_dexp) { + do_ExpIntrinsic(x); + return; + } + LIRItem value(x->argument_at(0), this); bool use_fpu = false; @@ -818,7 +824,6 @@ case vmIntrinsics::_dtan: case vmIntrinsics::_dlog: case vmIntrinsics::_dlog10: - case vmIntrinsics::_dexp: case vmIntrinsics::_dpow: use_fpu = true; } @@ -870,7 +875,6 @@ case vmIntrinsics::_dtan: __ tan (calc_input, calc_result, tmp1, tmp2); break; case vmIntrinsics::_dlog: __ log (calc_input, calc_result, tmp1); break; case vmIntrinsics::_dlog10: __ log10(calc_input, calc_result, tmp1); break; - case vmIntrinsics::_dexp: __ exp (calc_input, calc_result, tmp1, tmp2, FrameMap::rax_opr, FrameMap::rcx_opr, FrameMap::rdx_opr); break; case vmIntrinsics::_dpow: __ pow (calc_input, calc_input2, calc_result, tmp1, tmp2, FrameMap::rax_opr, FrameMap::rcx_opr, FrameMap::rdx_opr); break; default: ShouldNotReachHere(); } @@ -880,6 +884,32 @@ } } +void LIRGenerator::do_ExpIntrinsic(Intrinsic* x) { + LIRItem value(x->argument_at(0), this); + value.set_destroys_register(); + + LIR_Opr calc_result = rlock_result(x); + LIR_Opr result_reg = result_register_for(x->type()); + + BasicTypeList signature(1); + signature.append(T_DOUBLE); + CallingConvention* cc = frame_map()->c_calling_convention(&signature); + + value.load_item_force(cc->at(0)); + +#ifndef _LP64 + LIR_Opr tmp = FrameMap::fpu0_double_opr; + result_reg = tmp; + if (VM_Version::supports_sse2()) { + __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); + } else { + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args()); + } +#else + __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); +#endif + __ move(result_reg, calc_result); +} void LIRGenerator::do_ArrayCopy(Intrinsic* x) { assert(x->number_of_arguments() == 5, "wrong type");
--- a/hotspot/src/cpu/x86/vm/c1_LinearScan_x86.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/x86/vm/c1_LinearScan_x86.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -814,8 +814,7 @@ case lir_tan: case lir_sin: - case lir_cos: - case lir_exp: { + case lir_cos: { // sin, cos and exp need two temporary fpu stack slots, so there are two temporary // registers (stored in right and temp of the operation). // the stack allocator must guarantee that the stack slots are really free,
--- a/hotspot/src/cpu/x86/vm/c2_globals_x86.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/x86/vm/c2_globals_x86.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -48,11 +48,11 @@ define_pd_global(intx, OnStackReplacePercentage, 140); define_pd_global(intx, ConditionalMoveLimit, 3); -define_pd_global(intx, FLOATPRESSURE, 6); define_pd_global(intx, FreqInlineSize, 325); define_pd_global(intx, MinJumpTableSize, 10); #ifdef AMD64 define_pd_global(intx, INTPRESSURE, 13); +define_pd_global(intx, FLOATPRESSURE, 14); define_pd_global(intx, InteriorEntryAlignment, 16); define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K)); define_pd_global(intx, LoopUnrollLimit, 60); @@ -64,6 +64,7 @@ define_pd_global(uint64_t, MaxRAM, 128ULL*G); #else define_pd_global(intx, INTPRESSURE, 6); +define_pd_global(intx, FLOATPRESSURE, 6); define_pd_global(intx, InteriorEntryAlignment, 4); define_pd_global(size_t, NewSizeThreadIncrease, 4*K); define_pd_global(intx, LoopUnrollLimit, 50); // Design center runs on 1.3.1 @@ -82,6 +83,7 @@ define_pd_global(bool, UseCISCSpill, true); define_pd_global(bool, OptoScheduling, false); define_pd_global(bool, OptoBundling, false); +define_pd_global(bool, OptoRegScheduling, true); define_pd_global(intx, ReservedCodeCacheSize, 48*M); define_pd_global(intx, NonProfiledCodeHeapSize, 21*M);
--- a/hotspot/src/cpu/x86/vm/compiledIC_x86.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/x86/vm/compiledIC_x86.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -50,13 +50,15 @@ // ---------------------------------------------------------------------------- #define __ _masm. -address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) { +address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) { // Stub is fixed up when the corresponding call is converted from // calling compiled code to calling interpreted code. // movq rbx, 0 // jmp -5 # to self - address mark = cbuf.insts_mark(); // Get mark within main instrs section. + if (mark == NULL) { + mark = cbuf.insts_mark(); // Get mark within main instrs section. + } // Note that the code buffer's insts_mark is always relative to insts. // That's why we must use the macroassembler to generate a stub. @@ -73,6 +75,8 @@ // This is recognized as unresolved by relocs/nativeinst/ic code. __ jump(RuntimeAddress(__ pc())); + assert(__ pc() - base <= to_interp_stub_size(), "wrong stub size"); + // Update current stubs pointer and restore insts_end. __ end_a_stub(); return base; @@ -104,10 +108,15 @@ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); - assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(), +#ifdef ASSERT + // read the value once + intptr_t data = method_holder->data(); + address destination = jump->jump_destination(); + assert(data == 0 || data == (intptr_t)callee(), "a) MT-unsafe modification of inline cache"); - assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry, + assert(destination == (address)-1 || destination == entry, "b) MT-unsafe modification of inline cache"); +#endif // Update stub. method_holder->set_data((intptr_t)callee()); @@ -124,11 +133,12 @@ assert(stub != NULL, "stub not found"); // Creation also verifies the object. NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); - NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); method_holder->set_data(0); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); jump->set_jump_destination((address)-1); } + //----------------------------------------------------------------------------- // Non-product mode code #ifndef PRODUCT @@ -150,5 +160,4 @@ // Verify state. assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); } - #endif // !PRODUCT
--- a/hotspot/src/cpu/x86/vm/cppInterpreterGenerator_x86.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/x86/vm/cppInterpreterGenerator_x86.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -29,6 +29,7 @@ void generate_more_monitors(); void generate_deopt_handling(); + void lock_method(void); address generate_interpreter_frame_manager(bool synchronized); // C++ interpreter only void generate_compute_interpreter_state(const Register state, const Register prev_state,
--- a/hotspot/src/cpu/x86/vm/cppInterpreter_x86.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/x86/vm/cppInterpreter_x86.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -741,7 +741,7 @@ // Find preallocated monitor and lock method (C++ interpreter) // rbx - Method* // -void InterpreterGenerator::lock_method(void) { +void CppInterpreterGenerator::lock_method() { // assumes state == rsi/r13 == pointer to current interpreterState // minimally destroys rax, rdx|c_rarg1, rdi // @@ -807,7 +807,7 @@ // If G1 is not enabled then attempt to go through the accessor entry point // Reference.get is an accessor - return generate_jump_to_normal_entry(); + return NULL; } //
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/cpu/x86/vm/crc32c.h Wed Oct 21 16:38:48 2015 -0400 @@ -0,0 +1,66 @@ +/* +* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. +* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +* +* This code is free software; you can redistribute it and/or modify it +* under the terms of the GNU General Public License version 2 only, as +* published by the Free Software Foundation. +* +* This code is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +* version 2 for more details (a copy is included in the LICENSE file that +* accompanied this code). +* +* You should have received a copy of the GNU General Public License version +* 2 along with this work; if not, write to the Free Software Foundation, +* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +* +* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +* or visit www.oracle.com if you need additional information or have any +* questions. +* +*/ + +enum { + // S. Gueron / Information Processing Letters 112 (2012) 184 + // shows than anything above 6K and below 32K is a good choice + // 32K does not deliver any further performance gains + // 6K=8*256 (*3 as we compute 3 blocks together) + // + // Thus selecting the smallest value so it could apply to the largest number + // of buffer sizes. + CRC32C_HIGH = 8 * 256, + + // empirical + // based on ubench study using methodology described in + // V. Gopal et al. / Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction April 2011 8 + // + // arbitrary value between 27 and 256 + CRC32C_MIDDLE = 8 * 86, + + // V. Gopal et al. / Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction April 2011 9 + // shows that 240 and 1024 are equally good choices as the 216==8*27 + // + // Selecting the smallest value which resulted in a significant performance improvement over + // sequential version + CRC32C_LOW = 8 * 27, + + CRC32C_NUM_ChunkSizeInBytes = 3, + + // We need to compute powers of 64N and 128N for each "chunk" size + CRC32C_NUM_PRECOMPUTED_CONSTANTS = ( 2 * CRC32C_NUM_ChunkSizeInBytes ) +}; +// Notes: +// 1. Why we need to choose a "chunk" approach? +// Overhead of computing a powers and powers of for an arbitrary buffer of size N is significant +// (implementation approaches a library perf.) +// 2. Why only 3 "chunks"? +// Performance experiments results showed that a HIGH+LOW was not delivering a stable speedup +// curve. +// +// Disclaimer: +// If you ever decide to increase/decrease number of "chunks" be sure to modify +// a) constants table generation (hotspot/src/cpu/x86/vm/stubRoutines_x86.cpp) +// b) constant fetch from that table (macroAssembler_x86.cpp) +// c) unrolled for loop (macroAssembler_x86.cpp)
--- a/hotspot/src/cpu/x86/vm/frame_x86.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/x86/vm/frame_x86.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -48,8 +48,6 @@ } #endif -PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC - // Profiling/safepoint support bool frame::safe_for_sender(JavaThread *thread) { @@ -280,7 +278,7 @@ address* pc_addr = &(((address*) sp())[-1]); if (TracePcPatching) { tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", - pc_addr, *pc_addr, pc); + p2i(pc_addr), p2i(*pc_addr), p2i(pc)); } // Either the return address is the original one or we are going to // patch in the same address that's already there. @@ -458,11 +456,11 @@ // This is the sp before any possible extension (adapter/locals). intptr_t* unextended_sp = interpreter_frame_sender_sp(); -#ifdef COMPILER2 +#if defined(COMPILER2) || INCLUDE_JVMCI if (map->update_map()) { update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset)); } -#endif // COMPILER2 +#endif // COMPILER2 || INCLUDE_JVMCI return frame(sender_sp, unextended_sp, link(), sender_pc()); } @@ -683,10 +681,19 @@ DESCRIBE_FP_OFFSET(interpreter_frame_locals); DESCRIBE_FP_OFFSET(interpreter_frame_bcp); DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); +#ifdef AMD64 + } else if (is_entry_frame()) { + // This could be more descriptive if we use the enum in + // stubGenerator to map to real names but it's most important to + // claim these frame slots so the error checking works. + for (int i = 0; i < entry_frame_after_call_words; i++) { + values.describe(frame_no, fp() - i, err_msg("call_stub word fp - %d", i)); + } +#endif // AMD64 + } #endif - } } -#endif +#endif // !PRODUCT intptr_t *frame::initial_deoptimization_info() { // used to reset the saved FP
--- a/hotspot/src/cpu/x86/vm/frame_x86.inline.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/x86/vm/frame_x86.inline.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -78,7 +78,11 @@ assert(((nmethod*)_cb)->insts_contains(_pc), "original PC must be in nmethod"); _deopt_state = is_deoptimized; } else { - _deopt_state = not_deoptimized; + if (_cb->is_deoptimization_stub()) { + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } } }
--- a/hotspot/src/cpu/x86/vm/globals_x86.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/x86/vm/globals_x86.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -46,7 +46,7 @@ // the the vep is aligned at CodeEntryAlignment whereas c2 only aligns // the uep and the vep doesn't get real alignment but just slops on by // only assured that the entry instruction meets the 5 byte size requirement. -#ifdef COMPILER2 +#if defined(COMPILER2) || INCLUDE_JVMCI define_pd_global(intx, CodeEntryAlignment, 32); #else define_pd_global(intx, CodeEntryAlignment, 16); @@ -55,16 +55,28 @@ define_pd_global(intx, InlineFrequencyCount, 100); define_pd_global(intx, InlineSmallCode, 1000); -define_pd_global(intx, StackYellowPages, NOT_WINDOWS(2) WINDOWS_ONLY(3)); -define_pd_global(intx, StackRedPages, 1); +#define DEFAULT_STACK_YELLOW_PAGES (NOT_WINDOWS(2) WINDOWS_ONLY(3)) +#define DEFAULT_STACK_RED_PAGES (1) + +#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES +#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES + #ifdef AMD64 // Very large C++ stack frames using solaris-amd64 optimized builds // due to lack of optimization caused by C++ compiler bugs -define_pd_global(intx, StackShadowPages, NOT_WIN64(20) WIN64_ONLY(6) DEBUG_ONLY(+2)); +#define DEFAULT_STACK_SHADOW_PAGES (NOT_WIN64(20) WIN64_ONLY(6) DEBUG_ONLY(+2)) +// For those clients that do not use write socket, we allow +// the min range value to be below that of the default +#define MIN_STACK_SHADOW_PAGES (NOT_WIN64(10) WIN64_ONLY(6) DEBUG_ONLY(+2)) #else -define_pd_global(intx, StackShadowPages, 4 DEBUG_ONLY(+5)); +#define DEFAULT_STACK_SHADOW_PAGES (4 DEBUG_ONLY(+5)) +#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES #endif // AMD64 +define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES); +define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES); +define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); + define_pd_global(bool, RewriteBytecodes, true); define_pd_global(bool, RewriteFrequentPairs, true); @@ -91,6 +103,7 @@ \ product(intx, UseAVX, 99, \ "Highest supported AVX instructions set on x86/x64") \ + range(0, 99) \ \ product(bool, UseCLMUL, false, \ "Control whether CLMUL instructions can be used on x86/x64") \ @@ -134,6 +147,7 @@ \ product(uintx, RTMRetryCount, 5, \ "Number of RTM retries on lock abort or busy") \ + range(0, max_uintx) \ \ experimental(intx, RTMSpinLoopCount, 100, \ "Spin count for lock to become free before RTM retry") \
--- a/hotspot/src/cpu/x86/vm/interp_masm_x86.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/x86/vm/interp_masm_x86.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -40,6 +40,11 @@ // Implementation of InterpreterMacroAssembler +void InterpreterMacroAssembler::jump_to_entry(address entry) { + assert(entry, "Entry must have been generated by now"); + jump(RuntimeAddress(entry)); +} + #ifndef CC_INTERP void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) { Label update, next, none; @@ -1497,13 +1502,39 @@ bind(skip_receiver_profile); // The method data pointer needs to be updated to reflect the new target. +#if INCLUDE_JVMCI + if (MethodProfileWidth == 0) { + update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); + } +#else // INCLUDE_JVMCI update_mdp_by_constant(mdp, in_bytes(VirtualCallData:: virtual_call_data_size())); +#endif // INCLUDE_JVMCI bind(profile_continue); } } +#if INCLUDE_JVMCI +void InterpreterMacroAssembler::profile_called_method(Register method, Register mdp, Register reg2) { + assert_different_registers(method, mdp, reg2); + if (ProfileInterpreter && MethodProfileWidth > 0) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + Label done; + record_item_in_profile_helper(method, mdp, reg2, 0, done, MethodProfileWidth, + &VirtualCallData::method_offset, &VirtualCallData::method_count_offset, in_bytes(VirtualCallData::nonprofiled_receiver_count_offset())); + bind(done); + + update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); + bind(profile_continue); + } +} +#endif // INCLUDE_JVMCI + // This routine creates a state machine for updating the multi-row // type profile at a virtual call site (or other type-sensitive bytecode). // The machine visits each row (of receiver/count) until the receiver type @@ -1523,14 +1554,36 @@ if (is_virtual_call) { increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); } - return; +#if INCLUDE_JVMCI + else if (EnableJVMCI) { + increment_mdp_data_at(mdp, in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset())); + } +#endif // INCLUDE_JVMCI + } else { + int non_profiled_offset = -1; + if (is_virtual_call) { + non_profiled_offset = in_bytes(CounterData::count_offset()); + } +#if INCLUDE_JVMCI + else if (EnableJVMCI) { + non_profiled_offset = in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset()); + } +#endif // INCLUDE_JVMCI + + record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth, + &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset); } +} - int last_row = VirtualCallData::row_limit() - 1; +void InterpreterMacroAssembler::record_item_in_profile_helper(Register item, Register mdp, + Register reg2, int start_row, Label& done, int total_rows, + OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, + int non_profiled_offset) { + int last_row = total_rows - 1; assert(start_row <= last_row, "must be work left to do"); - // Test this row for both the receiver and for null. + // Test this row for both the item and for null. // Take any of three different outcomes: - // 1. found receiver => increment count and goto done + // 1. found item => increment count and goto done // 2. found null => keep looking for case 1, maybe allocate this cell // 3. found something else => keep looking for cases 1 and 2 // Case 3 is handled by a recursive call. @@ -1538,30 +1591,30 @@ Label next_test; bool test_for_null_also = (row == start_row); - // See if the receiver is receiver[n]. - int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row)); - test_mdp_data_at(mdp, recvr_offset, receiver, + // See if the item is item[n]. + int item_offset = in_bytes(item_offset_fn(row)); + test_mdp_data_at(mdp, item_offset, item, (test_for_null_also ? reg2 : noreg), next_test); - // (Reg2 now contains the receiver from the CallData.) + // (Reg2 now contains the item from the CallData.) - // The receiver is receiver[n]. Increment count[n]. - int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row)); + // The item is item[n]. Increment count[n]. + int count_offset = in_bytes(item_count_offset_fn(row)); increment_mdp_data_at(mdp, count_offset); jmp(done); bind(next_test); if (test_for_null_also) { Label found_null; - // Failed the equality check on receiver[n]... Test for null. + // Failed the equality check on item[n]... Test for null. testptr(reg2, reg2); if (start_row == last_row) { // The only thing left to do is handle the null case. - if (is_virtual_call) { + if (non_profiled_offset >= 0) { jccb(Assembler::zero, found_null); - // Receiver did not match any saved receiver and there is no empty row for it. + // Item did not match any saved item and there is no empty row for it. // Increment total counter to indicate polymorphic case. - increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + increment_mdp_data_at(mdp, non_profiled_offset); jmp(done); bind(found_null); } else { @@ -1573,21 +1626,22 @@ jcc(Assembler::zero, found_null); // Put all the "Case 3" tests here. - record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call); + record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows, + item_offset_fn, item_count_offset_fn, non_profiled_offset); - // Found a null. Keep searching for a matching receiver, + // Found a null. Keep searching for a matching item, // but remember that this is an empty (unused) slot. bind(found_null); } } - // In the fall-through case, we found no matching receiver, but we - // observed the receiver[start_row] is NULL. + // In the fall-through case, we found no matching item, but we + // observed the item[start_row] is NULL. - // Fill in the receiver field and increment the count. - int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row)); - set_mdp_data_at(mdp, recvr_offset, receiver); - int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row)); + // Fill in the item field and increment the count. + int item_offset = in_bytes(item_offset_fn(start_row)); + set_mdp_data_at(mdp, item_offset, item); + int count_offset = in_bytes(item_count_offset_fn(start_row)); movl(reg2, DataLayout::counter_increment); set_mdp_data_at(mdp, count_offset, reg2); if (start_row > 0) {
--- a/hotspot/src/cpu/x86/vm/interp_masm_x86.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/x86/vm/interp_masm_x86.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -32,6 +32,7 @@ // This file specializes the assember with interpreter-specific macros +typedef ByteSize (*OffsetFunction)(uint); class InterpreterMacroAssembler: public MacroAssembler { @@ -60,6 +61,8 @@ _locals_register(LP64_ONLY(r14) NOT_LP64(rdi)), _bcp_register(LP64_ONLY(r13) NOT_LP64(rsi)) {} + void jump_to_entry(address entry); + void load_earlyret_value(TosState state); #ifdef CC_INTERP @@ -249,6 +252,10 @@ void record_klass_in_profile_helper(Register receiver, Register mdp, Register reg2, int start_row, Label& done, bool is_virtual_call); + void record_item_in_profile_helper(Register item, Register mdp, + Register reg2, int start_row, Label& done, int total_rows, + OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, + int non_profiled_offset); void update_mdp_by_offset(Register mdp_in, int offset_of_offset); void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp); @@ -262,6 +269,7 @@ void profile_virtual_call(Register receiver, Register mdp, Register scratch2, bool receiver_can_be_null = false); + void profile_called_method(Register method, Register mdp, Register reg2) NOT_JVMCI_RETURN; void profile_ret(Register return_bci, Register mdp); void profile_null_seen(Register mdp); void profile_typecheck(Register mdp, Register klass, Register scratch);
--- a/hotspot/src/cpu/x86/vm/interpreterGenerator_x86.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/x86/vm/interpreterGenerator_x86.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -31,17 +31,6 @@ #define __ _masm-> -// Jump into normal path for accessor and empty entry to jump to normal entry -// The "fast" optimization don't update compilation count therefore can disable inlining -// for these functions that should be inlined. -address InterpreterGenerator::generate_jump_to_normal_entry(void) { - address entry_point = __ pc(); - - assert(Interpreter::entry_for_kind(Interpreter::zerolocals) != NULL, "should already be generated"); - __ jump(RuntimeAddress(Interpreter::entry_for_kind(Interpreter::zerolocals))); - return entry_point; -} - // Abstract method entry // Attempt to execute abstract method. Throw exception address InterpreterGenerator::generate_abstract_entry(void) {
--- a/hotspot/src/cpu/x86/vm/interpreterGenerator_x86.hpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/x86/vm/interpreterGenerator_x86.hpp Wed Oct 21 16:38:48 2015 -0400 @@ -36,19 +36,18 @@ address generate_native_entry(bool synchronized); address generate_abstract_entry(void); address generate_math_entry(AbstractInterpreter::MethodKind kind); - address generate_jump_to_normal_entry(void); - address generate_accessor_entry(void) { return generate_jump_to_normal_entry(); } - address generate_empty_entry(void) { return generate_jump_to_normal_entry(); } + address generate_accessor_entry(void) { return NULL; } + address generate_empty_entry(void) { return NULL; } address generate_Reference_get_entry(); address generate_CRC32_update_entry(); address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind); + address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind); #ifndef _LP64 address generate_Float_intBitsToFloat_entry(); address generate_Float_floatToRawIntBits_entry(); address generate_Double_longBitsToDouble_entry(); address generate_Double_doubleToRawLongBits_entry(); #endif - void lock_method(void); void generate_stack_overflow_check(void); void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue);
--- a/hotspot/src/cpu/x86/vm/interpreter_x86_32.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/x86/vm/interpreter_x86_32.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -151,11 +151,15 @@ __ pop_fTOS(); break; case Interpreter::java_lang_math_exp: - __ exp_with_fallback(0); - // Store to stack to convert 80bit precision back to 64bits - __ push_fTOS(); - __ pop_fTOS(); - break; + __ subptr(rsp, 2*wordSize); + __ fstp_d(Address(rsp, 0)); + if (VM_Version::supports_sse2()) { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp()))); + } else { + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dexp))); + } + __ addptr(rsp, 2*wordSize); + break; default : ShouldNotReachHere(); }
--- a/hotspot/src/cpu/x86/vm/interpreter_x86_64.cpp Mon Oct 19 15:48:13 2015 -0400 +++ b/hotspot/src/cpu/x86/vm/interpreter_x86_64.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -52,8 +52,6 @@ #define __ _masm-> -PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC - #ifdef _WIN64 address AbstractInterpreterGenerator::generate_slow_signature_handler() { address entry = __ pc(); @@ -252,6 +250,9 @@ if (kind == Interpreter::java_lang_math_sqrt) { __ sqrtsd(xmm0, Address(rsp, wordSize)); + } else if (kind == Interpreter::java_lang_math_exp) { + __ movdbl(xmm0, Address(rsp, wordSize)); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp()))); } else { __ fld_d(Address(rsp, wordSize)); switch (kind) { @@ -278,9 +279,6 @@ // empty stack slot) __ pow_with_fallback(0); break; - case Interpreter::java_lang_math_exp: - __ exp_with_fallback(0); - break; default : ShouldNotReachHere(); }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/cpu/x86/vm/jvmciCodeInstaller_x86.cpp Wed Oct 21 16:38:48 2015 -0400 @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "precompiled.hpp" +#include "compiler/disassembler.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/sharedRuntime.hpp" +#include "jvmci/jvmciEnv.hpp" +#include "jvmci/jvmciCodeInstaller.hpp" +#include "jvmci/jvmciJavaClasses.hpp" +#include "jvmci/jvmciCompilerToVM.hpp" +#include "jvmci/jvmciRuntime.hpp" +#include "asm/register.hpp" +#include "classfile/vmSymbols.hpp" +#include "code/vmreg.hpp" +#include "vmreg_x86.inline.hpp" + +jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) { + if (inst->is_call() || inst->is_jump()) { + assert(NativeCall::instruction_size == (int)NativeJump::instruction_size, "unexpected size"); + return (pc_offset + NativeCall::instruction_size); + } else if (inst->is_mov_literal64()) { + // mov+call instruction pair + jint offset = pc_offset + NativeMovConstReg::instruction_size; + u_char* call = (u_char*) (_instructions->start() + offset); + if (call[0] == Assembler::REX_B) { + offset += 1; /* prefix byte for extended register R8-R15 */ + call++; + } + assert(call[0] == 0xFF, "expected call"); + offset += 2; /* opcode byte + modrm byte */ + return (offset); + } else if (inst->is_call_reg()) { + // the inlined vtable stub contains a "call register" instruction + assert(method != NULL, "only valid for virtual calls"); + return (pc_offset + ((NativeCallReg *) inst)->next_instruction_offset()); + } else if (inst->is_cond_jump()) { + address pc = (address) (inst); + return pc_offset + (jint) (Assembler::locate_next_instruction(pc) - pc); + } else { + fatal("unsupported type of instruction for call site"); + return 0; + } +} + +void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) { + address pc = _instructions->start() + pc_offset; + Handle obj = HotSpotObjectConstantImpl::object(constant); + jobject value = JNIHandles::make_local(obj()); + if (HotSpotObjectConstantImpl::compressed(constant)) { +#ifdef _LP64 + address operand = Assembler::locate_operand(pc, Assembler::narrow_oop_operand); + int oop_index = _oop_recorder->find_index(value); + _instructions->relocate(pc, oop_Relocation::spec(oop_index), Assembler::narrow_oop_operand); + TRACE_jvmci_3("relocating (narrow oop constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand)); +#else + fatal("compressed oop on 32bit"); +#endif + } else { + address operand = Assembler::locate_operand(pc, Assembler::imm_operand); + *((jobject*) operand) = value; + _instructions->relocate(pc, oop_Relocation::spec_for_immediate(), Assembler::imm_operand); + TRACE_jvmci_3("relocating (oop constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand)); + } +} + +void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) { + address pc = _instructions->start() + pc_offset; + + address operand = Assembler::locate_operand(pc, Assembler::disp32_operand); + address next_instruction = Assembler::locate_next_instruction(pc); + address dest = _constants->start() + data_offset; + + long disp = dest - next_instruction; + assert(disp == (jint) disp, "disp doesn't fit in 32 bits"); + *((jint*) operand) = (jint) disp; + + _instructions->relocate(pc, section_word_Relocation::spec((address) dest, CodeBuffer::SECT_CONSTS), Assembler::disp32_operand); + TRACE_jvmci_3("relocating at " PTR_FORMAT "/" PTR_FORMAT " with destination at " PTR_FORMAT " (%d)", p2i(pc), p2i(operand), p2i(dest), data_offset); +} + +void CodeInstaller::pd_relocate_CodeBlob(CodeBlob* cb, NativeInstruction* inst) { + if (cb->is_nmethod()) { + nmethod* nm = (nmethod*) cb; + nativeJump_at((address)inst)->set_jump_destination(nm->verified_entry_point()); + } else { + nativeJump_at((address)inst)->set_jump_destination(cb->code_begin()); + } + _instructions->relocate((address)inst, runtime_call_Relocation::spec(), Assembler::call32_operand); +} + +void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) { + address pc = (address) inst; + if (inst->is_call()) { + // NOTE: for call without a mov, the offset must fit a 32-bit immediate + // see also CompilerToVM.getMaxCallTargetOffset() + NativeCall* call = nativeCall_at(pc); + call->set_destination((address) foreign_call_destination); + _instructions->relocate(call->instruction_addr