OpenJDK / bsd-port / jdk9 / hotspot
changeset 8630:d30647171e49
Merge
author | aph |
---|---|
date | Thu, 02 Jul 2015 11:12:59 +0100 |
parents | 9fcbb6768a78 f7b19ca15ef8 |
children | 9ad1e00a9f13 |
files | src/cpu/x86/vm/sharedRuntime_x86_64.cpp src/cpu/x86/vm/stubGenerator_x86_64.cpp src/cpu/x86/vm/vm_version_x86.cpp src/share/vm/classfile/vmSymbols.hpp src/share/vm/opto/c2_globals.hpp src/share/vm/opto/escape.cpp src/share/vm/opto/library_call.cpp src/share/vm/opto/runtime.cpp src/share/vm/opto/runtime.hpp src/share/vm/runtime/stubRoutines.cpp src/share/vm/runtime/stubRoutines.hpp test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForSupportedSparcCPU.java test/compiler/intrinsics/sha/cli/testcases/UseSHAIntrinsicsSpecificTestCaseForUnsupportedSparcCPU.java test/compiler/intrinsics/sha/cli/testcases/UseSHASpecificTestCaseForSupportedSparcCPU.java test/compiler/intrinsics/sha/cli/testcases/UseSHASpecificTestCaseForUnsupportedSparcCPU.java |
diffstat | 349 files changed, 14993 insertions(+), 4022 deletions(-) [+] |
line wrap: on
line diff
--- a/.hgtags Tue Jun 16 17:31:53 2015 +0100 +++ b/.hgtags Thu Jul 02 11:12:59 2015 +0100 @@ -469,3 +469,6 @@ bf92b8db249cdfa5651ef954b6c0743a7e0ea4cd jdk9-b64 e7ae94c4f35e940ea423fc1dd260435df34a77c0 jdk9-b65 197e94e0dacddd16816f101d24fc0442ab518326 jdk9-b66 +d47dfabd16d48eb96a451edd1b61194a39ee0eb5 jdk9-b67 +11af3990d56c97b40318bc1f20608e86f051a3f7 jdk9-b68 +ff0929a59ced0e144201aa05819ae2e47d6f2c61 jdk9-b69
--- a/agent/make/Makefile Tue Jun 16 17:31:53 2015 +0100 +++ b/agent/make/Makefile Thu Jul 02 11:12:59 2015 +0100 @@ -58,6 +58,7 @@ sun.jvm.hotspot.debugger.dummy \ sun.jvm.hotspot.debugger.linux \ sun.jvm.hotspot.debugger.linux.amd64 \ +sun.jvm.hotspot.debugger.linux.aarch64 \ sun.jvm.hotspot.debugger.linux.ppc64 \ sun.jvm.hotspot.debugger.linux.x86 \ sun.jvm.hotspot.debugger.posix \ @@ -65,6 +66,7 @@ sun.jvm.hotspot.debugger.ppc64 \ sun.jvm.hotspot.debugger.proc \ sun.jvm.hotspot.debugger.proc.amd64 \ +sun.jvm.hotspot.debugger.proc.aarch64 \ sun.jvm.hotspot.debugger.proc.ppc64 \ sun.jvm.hotspot.debugger.proc.sparc \ sun.jvm.hotspot.debugger.proc.x86 \ @@ -91,11 +93,13 @@ sun.jvm.hotspot.prims \ sun.jvm.hotspot.runtime \ sun.jvm.hotspot.runtime.amd64 \ +sun.jvm.hotspot.runtime.aarch64 \ sun.jvm.hotspot.runtime.bsd \ sun.jvm.hotspot.runtime.bsd_amd64 \ sun.jvm.hotspot.runtime.bsd_x86 \ sun.jvm.hotspot.runtime.linux \ sun.jvm.hotspot.runtime.linux_amd64 \ +sun.jvm.hotspot.runtime.linux_aarch64 \ sun.jvm.hotspot.runtime.linux_ppc64 \ sun.jvm.hotspot.runtime.linux_sparc \ sun.jvm.hotspot.runtime.linux_x86 \ @@ -149,16 +153,19 @@ sun/jvm/hotspot/debugger/linux/*.java \ sun/jvm/hotspot/debugger/linux/ppc64/*.java \ sun/jvm/hotspot/debugger/linux/x86/*.java \ +sun/jvm/hotspot/debugger/linux/aarch64/*.java \ sun/jvm/hotspot/debugger/posix/*.java \ sun/jvm/hotspot/debugger/posix/elf/*.java \ sun/jvm/hotspot/debugger/ppc64/*.java \ sun/jvm/hotspot/debugger/proc/*.java \ sun/jvm/hotspot/debugger/proc/amd64/*.java \ +sun/jvm/hotspot/debugger/proc/aarch64/*.java \ sun/jvm/hotspot/debugger/proc/ppc64/*.java \ sun/jvm/hotspot/debugger/proc/sparc/*.java \ sun/jvm/hotspot/debugger/proc/x86/*.java \ sun/jvm/hotspot/debugger/remote/*.java \ sun/jvm/hotspot/debugger/remote/amd64/*.java \ +sun/jvm/hotspot/debugger/remote/aarch64/*.java \ sun/jvm/hotspot/debugger/remote/ppc64/*.java \ sun/jvm/hotspot/debugger/remote/sparc/*.java \ sun/jvm/hotspot/debugger/remote/x86/*.java \ @@ -178,11 +185,13 @@ sun/jvm/hotspot/prims/*.java \ sun/jvm/hotspot/runtime/*.java \ sun/jvm/hotspot/runtime/amd64/*.java \ +sun/jvm/hotspot/runtime/aarch64/*.java \ sun/jvm/hotspot/runtime/bsd/*.java \ sun/jvm/hotspot/runtime/bsd_amd64/*.java \ sun/jvm/hotspot/runtime/bsd_x86/*.java \ sun/jvm/hotspot/runtime/linux/*.java \ sun/jvm/hotspot/runtime/linux_amd64/*.java \ +sun/jvm/hotspot/runtime/linux_aarch64/*.java \ sun/jvm/hotspot/runtime/linux_ppc64/*.java \ sun/jvm/hotspot/runtime/linux_sparc/*.java \ sun/jvm/hotspot/runtime/linux_x86/*.java \
--- a/agent/src/os/linux/LinuxDebuggerLocal.c Tue Jun 16 17:31:53 2015 +0100 +++ b/agent/src/os/linux/LinuxDebuggerLocal.c Thu Jul 02 11:12:59 2015 +0100 @@ -53,6 +53,10 @@ #include "sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext.h" #endif +#ifdef aarch64 +#include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h" +#endif + static jfieldID p_ps_prochandle_ID = 0; static jfieldID threadList_ID = 0; static jfieldID loadObjectList_ID = 0; @@ -368,7 +372,7 @@ #define NPRGREG sun_jvm_hotspot_debugger_amd64_AMD64ThreadContext_NPRGREG #endif #ifdef aarch64 -#define NPRGREG 32 +#define NPRGREG sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext_NPRGREG #endif #if defined(sparc) || defined(sparcv9) #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG @@ -473,6 +477,13 @@ #define REG_INDEX(reg) sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext_##reg + { + int i; + for (i = 0; i < 31; i++) + regs[i] = gregs.regs[i]; + regs[REG_INDEX(SP)] = gregs.sp; + regs[REG_INDEX(PC)] = gregs.pc; + } #endif /* aarch64 */ #ifdef ppc64
--- a/agent/src/os/linux/Makefile Tue Jun 16 17:31:53 2015 +0100 +++ b/agent/src/os/linux/Makefile Thu Jul 02 11:12:59 2015 +0100 @@ -53,14 +53,15 @@ $(JAVAH) -jni -classpath ../../../build/classes -d $(ARCH) \ sun.jvm.hotspot.debugger.x86.X86ThreadContext \ sun.jvm.hotspot.debugger.sparc.SPARCThreadContext \ - sun.jvm.hotspot.debugger.amd64.AMD64ThreadContext + sun.jvm.hotspot.debugger.amd64.AMD64ThreadContext \ + sun.jvm.hotspot.debugger.aarch64.AARCH64ThreadContext $(GCC) $(CFLAGS) $< -o $@ $(ARCH)/sadis.o: ../../share/native/sadis.c $(JAVAH) -jni -classpath ../../../build/classes -d $(ARCH) \ sun.jvm.hotspot.asm.Disassembler $(GCC) $(CFLAGS) $< -o $@ - + $(ARCH)/%.o: %.c $(GCC) $(CFLAGS) $< -o $@
--- a/agent/src/os/linux/libproc.h Tue Jun 16 17:31:53 2015 +0100 +++ b/agent/src/os/linux/libproc.h Thu Jul 02 11:12:59 2015 +0100 @@ -72,6 +72,7 @@ #define user_regs_struct pt_regs #endif #if defined(aarch64) +#include <asm/ptrace.h> #define user_regs_struct user_pt_regs #endif
--- a/agent/src/os/linux/proc_service.h Tue Jun 16 17:31:53 2015 +0100 +++ b/agent/src/os/linux/proc_service.h Thu Jul 02 11:12:59 2015 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -30,7 +30,7 @@ // Linux does not have the proc service library, though it does provide the // thread_db library which can be used to manipulate threads without having -// to know the details of LinuxThreads or NPTL +// to know the details of NPTL // copied from Solaris "proc_service.h" typedef enum {
--- a/agent/src/share/classes/sun/jvm/hotspot/HSDB.java Tue Jun 16 17:31:53 2015 +0100 +++ b/agent/src/share/classes/sun/jvm/hotspot/HSDB.java Thu Jul 02 11:12:59 2015 +0100 @@ -983,19 +983,15 @@ curFrame.getFP(), anno)); } else { - if (VM.getVM().getCPU().equals("x86") || VM.getVM().getCPU().equals("amd64")) { - // For C2, which has null frame pointers on x86/amd64 - CodeBlob cb = VM.getVM().getCodeCache().findBlob(curFrame.getPC()); - Address sp = curFrame.getSP(); - if (Assert.ASSERTS_ENABLED) { - Assert.that(cb.getFrameSize() > 0, "CodeBlob must have non-zero frame size"); - } - annoPanel.addAnnotation(new Annotation(sp, - sp.addOffsetTo(cb.getFrameSize()), - anno)); - } else { - Assert.that(VM.getVM().getCPU().equals("ia64"), "only ia64 should reach here"); + // For C2, which has null frame pointers on x86/amd64/aarch64 + CodeBlob cb = VM.getVM().getCodeCache().findBlob(curFrame.getPC()); + Address sp = curFrame.getSP(); + if (Assert.ASSERTS_ENABLED) { + Assert.that(cb.getFrameSize() > 0, "CodeBlob must have non-zero frame size"); } + annoPanel.addAnnotation(new Annotation(sp, + sp.addOffsetTo(cb.getFrameSize()), + anno)); } // Add interpreter frame annotations
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/aarch64/AARCH64ThreadContext.java Thu Jul 02 11:12:59 2015 +0100 @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.aarch64; + +import java.lang.annotation.Native; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.cdbg.*; + +/** Specifies the thread context on aarch64 platforms; only a sub-portion + * of the context is guaranteed to be present on all operating + * systems. */ + +public abstract class AARCH64ThreadContext implements ThreadContext { + // Taken from /usr/include/asm/sigcontext.h on Linux/AARCH64. + + // NOTE: the indices for the various registers must be maintained as + // listed across various operating systems. However, only a small + // subset of the registers' values are guaranteed to be present (and + // must be present for the SA's stack walking to work) + + // One instance of the Native annotation is enough to trigger header generation + // for this file. + @Native + public static final int R0 = 0; + public static final int R1 = 1; + public static final int R2 = 2; + public static final int R3 = 3; + public static final int R4 = 4; + public static final int R5 = 5; + public static final int R6 = 6; + public static final int R7 = 7; + public static final int R8 = 8; + public static final int R9 = 9; + public static final int R10 = 10; + public static final int R11 = 11; + public static final int R12 = 12; + public static final int R13 = 13; + public static final int R14 = 14; + public static final int R15 = 15; + public static final int R16 = 16; + public static final int R17 = 17; + public static final int R18 = 18; + public static final int R19 = 19; + public static final int R20 = 20; + public static final int R21 = 21; + public static final int R22 = 22; + public static final int R23 = 23; + public static final int R24 = 24; + public static final int R25 = 25; + public static final int R26 = 26; + public static final int R27 = 27; + public static final int R28 = 28; + public static final int FP = 29; + public static final int LR = 30; + public static final int SP = 31; + public static final int PC = 32; + + public static final int NPRGREG = 33; + + private long[] data; + + public AARCH64ThreadContext() { + data = new long[NPRGREG]; + } + + public int getNumRegisters() { + return NPRGREG; + } + + public String getRegisterName(int index) { + switch (index) { + case LR: return "lr"; + case SP: return "sp"; + case PC: return "pc"; + default: + return "r" + index; + } + } + + public void setRegister(int index, long value) { + data[index] = value; + } + + public long getRegister(int index) { + return data[index]; + } + + public CFrame getTopFrame(Debugger dbg) { + return null; + } + + /** This can't be implemented in this class since we would have to + * tie the implementation to, for example, the debugging system */ + public abstract void setRegisterAsAddress(int index, Address value); + + /** This can't be implemented in this class since we would have to + * tie the implementation to, for example, the debugging system */ + public abstract Address getRegisterAsAddress(int index); +}
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java Tue Jun 16 17:31:53 2015 +0100 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java Thu Jul 02 11:12:59 2015 +0100 @@ -1,5 +1,6 @@ /* * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -31,12 +32,14 @@ import sun.jvm.hotspot.debugger.cdbg.*; import sun.jvm.hotspot.debugger.x86.*; import sun.jvm.hotspot.debugger.amd64.*; +import sun.jvm.hotspot.debugger.aarch64.*; import sun.jvm.hotspot.debugger.sparc.*; import sun.jvm.hotspot.debugger.ppc64.*; import sun.jvm.hotspot.debugger.linux.x86.*; import sun.jvm.hotspot.debugger.linux.amd64.*; import sun.jvm.hotspot.debugger.linux.sparc.*; import sun.jvm.hotspot.debugger.linux.ppc64.*; +import sun.jvm.hotspot.debugger.linux.aarch64.*; import sun.jvm.hotspot.utilities.*; class LinuxCDebugger implements CDebugger { @@ -106,6 +109,13 @@ Address pc = context.getRegisterAsAddress(PPC64ThreadContext.PC); if (pc == null) return null; return new LinuxPPC64CFrame(dbg, sp, pc, LinuxDebuggerLocal.getAddressSize()); + } else if (cpu.equals("aarch64")) { + AARCH64ThreadContext context = (AARCH64ThreadContext) thread.getContext(); + Address fp = context.getRegisterAsAddress(AARCH64ThreadContext.FP); + if (fp == null) return null; + Address pc = context.getRegisterAsAddress(AARCH64ThreadContext.PC); + if (pc == null) return null; + return new LinuxAARCH64CFrame(dbg, fp, pc); } else { // Runtime exception thrown by LinuxThreadContextFactory if unknown cpu ThreadContext context = (ThreadContext) thread.getContext();
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/aarch64/LinuxAARCH64CFrame.java Thu Jul 02 11:12:59 2015 +0100 @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.linux.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.debugger.linux.*; +import sun.jvm.hotspot.debugger.cdbg.*; +import sun.jvm.hotspot.debugger.cdbg.basic.*; + +final public class LinuxAARCH64CFrame extends BasicCFrame { + public LinuxAARCH64CFrame(LinuxDebugger dbg, Address fp, Address pc) { + super(dbg.getCDebugger()); + this.fp = fp; + this.pc = pc; + this.dbg = dbg; + } + + // override base class impl to avoid ELF parsing + public ClosestSymbol closestSymbolToPC() { + // try native lookup in debugger. + return dbg.lookup(dbg.getAddressValue(pc())); + } + + public Address pc() { + return pc; + } + + public Address localVariableBase() { + return fp; + } + + public CFrame sender(ThreadProxy thread) { + AARCH64ThreadContext context = (AARCH64ThreadContext) thread.getContext(); + Address rsp = context.getRegisterAsAddress(AARCH64ThreadContext.SP); + + if ((fp == null) || fp.lessThan(rsp)) { + return null; + } + + // Check alignment of fp + if (dbg.getAddressValue(fp) % (2 * ADDRESS_SIZE) != 0) { + return null; + } + + Address nextFP = fp.getAddressAt(0 * ADDRESS_SIZE); + if (nextFP == null || nextFP.lessThanOrEqual(fp)) { + return null; + } + Address nextPC = fp.getAddressAt(1 * ADDRESS_SIZE); + if (nextPC == null) { + return null; + } + return new LinuxAARCH64CFrame(dbg, nextFP, nextPC); + } + + // package/class internals only + private static final int ADDRESS_SIZE = 8; + private Address pc; + private Address sp; + private Address fp; + private LinuxDebugger dbg; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/aarch64/LinuxAARCH64ThreadContext.java Thu Jul 02 11:12:59 2015 +0100 @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.linux.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.debugger.linux.*; + +public class LinuxAARCH64ThreadContext extends AARCH64ThreadContext { + private LinuxDebugger debugger; + + public LinuxAARCH64ThreadContext(LinuxDebugger debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +}
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java Tue Jun 16 17:31:53 2015 +0100 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java Thu Jul 02 11:12:59 2015 +0100 @@ -31,11 +31,13 @@ import sun.jvm.hotspot.debugger.*; import sun.jvm.hotspot.debugger.cdbg.*; import sun.jvm.hotspot.debugger.proc.amd64.*; +import sun.jvm.hotspot.debugger.proc.aarch64.*; import sun.jvm.hotspot.debugger.proc.sparc.*; import sun.jvm.hotspot.debugger.proc.ppc64.*; import sun.jvm.hotspot.debugger.proc.x86.*; import sun.jvm.hotspot.debugger.ppc64.*; import sun.jvm.hotspot.debugger.amd64.*; +import sun.jvm.hotspot.debugger.aarch64.*; import sun.jvm.hotspot.debugger.sparc.*; import sun.jvm.hotspot.debugger.x86.*; import sun.jvm.hotspot.utilities.*; @@ -88,6 +90,10 @@ threadFactory = new ProcAMD64ThreadFactory(this); pcRegIndex = AMD64ThreadContext.RIP; fpRegIndex = AMD64ThreadContext.RBP; + } else if (cpu.equals("aarch64")) { + threadFactory = new ProcAARCH64ThreadFactory(this); + pcRegIndex = AARCH64ThreadContext.PC; + fpRegIndex = AARCH64ThreadContext.FP; } else if (cpu.equals("ppc64")) { threadFactory = new ProcPPC64ThreadFactory(this); pcRegIndex = PPC64ThreadContext.PC;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/aarch64/ProcAARCH64Thread.java Thu Jul 02 11:12:59 2015 +0100 @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.debugger.proc.*; +import sun.jvm.hotspot.utilities.*; + +public class ProcAARCH64Thread implements ThreadProxy { + private ProcDebugger debugger; + private int id; + + public ProcAARCH64Thread(ProcDebugger debugger, Address addr) { + this.debugger = debugger; + + // FIXME: the size here should be configurable. However, making it + // so would produce a dependency on the "types" package from the + // debugger package, which is not desired. + this.id = (int) addr.getCIntegerAt(0, 4, true); + } + + public ProcAARCH64Thread(ProcDebugger debugger, long id) { + this.debugger = debugger; + this.id = (int) id; + } + + public ThreadContext getContext() throws IllegalThreadStateException { + ProcAARCH64ThreadContext context = new ProcAARCH64ThreadContext(debugger); + long[] regs = debugger.getThreadIntegerRegisterSet(id); + if (Assert.ASSERTS_ENABLED) { + Assert.that(regs.length == AARCH64ThreadContext.NPRGREG, "size mismatch"); + } + for (int i = 0; i < regs.length; i++) { + context.setRegister(i, regs[i]); + } + return context; + } + + public boolean canSetContext() throws DebuggerException { + return false; + } + + public void setContext(ThreadContext context) + throws IllegalThreadStateException, DebuggerException { + throw new DebuggerException("Unimplemented"); + } + + public String toString() { + return "t@" + id; + } + + public boolean equals(Object obj) { + if ((obj == null) || !(obj instanceof ProcAARCH64Thread)) { + return false; + } + + return (((ProcAARCH64Thread) obj).id == id); + } + + public int hashCode() { + return id; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/aarch64/ProcAARCH64ThreadContext.java Thu Jul 02 11:12:59 2015 +0100 @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.debugger.proc.*; + +public class ProcAARCH64ThreadContext extends AARCH64ThreadContext { + private ProcDebugger debugger; + + public ProcAARCH64ThreadContext(ProcDebugger debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/aarch64/ProcAARCH64ThreadFactory.java Thu Jul 02 11:12:59 2015 +0100 @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.proc.*; + +public class ProcAARCH64ThreadFactory implements ProcThreadFactory { + private ProcDebugger debugger; + + public ProcAARCH64ThreadFactory(ProcDebugger debugger) { + this.debugger = debugger; + } + + public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { + return new ProcAARCH64Thread(debugger, threadIdentifierAddr); + } + + public ThreadProxy createThreadWrapper(long id) { + return new ProcAARCH64Thread(debugger, id); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/aarch64/RemoteAARCH64Thread.java Thu Jul 02 11:12:59 2015 +0100 @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.debugger.remote.*; +import sun.jvm.hotspot.utilities.*; + +public class RemoteAARCH64Thread extends RemoteThread { + public RemoteAARCH64Thread(RemoteDebuggerClient debugger, Address addr) { + super(debugger, addr); + } + + public RemoteAARCH64Thread(RemoteDebuggerClient debugger, long id) { + super(debugger, id); + } + + public ThreadContext getContext() throws IllegalThreadStateException { + RemoteAARCH64ThreadContext context = new RemoteAARCH64ThreadContext(debugger); + long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) : + debugger.getThreadIntegerRegisterSet(id); + if (Assert.ASSERTS_ENABLED) { + Assert.that(regs.length == AARCH64ThreadContext.NPRGREG, "size of register set must match"); + } + for (int i = 0; i < regs.length; i++) { + context.setRegister(i, regs[i]); + } + return context; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/aarch64/RemoteAARCH64ThreadContext.java Thu Jul 02 11:12:59 2015 +0100 @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.debugger.remote.*; + +public class RemoteAARCH64ThreadContext extends AARCH64ThreadContext { + private RemoteDebuggerClient debugger; + + public RemoteAARCH64ThreadContext(RemoteDebuggerClient debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/aarch64/RemoteAARCH64ThreadFactory.java Thu Jul 02 11:12:59 2015 +0100 @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.remote.*; + +public class RemoteAARCH64ThreadFactory implements RemoteThreadFactory { + private RemoteDebuggerClient debugger; + + public RemoteAARCH64ThreadFactory(RemoteDebuggerClient debugger) { + this.debugger = debugger; + } + + public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { + return new RemoteAARCH64Thread(debugger, threadIdentifierAddr); + } + + public ThreadProxy createThreadWrapper(long id) { + return new RemoteAARCH64Thread(debugger, id); + } +}
--- a/agent/src/share/classes/sun/jvm/hotspot/gc/shared/Generation.java Tue Jun 16 17:31:53 2015 +0100 +++ b/agent/src/share/classes/sun/jvm/hotspot/gc/shared/Generation.java Thu Jul 02 11:12:59 2015 +0100 @@ -49,7 +49,6 @@ public abstract class Generation extends VMObject { private static long reservedFieldOffset; private static long virtualSpaceFieldOffset; - private static CIntegerField levelField; protected static final int K = 1024; // Fields for class StatRecord private static Field statRecordField; @@ -75,7 +74,6 @@ reservedFieldOffset = type.getField("_reserved").getOffset(); virtualSpaceFieldOffset = type.getField("_virtual_space").getOffset(); - levelField = type.getCIntegerField("_level"); // StatRecord statRecordField = type.getField("_stat_record"); type = db.lookupType("Generation::StatRecord"); @@ -130,14 +128,6 @@ } } - public GenerationSpec spec() { - return ((GenCollectedHeap) VM.getVM().getUniverse().heap()).spec(level()); - } - - public int level() { - return (int) levelField.getValue(addr); - } - public int invocations() { return getStatRecord().getInvocations(); }
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/Frame.java Tue Jun 16 17:31:53 2015 +0100 +++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/Frame.java Thu Jul 02 11:12:59 2015 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -357,12 +357,6 @@ // FIXME: avoiding implementing this for now if possible // public void interpreter_frame_set_monitor_end(BasicObjectLock* value); // public void interpreter_frame_verify_monitor(BasicObjectLock* value) const; - // - // Tells whether the current interpreter_frame frame pointer - // corresponds to the old compiled/deoptimized fp - // The receiver used to be a top level frame - // public boolean interpreter_frame_equals_unpacked_fp(intptr_t* fp); - //-------------------------------------------------------------------------------- // Method and constant pool cache: //
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java Tue Jun 16 17:31:53 2015 +0100 +++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java Thu Jul 02 11:12:59 2015 +0100 @@ -35,6 +35,7 @@ import sun.jvm.hotspot.runtime.win32_x86.Win32X86JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess; +import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess; import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess; @@ -91,6 +92,8 @@ access = new LinuxSPARCJavaThreadPDAccess(); } else if (cpu.equals("ppc64")) { access = new LinuxPPC64JavaThreadPDAccess(); + } else if (cpu.equals("aarch64")) { + access = new LinuxAARCH64JavaThreadPDAccess(); } else { try { access = (JavaThreadPDAccess)
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java Tue Jun 16 17:31:53 2015 +0100 +++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java Thu Jul 02 11:12:59 2015 +0100 @@ -121,6 +121,8 @@ private Flag[] commandLineFlags; private Map flagsMap; + private static Type intType; + private static Type uintType; private static Type intxType; private static Type uintxType; private static Type sizetType; @@ -170,6 +172,28 @@ return addr.getCIntegerAt(0, boolType.getSize(), boolType.isUnsigned()) != 0; } + public boolean isInt() { + return type.equals("int"); + } + + public long getInt() { + if (Assert.ASSERTS_ENABLED) { + Assert.that(isInt(), "not an int flag!"); + } + return addr.getCIntegerAt(0, intType.getSize(), false); + } + + public boolean isUInt() { + return type.equals("uint"); + } + + public long getUInt() { + if (Assert.ASSERTS_ENABLED) { + Assert.that(isUInt(), "not a uint flag!"); + } + return addr.getCIntegerAt(0, uintType.getSize(), false); + } + public boolean isIntx() { return type.equals("intx"); } @@ -206,6 +230,10 @@ public String getValue() { if (isBool()) { return new Boolean(getBool()).toString(); + } else if (isInt()) { + return new Long(getInt()).toString(); + } else if (isUInt()) { + return new Long(getUInt()).toString(); } else if (isIntx()) { return new Long(getIntx()).toString(); } else if (isUIntx()) { @@ -334,6 +362,8 @@ heapWordSize = db.lookupIntConstant("HeapWordSize").intValue(); oopSize = db.lookupIntConstant("oopSize").intValue(); + intType = db.lookupType("int"); + uintType = db.lookupType("uint"); intxType = db.lookupType("intx"); uintxType = db.lookupType("uintx"); sizetType = db.lookupType("size_t");
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/aarch64/AARCH64CurrentFrameGuess.java Thu Jul 02 11:12:59 2015 +0100 @@ -0,0 +1,244 @@ +/* + * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.code.*; +import sun.jvm.hotspot.interpreter.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.runtime.aarch64.*; + +/** <P> Should be able to be used on all aarch64 platforms we support + (Linux/aarch64) to implement JavaThread's "currentFrameGuess()" + functionality. Input is an AARCH64ThreadContext; output is SP, FP, + and PC for an AARCH64Frame. Instantiation of the AARCH64Frame is + left to the caller, since we may need to subclass AARCH64Frame to + support signal handler frames on Unix platforms. </P> + + <P> Algorithm is to walk up the stack within a given range (say, + 512K at most) looking for a plausible PC and SP for a Java frame, + also considering those coming in from the context. If we find a PC + that belongs to the VM (i.e., in generated code like the + interpreter or CodeCache) then we try to find an associated FP. + We repeat this until we either find a complete frame or run out of + stack to look at. </P> */ + +public class AARCH64CurrentFrameGuess { + private AARCH64ThreadContext context; + private JavaThread thread; + private Address spFound; + private Address fpFound; + private Address pcFound; + + private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.aarch64.AARCH64Frame.DEBUG") + != null; + + public AARCH64CurrentFrameGuess(AARCH64ThreadContext context, + JavaThread thread) { + this.context = context; + this.thread = thread; + } + + /** Returns false if not able to find a frame within a reasonable range. */ + public boolean run(long regionInBytesToSearch) { + Address sp = context.getRegisterAsAddress(AARCH64ThreadContext.SP); + Address pc = context.getRegisterAsAddress(AARCH64ThreadContext.PC); + Address fp = context.getRegisterAsAddress(AARCH64ThreadContext.FP); + if (sp == null) { + // Bail out if no last java frame either + if (thread.getLastJavaSP() != null) { + setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); + return true; + } + return false; + } + Address end = sp.addOffsetTo(regionInBytesToSearch); + VM vm = VM.getVM(); + + setValues(null, null, null); // Assume we're not going to find anything + + if (vm.isJavaPCDbg(pc)) { + if (vm.isClientCompiler()) { + // If the topmost frame is a Java frame, we are (pretty much) + // guaranteed to have a viable FP. We should be more robust + // than this (we have the potential for losing entire threads' + // stack traces) but need to see how much work we really have + // to do here. Searching the stack for an (SP, FP) pair is + // hard since it's easy to misinterpret inter-frame stack + // pointers as base-of-frame pointers; we also don't know the + // sizes of C1 frames (not registered in the nmethod) so can't + // derive them from SP. + + setValues(sp, fp, pc); + return true; + } else { + if (vm.getInterpreter().contains(pc)) { + if (DEBUG) { + System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " + + sp + ", fp = " + fp + ", pc = " + pc); + } + setValues(sp, fp, pc); + return true; + } + + // For the server compiler, FP is not guaranteed to be valid + // for compiled code. In addition, an earlier attempt at a + // non-searching algorithm (see below) failed because the + // stack pointer from the thread context was pointing + // (considerably) beyond the ostensible end of the stack, into + // garbage; walking from the topmost frame back caused a crash. + // + // This algorithm takes the current PC as a given and tries to + // find the correct corresponding SP by walking up the stack + // and repeatedly performing stackwalks (very inefficient). + // + // FIXME: there is something wrong with stackwalking across + // adapter frames...this is likely to be the root cause of the + // failure with the simpler algorithm below. + + for (long offset = 0; + offset < regionInBytesToSearch; + offset += vm.getAddressSize()) { + try { + Address curSP = sp.addOffsetTo(offset); + Frame frame = new AARCH64Frame(curSP, null, pc); + RegisterMap map = thread.newRegisterMap(false); + while (frame != null) { + if (frame.isEntryFrame() && frame.entryFrameIsFirst()) { + // We were able to traverse all the way to the + // bottommost Java frame. + // This sp looks good. Keep it. + if (DEBUG) { + System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc); + } + setValues(curSP, null, pc); + return true; + } + frame = frame.sender(map); + } + } catch (Exception e) { + if (DEBUG) { + System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset); + } + // Bad SP. Try another. + } + } + + // Were not able to find a plausible SP to go with this PC. + // Bail out. + return false; + + /* + // Original algorithm which does not work because SP was + // pointing beyond where it should have: + + // For the server compiler, FP is not guaranteed to be valid + // for compiled code. We see whether the PC is in the + // interpreter and take care of that, otherwise we run code + // (unfortunately) duplicated from AARCH64Frame.senderForCompiledFrame. + + CodeCache cc = vm.getCodeCache(); + if (cc.contains(pc)) { + CodeBlob cb = cc.findBlob(pc); + + // See if we can derive a frame pointer from SP and PC + // NOTE: This is the code duplicated from AARCH64Frame + Address saved_fp = null; + int llink_offset = cb.getLinkOffset(); + if (llink_offset >= 0) { + // Restore base-pointer, since next frame might be an interpreter frame. + Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset); + saved_fp = fp_addr.getAddressAt(0); + } + + setValues(sp, saved_fp, pc); + return true; + } + */ + } + } else { + // If the current program counter was not known to us as a Java + // PC, we currently assume that we are in the run-time system + // and attempt to look to thread-local storage for saved SP and + // FP. Note that if these are null (because we were, in fact, + // in Java code, i.e., vtable stubs or similar, and the SA + // didn't have enough insight into the target VM to understand + // that) then we are going to lose the entire stack trace for + // the thread, which is sub-optimal. FIXME. + + if (DEBUG) { + System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " + + thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP()); + } + if (thread.getLastJavaSP() == null) { + return false; // No known Java frames on stack + } + + // The runtime has a nasty habit of not saving fp in the frame + // anchor, leaving us to grovel about in the stack to find a + // plausible address. Fortunately, this only happens in + // compiled code; there we always have a valid PC, and we always + // push LR and FP onto the stack as a pair, with FP at the lower + // address. + pc = thread.getLastJavaPC(); + fp = thread.getLastJavaFP(); + sp = thread.getLastJavaSP(); + + if (fp == null) { + CodeCache cc = vm.getCodeCache(); + if (cc.contains(pc)) { + CodeBlob cb = cc.findBlob(pc); + if (DEBUG) { + System.out.println("FP is null. Found blob frame size " + cb.getFrameSize()); + } + // See if we can derive a frame pointer from SP and PC + long link_offset = cb.getFrameSize() - 2 * VM.getVM().getAddressSize(); + if (link_offset >= 0) { + fp = sp.addOffsetTo(link_offset); + } + } + } + + setValues(sp, fp, null); + + return true; + } + } + + public Address getSP() { return spFound; } + public Address getFP() { return fpFound; } + /** May be null if getting values from thread-local storage; take + care to call the correct AARCH64Frame constructor to recover this if + necessary */ + public Address getPC() { return pcFound; } + + private void setValues(Address sp, Address fp, Address pc) { + spFound = sp; + fpFound = fp; + pcFound = pc; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/aarch64/AARCH64Frame.java Thu Jul 02 11:12:59 2015 +0100 @@ -0,0 +1,555 @@ +/* + * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.aarch64; + +import java.util.*; +import sun.jvm.hotspot.code.*; +import sun.jvm.hotspot.compiler.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.oops.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.utilities.*; + +/** Specialization of and implementation of abstract methods of the + Frame class for the aarch64 family of CPUs. */ + +public class AARCH64Frame extends Frame { + private static final boolean DEBUG; + static { + DEBUG = System.getProperty("sun.jvm.hotspot.runtime.aarch64.AARCH64Frame.DEBUG") != null; + } + + // All frames + private static final int LINK_OFFSET = 0; + private static final int RETURN_ADDR_OFFSET = 1; + private static final int SENDER_SP_OFFSET = 2; + + // Interpreter frames + private static final int INTERPRETER_FRAME_MIRROR_OFFSET = 2; // for native calls only + private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1; + private static final int INTERPRETER_FRAME_LAST_SP_OFFSET = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1; + private static final int INTERPRETER_FRAME_METHOD_OFFSET = INTERPRETER_FRAME_LAST_SP_OFFSET - 1; + private static int INTERPRETER_FRAME_MDX_OFFSET; // Non-core builds only + private static int INTERPRETER_FRAME_CACHE_OFFSET; + private static int INTERPRETER_FRAME_LOCALS_OFFSET; + private static int INTERPRETER_FRAME_BCX_OFFSET; + private static int INTERPRETER_FRAME_INITIAL_SP_OFFSET; + private static int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET; + private static int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET; + + // Entry frames + private static int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -8; + + // Native frames + private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET = 2; + + private static VMReg fp = new VMReg(29); + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + INTERPRETER_FRAME_MDX_OFFSET = INTERPRETER_FRAME_METHOD_OFFSET - 1; + INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_MDX_OFFSET - 1; + INTERPRETER_FRAME_LOCALS_OFFSET = INTERPRETER_FRAME_CACHE_OFFSET - 1; + INTERPRETER_FRAME_BCX_OFFSET = INTERPRETER_FRAME_LOCALS_OFFSET - 1; + INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1; + INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; + INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; + } + + + // an additional field beyond sp and pc: + Address raw_fp; // frame pointer + private Address raw_unextendedSP; + + private AARCH64Frame() { + } + + private void adjustForDeopt() { + if ( pc != null) { + // Look for a deopt pc and if it is deopted convert to original pc + CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc); + if (cb != null && cb.isJavaMethod()) { + NMethod nm = (NMethod) cb; + if (pc.equals(nm.deoptHandlerBegin())) { + if (Assert.ASSERTS_ENABLED) { + Assert.that(this.getUnextendedSP() != null, "null SP in Java frame"); + } + // adjust pc if frame is deoptimized. + pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset()); + deoptimized = true; + } + } + } + } + + public AARCH64Frame(Address raw_sp, Address raw_fp, Address pc) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_sp; + this.raw_fp = raw_fp; + this.pc = pc; + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("AARCH64Frame(sp, fp, pc): " + this); + dumpStack(); + } + } + + public AARCH64Frame(Address raw_sp, Address raw_fp) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_sp; + this.raw_fp = raw_fp; + this.pc = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize()); + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("AARCH64Frame(sp, fp): " + this); + dumpStack(); + } + } + + public AARCH64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_unextendedSp; + this.raw_fp = raw_fp; + this.pc = pc; + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("AARCH64Frame(sp, unextendedSP, fp, pc): " + this); + dumpStack(); + } + + } + + public Object clone() { + AARCH64Frame frame = new AARCH64Frame(); + frame.raw_sp = raw_sp; + frame.raw_unextendedSP = raw_unextendedSP; + frame.raw_fp = raw_fp; + frame.pc = pc; + frame.deoptimized = deoptimized; + return frame; + } + + public boolean equals(Object arg) { + if (arg == null) { + return false; + } + + if (!(arg instanceof AARCH64Frame)) { + return false; + } + + AARCH64Frame other = (AARCH64Frame) arg; + + return (AddressOps.equal(getSP(), other.getSP()) && + AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) && + AddressOps.equal(getFP(), other.getFP()) && + AddressOps.equal(getPC(), other.getPC())); + } + + public int hashCode() { + if (raw_sp == null) { + return 0; + } + + return raw_sp.hashCode(); + } + + public String toString() { + return "sp: " + (getSP() == null? "null" : getSP().toString()) + + ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) + + ", fp: " + (getFP() == null? "null" : getFP().toString()) + + ", pc: " + (pc == null? "null" : pc.toString()); + } + + // accessors for the instance variables + public Address getFP() { return raw_fp; } + public Address getSP() { return raw_sp; } + public Address getID() { return raw_sp; } + + // FIXME: not implemented yet + public boolean isSignalHandlerFrameDbg() { return false; } + public int getSignalNumberDbg() { return 0; } + public String getSignalNameDbg() { return null; } + + public boolean isInterpretedFrameValid() { + if (Assert.ASSERTS_ENABLED) { + Assert.that(isInterpretedFrame(), "Not an interpreted frame"); + } + + // These are reasonable sanity checks + if (getFP() == null || getFP().andWithMask(0x3) != null) { + return false; + } + + if (getSP() == null || getSP().andWithMask(0x3) != null) { + return false; + } + + if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) { + return false; + } + + // These are hacks to keep us out of trouble. + // The problem with these is that they mask other problems + if (getFP().lessThanOrEqual(getSP())) { + // this attempts to deal with unsigned comparison above + return false; + } + + if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) { + // stack frames shouldn't be large. + return false; + } + + return true; + } + + // FIXME: not applicable in current system + // void patch_pc(Thread* thread, address pc); + + public Frame sender(RegisterMap regMap, CodeBlob cb) { + AARCH64RegisterMap map = (AARCH64RegisterMap) regMap; + + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + + // Default is we done have to follow them. The sender_for_xxx will + // update it accordingly + map.setIncludeArgumentOops(false); + + if (isEntryFrame()) return senderForEntryFrame(map); + if (isInterpretedFrame()) return senderForInterpreterFrame(map); + + if(cb == null) { + cb = VM.getVM().getCodeCache().findBlob(getPC()); + } else { + if (Assert.ASSERTS_ENABLED) { + Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same"); + } + } + + if (cb != null) { + return senderForCompiledFrame(map, cb); + } + + // Must be native-compiled frame, i.e. the marshaling code for native + // methods that exists in the core system. + return new AARCH64Frame(getSenderSP(), getLink(), getSenderPC()); + } + + private Frame senderForEntryFrame(AARCH64RegisterMap map) { + if (DEBUG) { + System.out.println("senderForEntryFrame"); + } + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + // Java frame called from C; skip all C frames and return top C + // frame of that chunk as the sender + AARCH64JavaCallWrapper jcw = (AARCH64JavaCallWrapper) getEntryFrameCallWrapper(); + if (Assert.ASSERTS_ENABLED) { + Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero"); + Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack"); + } + AARCH64Frame fr; + if (jcw.getLastJavaPC() != null) { + fr = new AARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC()); + } else { + fr = new AARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP()); + } + map.clear(); + if (Assert.ASSERTS_ENABLED) { + Assert.that(map.getIncludeArgumentOops(), "should be set by clear"); + } + return fr; + } + + //------------------------------------------------------------------------------ + // frame::adjust_unextended_sp + private void adjustUnextendedSP() { + // If we are returning to a compiled MethodHandle call site, the + // saved_fp will in fact be a saved value of the unextended SP. The + // simplest way to tell whether we are returning to such a call site + // is as follows: + + CodeBlob cb = cb(); + NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull(); + if (senderNm != null) { + // If the sender PC is a deoptimization point, get the original + // PC. For MethodHandle call site the unextended_sp is stored in + // saved_fp. + if (senderNm.isDeoptMhEntry(getPC())) { + // DEBUG_ONLY(verifyDeoptMhOriginalPc(senderNm, getFP())); + raw_unextendedSP = getFP(); + } + else if (senderNm.isDeoptEntry(getPC())) { + // DEBUG_ONLY(verifyDeoptOriginalPc(senderNm, raw_unextendedSp)); + } + else if (senderNm.isMethodHandleReturn(getPC())) { + raw_unextendedSP = getFP(); + } + } + } + + private Frame senderForInterpreterFrame(AARCH64RegisterMap map) { + if (DEBUG) { + System.out.println("senderForInterpreterFrame"); + } + Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); + Address sp = addressOfStackSlot(SENDER_SP_OFFSET); + // We do not need to update the callee-save register mapping because above + // us is either another interpreter frame or a converter-frame, but never + // directly a compiled frame. + // 11/24/04 SFG. With the removal of adapter frames this is no longer true. + // However c2 no longer uses callee save register for java calls so there + // are no callee register to find. + + if (map.getUpdateMap()) + updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET)); + + return new AARCH64Frame(sp, unextendedSP, getLink(), getSenderPC()); + } + + private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) { + map.setLocation(fp, savedFPAddr); + } + + private Frame senderForCompiledFrame(AARCH64RegisterMap map, CodeBlob cb) { + if (DEBUG) { + System.out.println("senderForCompiledFrame"); + } + + // + // NOTE: some of this code is (unfortunately) duplicated AARCH64CurrentFrameGuess + // + + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + + // frame owned by optimizing compiler + if (Assert.ASSERTS_ENABLED) { + Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size"); + } + Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize()); + + // The return_address is always the word on the stack + Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize()); + + // This is the saved value of FP which may or may not really be an FP. + // It is only an FP if the sender is an interpreter frame. + Address savedFPAddr = senderSP.addOffsetTo(- SENDER_SP_OFFSET * VM.getVM().getAddressSize()); + + if (map.getUpdateMap()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. + // For C1, the runtime stub might not have oop maps, so set this flag + // outside of update_register_map. + map.setIncludeArgumentOops(cb.callerMustGCArguments()); + + if (cb.getOopMaps() != null) { + ImmutableOopMapSet.updateRegisterMap(this, cb, map, true); + } + + // Since the prolog does the save and restore of FP there is no oopmap + // for it so we must fill in its location as if there was an oopmap entry + // since if our caller was compiled code there could be live jvm state in it. + updateMapWithSavedLink(map, savedFPAddr); + } + + return new AARCH64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC); + } + + protected boolean hasSenderPD() { + return true; + } + + public long frameSize() { + return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize()); + } + + public Address getLink() { + try { + if (DEBUG) { + System.out.println("Reading link at " + addressOfStackSlot(LINK_OFFSET) + + " = " + addressOfStackSlot(LINK_OFFSET).getAddressAt(0)); + } + return addressOfStackSlot(LINK_OFFSET).getAddressAt(0); + } catch (Exception e) { + if (DEBUG) + System.out.println("Returning null"); + return null; + } + } + + // FIXME: not implementable yet + //inline void frame::set_link(intptr_t* addr) { *(intptr_t **)addr_at(link_offset) = addr; } + + public Address getUnextendedSP() { return raw_unextendedSP; } + + // Return address: + public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); } + public Address getSenderPC() { return getSenderPCAddr().getAddressAt(0); } + + // return address of param, zero origin index. + public Address getNativeParamAddr(int idx) { + return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx); + } + + public Address getSenderSP() { return addressOfStackSlot(SENDER_SP_OFFSET); } + + public Address addressOfInterpreterFrameLocals() { + return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET); + } + + private Address addressOfInterpreterFrameBCX() { + return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET); + } + + public int getInterpreterFrameBCI() { + // FIXME: this is not atomic with respect to GC and is unsuitable + // for use in a non-debugging, or reflective, system. Need to + // figure out how to express this. + Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0); + Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0); + Method method = (Method)Metadata.instantiateWrapperFor(methodHandle); + return bcpToBci(bcp, method); + } + + public Address addressOfInterpreterFrameMDX() { + return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET); + } + + // FIXME + //inline int frame::interpreter_frame_monitor_size() { + // return BasicObjectLock::size(); + //} + + // expression stack + // (the max_stack arguments are used by the GC; see class FrameClosure) + + public Address addressOfInterpreterFrameExpressionStack() { + Address monitorEnd = interpreterFrameMonitorEnd().address(); + return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize()); + } + + public int getInterpreterFrameExpressionStackDirection() { return -1; } + + // top of expression stack + public Address addressOfInterpreterFrameTOS() { + return getSP(); + } + + /** Expression stack from top down */ + public Address addressOfInterpreterFrameTOSAt(int slot) { + return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize()); + } + + public Address getInterpreterFrameSenderSP() { + if (Assert.ASSERTS_ENABLED) { + Assert.that(isInterpretedFrame(), "interpreted frame expected"); + } + return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); + } + + // Monitors + public BasicObjectLock interpreterFrameMonitorBegin() { + return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET)); + } + + public BasicObjectLock interpreterFrameMonitorEnd() { + Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0); + if (Assert.ASSERTS_ENABLED) { + // make sure the pointer points inside the frame + Assert.that(AddressOps.gt(getFP(), result), "result must < than frame pointer"); + Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer"); + } + return new BasicObjectLock(result); + } + + public int interpreterFrameMonitorSize() { + return BasicObjectLock.size(); + } + + // Method + public Address addressOfInterpreterFrameMethod() { + return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET); + } + + // Constant pool cache + public Address addressOfInterpreterFrameCPCache() { + return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET); + } + + // Entry frames + public JavaCallWrapper getEntryFrameCallWrapper() { + return new AARCH64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0)); + } + + protected Address addressOfSavedOopResult() { + // offset is 2 for compiler2 and 3 for compiler1 + return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) * + VM.getVM().getAddressSize()); + } + + protected Address addressOfSavedReceiver() { + return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); + } + + private void dumpStack() { + for (Address addr = getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); + AddressOps.lt(addr, getSP()); + addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { + System.out.println(addr + ": " + addr.getAddressAt(0)); + } + System.out.println("-----------------------"); + for (Address addr = getSP(); + AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize())); + addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { + System.out.println(addr + ": " + addr.getAddressAt(0)); + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/aarch64/AARCH64JavaCallWrapper.java Thu Jul 02 11:12:59 2015 +0100 @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.aarch64; + +import java.util.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.runtime.*; + +public class AARCH64JavaCallWrapper extends JavaCallWrapper { + private static AddressField lastJavaFPField; + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + Type type = db.lookupType("JavaFrameAnchor"); + + lastJavaFPField = type.getAddressField("_last_Java_fp"); + } + + public AARCH64JavaCallWrapper(Address addr) { + super(addr); + } + + public Address getLastJavaFP() { + return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset())); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/aarch64/AARCH64RegisterMap.java Thu Jul 02 11:12:59 2015 +0100 @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.aarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.runtime.*; + +public class AARCH64RegisterMap extends RegisterMap { + + /** This is the only public constructor */ + public AARCH64RegisterMap(JavaThread thread, boolean updateMap) { + super(thread, updateMap); + } + + protected AARCH64RegisterMap(RegisterMap map) { + super(map); + } + + public Object clone() { + AARCH64RegisterMap retval = new AARCH64RegisterMap(this); + return retval; + } + + // no PD state to clear or copy: + protected void clearPD() {} + protected void initializePD() {} + protected void initializeFromPD(RegisterMap map) {} + protected Address getLocationPD(VMReg reg) { return null; } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_aarch64/LinuxAARCH64JavaThreadPDAccess.java Thu Jul 02 11:12:59 2015 +0100 @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.linux_aarch64; + +import java.io.*; +import java.util.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.aarch64.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.runtime.aarch64.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.utilities.*; + +public class LinuxAARCH64JavaThreadPDAccess implements JavaThreadPDAccess { + private static AddressField lastJavaFPField; + private static AddressField osThreadField; + + // Field from OSThread + private static CIntegerField osThreadThreadIDField; + + // This is currently unneeded but is being kept in case we change + // the currentFrameGuess algorithm + private static final long GUESS_SCAN_RANGE = 128 * 1024; + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + Type type = db.lookupType("JavaThread"); + osThreadField = type.getAddressField("_osthread"); + + Type anchorType = db.lookupType("JavaFrameAnchor"); + lastJavaFPField = anchorType.getAddressField("_last_Java_fp"); + + Type osThreadType = db.lookupType("OSThread"); + osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id"); + } + + public Address getLastJavaFP(Address addr) { + return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset())); + } + + public Address getLastJavaPC(Address addr) { + return null; + } + + public Address getBaseOfStackPointer(Address addr) { + return null; + } + + public Frame getLastFramePD(JavaThread thread, Address addr) { + Address fp = thread.getLastJavaFP(); + if (fp == null) { + return null; // no information + } + return new AARCH64Frame(thread.getLastJavaSP(), fp); + } + + public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) { + return new AARCH64RegisterMap(thread, updateMap); + } + + public Frame getCurrentFrameGuess(JavaThread thread, Address addr) { + ThreadProxy t = getThreadProxy(addr); + AARCH64ThreadContext context = (AARCH64ThreadContext) t.getContext(); + AARCH64CurrentFrameGuess guesser = new AARCH64CurrentFrameGuess(context, thread); + if (!guesser.run(GUESS_SCAN_RANGE)) { + return null; + } + if (guesser.getPC() == null) { + return new AARCH64Frame(guesser.getSP(), guesser.getFP()); + } else { + return new AARCH64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC()); + } + } + + public void printThreadIDOn(Address addr, PrintStream tty) { + tty.print(getThreadProxy(addr)); + } + + public void printInfoOn(Address threadAddr, PrintStream tty) { + tty.print("Thread id: "); + printThreadIDOn(threadAddr, tty); +// tty.println("\nPostJavaState: " + getPostJavaState(threadAddr)); + } + + public Address getLastSP(Address addr) { + ThreadProxy t = getThreadProxy(addr); + AARCH64ThreadContext context = (AARCH64ThreadContext) t.getContext(); + return context.getRegisterAsAddress(AARCH64ThreadContext.SP); + } + + public ThreadProxy getThreadProxy(Address addr) { + // Addr is the address of the JavaThread. + // Fetch the OSThread (for now and for simplicity, not making a + // separate "OSThread" class in this package) + Address osThreadAddr = osThreadField.getValue(addr); + // Get the address of the _thread_id from the OSThread + Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset()); + + JVMDebugger debugger = VM.getVM().getDebugger(); + return debugger.getThreadForIdentifierAddress(threadIdAddr); + } +}
--- a/agent/src/share/classes/sun/jvm/hotspot/utilities/AltPlatformInfo.java Tue Jun 16 17:31:53 2015 +0100 +++ b/agent/src/share/classes/sun/jvm/hotspot/utilities/AltPlatformInfo.java Thu Jul 02 11:12:59 2015 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,10 @@ package sun.jvm.hotspot.utilities; public interface AltPlatformInfo { + // Additional cpu types can be tested via this interface + public boolean knownCPU(String cpu); - public boolean knownCPU(String cpu); -} \ No newline at end of file + // Mangle a cpu name if necessary + public String getCPU(String cpu); +}
--- a/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java Tue Jun 16 17:31:53 2015 +0100 +++ b/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java Thu Jul 02 11:12:59 2015 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -52,27 +52,54 @@ } } - /* Returns "sparc" for SPARC based platforms and "x86" for x86 based - platforms. Otherwise returns the value of os.arch. If the value - is not recognized as supported, an exception is thrown instead. */ + public static boolean knownCPU(String cpu) { + final String[] KNOWN = + new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "aarch64"}; + + for(String s : KNOWN) { + if(s.equals(cpu)) + return true; + } + + return false; + } + + /* Returns "sparc" for SPARC based platforms "x86" for x86 based + platforms and x86_64 for 64bit x86 based platform. Otherwise + returns the value of os.arch. If the value is not recognized as supported, + an exception is thrown instead. */ + public static String getCPU() throws UnsupportedPlatformException { String cpu = System.getProperty("os.arch"); - if (cpu.equals("i386") || cpu.equals("x86")) { + + // Let any additional CPU mangling fire first + try { + Class pic = Class.forName("sun.jvm.hotspot.utilities.PlatformInfoClosed"); + AltPlatformInfo api = (AltPlatformInfo) pic.newInstance(); + if (api.knownCPU(cpu)) { + return api.getCPU(cpu); + } + } catch (Exception e) { + // Ignored + } + + // Check that CPU is supported + if (!knownCPU(cpu)) { + throw new UnsupportedPlatformException("CPU type " + cpu + " not yet supported"); + } + + // Tweeks + if (cpu.equals("i386")) return "x86"; - } else if (cpu.equals("sparc") || cpu.equals("sparcv9")) { + + if (cpu.equals("sparcv9")) return "sparc"; - } else if (cpu.equals("ia64") || cpu.equals("amd64") || cpu.equals("x86_64") || cpu.equals("ppc64") || cpu.equals("aarch64")) { - return cpu; - } else { - try { - Class pic = Class.forName("sun.jvm.hotspot.utilities.PlatformInfoClosed"); - AltPlatformInfo api = (AltPlatformInfo)pic.newInstance(); - if (api.knownCPU(cpu)) { - return cpu; - } - } catch (Exception e) {} - throw new UnsupportedPlatformException("CPU type " + cpu + " not yet supported"); - } + + if (cpu.equals("x86_64")) + return "amd64"; + + return cpu; + } // this main is invoked from Makefile to make platform specific agent Makefile(s).
--- a/agent/src/share/classes/sun/jvm/hotspot/utilities/PointerLocation.java Tue Jun 16 17:31:53 2015 +0100 +++ b/agent/src/share/classes/sun/jvm/hotspot/utilities/PointerLocation.java Thu Jul 02 11:12:59 2015 +0100 @@ -84,11 +84,11 @@ } public boolean isInNewGen() { - return ((gen != null) && (gen.level() == 0)); + return ((gen != null) && (gen == ((GenCollectedHeap)heap).getGen(0))); } public boolean isInOldGen() { - return ((gen != null) && (gen.level() == 1)); + return ((gen != null) && (gen == ((GenCollectedHeap)heap).getGen(1))); } public boolean inOtherGen() { @@ -207,8 +207,6 @@ tty.print("In new generation "); } else if (isInOldGen()) { tty.print("In old generation "); - } else if (gen != null) { - tty.print("In Generation " + getGeneration().level()); } else { tty.print("In unknown section of Java heap"); }
--- a/make/bsd/makefiles/dtrace.make Tue Jun 16 17:31:53 2015 +0100 +++ b/make/bsd/makefiles/dtrace.make Thu Jul 02 11:12:59 2015 +0100 @@ -263,14 +263,19 @@ $(DtraceOutDir): mkdir $(DtraceOutDir) +# When building using a devkit, dtrace cannot find the correct preprocessor so +# we run it explicitly before runing dtrace. $(DtraceOutDir)/hotspot.h: $(DTRACE_COMMON_SRCDIR)/hotspot.d | $(DtraceOutDir) - $(QUIETLY) $(DTRACE_PROG) $(DTRACE_OPTS) -C -I. -h -o $@ -s $(DTRACE_COMMON_SRCDIR)/hotspot.d + $(QUIETLY) $(CC) -E $(DTRACE_OPTS) -I. -x c $(DTRACE_COMMON_SRCDIR)/hotspot.d > $(DtraceOutDir)/hotspot.d + $(QUIETLY) $(DTRACE_PROG) -h -o $@ -s $(DtraceOutDir)/hotspot.d $(DtraceOutDir)/hotspot_jni.h: $(DTRACE_COMMON_SRCDIR)/hotspot_jni.d | $(DtraceOutDir) - $(QUIETLY) $(DTRACE_PROG) $(DTRACE_OPTS) -C -I. -h -o $@ -s $(DTRACE_COMMON_SRCDIR)/hotspot_jni.d + $(QUIETLY) $(CC) -E $(DTRACE_OPTS) -I. -x c $(DTRACE_COMMON_SRCDIR)/hotspot_jni.d > $(DtraceOutDir)/hotspot_jni.d + $(QUIETLY) $(DTRACE_PROG) -h -o $@ -s $(DtraceOutDir)/hotspot_jni.d $(DtraceOutDir)/hs_private.h: $(DTRACE_COMMON_SRCDIR)/hs_private.d | $(DtraceOutDir) - $(QUIETLY) $(DTRACE_PROG) $(DTRACE_OPTS) -C -I. -h -o $@ -s $(DTRACE_COMMON_SRCDIR)/hs_private.d + $(QUIETLY) $(CC) -E $(DTRACE_OPTS) -I. -x c $(DTRACE_COMMON_SRCDIR)/hs_private.d > $(DtraceOutDir)/hs_private.d + $(QUIETLY) $(DTRACE_PROG) -h -o $@ -s $(DtraceOutDir)/hs_private.d dtrace_gen_headers: $(DtraceOutDir)/hotspot.h $(DtraceOutDir)/hotspot_jni.h $(DtraceOutDir)/hs_private.h
--- a/make/bsd/makefiles/universal.gmk Tue Jun 16 17:31:53 2015 +0100 +++ b/make/bsd/makefiles/universal.gmk Thu Jul 02 11:12:59 2015 +0100 @@ -56,13 +56,14 @@ universalize: $(UNIVERSAL_LIPO_LIST) $(UNIVERSAL_COPY_LIST) $(RM) -r $(EXPORT_PATH)/lib/{i386,amd64} +LIPO ?= lipo # Package built libraries in a universal binary $(UNIVERSAL_LIPO_LIST): BUILT_LIPO_FILES="`find $(EXPORT_LIB_DIR)/{i386,amd64}/$(subst $(EXPORT_LIB_DIR)/,,$@) 2>/dev/null`" || test $$? = "1"; \ if [ -n "$${BUILT_LIPO_FILES}" ]; then \ $(MKDIR) -p $(shell dirname $@); \ - lipo -create -output $@ $${BUILT_LIPO_FILES}; \ + $(LIPO) -create -output $@ $${BUILT_LIPO_FILES}; \ fi
--- a/make/sa.files Tue Jun 16 17:31:53 2015 +0100 +++ b/make/sa.files Thu Jul 02 11:12:59 2015 +0100 @@ -44,6 +44,7 @@ $(AGENT_SRC_DIR)/sun/jvm/hotspot/compiler/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/amd64/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/aarch64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/bsd/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/bsd/amd64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/bsd/x86/*.java \ @@ -55,6 +56,7 @@ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/amd64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/ia64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/ppc64/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/aarch64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/x86/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/sparc/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/posix/*.java \ @@ -63,6 +65,7 @@ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/amd64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/ppc64/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/aarch64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/sparc/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/x86/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/remote/*.java \ @@ -70,6 +73,7 @@ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/remote/ppc64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/remote/sparc/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/remote/x86/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/remote/aarch64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/sparc/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/win32/coff/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/windbg/*.java \ @@ -92,11 +96,13 @@ $(AGENT_SRC_DIR)/sun/jvm/hotspot/prims/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/amd64/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/aarch64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/bsd/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/bsd_amd64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/bsd_x86/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_amd64/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_aarch64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_x86/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_sparc/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_ppc64/*.java \
--- a/src/cpu/aarch64/vm/aarch64.ad Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/aarch64/vm/aarch64.ad Thu Jul 02 11:12:59 2015 +0100 @@ -865,6 +865,42 @@ V31, V31_H ); +// Class for all 64bit vector registers +reg_class vectord_reg( + V0, V0_H, + V1, V1_H, + V2, V2_H, + V3, V3_H, + V4, V4_H, + V5, V5_H, + V6, V6_H, + V7, V7_H, + V8, V8_H, + V9, V9_H, + V10, V10_H, + V11, V11_H, + V12, V12_H, + V13, V13_H, + V14, V14_H, + V15, V15_H, + V16, V16_H, + V17, V17_H, + V18, V18_H, + V19, V19_H, + V20, V20_H, + V21, V21_H, + V22, V22_H, + V23, V23_H, + V24, V24_H, + V25, V25_H, + V26, V26_H, + V27, V27_H, + V28, V28_H, + V29, V29_H, + V30, V30_H, + V31, V31_H +); + // Class for all 128bit vector registers reg_class vectorx_reg( V0, V0_H, V0_J, V0_K, @@ -2133,40 +2169,48 @@ if (bottom_type()->isa_vect() != NULL) { uint len = 4; + uint ireg = ideal_reg(); + assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector"); if (cbuf) { MacroAssembler _masm(cbuf); - uint ireg = ideal_reg(); assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity"); - assert(ireg == Op_VecX, "sanity"); if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { // stack->stack int src_offset = ra_->reg2offset(src_lo); int dst_offset = ra_->reg2offset(dst_lo); assert((src_offset & 7) && (dst_offset & 7), "unaligned stack offset"); len = 8; - if (src_offset < 512) { - __ ldp(rscratch1, rscratch2, Address(sp, src_offset)); + if (ireg == Op_VecD) { + __ ldr(rscratch1, Address(sp, src_offset)); + __ str(rscratch1, Address(sp, dst_offset)); } else { - __ ldr(rscratch1, Address(sp, src_offset)); - __ ldr(rscratch2, Address(sp, src_offset+4)); - len += 4; - } - if (dst_offset < 512) { - __ stp(rscratch1, rscratch2, Address(sp, dst_offset)); - } else { - __ str(rscratch1, Address(sp, dst_offset)); - __ str(rscratch2, Address(sp, dst_offset+4)); - len += 4; + if (src_offset < 512) { + __ ldp(rscratch1, rscratch2, Address(sp, src_offset)); + } else { + __ ldr(rscratch1, Address(sp, src_offset)); + __ ldr(rscratch2, Address(sp, src_offset+4)); + len += 4; + } + if (dst_offset < 512) { + __ stp(rscratch1, rscratch2, Address(sp, dst_offset)); + } else { + __ str(rscratch1, Address(sp, dst_offset)); + __ str(rscratch2, Address(sp, dst_offset+4)); + len += 4; + } } } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) { - __ orr(as_FloatRegister(Matcher::_regEncode[dst_lo]), __ T16B, + __ orr(as_FloatRegister(Matcher::_regEncode[dst_lo]), + ireg == Op_VecD ? __ T8B : __ T16B, as_FloatRegister(Matcher::_regEncode[src_lo]), as_FloatRegister(Matcher::_regEncode[src_lo])); } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) { - __ str(as_FloatRegister(Matcher::_regEncode[src_lo]), __ Q, + __ str(as_FloatRegister(Matcher::_regEncode[src_lo]), + ireg == Op_VecD ? __ D : __ Q, Address(sp, ra_->reg2offset(dst_lo))); } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) { - __ ldr(as_FloatRegister(Matcher::_regEncode[dst_lo]), __ Q, + __ ldr(as_FloatRegister(Matcher::_regEncode[dst_lo]), + ireg == Op_VecD ? __ D : __ Q, Address(sp, ra_->reg2offset(src_lo))); } else { ShouldNotReachHere(); @@ -2176,17 +2220,22 @@ // stack->stack int src_offset = ra_->reg2offset(src_lo); int dst_offset = ra_->reg2offset(dst_lo); - if (src_offset < 512) { - st->print("ldp rscratch1, rscratch2, [sp, #%d]", src_offset); + if (ireg == Op_VecD) { + st->print("ldr rscratch1, [sp, #%d]", src_offset); + st->print("str rscratch1, [sp, #%d]", dst_offset); } else { - st->print("ldr rscratch1, [sp, #%d]", src_offset); - st->print("\nldr rscratch2, [sp, #%d]", src_offset+4); - } - if (dst_offset < 512) { - st->print("\nstp rscratch1, rscratch2, [sp, #%d]", dst_offset); - } else { - st->print("\nstr rscratch1, [sp, #%d]", dst_offset); - st->print("\nstr rscratch2, [sp, #%d]", dst_offset+4); + if (src_offset < 512) { + st->print("ldp rscratch1, rscratch2, [sp, #%d]", src_offset); + } else { + st->print("ldr rscratch1, [sp, #%d]", src_offset); + st->print("\nldr rscratch2, [sp, #%d]", src_offset+4); + } + if (dst_offset < 512) { + st->print("\nstp rscratch1, rscratch2, [sp, #%d]", dst_offset); + } else { + st->print("\nstr rscratch1, [sp, #%d]", dst_offset); + st->print("\nstr rscratch2, [sp, #%d]", dst_offset+4); + } } st->print("\t# vector spill, stack to stack"); } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) { @@ -2638,17 +2687,22 @@ return vector_width_in_bytes(bt)/type2aelembytes(bt); } const int Matcher::min_vector_size(const BasicType bt) { - //return (type2aelembytes(bt) == 1) ? 4 : 2; - // For the moment, only support 1 vector size, 128 bits - return max_vector_size(bt); +// For the moment limit the vector size to 8 bytes + int size = 8 / type2aelembytes(bt); + if (size < 2) size = 2; + return size; } // Vector ideal reg. const int Matcher::vector_ideal_reg(int len) { - return Op_VecX; + switch(len) { + case 8: return Op_VecD; + case 16: return Op_VecX; + } + ShouldNotReachHere(); + return 0; } -// Only lowest bits of xmm reg are used for vector shift count. const int Matcher::vector_shift_count_ideal_reg(int size) { return Op_VecX; } @@ -2660,9 +2714,7 @@ // x86 supports misaligned vectors store/load. const bool Matcher::misaligned_vectors_ok() { - // TODO fixme - // return !AlignVector; // can be changed by flag - return false; + return !AlignVector; // can be changed by flag } // false => size gets scaled to BytesPerLong, ok. @@ -3073,13 +3125,13 @@ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} - enc_class aarch64_enc_ldrvS(vecX dst, memory mem) %{ + enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{ FloatRegister dst_reg = as_FloatRegister($dst$$reg); loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} - enc_class aarch64_enc_ldrvD(vecX dst, memory mem) %{ + enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{ FloatRegister dst_reg = as_FloatRegister($dst$$reg); loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); @@ -3159,13 +3211,13 @@ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} - enc_class aarch64_enc_strvS(vecX src, memory mem) %{ + enc_class aarch64_enc_strvS(vecD src, memory mem) %{ FloatRegister src_reg = as_FloatRegister($src$$reg); loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} - enc_class aarch64_enc_strvD(vecX src, memory mem) %{ + enc_class aarch64_enc_strvD(vecD src, memory mem) %{ FloatRegister src_reg = as_FloatRegister($src$$reg); loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); @@ -5187,6 +5239,16 @@ interface(REG_INTER); %} +operand vecD() +%{ + constraint(ALLOC_IN_RC(vectord_reg)); + match(VecD); + + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + operand vecX() %{ constraint(ALLOC_IN_RC(vectorx_reg)); @@ -7402,6 +7464,96 @@ ins_pipe(ialu_reg); %} +//---------- Population Count Instructions ------------------------------------- +// + +instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountI src)); + effect(TEMP tmp); + ins_cost(INSN_COST * 13); + + format %{ "movw $src, $src\n\t" + "mov $tmp, $src\t# vector (1D)\n\t" + "cnt $tmp, $tmp\t# vector (8B)\n\t" + "addv $tmp, $tmp\t# vector (8B)\n\t" + "mov $dst, $tmp\t# vector (1D)" %} + ins_encode %{ + __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0 + __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register); + __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0); + %} + + ins_pipe(pipe_class_default); +%} + +instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountI (LoadI mem))); + effect(TEMP tmp); + ins_cost(INSN_COST * 13); + + format %{ "ldrs $tmp, $mem\n\t" + "cnt $tmp, $tmp\t# vector (8B)\n\t" + "addv $tmp, $tmp\t# vector (8B)\n\t" + "mov $dst, $tmp\t# vector (1D)" %} + ins_encode %{ + FloatRegister tmp_reg = as_FloatRegister($tmp$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0); + %} + + ins_pipe(pipe_class_default); +%} + +// Note: Long.bitCount(long) returns an int. +instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountL src)); + effect(TEMP tmp); + ins_cost(INSN_COST * 13); + + format %{ "mov $tmp, $src\t# vector (1D)\n\t" + "cnt $tmp, $tmp\t# vector (8B)\n\t" + "addv $tmp, $tmp\t# vector (8B)\n\t" + "mov $dst, $tmp\t# vector (1D)" %} + ins_encode %{ + __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register); + __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0); + %} + + ins_pipe(pipe_class_default); +%} + +instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountL (LoadL mem))); + effect(TEMP tmp); + ins_cost(INSN_COST * 13); + + format %{ "ldrd $tmp, $mem\n\t" + "cnt $tmp, $tmp\t# vector (8B)\n\t" + "addv $tmp, $tmp\t# vector (8B)\n\t" + "mov $dst, $tmp\t# vector (1D)" %} + ins_encode %{ + FloatRegister tmp_reg = as_FloatRegister($tmp$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0); + %} + + ins_pipe(pipe_class_default); +%} + // ============================================================================ // MemBar Instruction @@ -13194,7 +13346,7 @@ // ====================VECTOR INSTRUCTIONS===================================== // Load vector (32 bits) -instruct loadV4(vecX dst, vmem mem) +instruct loadV4(vecD dst, vmem mem) %{ predicate(n->as_LoadVector()->memory_size() == 4); match(Set dst (LoadVector mem)); @@ -13205,7 +13357,7 @@ %} // Load vector (64 bits) -instruct loadV8(vecX dst, vmem mem) +instruct loadV8(vecD dst, vmem mem) %{ predicate(n->as_LoadVector()->memory_size() == 8); match(Set dst (LoadVector mem)); @@ -13227,7 +13379,7 @@ %} // Store Vector (32 bits) -instruct storeV4(vecX src, vmem mem) +instruct storeV4(vecD src, vmem mem) %{ predicate(n->as_StoreVector()->memory_size() == 4); match(Set mem (StoreVector mem src)); @@ -13238,7 +13390,7 @@ %} // Store Vector (64 bits) -instruct storeV8(vecX src, vmem mem) +instruct storeV8(vecD src, vmem mem) %{ predicate(n->as_StoreVector()->memory_size() == 8); match(Set mem (StoreVector mem src)); @@ -13259,8 +13411,22 @@ ins_pipe(pipe_class_memory); %} +instruct replicate8B(vecD dst, iRegIorL2I src) +%{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (ReplicateB src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (8B)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct replicate16B(vecX dst, iRegIorL2I src) %{ + predicate(n->as_Vector()->length() == 16); match(Set dst (ReplicateB src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (16B)" %} @@ -13270,19 +13436,47 @@ ins_pipe(pipe_class_default); %} +instruct replicate8B_imm(vecD dst, immI con) +%{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (ReplicateB con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(8B)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff); + %} + ins_pipe(pipe_class_default); +%} + instruct replicate16B_imm(vecX dst, immI con) %{ + predicate(n->as_Vector()->length() == 16); match(Set dst (ReplicateB con)); ins_cost(INSN_COST); format %{ "movi $dst, $con\t# vector(16B)" %} ins_encode %{ - __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant); + __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate4S(vecD dst, iRegIorL2I src) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (ReplicateS src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (4S)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg)); %} ins_pipe(pipe_class_default); %} instruct replicate8S(vecX dst, iRegIorL2I src) %{ + predicate(n->as_Vector()->length() == 8); match(Set dst (ReplicateS src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (8S)" %} @@ -13292,19 +13486,46 @@ ins_pipe(pipe_class_default); %} +instruct replicate4S_imm(vecD dst, immI con) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (ReplicateS con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(4H)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff); + %} + ins_pipe(pipe_class_default); +%} + instruct replicate8S_imm(vecX dst, immI con) %{ + predicate(n->as_Vector()->length() == 8); match(Set dst (ReplicateS con)); ins_cost(INSN_COST); format %{ "movi $dst, $con\t# vector(8H)" %} ins_encode %{ - __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant); + __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff); + %} + ins_pipe(pipe_class_default); +%} + +instruct replicate2I(vecD dst, iRegIorL2I src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (2I)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg)); %} ins_pipe(pipe_class_default); %} instruct replicate4I(vecX dst, iRegIorL2I src) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateI src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (4I)" %} @@ -13314,8 +13535,21 @@ ins_pipe(pipe_class_default); %} +instruct replicate2I_imm(vecD dst, immI con) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(2I)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant); + %} + ins_pipe(pipe_class_default); +%} + instruct replicate4I_imm(vecX dst, immI con) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateI con)); ins_cost(INSN_COST); format %{ "movi $dst, $con\t# vector(4I)" %} @@ -13327,6 +13561,7 @@ instruct replicate2L(vecX dst, iRegL src) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateL src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (2L)" %} @@ -13338,6 +13573,7 @@ instruct replicate2L_zero(vecX dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateI zero)); ins_cost(INSN_COST); format %{ "movi $dst, $zero\t# vector(4I)" %} @@ -13349,8 +13585,22 @@ ins_pipe(pipe_class_default); %} +instruct replicate2F(vecD dst, vRegF src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateF src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (2F)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct replicate4F(vecX dst, vRegF src) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateF src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (4F)" %} @@ -13363,6 +13613,7 @@ instruct replicate2D(vecX dst, vRegD src) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateD src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (2D)" %} @@ -13375,6 +13626,25 @@ // ====================REDUCTION ARITHMETIC==================================== +instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp, iRegI tmp2) +%{ + match(Set dst (AddReductionVI src1 src2)); + ins_cost(INSN_COST); + effect(TEMP tmp, TEMP tmp2); + format %{ "umov $tmp, $src2, S, 0\n\t" + "umov $tmp2, $src2, S, 1\n\t" + "addw $dst, $src1, $tmp\n\t" + "addw $dst, $dst, $tmp2\t add reduction2i" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0); + __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1); + __ addw($dst$$Register, $src1$$Register, $tmp$$Register); + __ addw($dst$$Register, $dst$$Register, $tmp2$$Register); + %} + ins_pipe(pipe_class_default); +%} + instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2) %{ match(Set dst (AddReductionVI src1 src2)); @@ -13393,6 +13663,25 @@ ins_pipe(pipe_class_default); %} +instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp) +%{ + match(Set dst (MulReductionVI src1 src2)); + ins_cost(INSN_COST); + effect(TEMP tmp, TEMP dst); + format %{ "umov $tmp, $src2, S, 0\n\t" + "mul $dst, $tmp, $src1\n\t" + "umov $tmp, $src2, S, 1\n\t" + "mul $dst, $tmp, $dst\t mul reduction2i\n\t" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0); + __ mul($dst$$Register, $tmp$$Register, $src1$$Register); + __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1); + __ mul($dst$$Register, $tmp$$Register, $dst$$Register); + %} + ins_pipe(pipe_class_default); +%} + instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2) %{ match(Set dst (MulReductionVI src1 src2)); @@ -13418,6 +13707,26 @@ ins_pipe(pipe_class_default); %} +instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp) +%{ + match(Set dst (AddReductionVF src1 src2)); + ins_cost(INSN_COST); + effect(TEMP tmp, TEMP dst); + format %{ "fadds $dst, $src1, $src2\n\t" + "ins $tmp, S, $src2, 0, 1\n\t" + "fadds $dst, $dst, $tmp\t add reduction2f" + %} + ins_encode %{ + __ fadds(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ S, + as_FloatRegister($src2$$reg), 0, 1); + __ fadds(as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp) %{ match(Set dst (AddReductionVF src1 src2)); @@ -13450,6 +13759,26 @@ ins_pipe(pipe_class_default); %} +instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp) +%{ + match(Set dst (MulReductionVF src1 src2)); + ins_cost(INSN_COST); + effect(TEMP tmp, TEMP dst); + format %{ "fmuls $dst, $src1, $src2\n\t" + "ins $tmp, S, $src2, 0, 1\n\t" + "fmuls $dst, $dst, $tmp\t add reduction4f" + %} + ins_encode %{ + __ fmuls(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ S, + as_FloatRegister($src2$$reg), 0, 1); + __ fmuls(as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp) %{ match(Set dst (MulReductionVF src1 src2)); @@ -13526,8 +13855,24 @@ // --------------------------------- ADD -------------------------------------- +instruct vadd8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (AddVB src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vadd16B(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 16); match(Set dst (AddVB src1 src2)); ins_cost(INSN_COST); format %{ "addv $dst,$src1,$src2\t# vector (16B)" %} @@ -13539,8 +13884,24 @@ ins_pipe(pipe_class_default); %} +instruct vadd4S(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (AddVS src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (4H)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vadd8S(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 8); match(Set dst (AddVS src1 src2)); ins_cost(INSN_COST); format %{ "addv $dst,$src1,$src2\t# vector (8H)" %} @@ -13552,8 +13913,23 @@ ins_pipe(pipe_class_default); %} +instruct vadd2I(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVI src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vadd4I(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (AddVI src1 src2)); ins_cost(INSN_COST); format %{ "addv $dst,$src1,$src2\t# vector (4S)" %} @@ -13567,6 +13943,7 @@ instruct vadd2L(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (AddVL src1 src2)); ins_cost(INSN_COST); format %{ "addv $dst,$src1,$src2\t# vector (2L)" %} @@ -13578,8 +13955,23 @@ ins_pipe(pipe_class_default); %} +instruct vadd2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fadd $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ fadd(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vadd4F(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (AddVF src1 src2)); ins_cost(INSN_COST); format %{ "fadd $dst,$src1,$src2\t# vector (4S)" %} @@ -13606,8 +13998,24 @@ // --------------------------------- SUB -------------------------------------- +instruct vsub8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (SubVB src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vsub16B(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 16); match(Set dst (SubVB src1 src2)); ins_cost(INSN_COST); format %{ "subv $dst,$src1,$src2\t# vector (16B)" %} @@ -13619,8 +14027,24 @@ ins_pipe(pipe_class_default); %} +instruct vsub4S(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (SubVS src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (4H)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vsub8S(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 8); match(Set dst (SubVS src1 src2)); ins_cost(INSN_COST); format %{ "subv $dst,$src1,$src2\t# vector (8H)" %} @@ -13632,8 +14056,23 @@ ins_pipe(pipe_class_default); %} +instruct vsub2I(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVI src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vsub4I(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (SubVI src1 src2)); ins_cost(INSN_COST); format %{ "subv $dst,$src1,$src2\t# vector (4S)" %} @@ -13647,6 +14086,7 @@ instruct vsub2L(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (SubVL src1 src2)); ins_cost(INSN_COST); format %{ "subv $dst,$src1,$src2\t# vector (2L)" %} @@ -13658,8 +14098,23 @@ ins_pipe(pipe_class_default); %} +instruct vsub2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fsub $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ fsub(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vsub4F(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (SubVF src1 src2)); ins_cost(INSN_COST); format %{ "fsub $dst,$src1,$src2\t# vector (4S)" %} @@ -13673,6 +14128,7 @@ instruct vsub2D(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (SubVD src1 src2)); ins_cost(INSN_COST); format %{ "fsub $dst,$src1,$src2\t# vector (2D)" %} @@ -13686,8 +14142,24 @@ // --------------------------------- MUL -------------------------------------- +instruct vmul4S(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (MulVS src1 src2)); + ins_cost(INSN_COST); + format %{ "mulv $dst,$src1,$src2\t# vector (4H)" %} + ins_encode %{ + __ mulv(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vmul8S(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 8); match(Set dst (MulVS src1 src2)); ins_cost(INSN_COST); format %{ "mulv $dst,$src1,$src2\t# vector (8H)" %} @@ -13699,8 +14171,23 @@ ins_pipe(pipe_class_default); %} +instruct vmul2I(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVI src1 src2)); + ins_cost(INSN_COST); + format %{ "mulv $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ mulv(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vmul4I(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (MulVI src1 src2)); ins_cost(INSN_COST); format %{ "mulv $dst,$src1,$src2\t# vector (4S)" %} @@ -13712,8 +14199,23 @@ ins_pipe(pipe_class_default); %} +instruct vmul2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fmul $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ fmul(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vmul4F(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (MulVF src1 src2)); ins_cost(INSN_COST); format %{ "fmul $dst,$src1,$src2\t# vector (4S)" %} @@ -13727,6 +14229,7 @@ instruct vmul2D(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (MulVD src1 src2)); ins_cost(INSN_COST); format %{ "fmul $dst,$src1,$src2\t# vector (2D)" %} @@ -13740,8 +14243,23 @@ // --------------------------------- DIV -------------------------------------- +instruct vdiv2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (DivVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fdiv $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ fdiv(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vdiv4F(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (DivVF src1 src2)); ins_cost(INSN_COST); format %{ "fdiv $dst,$src1,$src2\t# vector (4S)" %} @@ -13755,6 +14273,7 @@ instruct vdiv2D(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (DivVD src1 src2)); ins_cost(INSN_COST); format %{ "fdiv $dst,$src1,$src2\t# vector (2D)" %} @@ -13768,8 +14287,24 @@ // --------------------------------- AND -------------------------------------- +instruct vand8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 4 || + n->as_Vector()->length_in_bytes() == 8); + match(Set dst (AndV src1 src2)); + ins_cost(INSN_COST); + format %{ "and $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ andr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vand16B(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); match(Set dst (AndV src1 src2)); ins_cost(INSN_COST); format %{ "and $dst,$src1,$src2\t# vector (16B)" %} @@ -13783,8 +14318,24 @@ // --------------------------------- OR --------------------------------------- +instruct vor8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 4 || + n->as_Vector()->length_in_bytes() == 8); + match(Set dst (OrV src1 src2)); + ins_cost(INSN_COST); + format %{ "and $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ orr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vor16B(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); match(Set dst (OrV src1 src2)); ins_cost(INSN_COST); format %{ "orr $dst,$src1,$src2\t# vector (16B)" %} @@ -13798,8 +14349,24 @@ // --------------------------------- XOR -------------------------------------- +instruct vxor8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 4 || + n->as_Vector()->length_in_bytes() == 8); + match(Set dst (XorV src1 src2)); + ins_cost(INSN_COST); + format %{ "xor $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ eor(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vxor16B(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); match(Set dst (XorV src1 src2)); ins_cost(INSN_COST); format %{ "xor $dst,$src1,$src2\t# vector (16B)" %} @@ -13833,7 +14400,23 @@ ins_pipe(pipe_class_default); %} +instruct vsll8B(vecD dst, vecD src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (LShiftVB src shift)); + match(Set dst (RShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (8B)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vsll16B(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 16); match(Set dst (LShiftVB src shift)); match(Set dst (RShiftVB src shift)); ins_cost(INSN_COST); @@ -13846,7 +14429,22 @@ ins_pipe(pipe_class_default); %} +instruct vsrl8B(vecD dst, vecD src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (URShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "ushl $dst,$src,$shift\t# vector (8B)" %} + ins_encode %{ + __ ushl(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vsrl16B(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 16); match(Set dst (URShiftVB src shift)); ins_cost(INSN_COST); format %{ "ushl $dst,$src,$shift\t# vector (16B)" %} @@ -13858,7 +14456,28 @@ ins_pipe(pipe_class_default); %} +instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (LShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (8B)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 8) { + __ eor(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ shl(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(pipe_class_default); +%} + instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); match(Set dst (LShiftVB src shift)); ins_cost(INSN_COST); format %{ "shl $dst, $src, $shift\t# vector (16B)" %} @@ -13876,7 +14495,24 @@ ins_pipe(pipe_class_default); %} +instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (RShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (8B)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 8) sh = 7; + sh = -sh & 7; + __ sshr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(pipe_class_default); +%} + instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); match(Set dst (RShiftVB src shift)); ins_cost(INSN_COST); format %{ "sshr $dst, $src, $shift\t# vector (16B)" %} @@ -13890,7 +14526,28 @@ ins_pipe(pipe_class_default); %} +instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (URShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (8B)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 8) { + __ eor(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ ushr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), -sh & 7); + } + %} + ins_pipe(pipe_class_default); +%} + instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); match(Set dst (URShiftVB src shift)); ins_cost(INSN_COST); format %{ "ushr $dst, $src, $shift\t# vector (16B)" %} @@ -13908,7 +14565,23 @@ ins_pipe(pipe_class_default); %} +instruct vsll4S(vecD dst, vecD src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (LShiftVS src shift)); + match(Set dst (RShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (4H)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vsll8S(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 8); match(Set dst (LShiftVS src shift)); match(Set dst (RShiftVS src shift)); ins_cost(INSN_COST); @@ -13921,7 +14594,22 @@ ins_pipe(pipe_class_default); %} +instruct vsrl4S(vecD dst, vecD src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (URShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "ushl $dst,$src,$shift\t# vector (4H)" %} + ins_encode %{ + __ ushl(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vsrl8S(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 8); match(Set dst (URShiftVS src shift)); ins_cost(INSN_COST); format %{ "ushl $dst,$src,$shift\t# vector (8H)" %} @@ -13933,7 +14621,28 @@ ins_pipe(pipe_class_default); %} +instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (LShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (4H)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 16) { + __ eor(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ shl(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(pipe_class_default); +%} + instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); match(Set dst (LShiftVS src shift)); ins_cost(INSN_COST); format %{ "shl $dst, $src, $shift\t# vector (8H)" %} @@ -13951,7 +14660,24 @@ ins_pipe(pipe_class_default); %} +instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (RShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (4H)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 16) sh = 15; + sh = -sh & 15; + __ sshr(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(pipe_class_default); +%} + instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); match(Set dst (RShiftVS src shift)); ins_cost(INSN_COST); format %{ "sshr $dst, $src, $shift\t# vector (8H)" %} @@ -13965,7 +14691,28 @@ ins_pipe(pipe_class_default); %} +instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (URShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (4H)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 16) { + __ eor(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ ushr(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), -sh & 15); + } + %} + ins_pipe(pipe_class_default); +%} + instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); match(Set dst (URShiftVS src shift)); ins_cost(INSN_COST); format %{ "ushr $dst, $src, $shift\t# vector (8H)" %} @@ -13983,7 +14730,22 @@ ins_pipe(pipe_class_default); %} +instruct vsll2I(vecD dst, vecD src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVI src shift)); + match(Set dst (RShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (2S)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vsll4I(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (LShiftVI src shift)); match(Set dst (RShiftVI src shift)); ins_cost(INSN_COST); @@ -13996,7 +14758,21 @@ ins_pipe(pipe_class_default); %} +instruct vsrl2I(vecD dst, vecD src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "ushl $dst,$src,$shift\t# vector (2S)" %} + ins_encode %{ + __ ushl(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vsrl4I(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (URShiftVI src shift)); ins_cost(INSN_COST); format %{ "ushl $dst,$src,$shift\t# vector (4S)" %} @@ -14008,7 +14784,21 @@ ins_pipe(pipe_class_default); %} +instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (2S)" %} + ins_encode %{ + __ shl(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + (int)$shift$$constant & 31); + %} + ins_pipe(pipe_class_default); +%} + instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (LShiftVI src shift)); ins_cost(INSN_COST); format %{ "shl $dst, $src, $shift\t# vector (4S)" %} @@ -14020,7 +14810,21 @@ ins_pipe(pipe_class_default); %} +instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (2S)" %} + ins_encode %{ + __ sshr(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + -(int)$shift$$constant & 31); + %} + ins_pipe(pipe_class_default); +%} + instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (RShiftVI src shift)); ins_cost(INSN_COST); format %{ "sshr $dst, $src, $shift\t# vector (4S)" %} @@ -14032,7 +14836,21 @@ ins_pipe(pipe_class_default); %} +instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (2S)" %} + ins_encode %{ + __ ushr(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + -(int)$shift$$constant & 31); + %} + ins_pipe(pipe_class_default); +%} + instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (URShiftVI src shift)); ins_cost(INSN_COST); format %{ "ushr $dst, $src, $shift\t# vector (4S)" %} @@ -14045,6 +14863,7 @@ %} instruct vsll2L(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (LShiftVL src shift)); match(Set dst (RShiftVL src shift)); ins_cost(INSN_COST); @@ -14058,6 +14877,7 @@ %} instruct vsrl2L(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (URShiftVL src shift)); ins_cost(INSN_COST); format %{ "ushl $dst,$src,$shift\t# vector (2D)" %} @@ -14070,6 +14890,7 @@ %} instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (LShiftVL src shift)); ins_cost(INSN_COST); format %{ "shl $dst, $src, $shift\t# vector (2D)" %} @@ -14082,6 +14903,7 @@ %} instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (RShiftVL src shift)); ins_cost(INSN_COST); format %{ "sshr $dst, $src, $shift\t# vector (2D)" %} @@ -14094,6 +14916,7 @@ %} instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (URShiftVL src shift)); ins_cost(INSN_COST); format %{ "ushr $dst, $src, $shift\t# vector (2D)" %}
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp Thu Jul 02 11:12:59 2015 +0100 @@ -491,6 +491,11 @@ i->rf(_index, 16); i->f(_ext.option(), 15, 13); unsigned size = i->get(31, 30); + if (i->get(26, 26) && i->get(23, 23)) { + // SIMD Q Type - Size = 128 bits + assert(size == 0, "bad size"); + size = 0b100; + } if (size == 0) // It's a byte i->f(_ext.shift() >= 0, 12); else { @@ -2050,6 +2055,9 @@ INSN(negr, 1, 0b100000101110); INSN(notr, 1, 0b100000010110); INSN(addv, 0, 0b110001101110); + INSN(cls, 0, 0b100000010010); + INSN(clz, 1, 0b100000010010); + INSN(cnt, 0, 0b100000010110); #undef INSN
--- a/src/cpu/aarch64/vm/frame_aarch64.cpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/aarch64/vm/frame_aarch64.cpp Thu Jul 02 11:12:59 2015 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -526,16 +526,6 @@ return frame(sender_sp(), link(), sender_pc()); } -bool frame::interpreter_frame_equals_unpacked_fp(intptr_t* fp) { - assert(is_interpreted_frame(), "must be interpreter frame"); - Method* method = interpreter_frame_method(); - // When unpacking an optimized frame the frame pointer is - // adjusted with: - int diff = (method->max_locals() - method->size_of_parameters()) * - Interpreter::stackElementWords; - return _fp == (fp - diff); -} - bool frame::is_interpreted_frame_valid(JavaThread* thread) const { // QQQ #ifdef CC_INTERP
--- a/src/cpu/aarch64/vm/globals_aarch64.hpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/aarch64/vm/globals_aarch64.hpp Thu Jul 02 11:12:59 2015 +0100 @@ -1,6 +1,6 @@ /* - * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -84,7 +84,7 @@ #ifdef BUILTIN_SIM #define UseBuiltinSim true -#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \ +#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \ \ product(bool, NotifySimulator, UseBuiltinSim, \ "tell the AArch64 sim where we are in method code") \ @@ -112,7 +112,7 @@ #define NotifySimulator false #define UseSimulatorCache false #define DisableBCCheck true -#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \ +#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \ \ product(bool, NearCpool, true, \ "constant pool is close to instructions") \
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Thu Jul 02 11:12:59 2015 +0100 @@ -1408,6 +1408,52 @@ movk(r, imm64 & 0xffff, 32); } +// Macro to mov replicated immediate to vector register. +// Vd will get the following values for different arrangements in T +// imm32 == hex 000000gh T8B: Vd = ghghghghghghghgh +// imm32 == hex 000000gh T16B: Vd = ghghghghghghghghghghghghghghghgh +// imm32 == hex 0000efgh T4H: Vd = efghefghefghefgh +// imm32 == hex 0000efgh T8H: Vd = efghefghefghefghefghefghefghefgh +// imm32 == hex abcdefgh T2S: Vd = abcdefghabcdefgh +// imm32 == hex abcdefgh T4S: Vd = abcdefghabcdefghabcdefghabcdefgh +// T1D/T2D: invalid +void MacroAssembler::mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32) { + assert(T != T1D && T != T2D, "invalid arrangement"); + if (T == T8B || T == T16B) { + assert((imm32 & ~0xff) == 0, "extraneous bits in unsigned imm32 (T8B/T16B)"); + movi(Vd, T, imm32 & 0xff, 0); + return; + } + u_int32_t nimm32 = ~imm32; + if (T == T4H || T == T8H) { + assert((imm32 & ~0xffff) == 0, "extraneous bits in unsigned imm32 (T4H/T8H)"); + imm32 &= 0xffff; + nimm32 &= 0xffff; + } + u_int32_t x = imm32; + int movi_cnt = 0; + int movn_cnt = 0; + while (x) { if (x & 0xff) movi_cnt++; x >>= 8; } + x = nimm32; + while (x) { if (x & 0xff) movn_cnt++; x >>= 8; } + if (movn_cnt < movi_cnt) imm32 = nimm32; + unsigned lsl = 0; + while (imm32 && (imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; } + if (movn_cnt < movi_cnt) + mvni(Vd, T, imm32 & 0xff, lsl); + else + movi(Vd, T, imm32 & 0xff, lsl); + imm32 >>= 8; lsl += 8; + while (imm32) { + while ((imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; } + if (movn_cnt < movi_cnt) + bici(Vd, T, imm32 & 0xff, lsl); + else + orri(Vd, T, imm32 & 0xff, lsl); + lsl += 8; imm32 >>= 8; + } +} + void MacroAssembler::mov_immediate64(Register dst, u_int64_t imm64) { #ifndef PRODUCT @@ -2888,41 +2934,40 @@ cmp(src1, rscratch1); } +void MacroAssembler::store_check(Register obj, Address dst) { + store_check(obj); +} + void MacroAssembler::store_check(Register obj) { // Does a store check for the oop in register obj. The content of // register obj is destroyed afterwards. - store_check_part_1(obj); - store_check_part_2(obj); -} - -void MacroAssembler::store_check(Register obj, Address dst) { - store_check(obj); -} - - -// split the store check operation so that other instructions can be scheduled inbetween -void MacroAssembler::store_check_part_1(Register obj) { + BarrierSet* bs = Universe::heap()->barrier_set(); assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); + + CardTableModRefBS* ct = barrier_set_cast<CardTableModRefBS>(bs); + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + lsr(obj, obj, CardTableModRefBS::card_shift); -} - -void MacroAssembler::store_check_part_2(Register obj) { - BarrierSet* bs = Universe::heap()->barrier_set(); - assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); - CardTableModRefBS* ct = (CardTableModRefBS*)bs; - assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); - - // The calculation for byte_map_base is as follows: - // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); - // So this essentially converts an address to a displacement and - // it will never need to be relocated. - - // FIXME: It's not likely that disp will fit into an offset so we - // don't bother to check, but it could save an instruction. - intptr_t disp = (intptr_t) ct->byte_map_base; - mov(rscratch1, disp); - strb(zr, Address(obj, rscratch1)); + + assert(CardTableModRefBS::dirty_card_val() == 0, "must be"); + + { + ExternalAddress cardtable((address) ct->byte_map_base); + unsigned long offset; + adrp(rscratch1, cardtable, offset); + assert(offset == 0, "byte_map_base is misaligned"); + } + + if (UseCondCardMark) { + Label L_already_dirty; + ldrb(rscratch2, Address(obj, rscratch1)); + cbz(rscratch2, L_already_dirty); + strb(zr, Address(obj, rscratch1)); + bind(L_already_dirty); + } else { + strb(zr, Address(obj, rscratch1)); + } } void MacroAssembler::load_klass(Register dst, Register src) {
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Thu Jul 02 11:12:59 2015 +0100 @@ -36,6 +36,7 @@ class MacroAssembler: public Assembler { friend class LIR_Assembler; + public: using Assembler::mov; using Assembler::movi; @@ -465,44 +466,7 @@ void movptr(Register r, uintptr_t imm64); - // Macro to mov replicated immediate to vector register. - // Where imm32 == hex abcdefgh, Vd will get the following values - // for different arrangements in T - // T8B: Vd = ghghghghghghghgh - // T16B: Vd = ghghghghghghghghghghghghghghghgh - // T4H: Vd = efghefghefghefgh - // T8H: Vd = efghefghefghefghefghefghefghefgh - // T2S: Vd = abcdefghabcdefgh - // T4S: Vd = abcdefghabcdefghabcdefghabcdefgh - // T1D/T2D: invalid - void mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32) { - assert(T != T1D && T != T2D, "invalid arrangement"); - u_int32_t nimm32 = ~imm32; - if (T == T8B || T == T16B) { imm32 &= 0xff; nimm32 &= 0xff; } - if (T == T4H || T == T8H) { imm32 &= 0xffff; nimm32 &= 0xffff; } - u_int32_t x = imm32; - int movi_cnt = 0; - int movn_cnt = 0; - while (x) { if (x & 0xff) movi_cnt++; x >>= 8; } - x = nimm32; - while (x) { if (x & 0xff) movn_cnt++; x >>= 8; } - if (movn_cnt < movi_cnt) imm32 = nimm32; - unsigned lsl = 0; - while (imm32 && (imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; } - if (movn_cnt < movi_cnt) - mvni(Vd, T, imm32 & 0xff, lsl); - else - movi(Vd, T, imm32 & 0xff, lsl); - imm32 >>= 8; lsl += 8; - while (imm32) { - while ((imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; } - if (movn_cnt < movi_cnt) - bici(Vd, T, imm32 & 0xff, lsl); - else - orri(Vd, T, imm32 & 0xff, lsl); - lsl += 8; imm32 >>= 8; - } - } + void mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32); // macro instructions for accessing and updating floating point // status register @@ -756,10 +720,6 @@ #endif // INCLUDE_ALL_GCS - // split store_check(Register obj) to enhance instruction interleaving - void store_check_part_1(Register obj); - void store_check_part_2(Register obj); - // oop manipulations void load_klass(Register dst, Register src); void store_klass(Register dst, Register src);
--- a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Thu Jul 02 11:12:59 2015 +0100 @@ -1,6 +1,6 @@ /* - * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2120,6 +2120,7 @@ save_native_result(masm, ret_type, stack_slots); } + __ mov(c_rarg2, rthread); __ lea(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); __ mov(c_rarg0, obj_reg); @@ -2128,7 +2129,7 @@ __ ldr(r19, Address(rthread, in_bytes(Thread::pending_exception_offset()))); __ str(zr, Address(rthread, in_bytes(Thread::pending_exception_offset()))); - rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), 2, 0, 1); + rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), 3, 0, 1); #ifdef ASSERT {
--- a/src/cpu/aarch64/vm/vm_version_aarch64.cpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/aarch64/vm/vm_version_aarch64.cpp Thu Jul 02 11:12:59 2015 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2015, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -190,10 +190,21 @@ } } + if (UseGHASHIntrinsics) { + warning("GHASH intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); + } + if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { UseCRC32Intrinsics = true; } + if (UseCRC32CIntrinsics) { + if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) + warning("CRC32C intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); + } + if (auxv & (HWCAP_SHA1 | HWCAP_SHA2)) { if (FLAG_IS_DEFAULT(UseSHA)) { FLAG_SET_DEFAULT(UseSHA, true); @@ -228,6 +239,9 @@ warning("SHA512 instruction (for SHA-384 and SHA-512) is not available on this CPU."); FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); } + if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA, false); + } } // This machine allows unaligned memory accesses @@ -243,6 +257,10 @@ UseBarriersForVolatile = (_cpuFeatures & CPU_DMB_ATOMICS) != 0; } + if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { + UsePopCountInstruction = true; + } + #ifdef COMPILER2 if (FLAG_IS_DEFAULT(OptoScheduling)) { OptoScheduling = true;
--- a/src/cpu/ppc/vm/globals_ppc.hpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/ppc/vm/globals_ppc.hpp Thu Jul 02 11:12:59 2015 +0100 @@ -63,7 +63,7 @@ define_pd_global(uintx, TypeProfileLevel, 111); // Platform dependent flag handling: flags only defined on this platform. -#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \ +#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \ \ /* Load poll address from thread. This is used to implement per-thread */ \ /* safepoints on platforms != IA64. */ \
--- a/src/cpu/ppc/vm/sharedRuntime_ppc.cpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/ppc/vm/sharedRuntime_ppc.cpp Thu Jul 02 11:12:59 2015 +0100 @@ -2475,7 +2475,8 @@ // Slow case of monitor enter. // Inline a special case of call_VM that disallows any pending_exception. - __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), r_oop, r_box); + // Arguments are (oop obj, BasicLock* lock, JavaThread* thread). + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), r_oop, r_box, R16_thread); __ asm_assert_mem8_is_zero(thread_(pending_exception), "no pending exception allowed on exit from SharedRuntime::complete_monitor_unlocking_C", 0);
--- a/src/cpu/ppc/vm/vm_version_ppc.cpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/ppc/vm/vm_version_ppc.cpp Thu Jul 02 11:12:59 2015 +0100 @@ -176,6 +176,11 @@ FLAG_SET_DEFAULT(UseAESIntrinsics, false); } + if (UseGHASHIntrinsics) { + warning("GHASH intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); + } + if (UseSHA) { warning("SHA instructions are not available on this CPU"); FLAG_SET_DEFAULT(UseSHA, false); @@ -186,6 +191,13 @@ FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); } + + if (UseCRC32CIntrinsics) { + if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) + warning("CRC32C intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); + } + // Adjust RTM (Restricted Transactional Memory) flags. if (!has_tcheck() && UseRTMLocking) { // Can't continue because UseRTMLocking affects UseBiasedLocking flag @@ -505,7 +517,8 @@ void VM_Version::determine_features() { #if defined(ABI_ELFv2) - const int code_size = (num_features+1+2*7)*BytesPerInstWord; // TODO(asmundak): calculation is incorrect. + // 1 InstWord per call for the blr instruction. + const int code_size = (num_features+1+2*1)*BytesPerInstWord; #else // 7 InstWords for each call (function descriptor + blr instruction). const int code_size = (num_features+1+2*7)*BytesPerInstWord; @@ -540,7 +553,8 @@ a->popcntw(R7, R5); // code[6] -> popcntw a->fcfids(F3, F4); // code[7] -> fcfids a->vand(VR0, VR0, VR0); // code[8] -> vand - a->lqarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[9] -> lqarx_m + // arg0 of lqarx must be an even register, (arg1 + arg2) must be a multiple of 16 + a->lqarx_unchecked(R6, R3_ARG1, R4_ARG2, 1); // code[9] -> lqarx_m a->vcipher(VR0, VR1, VR2); // code[10] -> vcipher a->vpmsumb(VR0, VR1, VR2); // code[11] -> vpmsumb a->tcheck(0); // code[12] -> tcheck @@ -572,7 +586,8 @@ // Execute code. Illegal instructions will be replaced by 0 in the signal handler. VM_Version::_is_determine_features_test_running = true; - (*test)((address)mid_of_test_area, (uint64_t)0); + // We must align the first argument to 16 bytes because of the lqarx check. + (*test)((address)align_size_up((intptr_t)mid_of_test_area, 16), (uint64_t)0); VM_Version::_is_determine_features_test_running = false; // determine which instructions are legal. @@ -614,12 +629,12 @@ MacroAssembler* a = new MacroAssembler(&cb); // Emit code. - uint64_t (*get_dscr)() = (uint64_t(*)())(void *)a->emit_fd(); + uint64_t (*get_dscr)() = (uint64_t(*)())(void *)a->function_entry(); uint32_t *code = (uint32_t *)a->pc(); a->mfdscr(R3); a->blr(); - void (*set_dscr)(long) = (void(*)(long))(void *)a->emit_fd(); + void (*set_dscr)(long) = (void(*)(long))(void *)a->function_entry(); a->mtdscr(R3); a->blr();
--- a/src/cpu/sparc/vm/assembler_sparc.hpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/sparc/vm/assembler_sparc.hpp Thu Jul 02 11:12:59 2015 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -128,7 +128,11 @@ faligndata_op3 = 0x36, flog3_op3 = 0x36, edge_op3 = 0x36, + fzero_op3 = 0x36, fsrc_op3 = 0x36, + fnot_op3 = 0x36, + xmulx_op3 = 0x36, + crc32c_op3 = 0x36, impdep2_op3 = 0x37, stpartialf_op3 = 0x37, jmpl_op3 = 0x38, @@ -220,6 +224,8 @@ mdtox_opf = 0x110, mstouw_opf = 0x111, mstosw_opf = 0x113, + xmulx_opf = 0x115, + xmulxhi_opf = 0x116, mxtod_opf = 0x118, mwtos_opf = 0x119, @@ -228,7 +234,9 @@ sha1_opf = 0x141, sha256_opf = 0x142, - sha512_opf = 0x143 + sha512_opf = 0x143, + + crc32c_opf = 0x147 }; enum op5s { @@ -597,6 +605,11 @@ return x & ((1 << 10) - 1); } + // create a low12 __value__ (not a field) for a given a 32-bit constant + static int low12( int x ) { + return x & ((1 << 12) - 1); + } + // AES crypto instructions supported only on certain processors static void aes_only() { assert( VM_Version::has_aes(), "This instruction only works on SPARC with AES instructions support"); } @@ -605,6 +618,9 @@ static void sha256_only() { assert( VM_Version::has_sha256(), "This instruction only works on SPARC with SHA256"); } static void sha512_only() { assert( VM_Version::has_sha512(), "This instruction only works on SPARC with SHA512"); } + // CRC32C instruction supported only on certain processors + static void crc32c_only() { assert( VM_Version::has_crc32c(), "This instruction only works on SPARC with CRC32C"); } + // instruction only in VIS1 static void vis1_only() { assert( VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); } @@ -1019,6 +1035,7 @@ void nop() { emit_int32( op(branch_op) | op2(sethi_op2) ); } + void sw_count() { emit_int32( op(branch_op) | op2(sethi_op2) | 0x3f0 ); } // pp 202 @@ -1195,8 +1212,14 @@ void faligndata( FloatRegister s1, FloatRegister s2, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(faligndata_op3) | fs1(s1, FloatRegisterImpl::D) | opf(faligndata_opf) | fs2(s2, FloatRegisterImpl::D)); } + void fzero( FloatRegisterImpl::Width w, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, w) | op3(fzero_op3) | opf(0x62 - w)); } + void fsrc2( FloatRegisterImpl::Width w, FloatRegister s2, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, w) | op3(fsrc_op3) | opf(0x7A - w) | fs2(s2, w)); } + void fnot1( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, w) | op3(fnot_op3) | fs1(s1, w) | opf(0x6C - w)); } + + void fpmerge( FloatRegister s1, FloatRegister s2, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(0x36) | fs1(s1, FloatRegisterImpl::S) | opf(0x4b) | fs2(s2, FloatRegisterImpl::S)); } + void stpartialf( Register s1, Register s2, FloatRegister d, int ia = -1 ) { vis1_only(); emit_int32( op(ldst_op) | fd(d, FloatRegisterImpl::D) | op3(stpartialf_op3) | rs1(s1) | imm_asi(ia) | rs2(s2)); } // VIS2 instructions @@ -1212,12 +1235,19 @@ void movwtos( Register s, FloatRegister d ) { vis3_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(mftoi_op3) | opf(mwtos_opf) | rs2(s)); } void movxtod( Register s, FloatRegister d ) { vis3_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(mftoi_op3) | opf(mxtod_opf) | rs2(s)); } + void xmulx(Register s1, Register s2, Register d) { vis3_only(); emit_int32( op(arith_op) | rd(d) | op3(xmulx_op3) | rs1(s1) | opf(xmulx_opf) | rs2(s2)); } + void xmulxhi(Register s1, Register s2, Register d) { vis3_only(); emit_int32( op(arith_op) | rd(d) | op3(xmulx_op3) | rs1(s1) | opf(xmulxhi_opf) | rs2(s2)); } + // Crypto SHA instructions void sha1() { sha1_only(); emit_int32( op(arith_op) | op3(sha_op3) | opf(sha1_opf)); } void sha256() { sha256_only(); emit_int32( op(arith_op) | op3(sha_op3) | opf(sha256_opf)); } void sha512() { sha512_only(); emit_int32( op(arith_op) | op3(sha_op3) | opf(sha512_opf)); } + // CRC32C instruction + + void crc32c( FloatRegister s1, FloatRegister s2, FloatRegister d ) { crc32c_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(crc32c_op3) | fs1(s1, FloatRegisterImpl::D) | opf(crc32c_opf) | fs2(s2, FloatRegisterImpl::D)); } + // Creation Assembler(CodeBuffer* code) : AbstractAssembler(code) { #ifdef CHECK_DELAY
--- a/src/cpu/sparc/vm/frame_sparc.cpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/sparc/vm/frame_sparc.cpp Thu Jul 02 11:12:59 2015 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -599,12 +599,6 @@ return next_younger_sp_or_null(valid_sp, sp) != NULL; } - -bool frame::interpreter_frame_equals_unpacked_fp(intptr_t* fp) { - assert(is_interpreted_frame(), "must be interpreter frame"); - return this->fp() == fp; -} - bool frame::is_interpreted_frame_valid(JavaThread* thread) const { #ifdef CC_INTERP // Is there anything to do?
--- a/src/cpu/sparc/vm/globals_sparc.hpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/sparc/vm/globals_sparc.hpp Thu Jul 02 11:12:59 2015 +0100 @@ -81,7 +81,7 @@ define_pd_global(uintx, TypeProfileLevel, 111); -#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \ +#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \ \ product(intx, UseVIS, 99, \ "Highest supported VIS instructions set on Sparc") \
--- a/src/cpu/sparc/vm/macroAssembler_sparc.cpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/sparc/vm/macroAssembler_sparc.cpp Thu Jul 02 11:12:59 2015 +0100 @@ -956,6 +956,7 @@ int hi = (int)(value >> 32); int lo = (int)(value & ~0); + int bits_33to2 = (int)((value >> 2) & ~0); // (Matcher::isSimpleConstant64 knows about the following optimizations.) if (Assembler::is_simm13(lo) && value == lo) { or3(G0, lo, d); @@ -964,6 +965,12 @@ if (low10(lo) != 0) or3(d, low10(lo), d); } + else if ((hi >> 2) == 0) { + Assembler::sethi(bits_33to2, d); // hardware version zero-extends to upper 32 + sllx(d, 2, d); + if (low12(lo) != 0) + or3(d, low12(lo), d); + } else if (hi == -1) { Assembler::sethi(~lo, d); // hardware version zero-extends to upper 32 xor3(d, low10(lo) ^ ~low10(~0), d); @@ -4351,3 +4358,52 @@ cmp_and_brx_short(to, end, Assembler::lessUnsigned, Assembler::pt, small_loop); nop(); // Separate short branches } + +/** + * Update CRC-32[C] with a byte value according to constants in table + * + * @param [in,out]crc Register containing the crc. + * @param [in]val Register containing the byte to fold into the CRC. + * @param [in]table Register containing the table of crc constants. + * + * uint32_t crc; + * val = crc_table[(val ^ crc) & 0xFF]; + * crc = val ^ (crc >> 8); + */ +void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { + xor3(val, crc, val); + and3(val, 0xFF, val); + sllx(val, 2, val); + lduw(table, val, val); + srlx(crc, 8, crc); + xor3(val, crc, crc); +} + +// Reverse byte order of lower 32 bits, assuming upper 32 bits all zeros +void MacroAssembler::reverse_bytes_32(Register src, Register dst, Register tmp) { + srlx(src, 24, dst); + + sllx(src, 32+8, tmp); + srlx(tmp, 32+24, tmp); + sllx(tmp, 8, tmp); + or3(dst, tmp, dst); + + sllx(src, 32+16, tmp); + srlx(tmp, 32+24, tmp); + sllx(tmp, 16, tmp); + or3(dst, tmp, dst); + + sllx(src, 32+24, tmp); + srlx(tmp, 32, tmp); + or3(dst, tmp, dst); +} + +void MacroAssembler::movitof_revbytes(Register src, FloatRegister dst, Register tmp1, Register tmp2) { + reverse_bytes_32(src, tmp1, tmp2); + movxtod(tmp1, dst); +} + +void MacroAssembler::movftoi_revbytes(FloatRegister src, Register dst, Register tmp1, Register tmp2) { + movdtox(src, tmp1); + reverse_bytes_32(tmp1, dst, tmp2); +}
--- a/src/cpu/sparc/vm/macroAssembler_sparc.hpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/sparc/vm/macroAssembler_sparc.hpp Thu Jul 02 11:12:59 2015 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -903,6 +903,10 @@ inline void ldf(FloatRegisterImpl::Width w, Register s1, RegisterOrConstant s2, FloatRegister d); inline void ldf(FloatRegisterImpl::Width w, const Address& a, FloatRegister d, int offset = 0); + // little-endian + inline void ldxl(Register s1, Register s2, Register d) { ldxa(s1, s2, ASI_PRIMARY_LITTLE, d); } + inline void ldfl(FloatRegisterImpl::Width w, Register s1, Register s2, FloatRegister d) { ldfa(w, s1, s2, ASI_PRIMARY_LITTLE, d); } + // membar psuedo instruction. takes into account target memory model. inline void membar( Assembler::Membar_mask_bits const7a ); @@ -1436,6 +1440,14 @@ // Use BIS for zeroing void bis_zeroing(Register to, Register count, Register temp, Label& Ldone); + // Update CRC-32[C] with a byte value according to constants in table + void update_byte_crc32(Register crc, Register val, Register table); + + // Reverse byte order of lower 32 bits, assuming upper 32 bits all zeros + void reverse_bytes_32(Register src, Register dst, Register tmp); + void movitof_revbytes(Register src, FloatRegister dst, Register tmp1, Register tmp2); + void movftoi_revbytes(FloatRegister src, Register dst, Register tmp1, Register tmp2); + #undef VIRTUAL };
--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp Thu Jul 02 11:12:59 2015 +0100 @@ -4786,6 +4786,330 @@ return start; } + /* Single and multi-block ghash operations */ + address generate_ghash_processBlocks() { + __ align(CodeEntryAlignment); + Label L_ghash_loop, L_aligned, L_main; + StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks"); + address start = __ pc(); + + Register state = I0; + Register subkeyH = I1; + Register data = I2; + Register len = I3; + + __ save_frame(0); + + __ ldx(state, 0, O0); + __ ldx(state, 8, O1); + + // Loop label for multiblock operations + __ BIND(L_ghash_loop); + + // Check if 'data' is unaligned + __ andcc(data, 7, G1); + __ br(Assembler::zero, false, Assembler::pt, L_aligned); + __ delayed()->nop(); + + Register left_shift = L1; + Register right_shift = L2; + Register data_ptr = L3; + + // Get left and right shift values in bits + __ sll(G1, LogBitsPerByte, left_shift); + __ mov(64, right_shift); + __ sub(right_shift, left_shift, right_shift); + + // Align to read 'data' + __ sub(data, G1, data_ptr); + + // Load first 8 bytes of 'data' + __ ldx(data_ptr, 0, O4); + __ sllx(O4, left_shift, O4); + __ ldx(data_ptr, 8, O5); + __ srlx(O5, right_shift, G4); + __ bset(G4, O4); + + // Load second 8 bytes of 'data' + __ sllx(O5, left_shift, O5); + __ ldx(data_ptr, 16, G4); + __ srlx(G4, right_shift, G4); + __ ba(L_main); + __ delayed()->bset(G4, O5); + + // If 'data' is aligned, load normally + __ BIND(L_aligned); + __ ldx(data, 0, O4); + __ ldx(data, 8, O5); + + __ BIND(L_main); + __ ldx(subkeyH, 0, O2); + __ ldx(subkeyH, 8, O3); + + __ xor3(O0, O4, O0); + __ xor3(O1, O5, O1); + + __ xmulxhi(O0, O3, G3); + __ xmulx(O0, O2, O5); + __ xmulxhi(O1, O2, G4); + __ xmulxhi(O1, O3, G5); + __ xmulx(O0, O3, G1); + __ xmulx(O1, O3, G2); + __ xmulx(O1, O2, O3); + __ xmulxhi(O0, O2, O4); + + __ mov(0xE1, O0); + __ sllx(O0, 56, O0); + + __ xor3(O5, G3, O5); + __ xor3(O5, G4, O5); + __ xor3(G5, G1, G1); + __ xor3(G1, O3, G1); + __ srlx(G2, 63, O1); + __ srlx(G1, 63, G3); + __ sllx(G2, 63, O3); + __ sllx(G2, 58, O2); + __ xor3(O3, O2, O2); + + __ sllx(G1, 1, G1); + __ or3(G1, O1, G1); + + __ xor3(G1, O2, G1); + + __ sllx(G2, 1, G2); + + __ xmulxhi(G1, O0, O1); + __ xmulx(G1, O0, O2); + __ xmulxhi(G2, O0, O3); + __ xmulx(G2, O0, G1); + + __ xor3(O4, O1, O4); + __ xor3(O5, O2, O5); + __ xor3(O5, O3, O5); + + __ sllx(O4, 1, O2); + __ srlx(O5, 63, O3); + + __ or3(O2, O3, O0); + + __ sllx(O5, 1, O1); + __ srlx(G1, 63, O2); + __ or3(O1, O2, O1); + __ xor3(O1, G3, O1); + + __ deccc(len); + __ br(Assembler::notZero, true, Assembler::pt, L_ghash_loop); + __ delayed()->add(data, 16, data); + + __ stx(O0, I0, 0); + __ stx(O1, I0, 8); + + __ ret(); + __ delayed()->restore(); + + return start; + } + +#define CHUNK_LEN 128 /* 128 x 8B = 1KB */ +#define CHUNK_K1 0x1307a0206 /* reverseBits(pow(x, CHUNK_LEN*8*8*3 - 32) mod P(x)) << 1 */ +#define CHUNK_K2 0x1a0f717c4 /* reverseBits(pow(x, CHUNK_LEN*8*8*2 - 32) mod P(x)) << 1 */ +#define CHUNK_K3 0x0170076fa /* reverseBits(pow(x, CHUNK_LEN*8*8*1 - 32) mod P(x)) << 1 */ + + /** + * Arguments: + * + * Inputs: + * O0 - int crc + * O1 - byte* buf + * O2 - int len + * O3 - int* table + * + * Output: + * O0 - int crc result + */ + address generate_updateBytesCRC32C() { + assert(UseCRC32CIntrinsics, "need CRC32C instruction"); + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C"); + address start = __ pc(); + + const Register crc = O0; // crc + const Register buf = O1; // source java byte array address + const Register len = O2; // number of bytes + const Register table = O3; // byteTable + + Label L_crc32c_head, L_crc32c_aligned; + Label L_crc32c_parallel, L_crc32c_parallel_loop; + Label L_crc32c_serial, L_crc32c_x32_loop, L_crc32c_x8, L_crc32c_x8_loop; + Label L_crc32c_done, L_crc32c_tail, L_crc32c_return; + + __ cmp_and_br_short(len, 0, Assembler::lessEqual, Assembler::pn, L_crc32c_return); + + // clear upper 32 bits of crc + __ clruwu(crc); + + __ and3(buf, 7, G4); + __ cmp_and_brx_short(G4, 0, Assembler::equal, Assembler::pt, L_crc32c_aligned); + + __ mov(8, G1); + __ sub(G1, G4, G4); + + // ------ process the misaligned head (7 bytes or less) ------ + __ BIND(L_crc32c_head); + + // crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF]; + __ ldub(buf, 0, G1); + __ update_byte_crc32(crc, G1, table); + + __ inc(buf); + __ dec(len); + __ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pn, L_crc32c_return); + __ dec(G4); + __ cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_head); + + // ------ process the 8-byte-aligned body ------ + __ BIND(L_crc32c_aligned); + __ nop(); + __ cmp_and_br_short(len, 8, Assembler::less, Assembler::pn, L_crc32c_tail); + + // reverse the byte order of lower 32 bits to big endian, and move to FP side + __ movitof_revbytes(crc, F0, G1, G3); + + __ set(CHUNK_LEN*8*4, G4); + __ cmp_and_br_short(len, G4, Assembler::less, Assembler::pt, L_crc32c_serial); + + // ------ process four 1KB chunks in parallel ------ + __ BIND(L_crc32c_parallel); + + __ fzero(FloatRegisterImpl::D, F2); + __ fzero(FloatRegisterImpl::D, F4); + __ fzero(FloatRegisterImpl::D, F6); + + __ mov(CHUNK_LEN - 1, G4); + __ BIND(L_crc32c_parallel_loop); + // schedule ldf's ahead of crc32c's to hide the load-use latency + __ ldf(FloatRegisterImpl::D, buf, 0, F8); + __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10); + __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12); + __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*24, F14); + __ crc32c(F0, F8, F0); + __ crc32c(F2, F10, F2); + __ crc32c(F4, F12, F4); + __ crc32c(F6, F14, F6); + __ inc(buf, 8); + __ dec(G4); + __ cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_parallel_loop); + + __ ldf(FloatRegisterImpl::D, buf, 0, F8); + __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10); + __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12); + __ crc32c(F0, F8, F0); + __ crc32c(F2, F10, F2); + __ crc32c(F4, F12, F4); + + __ inc(buf, CHUNK_LEN*24); + __ ldfl(FloatRegisterImpl::D, buf, G0, F14); // load in little endian + __ inc(buf, 8); + + __ prefetch(buf, 0, Assembler::severalReads); + __ prefetch(buf, CHUNK_LEN*8, Assembler::severalReads); + __ prefetch(buf, CHUNK_LEN*16, Assembler::severalReads); + __ prefetch(buf, CHUNK_LEN*24, Assembler::severalReads); + + // move to INT side, and reverse the byte order of lower 32 bits to little endian + __ movftoi_revbytes(F0, O4, G1, G4); + __ movftoi_revbytes(F2, O5, G1, G4); + __ movftoi_revbytes(F4, G5, G1, G4); + + // combine the results of 4 chunks + __ set64(CHUNK_K1, G3, G1); + __ xmulx(O4, G3, O4); + __ set64(CHUNK_K2, G3, G1); + __ xmulx(O5, G3, O5); + __ set64(CHUNK_K3, G3, G1); + __ xmulx(G5, G3, G5); + + __ movdtox(F14, G4); + __ xor3(O4, O5, O5); + __ xor3(G5, O5, O5); + __ xor3(G4, O5, O5); + + // reverse the byte order to big endian, via stack, and move to FP side + __ add(SP, -8, G1); + __ srlx(G1, 3, G1); + __ sllx(G1, 3, G1); + __ stx(O5, G1, G0); + __ ldfl(FloatRegisterImpl::D, G1, G0, F2); // load in little endian + + __ crc32c(F6, F2, F0); + + __ set(CHUNK_LEN*8*4, G4); + __ sub(len, G4, len); + __ cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_parallel); + __ nop(); + __ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_done); + + __ BIND(L_crc32c_serial); + + __ mov(32, G4); + __ cmp_and_br_short(len, G4, Assembler::less, Assembler::pn, L_crc32c_x8); + + // ------ process 32B chunks ------ + __ BIND(L_crc32c_x32_loop); + __ ldf(FloatRegisterImpl::D, buf, 0, F2); + __ inc(buf, 8); + __ crc32c(F0, F2, F0); + __ ldf(FloatRegisterImpl::D, buf, 0, F2); + __ inc(buf, 8); + __ crc32c(F0, F2, F0); + __ ldf(FloatRegisterImpl::D, buf, 0, F2); + __ inc(buf, 8); + __ crc32c(F0, F2, F0); + __ ldf(FloatRegisterImpl::D, buf, 0, F2); + __ inc(buf, 8); + __ crc32c(F0, F2, F0); + __ dec(len, 32); + __ cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_x32_loop); + + __ BIND(L_crc32c_x8); + __ nop(); + __ cmp_and_br_short(len, 8, Assembler::less, Assembler::pt, L_crc32c_done); + + // ------ process 8B chunks ------ + __ BIND(L_crc32c_x8_loop); + __ ldf(FloatRegisterImpl::D, buf, 0, F2); + __ inc(buf, 8); + __ crc32c(F0, F2, F0); + __ dec(len, 8); + __ cmp_and_br_short(len, 8, Assembler::greaterEqual, Assembler::pt, L_crc32c_x8_loop); + + __ BIND(L_crc32c_done); + + // move to INT side, and reverse the byte order of lower 32 bits to little endian + __ movftoi_revbytes(F0, crc, G1, G3); + + __ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_return); + + // ------ process the misaligned tail (7 bytes or less) ------ + __ BIND(L_crc32c_tail); + + // crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF]; + __ ldub(buf, 0, G1); + __ update_byte_crc32(crc, G1, table); + + __ inc(buf); + __ dec(len); + __ cmp_and_br_short(len, 0, Assembler::greater, Assembler::pt, L_crc32c_tail); + + __ BIND(L_crc32c_return); + __ nop(); + __ retl(); + __ delayed()->nop(); + + return start; + } + void generate_initial() { // Generates all stubs and initializes the entry points @@ -4859,6 +5183,10 @@ StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel(); } + // generate GHASH intrinsics code + if (UseGHASHIntrinsics) { + StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); + } // generate SHA1/SHA256/SHA512 intrinsics code if (UseSHA1Intrinsics) { @@ -4873,6 +5201,11 @@ StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress"); StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB"); } + + // generate CRC32C intrinsic code + if (UseCRC32CIntrinsics) { + StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(); + } }
--- a/src/cpu/sparc/vm/stubRoutines_sparc.hpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/sparc/vm/stubRoutines_sparc.hpp Thu Jul 02 11:12:59 2015 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -41,7 +41,7 @@ enum /* platform_dependent_constants */ { // %%%%%%%% May be able to shrink this a lot code_size1 = 20000, // simply increase if too small (assembler will crash if too small) - code_size2 = 23000 // simply increase if too small (assembler will crash if too small) + code_size2 = 24000 // simply increase if too small (assembler will crash if too small) }; class Sparc {
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp Thu Jul 02 11:12:59 2015 +0100 @@ -230,7 +230,7 @@ assert((OptoLoopAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size"); char buf[512]; - jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", (has_v9() ? ", v9" : (has_v8() ? ", v8" : "")), (has_hardware_popc() ? ", popc" : ""), (has_vis1() ? ", vis1" : ""), @@ -242,6 +242,7 @@ (has_sha1() ? ", sha1" : ""), (has_sha256() ? ", sha256" : ""), (has_sha512() ? ", sha512" : ""), + (has_crc32c() ? ", crc32c" : ""), (is_ultra3() ? ", ultra3" : ""), (is_sun4v() ? ", sun4v" : ""), (is_niagara_plus() ? ", niagara_plus" : (is_niagara() ? ", niagara" : "")), @@ -300,6 +301,17 @@ } } + // GHASH/GCM intrinsics + if (has_vis3() && (UseVIS > 2)) { + if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { + UseGHASHIntrinsics = true; + } + } else if (UseGHASHIntrinsics) { + if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) + warning("GHASH intrinsics require VIS3 insructions support. Intriniscs will be disabled"); + FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); + } + // SHA1, SHA256, and SHA512 instructions were added to SPARC T-series at different times if (has_sha1() || has_sha256() || has_sha512()) { if (UseVIS > 0) { // SHA intrinsics use VIS1 instructions @@ -352,6 +364,23 @@ } } + // SPARC T4 and above should have support for CRC32C instruction + if (has_crc32c()) { + if (UseVIS > 2) { // CRC32C intrinsics use VIS3 instructions + if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, true); + } + } else { + if (UseCRC32CIntrinsics) { + warning("SPARC CRC32C intrinsics require VIS3 instruction support. Intrinsics will be disabled."); + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); + } + } + } else if (UseCRC32CIntrinsics) { + warning("CRC32C instruction is not available on this CPU"); + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); + } + if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && (cache_line_size > ContendedPaddingWidth)) ContendedPaddingWidth = cache_line_size;
--- a/src/cpu/sparc/vm/vm_version_sparc.hpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/sparc/vm/vm_version_sparc.hpp Thu Jul 02 11:12:59 2015 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -53,7 +53,8 @@ aes_instructions = 19, sha1_instruction = 20, sha256_instruction = 21, - sha512_instruction = 22 + sha512_instruction = 22, + crc32c_instruction = 23 }; enum Feature_Flag_Set { @@ -83,6 +84,7 @@ sha1_instruction_m = 1 << sha1_instruction, sha256_instruction_m = 1 << sha256_instruction, sha512_instruction_m = 1 << sha512_instruction, + crc32c_instruction_m = 1 << crc32c_instruction, generic_v8_m = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m, generic_v9_m = generic_v8_m | v9_instructions_m, @@ -141,6 +143,7 @@ static bool has_sha1() { return (_features & sha1_instruction_m) != 0; } static bool has_sha256() { return (_features & sha256_instruction_m) != 0; } static bool has_sha512() { return (_features & sha512_instruction_m) != 0; } + static bool has_crc32c() { return (_features & crc32c_instruction_m) != 0; } static bool supports_compare_and_exchange() { return has_v9(); }
--- a/src/cpu/x86/vm/assembler_x86.cpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/x86/vm/assembler_x86.cpp Thu Jul 02 11:12:59 2015 +0100 @@ -1347,7 +1347,7 @@ void Assembler::andnl(Register dst, Register src1, Register src2) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode(dst, src1, src2, false); + int encode = vex_prefix_0F38_and_encode_legacy(dst, src1, src2, false); emit_int8((unsigned char)0xF2); emit_int8((unsigned char)(0xC0 | encode)); } @@ -1355,7 +1355,7 @@ void Assembler::andnl(Register dst, Register src1, Address src2) { InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38(dst, src1, src2, false); + vex_prefix_0F38_legacy(dst, src1, src2, false); emit_int8((unsigned char)0xF2); emit_operand(dst, src2); } @@ -1382,7 +1382,7 @@ void Assembler::blsil(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode(rbx, dst, src, false); + int encode = vex_prefix_0F38_and_encode_legacy(rbx, dst, src, false); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } @@ -1390,14 +1390,14 @@ void Assembler::blsil(Register dst, Address src) { InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38(rbx, dst, src, false); + vex_prefix_0F38_legacy(rbx, dst, src, false); emit_int8((unsigned char)0xF3); emit_operand(rbx, src); } void Assembler::blsmskl(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode(rdx, dst, src, false); + int encode = vex_prefix_0F38_and_encode_legacy(rdx, dst, src, false); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } @@ -1412,7 +1412,7 @@ void Assembler::blsrl(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode(rcx, dst, src, false); + int encode = vex_prefix_0F38_and_encode_legacy(rcx, dst, src, false); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } @@ -1420,7 +1420,7 @@ void Assembler::blsrl(Register dst, Address src) { InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38(rcx, dst, src, false); + vex_prefix_0F38_legacy(rcx, dst, src, false); emit_int8((unsigned char)0xF3); emit_operand(rcx, src); } @@ -3095,26 +3095,35 @@ void Assembler::psrldq(XMMRegister dst, int shift) { // Shift 128 bit value in xmm register by number of bytes. NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, true, VEX_OPCODE_0F, - false, AVX_128bit, (VM_Version::supports_avx512bw() == false)); + int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, true, VEX_OPCODE_0F, false, AVX_128bit, (VM_Version::supports_avx512bw() == false)); emit_int8(0x73); emit_int8((unsigned char)(0xC0 | encode)); emit_int8(shift); } +void Assembler::pslldq(XMMRegister dst, int shift) { + // Shift left 128 bit value in xmm register by number of bytes. + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, true, VEX_OPCODE_0F, false, AVX_128bit, (VM_Version::supports_avx512bw() == false)); + emit_int8(0x73); + emit_int8((unsigned char)(0xC0 | encode)); + emit_int8(shift); +} + void Assembler::ptest(XMMRegister dst, Address src) { assert(VM_Version::supports_sse4_1(), ""); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); InstructionMark im(this); - simd_prefix(dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38); + simd_prefix(dst, xnoreg, src, VEX_SIMD_66, false, + VEX_OPCODE_0F_38, false, AVX_128bit, true); emit_int8(0x17); emit_operand(dst, src); } void Assembler::ptest(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_sse4_1(), ""); - int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, - false, VEX_OPCODE_0F_38); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, false, + VEX_OPCODE_0F_38, false, AVX_128bit, true); emit_int8(0x17); emit_int8((unsigned char)(0xC0 | encode)); } @@ -3126,7 +3135,7 @@ assert(dst != xnoreg, "sanity"); int dst_enc = dst->encoding(); // swap src<->dst for encoding - vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len); + vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len, true, false); emit_int8(0x17); emit_operand(dst, src); } @@ -3135,7 +3144,7 @@ assert(VM_Version::supports_avx(), ""); int vector_len = AVX_256bit; int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, - vector_len, VEX_OPCODE_0F_38); + vector_len, VEX_OPCODE_0F_38, true, false); emit_int8(0x17); emit_int8((unsigned char)(0xC0 | encode)); } @@ -3146,12 +3155,12 @@ if (VM_Version::supports_evex()) { tuple_type = EVEX_FVM; } - emit_simd_arith(0x60, dst, src, VEX_SIMD_66); + emit_simd_arith(0x60, dst, src, VEX_SIMD_66, false, (VM_Version::supports_avx512vlbw() == false)); } void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_simd_arith(0x60, dst, src, VEX_SIMD_66); + emit_simd_arith(0x60, dst, src, VEX_SIMD_66, false, (VM_Version::supports_avx512vlbw() == false)); } void Assembler::punpckldq(XMMRegister dst, Address src) { @@ -4979,7 +4988,51 @@ emit_int8((unsigned char)(0xC0 | encode)); } -// duplicate 4-bytes integer data from src into 8 locations in dest +// duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL +void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, + vector_len, VEX_OPCODE_0F_38, false); + emit_int8(0x78); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_8bit; + InstructionMark im(this); + assert(dst != xnoreg, "sanity"); + int dst_enc = dst->encoding(); + // swap src<->dst for encoding + vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len); + emit_int8(0x78); + emit_operand(dst, src); +} + +// duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL +void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, + vector_len, VEX_OPCODE_0F_38, false); + emit_int8(0x79); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_16bit; + InstructionMark im(this); + assert(dst != xnoreg, "sanity"); + int dst_enc = dst->encoding(); + // swap src<->dst for encoding + vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len); + emit_int8(0x79); + emit_operand(dst, src); +} + +// duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) { assert(VM_Version::supports_evex(), ""); int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, @@ -4988,6 +5041,121 @@ emit_int8((unsigned char)(0xC0 | encode)); } +void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_32bit; + InstructionMark im(this); + assert(dst != xnoreg, "sanity"); + int dst_enc = dst->encoding(); + // swap src<->dst for encoding + vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len); + emit_int8(0x58); + emit_operand(dst, src); +} + +// duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL +void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, + VEX_OPCODE_0F_38, true, vector_len, false, false); + emit_int8(0x59); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_64bit; + InstructionMark im(this); + assert(dst != xnoreg, "sanity"); + int dst_enc = dst->encoding(); + // swap src<->dst for encoding + vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, true, vector_len); + emit_int8(0x59); + emit_operand(dst, src); +} + +// duplicate single precision fp from src into 4|8|16 locations in dest : requires AVX512VL +void Assembler::evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, + VEX_OPCODE_0F_38, false, vector_len, false, false); + emit_int8(0x18); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_32bit; + InstructionMark im(this); + assert(dst != xnoreg, "sanity"); + int dst_enc = dst->encoding(); + // swap src<->dst for encoding + vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len); + emit_int8(0x18); + emit_operand(dst, src); +} + +// duplicate double precision fp from src into 2|4|8 locations in dest : requires AVX512VL +void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, + VEX_OPCODE_0F_38, true, vector_len, false, false); + emit_int8(0x19); + emit_int8((unsigned char)(0xC0 | encode)); +} + +void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + tuple_type = EVEX_T1S; + input_size_in_bits = EVEX_64bit; + InstructionMark im(this); + assert(dst != xnoreg, "sanity"); + int dst_enc = dst->encoding(); + // swap src<->dst for encoding + vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, true, vector_len); + emit_int8(0x19); + emit_operand(dst, src); +} + +// duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL +void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, + VEX_OPCODE_0F_38, false, vector_len, false, false); + emit_int8(0x7A); + emit_int8((unsigned char)(0xC0 | encode)); +} + +// duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL +void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, + VEX_OPCODE_0F_38, false, vector_len, false, false); + emit_int8(0x7B); + emit_int8((unsigned char)(0xC0 | encode)); +} + +// duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL +void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, + VEX_OPCODE_0F_38, false, vector_len, false, false); + emit_int8(0x7C); + emit_int8((unsigned char)(0xC0 | encode)); +} + +// duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL +void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, + VEX_OPCODE_0F_38, true, vector_len, false, false); + emit_int8(0x7C); + emit_int8((unsigned char)(0xC0 | encode)); +} + // Carry-Less Multiplication Quadword void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) { assert(VM_Version::supports_clmul(), ""); @@ -5598,7 +5766,7 @@ void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg) { - bool vex_r = (xreg_enc >= 8); + bool vex_r = ((xreg_enc & 8) == 8) ? 1 : 0; bool vex_b = adr.base_needs_rex(); bool vex_x = adr.index_needs_rex(); avx_vector_len = vector_len; @@ -5626,8 +5794,8 @@ int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg ) { - bool vex_r = (dst_enc >= 8); - bool vex_b = (src_enc >= 8); + bool vex_r = ((dst_enc & 8) == 8) ? 1 : 0; + bool vex_b = ((src_enc & 8) == 8) ? 1 : 0; bool vex_x = false; avx_vector_len = vector_len; @@ -6272,19 +6440,15 @@ void Assembler::andnq(Register dst, Register src1, Register src2) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode_q(dst, src1, src2); + int encode = vex_prefix_0F38_and_encode_q_legacy(dst, src1, src2); emit_int8((unsigned char)0xF2); emit_int8((unsigned char)(0xC0 | encode)); } void Assembler::andnq(Register dst, Register src1, Address src2) { - if (VM_Version::supports_evex()) { - tuple_type = EVEX_T1S; - input_size_in_bits = EVEX_64bit; - } InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38_q(dst, src1, src2); + vex_prefix_0F38_q_legacy(dst, src1, src2); emit_int8((unsigned char)0xF2); emit_operand(dst, src2); } @@ -6311,7 +6475,7 @@ void Assembler::blsiq(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode_q(rbx, dst, src); + int encode = vex_prefix_0F38_and_encode_q_legacy(rbx, dst, src); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } @@ -6319,14 +6483,14 @@ void Assembler::blsiq(Register dst, Address src) { InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38_q(rbx, dst, src); + vex_prefix_0F38_q_legacy(rbx, dst, src); emit_int8((unsigned char)0xF3); emit_operand(rbx, src); } void Assembler::blsmskq(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode_q(rdx, dst, src); + int encode = vex_prefix_0F38_and_encode_q_legacy(rdx, dst, src); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } @@ -6334,14 +6498,14 @@ void Assembler::blsmskq(Register dst, Address src) { InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38_q(rdx, dst, src); + vex_prefix_0F38_q_legacy(rdx, dst, src); emit_int8((unsigned char)0xF3); emit_operand(rdx, src); } void Assembler::blsrq(Register dst, Register src) { assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - int encode = vex_prefix_0F38_and_encode_q(rcx, dst, src); + int encode = vex_prefix_0F38_and_encode_q_legacy(rcx, dst, src); emit_int8((unsigned char)0xF3); emit_int8((unsigned char)(0xC0 | encode)); } @@ -6349,7 +6513,7 @@ void Assembler::blsrq(Register dst, Address src) { InstructionMark im(this); assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); - vex_prefix_0F38_q(rcx, dst, src); + vex_prefix_0F38_q_legacy(rcx, dst, src); emit_int8((unsigned char)0xF3); emit_operand(rcx, src); }
--- a/src/cpu/x86/vm/assembler_x86.hpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/x86/vm/assembler_x86.hpp Thu Jul 02 11:12:59 2015 +0100 @@ -661,6 +661,14 @@ vector_len, no_mask_reg); } + void vex_prefix_0F38_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) { + bool vex_w = false; + int vector_len = AVX_128bit; + vex_prefix(src, nds->encoding(), dst->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, + vector_len, true, no_mask_reg); + } + void vex_prefix_0F38_q(Register dst, Register nds, Address src, bool no_mask_reg = false) { bool vex_w = true; int vector_len = AVX_128bit; @@ -668,6 +676,15 @@ VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len, no_mask_reg); } + + void vex_prefix_0F38_q_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) { + bool vex_w = true; + int vector_len = AVX_128bit; + vex_prefix(src, nds->encoding(), dst->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, + vector_len, true, no_mask_reg); + } + int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, int vector_len, @@ -680,6 +697,15 @@ VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len, false, no_mask_reg); } + + int vex_prefix_0F38_and_encode_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) { + bool vex_w = false; + int vector_len = AVX_128bit; + return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len, + true, no_mask_reg); + } + int vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src, bool no_mask_reg = false) { bool vex_w = true; int vector_len = AVX_128bit; @@ -687,6 +713,15 @@ VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len, false, no_mask_reg); } + + int vex_prefix_0F38_and_encode_q_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) { + bool vex_w = true; + int vector_len = AVX_128bit; + return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), + VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len, + true, no_mask_reg); + } + int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, int vector_len = AVX_128bit, VexOpcode opc = VEX_OPCODE_0F, bool legacy_mode = false, @@ -1666,6 +1701,8 @@ // Shift Right by bytes Logical DoubleQuadword Immediate void psrldq(XMMRegister dst, int shift); + // Shift Left by bytes Logical DoubleQuadword Immediate + void pslldq(XMMRegister dst, int shift); // Logical Compare 128bit void ptest(XMMRegister dst, XMMRegister src); @@ -2024,8 +2061,25 @@ // duplicate 4-bytes integer data from src into 8 locations in dest void vpbroadcastd(XMMRegister dst, XMMRegister src); - // duplicate 4-bytes integer data from src into vector_len locations in dest + // duplicate n-bytes integer data from src into vector_len locations in dest + void evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len); + void evpbroadcastb(XMMRegister dst, Address src, int vector_len); + void evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len); + void evpbroadcastw(XMMRegister dst, Address src, int vector_len); void evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len); + void evpbroadcastd(XMMRegister dst, Address src, int vector_len); + void evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len); + void evpbroadcastq(XMMRegister dst, Address src, int vector_len); + + void evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len); + void evpbroadcastss(XMMRegister dst, Address src, int vector_len); + void evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len); + void evpbroadcastsd(XMMRegister dst, Address src, int vector_len); + + void evpbroadcastb(XMMRegister dst, Register src, int vector_len); + void evpbroadcastw(XMMRegister dst, Register src, int vector_len); + void evpbroadcastd(XMMRegister dst, Register src, int vector_len); + void evpbroadcastq(XMMRegister dst, Register src, int vector_len); // Carry-Less Multiplication Quadword void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
--- a/src/cpu/x86/vm/c2_init_x86.cpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/x86/vm/c2_init_x86.cpp Thu Jul 02 11:12:59 2015 +0100 @@ -58,4 +58,6 @@ OptoReg::invalidate(i); } } + + SuperWordLoopUnrollAnalysis = true; }
--- a/src/cpu/x86/vm/frame_x86.cpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/x86/vm/frame_x86.cpp Thu Jul 02 11:12:59 2015 +0100 @@ -524,17 +524,6 @@ return frame(sender_sp(), link(), sender_pc()); } - -bool frame::interpreter_frame_equals_unpacked_fp(intptr_t* fp) { - assert(is_interpreted_frame(), "must be interpreter frame"); - Method* method = interpreter_frame_method(); - // When unpacking an optimized frame the frame pointer is - // adjusted with: - int diff = (method->max_locals() - method->size_of_parameters()) * - Interpreter::stackElementWords; - return _fp == (fp - diff); -} - bool frame::is_interpreted_frame_valid(JavaThread* thread) const { // QQQ #ifdef CC_INTERP
--- a/src/cpu/x86/vm/globals_x86.hpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/x86/vm/globals_x86.hpp Thu Jul 02 11:12:59 2015 +0100 @@ -84,7 +84,7 @@ define_pd_global(bool, PreserveFramePointer, false); -#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \ +#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \ \ develop(bool, IEEEPrecision, true, \ "Enables IEEE precision (for INTEL only)") \
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp Thu Jul 02 11:12:59 2015 +0100 @@ -4260,31 +4260,24 @@ ////////////////////////////////////////////////////////////////////////////////// +void MacroAssembler::store_check(Register obj, Address dst) { + store_check(obj); +} + void MacroAssembler::store_check(Register obj) { // Does a store check for the oop in register obj. The content of // register obj is destroyed afterwards. - store_check_part_1(obj); - store_check_part_2(obj); -} - -void MacroAssembler::store_check(Register obj, Address dst) { - store_check(obj); -} - - -// split the store check operation so that other instructions can be scheduled inbetween -void MacroAssembler::store_check_part_1(Register obj) { + BarrierSet* bs = Universe::heap()->barrier_set(); assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); - shrptr(obj, CardTableModRefBS::card_shift); -} - -void MacroAssembler::store_check_part_2(Register obj) { - BarrierSet* bs = Universe::heap()->barrier_set(); - assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); + CardTableModRefBS* ct = barrier_set_cast<CardTableModRefBS>(bs); assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + shrptr(obj, CardTableModRefBS::card_shift); + + Address card_addr; + // The calculation for byte_map_base is as follows: // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); // So this essentially converts an address to a displacement and it will @@ -4292,8 +4285,7 @@ // large for a 32bit displacement. intptr_t disp = (intptr_t) ct->byte_map_base; if (is_simm32(disp)) { - Address cardtable(noreg, obj, Address::times_1, disp); - movb(cardtable, 0); + card_addr = Address(noreg, obj, Address::times_1, disp); } else { // By doing it as an ExternalAddress 'disp' could be converted to a rip-relative // displacement and done in a single instruction given favorable mapping and a @@ -4301,7 +4293,21 @@ // entry and that entry is not properly handled by the relocation code. AddressLiteral cardtable((address)ct->byte_map_base, relocInfo::none); Address index(noreg, obj, Address::times_1); - movb(as_Address(ArrayAddress(cardtable, index)), 0); + card_addr = as_Address(ArrayAddress(cardtable, index)); + } + + int dirty = CardTableModRefBS::dirty_card_val(); + if (UseCondCardMark) { + Label L_already_dirty; + if (UseConcMarkSweepGC) { + membar(Assembler::StoreLoad); + } + cmpb(card_addr, dirty); + jcc(Assembler::equal, L_already_dirty); + movb(card_addr, dirty); + bind(L_already_dirty); + } else { + movb(card_addr, dirty); } }
--- a/src/cpu/x86/vm/macroAssembler_x86.hpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/x86/vm/macroAssembler_x86.hpp Thu Jul 02 11:12:59 2015 +0100 @@ -315,10 +315,6 @@ #endif // INCLUDE_ALL_GCS - // split store_check(Register obj) to enhance instruction interleaving - void store_check_part_1(Register obj); - void store_check_part_2(Register obj); - // C 'boolean' to Java boolean: x == 0 ? 0 : 1 void c2bool(Register x);
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Thu Jul 02 11:12:59 2015 +0100 @@ -368,22 +368,22 @@ map->set_callee_saved(STACK_OFFSET(xmm14H_off), xmm14->as_VMReg()->next()); map->set_callee_saved(STACK_OFFSET(xmm15H_off), xmm15->as_VMReg()->next()); if (UseAVX > 2) { - map->set_callee_saved(STACK_OFFSET(xmm16H_off), xmm16->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(xmm17H_off), xmm17->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(xmm18H_off), xmm18->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(xmm19H_off), xmm19->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(xmm20H_off), xmm20->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(xmm21H_off), xmm21->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(xmm22H_off), xmm22->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(xmm23H_off), xmm23->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(xmm24H_off), xmm24->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(xmm25H_off), xmm25->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(xmm26H_off), xmm26->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(xmm27H_off), xmm27->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(xmm28H_off), xmm28->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(xmm29H_off), xmm29->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(xmm30H_off), xmm30->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(xmm31H_off), xmm31->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm16H_off), xmm16->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm17H_off), xmm17->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm18H_off), xmm18->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm19H_off), xmm19->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm20H_off), xmm20->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm21H_off), xmm21->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm22H_off), xmm22->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm23H_off), xmm23->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm24H_off), xmm24->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm25H_off), xmm25->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm26H_off), xmm26->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm27H_off), xmm27->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm28H_off), xmm28->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm29H_off), xmm29->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm30H_off), xmm30->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm31H_off), xmm31->as_VMReg()->next()); } } @@ -469,7 +469,7 @@ __ vinsertf64x4h(xmm29, Address(rsp, 928)); __ vinsertf64x4h(xmm30, Address(rsp, 960)); __ vinsertf64x4h(xmm31, Address(rsp, 992)); - __ subptr(rsp, 1024); + __ addptr(rsp, 1024); } } #else
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp Thu Jul 02 11:12:59 2015 +0100 @@ -2727,6 +2727,167 @@ return start; } + // byte swap x86 long + address generate_ghash_long_swap_mask() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask"); + address start = __ pc(); + __ emit_data(0x0b0a0908, relocInfo::none, 0); + __ emit_data(0x0f0e0d0c, relocInfo::none, 0); + __ emit_data(0x03020100, relocInfo::none, 0); + __ emit_data(0x07060504, relocInfo::none, 0); + + return start; + } + + // byte swap x86 byte array + address generate_ghash_byte_swap_mask() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask"); + address start = __ pc(); + __ emit_data(0x0c0d0e0f, relocInfo::none, 0); + __ emit_data(0x08090a0b, relocInfo::none, 0); + __ emit_data(0x04050607, relocInfo::none, 0); + __ emit_data(0x00010203, relocInfo::none, 0); + return start; + } + + /* Single and multi-block ghash operations */ + address generate_ghash_processBlocks() { + assert(UseGHASHIntrinsics, "need GHASH intrinsics and CLMUL support"); + __ align(CodeEntryAlignment); + Label L_ghash_loop, L_exit; + StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks"); + address start = __ pc(); + + const Register state = rdi; + const Register subkeyH = rsi; + const Register data = rdx; + const Register blocks = rcx; + + const Address state_param(rbp, 8+0); + const Address subkeyH_param(rbp, 8+4); + const Address data_param(rbp, 8+8); + const Address blocks_param(rbp, 8+12); + + const XMMRegister xmm_temp0 = xmm0; + const XMMRegister xmm_temp1 = xmm1; + const XMMRegister xmm_temp2 = xmm2; + const XMMRegister xmm_temp3 = xmm3; + const XMMRegister xmm_temp4 = xmm4; + const XMMRegister xmm_temp5 = xmm5; + const XMMRegister xmm_temp6 = xmm6; + const XMMRegister xmm_temp7 = xmm7; + + __ enter(); + + __ movptr(state, state_param); + __ movptr(subkeyH, subkeyH_param); + __ movptr(data, data_param); + __ movptr(blocks, blocks_param); + + __ movdqu(xmm_temp0, Address(state, 0)); + __ pshufb(xmm_temp0, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); + + __ movdqu(xmm_temp1, Address(subkeyH, 0)); + __ pshufb(xmm_temp1, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); + + __ BIND(L_ghash_loop); + __ movdqu(xmm_temp2, Address(data, 0)); + __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr())); + + __ pxor(xmm_temp0, xmm_temp2); + + // + // Multiply with the hash key + // + __ movdqu(xmm_temp3, xmm_temp0); + __ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0 + __ movdqu(xmm_temp4, xmm_temp0); + __ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1 + + __ movdqu(xmm_temp5, xmm_temp0); + __ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0 + __ movdqu(xmm_temp6, xmm_temp0); + __ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1 + + __ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0 + + __ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5 + __ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right + __ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left + __ pxor(xmm_temp3, xmm_temp5); + __ pxor(xmm_temp6, xmm_temp4); // Register pair <xmm6:xmm3> holds the result + // of the carry-less multiplication of + // xmm0 by xmm1. + + // We shift the result of the multiplication by one bit position + // to the left to cope for the fact that the bits are reversed. + __ movdqu(xmm_temp7, xmm_temp3); + __ movdqu(xmm_temp4, xmm_temp6); + __ pslld (xmm_temp3, 1); + __ pslld(xmm_temp6, 1); + __ psrld(xmm_temp7, 31); + __ psrld(xmm_temp4, 31); + __ movdqu(xmm_temp5, xmm_temp7); + __ pslldq(xmm_temp4, 4); + __ pslldq(xmm_temp7, 4); + __ psrldq(xmm_temp5, 12); + __ por(xmm_temp3, xmm_temp7); + __ por(xmm_temp6, xmm_temp4); + __ por(xmm_temp6, xmm_temp5); + + // + // First phase of the reduction + // + // Move xmm3 into xmm4, xmm5, xmm7 in order to perform the shifts + // independently. + __ movdqu(xmm_temp7, xmm_temp3); + __ movdqu(xmm_temp4, xmm_temp3); + __ movdqu(xmm_temp5, xmm_temp3); + __ pslld(xmm_temp7, 31); // packed right shift shifting << 31 + __ pslld(xmm_temp4, 30); // packed right shift shifting << 30 + __ pslld(xmm_temp5, 25); // packed right shift shifting << 25 + __ pxor(xmm_temp7, xmm_temp4); // xor the shifted versions + __ pxor(xmm_temp7, xmm_temp5); + __ movdqu(xmm_temp4, xmm_temp7); + __ pslldq(xmm_temp7, 12); + __ psrldq(xmm_temp4, 4); + __ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete + + // + // Second phase of the reduction + // + // Make 3 copies of xmm3 in xmm2, xmm5, xmm7 for doing these + // shift operations. + __ movdqu(xmm_temp2, xmm_temp3); + __ movdqu(xmm_temp7, xmm_temp3); + __ movdqu(xmm_temp5, xmm_temp3); + __ psrld(xmm_temp2, 1); // packed left shifting >> 1 + __ psrld(xmm_temp7, 2); // packed left shifting >> 2 + __ psrld(xmm_temp5, 7); // packed left shifting >> 7 + __ pxor(xmm_temp2, xmm_temp7); // xor the shifted versions + __ pxor(xmm_temp2, xmm_temp5); + __ pxor(xmm_temp2, xmm_temp4); + __ pxor(xmm_temp3, xmm_temp2); + __ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6 + + __ decrement(blocks); + __ jcc(Assembler::zero, L_exit); + __ movdqu(xmm_temp0, xmm_temp6); + __ addptr(data, 16); + __ jmp(L_ghash_loop); + + __ BIND(L_exit); + // Byte swap 16-byte result + __ pshufb(xmm_temp6, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); + __ movdqu(Address(state, 0), xmm_temp6); // store the result + + __ leave(); + __ ret(0); + return start; + } + /** * Arguments: * @@ -3026,6 +3187,13 @@ StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt(); } + // Generate GHASH intrinsics code + if (UseGHASHIntrinsics) { + StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask(); + StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask(); + StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); + } + // Safefetch stubs. generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, &StubRoutines::_safefetch32_fault_pc,
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Thu Jul 02 11:12:59 2015 +0100 @@ -382,7 +382,12 @@ // restore regs belonging to calling function #ifdef _WIN64 - for (int i = 15; i >= 6; i--) { + int xmm_ub = 15; + if (UseAVX > 2) { + xmm_ub = 31; + } + // emit the restores for xmm regs + for (int i = 6; i <= xmm_ub; i++) { __ movdqu(as_XMMRegister(i), xmm_save(i)); } #endif @@ -3681,6 +3686,175 @@ return start; } + + // byte swap x86 long + address generate_ghash_long_swap_mask() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask"); + address start = __ pc(); + __ emit_data64(0x0f0e0d0c0b0a0908, relocInfo::none ); + __ emit_data64(0x0706050403020100, relocInfo::none ); + return start; + } + + // byte swap x86 byte array + address generate_ghash_byte_swap_mask() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask"); + address start = __ pc(); + __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none ); + __ emit_data64(0x0001020304050607, relocInfo::none ); + return start; + } + + /* Single and multi-block ghash operations */ + address generate_ghash_processBlocks() { + __ align(CodeEntryAlignment); + Label L_ghash_loop, L_exit; + StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks"); + address start = __ pc(); + + const Register state = c_rarg0; + const Register subkeyH = c_rarg1; + const Register data = c_rarg2; + const Register blocks = c_rarg3; + +#ifdef _WIN64 + const int XMM_REG_LAST = 10; +#endif + + const XMMRegister xmm_temp0 = xmm0; + const XMMRegister xmm_temp1 = xmm1; + const XMMRegister xmm_temp2 = xmm2; + const XMMRegister xmm_temp3 = xmm3; + const XMMRegister xmm_temp4 = xmm4; + const XMMRegister xmm_temp5 = xmm5; + const XMMRegister xmm_temp6 = xmm6; + const XMMRegister xmm_temp7 = xmm7; + const XMMRegister xmm_temp8 = xmm8; + const XMMRegister xmm_temp9 = xmm9; + const XMMRegister xmm_temp10 = xmm10; + + __ enter(); + +#ifdef _WIN64 + // save the xmm registers which must be preserved 6-10 + __ subptr(rsp, -rsp_after_call_off * wordSize); + for (int i = 6; i <= XMM_REG_LAST; i++) { + __ movdqu(xmm_save(i), as_XMMRegister(i)); + } +#endif + + __ movdqu(xmm_temp10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr())); + + __ movdqu(xmm_temp0, Address(state, 0)); + __ pshufb(xmm_temp0, xmm_temp10); + + + __ BIND(L_ghash_loop); + __ movdqu(xmm_temp2, Address(data, 0)); + __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr())); + + __ movdqu(xmm_temp1, Address(subkeyH, 0)); + __ pshufb(xmm_temp1, xmm_temp10); + + __ pxor(xmm_temp0, xmm_temp2); + + // + // Multiply with the hash key + // + __ movdqu(xmm_temp3, xmm_temp0); + __ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0 + __ movdqu(xmm_temp4, xmm_temp0); + __ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1 + + __ movdqu(xmm_temp5, xmm_temp0); + __ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0 + __ movdqu(xmm_temp6, xmm_temp0); + __ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1 + + __ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0 + + __ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5 + __ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right + __ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left + __ pxor(xmm_temp3, xmm_temp5); + __ pxor(xmm_temp6, xmm_temp4); // Register pair <xmm6:xmm3> holds the result + // of the carry-less multiplication of + // xmm0 by xmm1. + + // We shift the result of the multiplication by one bit position + // to the left to cope for the fact that the bits are reversed. + __ movdqu(xmm_temp7, xmm_temp3); + __ movdqu(xmm_temp8, xmm_temp6); + __ pslld(xmm_temp3, 1); + __ pslld(xmm_temp6, 1); + __ psrld(xmm_temp7, 31); + __ psrld(xmm_temp8, 31); + __ movdqu(xmm_temp9, xmm_temp7); + __ pslldq(xmm_temp8, 4); + __ pslldq(xmm_temp7, 4); + __ psrldq(xmm_temp9, 12); + __ por(xmm_temp3, xmm_temp7); + __ por(xmm_temp6, xmm_temp8); + __ por(xmm_temp6, xmm_temp9); + + // + // First phase of the reduction + // + // Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts + // independently. + __ movdqu(xmm_temp7, xmm_temp3); + __ movdqu(xmm_temp8, xmm_temp3); + __ movdqu(xmm_temp9, xmm_temp3); + __ pslld(xmm_temp7, 31); // packed right shift shifting << 31 + __ pslld(xmm_temp8, 30); // packed right shift shifting << 30 + __ pslld(xmm_temp9, 25); // packed right shift shifting << 25 + __ pxor(xmm_temp7, xmm_temp8); // xor the shifted versions + __ pxor(xmm_temp7, xmm_temp9); + __ movdqu(xmm_temp8, xmm_temp7); + __ pslldq(xmm_temp7, 12); + __ psrldq(xmm_temp8, 4); + __ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete + + // + // Second phase of the reduction + // + // Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these + // shift operations. + __ movdqu(xmm_temp2, xmm_temp3); + __ movdqu(xmm_temp4, xmm_temp3); + __ movdqu(xmm_temp5, xmm_temp3); + __ psrld(xmm_temp2, 1); // packed left shifting >> 1 + __ psrld(xmm_temp4, 2); // packed left shifting >> 2 + __ psrld(xmm_temp5, 7); // packed left shifting >> 7 + __ pxor(xmm_temp2, xmm_temp4); // xor the shifted versions + __ pxor(xmm_temp2, xmm_temp5); + __ pxor(xmm_temp2, xmm_temp8); + __ pxor(xmm_temp3, xmm_temp2); + __ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6 + + __ decrement(blocks); + __ jcc(Assembler::zero, L_exit); + __ movdqu(xmm_temp0, xmm_temp6); + __ addptr(data, 16); + __ jmp(L_ghash_loop); + + __ BIND(L_exit); + __ pshufb(xmm_temp6, xmm_temp10); // Byte swap 16-byte result + __ movdqu(Address(state, 0), xmm_temp6); // store the result + +#ifdef _WIN64 + // restore xmm regs belonging to calling function + for (int i = 6; i <= XMM_REG_LAST; i++) { + __ movdqu(as_XMMRegister(i), xmm_save(i)); + } +#endif + __ leave(); + __ ret(0); + return start; + } + /** * Arguments: * @@ -4120,6 +4294,13 @@ StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel(); } + // Generate GHASH intrinsics code + if (UseGHASHIntrinsics) { + StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask(); + StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask(); + StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); + } + // Safefetch stubs. generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, &StubRoutines::_safefetch32_fault_pc,
--- a/src/cpu/x86/vm/stubRoutines_x86.cpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/x86/vm/stubRoutines_x86.cpp Thu Jul 02 11:12:59 2015 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -33,6 +33,8 @@ address StubRoutines::x86::_verify_mxcsr_entry = NULL; address StubRoutines::x86::_key_shuffle_mask_addr = NULL; +address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL; +address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL; uint64_t StubRoutines::x86::_crc_by128_masks[] = {
--- a/src/cpu/x86/vm/stubRoutines_x86.hpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/x86/vm/stubRoutines_x86.hpp Thu Jul 02 11:12:59 2015 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -36,10 +36,15 @@ // masks and table for CRC32 static uint64_t _crc_by128_masks[]; static juint _crc_table[]; + // swap mask for ghash + static address _ghash_long_swap_mask_addr; + static address _ghash_byte_swap_mask_addr; public: static address verify_mxcsr_entry() { return _verify_mxcsr_entry; } static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; } static address crc_by128_masks_addr() { return (address)_crc_by128_masks; } + static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; } + static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; } #endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP
--- a/src/cpu/x86/vm/stubRoutines_x86_64.hpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/x86/vm/stubRoutines_x86_64.hpp Thu Jul 02 11:12:59 2015 +0100 @@ -33,7 +33,7 @@ enum platform_dependent_constants { code_size1 = 19000, // simply increase if too small (assembler will crash if too small) - code_size2 = 23000 // simply increase if too small (assembler will crash if too small) + code_size2 = 24000 // simply increase if too small (assembler will crash if too small) }; class x86 {
--- a/src/cpu/x86/vm/vm_version_x86.cpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Thu Jul 02 11:12:59 2015 +0100 @@ -677,6 +677,17 @@ FLAG_SET_DEFAULT(UseAESIntrinsics, false); } + // GHASH/GCM intrinsics + if (UseCLMUL && (UseSSE > 2)) { + if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { + UseGHASHIntrinsics = true; + } + } else if (UseGHASHIntrinsics) { + if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) + warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU"); + FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); + } + if (UseSHA) { warning("SHA instructions are not available on this CPU"); FLAG_SET_DEFAULT(UseSHA, false); @@ -688,6 +699,12 @@ FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); } + if (UseCRC32CIntrinsics) { + if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) + warning("CRC32C intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); + } + // Adjust RTM (Restricted Transactional Memory) flags if (!supports_rtm() && UseRTMLocking) { // Can't continue because UseRTMLocking affects UseBiasedLocking flag
--- a/src/cpu/x86/vm/vm_version_x86.hpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/x86/vm/vm_version_x86.hpp Thu Jul 02 11:12:59 2015 +0100 @@ -702,6 +702,7 @@ static bool supports_avx512cd() { return (_cpuFeatures & CPU_AVX512CD) != 0; } static bool supports_avx512bw() { return (_cpuFeatures & CPU_AVX512BW) != 0; } static bool supports_avx512vl() { return (_cpuFeatures & CPU_AVX512VL) != 0; } + static bool supports_avx512vlbw() { return (supports_avx512bw() && supports_avx512vl()); } // Intel features static bool is_intel_family_core() { return is_intel() && extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
--- a/src/cpu/x86/vm/x86.ad Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/x86/vm/x86.ad Thu Jul 02 11:12:59 2015 +0100 @@ -2894,6 +2894,457 @@ ins_pipe( pipe_slow ); %} +// ====================LEGACY REPLICATE======================================= + +instruct Repl4B_mem(vecS dst, memory mem) %{ + predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateB (LoadB mem))); + format %{ "punpcklbw $dst,$mem\n\t" + "pshuflw $dst,$dst,0x00\t! replicate4B" %} + ins_encode %{ + __ punpcklbw($dst$$XMMRegister, $mem$$Address); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8B_mem(vecD dst, memory mem) %{ + predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateB (LoadB mem))); + format %{ "punpcklbw $dst,$mem\n\t" + "pshuflw $dst,$dst,0x00\t! replicate8B" %} + ins_encode %{ + __ punpcklbw($dst$$XMMRegister, $mem$$Address); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16B(vecX dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateB src)); + format %{ "movd $dst,$src\n\t" + "punpcklbw $dst,$dst\n\t" + "pshuflw $dst,$dst,0x00\n\t" + "punpcklqdq $dst,$dst\t! replicate16B" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16B_mem(vecX dst, memory mem) %{ + predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateB (LoadB mem))); + format %{ "punpcklbw $dst,$mem\n\t" + "pshuflw $dst,$dst,0x00\n\t" + "punpcklqdq $dst,$dst\t! replicate16B" %} + ins_encode %{ + __ punpcklbw($dst$$XMMRegister, $mem$$Address); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl32B(vecY dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateB src)); + format %{ "movd $dst,$src\n\t" + "punpcklbw $dst,$dst\n\t" + "pshuflw $dst,$dst,0x00\n\t" + "punpcklqdq $dst,$dst\n\t" + "vinserti128h $dst,$dst,$dst\t! replicate32B" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl32B_mem(vecY dst, memory mem) %{ + predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateB (LoadB mem))); + format %{ "punpcklbw $dst,$mem\n\t" + "pshuflw $dst,$dst,0x00\n\t" + "punpcklqdq $dst,$dst\n\t" + "vinserti128h $dst,$dst,$dst\t! replicate32B" %} + ins_encode %{ + __ punpcklbw($dst$$XMMRegister, $mem$$Address); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16B_imm(vecX dst, immI con) %{ + predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateB con)); + format %{ "movq $dst,[$constantaddress]\n\t" + "punpcklqdq $dst,$dst\t! replicate16B($con)" %} + ins_encode %{ + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl32B_imm(vecY dst, immI con) %{ + predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateB con)); + format %{ "movq $dst,[$constantaddress]\n\t" + "punpcklqdq $dst,$dst\n\t" + "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} + ins_encode %{ + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl4S(vecD dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateS src)); + format %{ "movd $dst,$src\n\t" + "pshuflw $dst,$dst,0x00\t! replicate4S" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl4S_mem(vecD dst, memory mem) %{ + predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateS (LoadS mem))); + format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} + ins_encode %{ + __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8S(vecX dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateS src)); + format %{ "movd $dst,$src\n\t" + "pshuflw $dst,$dst,0x00\n\t" + "punpcklqdq $dst,$dst\t! replicate8S" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8S_mem(vecX dst, memory mem) %{ + predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateS (LoadS mem))); + format %{ "pshuflw $dst,$mem,0x00\n\t" + "punpcklqdq $dst,$dst\t! replicate8S" %} + ins_encode %{ + __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8S_imm(vecX dst, immI con) %{ + predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateS con)); + format %{ "movq $dst,[$constantaddress]\n\t" + "punpcklqdq $dst,$dst\t! replicate8S($con)" %} + ins_encode %{ + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16S(vecY dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateS src)); + format %{ "movd $dst,$src\n\t" + "pshuflw $dst,$dst,0x00\n\t" + "punpcklqdq $dst,$dst\n\t" + "vinserti128h $dst,$dst,$dst\t! replicate16S" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16S_mem(vecY dst, memory mem) %{ + predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateS (LoadS mem))); + format %{ "pshuflw $dst,$mem,0x00\n\t" + "punpcklqdq $dst,$dst\n\t" + "vinserti128h $dst,$dst,$dst\t! replicate16S" %} + ins_encode %{ + __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16S_imm(vecY dst, immI con) %{ + predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateS con)); + format %{ "movq $dst,[$constantaddress]\n\t" + "punpcklqdq $dst,$dst\n\t" + "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} + ins_encode %{ + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl4I(vecX dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); + match(Set dst (ReplicateI src)); + format %{ "movd $dst,$src\n\t" + "pshufd $dst,$dst,0x00\t! replicate4I" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl4I_mem(vecX dst, memory mem) %{ + predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); + match(Set dst (ReplicateI (LoadI mem))); + format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} + ins_encode %{ + __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8I(vecY dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); + match(Set dst (ReplicateI src)); + format %{ "movd $dst,$src\n\t" + "pshufd $dst,$dst,0x00\n\t" + "vinserti128h $dst,$dst,$dst\t! replicate8I" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8I_mem(vecY dst, memory mem) %{ + predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); + match(Set dst (ReplicateI (LoadI mem))); + format %{ "pshufd $dst,$mem,0x00\n\t" + "vinserti128h $dst,$dst,$dst\t! replicate8I" %} + ins_encode %{ + __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl4I_imm(vecX dst, immI con) %{ + predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); + match(Set dst (ReplicateI con)); + format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" + "punpcklqdq $dst,$dst" %} + ins_encode %{ + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8I_imm(vecY dst, immI con) %{ + predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); + match(Set dst (ReplicateI con)); + format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" + "punpcklqdq $dst,$dst\n\t" + "vinserti128h $dst,$dst,$dst" %} + ins_encode %{ + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Long could be loaded into xmm register directly from memory. +instruct Repl2L_mem(vecX dst, memory mem) %{ + predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateL (LoadL mem))); + format %{ "movq $dst,$mem\n\t" + "punpcklqdq $dst,$dst\t! replicate2L" %} + ins_encode %{ + __ movq($dst$$XMMRegister, $mem$$Address); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Replicate long (8 byte) scalar to be vector +#ifdef _LP64 +instruct Repl4L(vecY dst, rRegL src) %{ + predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); + match(Set dst (ReplicateL src)); + format %{ "movdq $dst,$src\n\t" + "punpcklqdq $dst,$dst\n\t" + "vinserti128h $dst,$dst,$dst\t! replicate4L" %} + ins_encode %{ + __ movdq($dst$$XMMRegister, $src$$Register); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} +#else // _LP64 +instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ + predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); + match(Set dst (ReplicateL src)); + effect(TEMP dst, USE src, TEMP tmp); + format %{ "movdl $dst,$src.lo\n\t" + "movdl $tmp,$src.hi\n\t" + "punpckldq $dst,$tmp\n\t" + "punpcklqdq $dst,$dst\n\t" + "vinserti128h $dst,$dst,$dst\t! replicate4L" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); + __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} +#endif // _LP64 + +instruct Repl4L_imm(vecY dst, immL con) %{ + predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); + match(Set dst (ReplicateL con)); + format %{ "movq $dst,[$constantaddress]\n\t" + "punpcklqdq $dst,$dst\n\t" + "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} + ins_encode %{ + __ movq($dst$$XMMRegister, $constantaddress($con)); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl4L_mem(vecY dst, memory mem) %{ + predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); + match(Set dst (ReplicateL (LoadL mem))); + format %{ "movq $dst,$mem\n\t" + "punpcklqdq $dst,$dst\n\t" + "vinserti128h $dst,$dst,$dst\t! replicate4L" %} + ins_encode %{ + __ movq($dst$$XMMRegister, $mem$$Address); + __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); + __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl2F_mem(vecD dst, memory mem) %{ + predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); + match(Set dst (ReplicateF (LoadF mem))); + format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} + ins_encode %{ + __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl4F_mem(vecX dst, memory mem) %{ + predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); + match(Set dst (ReplicateF (LoadF mem))); + format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} + ins_encode %{ + __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8F(vecY dst, regF src) %{ + predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); + match(Set dst (ReplicateF src)); + format %{ "pshufd $dst,$src,0x00\n\t" + "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} + ins_encode %{ + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8F_mem(vecY dst, memory mem) %{ + predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); + match(Set dst (ReplicateF (LoadF mem))); + format %{ "pshufd $dst,$mem,0x00\n\t" + "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} + ins_encode %{ + __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl2D_mem(vecX dst, memory mem) %{ + predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); + match(Set dst (ReplicateD (LoadD mem))); + format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} + ins_encode %{ + __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl4D(vecY dst, regD src) %{ + predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); + match(Set dst (ReplicateD src)); + format %{ "pshufd $dst,$src,0x44\n\t" + "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} + ins_encode %{ + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl4D_mem(vecY dst, memory mem) %{ + predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); + match(Set dst (ReplicateD (LoadD mem))); + format %{ "pshufd $dst,$mem,0x44\n\t" + "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} + ins_encode %{ + __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// ====================GENERIC REPLICATE========================================== + // Replicate byte scalar to be vector instruct Repl4B(vecS dst, rRegI src) %{ predicate(n->as_Vector()->length() == 4); @@ -2923,60 +3374,6 @@ ins_pipe( pipe_slow ); %} -instruct Repl16B(vecX dst, rRegI src) %{ - predicate(n->as_Vector()->length() == 16); - match(Set dst (ReplicateB src)); - format %{ "movd $dst,$src\n\t" - "punpcklbw $dst,$dst\n\t" - "pshuflw $dst,$dst,0x00\n\t" - "punpcklqdq $dst,$dst\t! replicate16B" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct Repl32B(vecY dst, rRegI src) %{ - predicate(n->as_Vector()->length() == 32); - match(Set dst (ReplicateB src)); - format %{ "movd $dst,$src\n\t" - "punpcklbw $dst,$dst\n\t" - "pshuflw $dst,$dst,0x00\n\t" - "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate32B" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct Repl64B(vecZ dst, rRegI src) %{ - predicate(n->as_Vector()->length() == 64); - match(Set dst (ReplicateB src)); - format %{ "movd $dst,$src\n\t" - "punpcklbw $dst,$dst\n\t" - "pshuflw $dst,$dst,0x00\n\t" - "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! lower replicate32B\n\t" - "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate632B" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - // Replicate byte scalar immediate to be vector by loading from const table. instruct Repl4B_imm(vecS dst, immI con) %{ predicate(n->as_Vector()->length() == 4); @@ -2998,48 +3395,6 @@ ins_pipe( pipe_slow ); %} -instruct Repl16B_imm(vecX dst, immI con) %{ - predicate(n->as_Vector()->length() == 16); - match(Set dst (ReplicateB con)); - format %{ "movq $dst,[$constantaddress]\n\t" - "punpcklqdq $dst,$dst\t! replicate16B($con)" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct Repl32B_imm(vecY dst, immI con) %{ - predicate(n->as_Vector()->length() == 32); - match(Set dst (ReplicateB con)); - format %{ "movq $dst,[$constantaddress]\n\t" - "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct Repl64B_imm(vecZ dst, immI con) %{ - predicate(n->as_Vector()->length() == 64); - match(Set dst (ReplicateB con)); - format %{ "movq $dst,[$constantaddress]\n\t" - "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! lower replicate32B($con)\n\t" - "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate32B($con)" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - // Replicate byte scalar zero to be vector instruct Repl4B_zero(vecS dst, immI0 zero) %{ predicate(n->as_Vector()->length() == 4); @@ -3083,18 +3438,6 @@ ins_pipe( fpu_reg_reg ); %} -instruct Repl64B_zero(vecZ dst, immI0 zero) %{ - predicate(n->as_Vector()->length() == 64); - match(Set dst (ReplicateB zero)); - format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} - ins_encode %{ - // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). - int vector_len = 2; - __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); - %} - ins_pipe( fpu_reg_reg ); -%} - // Replicate char/short (2 byte) scalar to be vector instruct Repl2S(vecS dst, rRegI src) %{ predicate(n->as_Vector()->length() == 2); @@ -3108,66 +3451,6 @@ ins_pipe( fpu_reg_reg ); %} -instruct Repl4S(vecD dst, rRegI src) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (ReplicateS src)); - format %{ "movd $dst,$src\n\t" - "pshuflw $dst,$dst,0x00\t! replicate4S" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); -%} - -instruct Repl8S(vecX dst, rRegI src) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (ReplicateS src)); - format %{ "movd $dst,$src\n\t" - "pshuflw $dst,$dst,0x00\n\t" - "punpcklqdq $dst,$dst\t! replicate8S" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct Repl16S(vecY dst, rRegI src) %{ - predicate(n->as_Vector()->length() == 16); - match(Set dst (ReplicateS src)); - format %{ "movd $dst,$src\n\t" - "pshuflw $dst,$dst,0x00\n\t" - "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate16S" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct Repl32S(vecZ dst, rRegI src) %{ - predicate(n->as_Vector()->length() == 32); - match(Set dst (ReplicateS src)); - format %{ "movd $dst,$src\n\t" - "pshuflw $dst,$dst,0x00\n\t" - "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! lower replicate16S\n\t" - "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate16S" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. instruct Repl2S_imm(vecS dst, immI con) %{ predicate(n->as_Vector()->length() == 2); @@ -3189,48 +3472,6 @@ ins_pipe( fpu_reg_reg ); %} -instruct Repl8S_imm(vecX dst, immI con) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (ReplicateS con)); - format %{ "movq $dst,[$constantaddress]\n\t" - "punpcklqdq $dst,$dst\t! replicate8S($con)" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct Repl16S_imm(vecY dst, immI con) %{ - predicate(n->as_Vector()->length() == 16); - match(Set dst (ReplicateS con)); - format %{ "movq $dst,[$constantaddress]\n\t" - "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct Repl32S_imm(vecZ dst, immI con) %{ - predicate(n->as_Vector()->length() == 32); - match(Set dst (ReplicateS con)); - format %{ "movq $dst,[$constantaddress]\n\t" - "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! lower replicate16S($con)\n\t" - "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate16S($con)" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - // Replicate char/short (2 byte) scalar zero to be vector instruct Repl2S_zero(vecS dst, immI0 zero) %{ predicate(n->as_Vector()->length() == 2); @@ -3274,18 +3515,6 @@ ins_pipe( fpu_reg_reg ); %} -instruct Repl32S_zero(vecZ dst, immI0 zero) %{ - predicate(n->as_Vector()->length() == 32); - match(Set dst (ReplicateS zero)); - format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} - ins_encode %{ - // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). - int vector_len = 2; - __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); - %} - ins_pipe( fpu_reg_reg ); -%} - // Replicate integer (4 byte) scalar to be vector instruct Repl2I(vecD dst, rRegI src) %{ predicate(n->as_Vector()->length() == 2); @@ -3299,101 +3528,6 @@ ins_pipe( fpu_reg_reg ); %} -instruct Repl4I(vecX dst, rRegI src) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (ReplicateI src)); - format %{ "movd $dst,$src\n\t" - "pshufd $dst,$dst,0x00\t! replicate4I" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - %} - ins_pipe( pipe_slow ); -%} - -instruct Repl8I(vecY dst, rRegI src) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (ReplicateI src)); - format %{ "movd $dst,$src\n\t" - "pshufd $dst,$dst,0x00\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate8I" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct Repl16I(vecZ dst, rRegI src) %{ - predicate(n->as_Vector()->length() == 16); - match(Set dst (ReplicateI src)); - format %{ "movd $dst,$src\n\t" - "pshufd $dst,$dst,0x00\n\t" - "vinserti128h $dst,$dst,$dst\t! lower replicate8I\n\t" - "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate8I" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -// Replicate integer (4 byte) scalar immediate to be vector by loading from const table. -instruct Repl2I_imm(vecD dst, immI con) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (ReplicateI con)); - format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); - %} - ins_pipe( fpu_reg_reg ); -%} - -instruct Repl4I_imm(vecX dst, immI con) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (ReplicateI con)); - format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" - "punpcklqdq $dst,$dst" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct Repl8I_imm(vecY dst, immI con) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (ReplicateI con)); - format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" - "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct Repl16I_imm(vecZ dst, immI con) %{ - predicate(n->as_Vector()->length() == 16); - match(Set dst (ReplicateI con)); - format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" - "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\n\t" - "vinserti64x4h $dst k0,$dst,$dst" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - // Integer could be loaded into xmm register directly from memory. instruct Repl2I_mem(vecD dst, memory mem) %{ predicate(n->as_Vector()->length() == 2); @@ -3407,46 +3541,15 @@ ins_pipe( fpu_reg_reg ); %} -instruct Repl4I_mem(vecX dst, memory mem) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (ReplicateI (LoadI mem))); - format %{ "movd $dst,$mem\n\t" - "pshufd $dst,$dst,0x00\t! replicate4I" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $mem$$Address); - __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - %} - ins_pipe( pipe_slow ); -%} - -instruct Repl8I_mem(vecY dst, memory mem) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (ReplicateI (LoadI mem))); - format %{ "movd $dst,$mem\n\t" - "pshufd $dst,$dst,0x00\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate8I" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $mem$$Address); - __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct Repl16I_mem(vecZ dst, memory mem) %{ - predicate(n->as_Vector()->length() == 16); - match(Set dst (ReplicateI (LoadI mem))); - format %{ "movd $dst,$mem\n\t" - "pshufd $dst,$dst,0x00\n\t" - "vinserti128h $dst,$dst,$dst\t! lower replicate8I\n\t" - "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate8I" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $mem$$Address); - __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); +// Replicate integer (4 byte) scalar immediate to be vector by loading from const table. +instruct Repl2I_imm(vecD dst, immI con) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI con)); + format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} + ins_encode %{ + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); + %} + ins_pipe( fpu_reg_reg ); %} // Replicate integer (4 byte) scalar zero to be vector @@ -3482,18 +3585,6 @@ ins_pipe( fpu_reg_reg ); %} -instruct Repl16I_zero(vecZ dst, immI0 zero) %{ - predicate(n->as_Vector()->length() == 16); - match(Set dst (ReplicateI zero)); - format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} - ins_encode %{ - // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). - int vector_len = 2; - __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); - %} - ins_pipe( fpu_reg_reg ); -%} - // Replicate long (8 byte) scalar to be vector #ifdef _LP64 instruct Repl2L(vecX dst, rRegL src) %{ @@ -3507,36 +3598,6 @@ %} ins_pipe( pipe_slow ); %} - -instruct Repl4L(vecY dst, rRegL src) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (ReplicateL src)); - format %{ "movdq $dst,$src\n\t" - "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate4L" %} - ins_encode %{ - __ movdq($dst$$XMMRegister, $src$$Register); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct Repl8L(vecZ dst, rRegL src) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (ReplicateL src)); - format %{ "movdq $dst,$src\n\t" - "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t" - "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %} - ins_encode %{ - __ movdq($dst$$XMMRegister, $src$$Register); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} #else // _LP64 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ predicate(n->as_Vector()->length() == 2); @@ -3554,45 +3615,6 @@ %} ins_pipe( pipe_slow ); %} - -instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (ReplicateL src)); - effect(TEMP dst, USE src, TEMP tmp); - format %{ "movdl $dst,$src.lo\n\t" - "movdl $tmp,$src.hi\n\t" - "punpckldq $dst,$tmp\n\t" - "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate4L" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); - __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct Repl8L(vecZ dst, eRegL src, regD tmp) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (ReplicateL src)); - effect(TEMP dst, USE src, TEMP tmp); - format %{ "movdl $dst,$src.lo\n\t" - "movdl $tmp,$src.hi\n\t" - "punpckldq $dst,$tmp\n\t" - "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t" - "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); - __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} #endif // _LP64 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. @@ -3608,79 +3630,6 @@ ins_pipe( pipe_slow ); %} -instruct Repl4L_imm(vecY dst, immL con) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (ReplicateL con)); - format %{ "movq $dst,[$constantaddress]\n\t" - "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $constantaddress($con)); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct Repl8L_imm(vecZ dst, immL con) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (ReplicateL con)); - format %{ "movq $dst,[$constantaddress]\n\t" - "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! lower replicate4L($con)\n\t" - "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L($con)" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $constantaddress($con)); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -// Long could be loaded into xmm register directly from memory. -instruct Repl2L_mem(vecX dst, memory mem) %{ - predicate(n->as_Vector()->length() == 2); - match(Set dst (ReplicateL (LoadL mem))); - format %{ "movq $dst,$mem\n\t" - "punpcklqdq $dst,$dst\t! replicate2L" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $mem$$Address); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct Repl4L_mem(vecY dst, memory mem) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (ReplicateL (LoadL mem))); - format %{ "movq $dst,$mem\n\t" - "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! replicate4L" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $mem$$Address); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct Repl8L_mem(vecZ dst, memory mem) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (ReplicateL (LoadL mem))); - format %{ "movq $dst,$mem\n\t" - "punpcklqdq $dst,$dst\n\t" - "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t" - "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $mem$$Address); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - // Replicate long (8 byte) scalar zero to be vector instruct Repl2L_zero(vecX dst, immL0 zero) %{ predicate(n->as_Vector()->length() == 2); @@ -3704,18 +3653,6 @@ ins_pipe( fpu_reg_reg ); %} -instruct Repl8L_zero(vecZ dst, immL0 zero) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (ReplicateL zero)); - format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} - ins_encode %{ - // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). - int vector_len = 2; - __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); - %} - ins_pipe( fpu_reg_reg ); -%} - // Replicate float (4 byte) scalar to be vector instruct Repl2F(vecD dst, regF src) %{ predicate(n->as_Vector()->length() == 2); @@ -3737,32 +3674,6 @@ ins_pipe( pipe_slow ); %} -instruct Repl8F(vecY dst, regF src) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (ReplicateF src)); - format %{ "pshufd $dst,$src,0x00\n\t" - "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} - ins_encode %{ - __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); - __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct Repl16F(vecZ dst, regF src) %{ - predicate(n->as_Vector()->length() == 16); - match(Set dst (ReplicateF src)); - format %{ "pshufd $dst,$src,0x00\n\t" - "vinsertf128h $dst,$dst,$dst\t! lower replicate8F\n\t" - "vinsertf64x4h $dst k0,$dst,$dst\t! lower replicate8F" %} - ins_encode %{ - __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); - __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - __ vinsertf64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - // Replicate float (4 byte) scalar zero to be vector instruct Repl2F_zero(vecD dst, immF0 zero) %{ predicate(n->as_Vector()->length() == 2); @@ -3795,17 +3706,6 @@ ins_pipe( fpu_reg_reg ); %} -instruct Repl16F_zero(vecZ dst, immF0 zero) %{ - predicate(n->as_Vector()->length() == 16); - match(Set dst (ReplicateF zero)); - format %{ "vxorps $dst k0,$dst,$dst\t! replicate16F zero" %} - ins_encode %{ - int vector_len = 2; - __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); - %} - ins_pipe( fpu_reg_reg ); -%} - // Replicate double (8 bytes) scalar to be vector instruct Repl2D(vecX dst, regD src) %{ predicate(n->as_Vector()->length() == 2); @@ -3817,32 +3717,6 @@ ins_pipe( pipe_slow ); %} -instruct Repl4D(vecY dst, regD src) %{ - predicate(n->as_Vector()->length() == 4); - match(Set dst (ReplicateD src)); - format %{ "pshufd $dst,$src,0x44\n\t" - "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} - ins_encode %{ - __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); - __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct Repl8D(vecZ dst, regD src) %{ - predicate(n->as_Vector()->length() == 8); - match(Set dst (ReplicateD src)); - format %{ "pshufd $dst,$src,0x44\n\t" - "vinsertf128h $dst,$dst,$dst\t! lower replicate4D\n\t" - "vinsertf64x4h $dst k0,$dst,$dst\t! upper replicate4D" %} - ins_encode %{ - __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); - __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - __ vinsertf64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - // Replicate double (8 byte) scalar zero to be vector instruct Repl2D_zero(vecX dst, immD0 zero) %{ predicate(n->as_Vector()->length() == 2); @@ -3865,8 +3739,636 @@ ins_pipe( fpu_reg_reg ); %} -instruct Repl8D_zero(vecZ dst, immD0 zero) %{ - predicate(n->as_Vector()->length() == 8); +// ====================EVEX REPLICATE============================================= + +instruct Repl4B_mem_evex(vecS dst, memory mem) %{ + predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateB (LoadB mem))); + format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} + ins_encode %{ + int vector_len = 0; + __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8B_mem_evex(vecD dst, memory mem) %{ + predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateB (LoadB mem))); + format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} + ins_encode %{ + int vector_len = 0; + __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16B_evex(vecX dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateB src)); + format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} + ins_encode %{ + int vector_len = 0; + __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16B_mem_evex(vecX dst, memory mem) %{ + predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateB (LoadB mem))); + format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} + ins_encode %{ + int vector_len = 0; + __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl32B_evex(vecY dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateB src)); + format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} + ins_encode %{ + int vector_len = 1; + __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl32B_mem_evex(vecY dst, memory mem) %{ + predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateB (LoadB mem))); + format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} + ins_encode %{ + int vector_len = 1; + __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl64B_evex(vecZ dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 64 && UseAVX > 2); + match(Set dst (ReplicateB src)); + format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} + ins_encode %{ + int vector_len = 2; + __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ + predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateB (LoadB mem))); + format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} + ins_encode %{ + int vector_len = 2; + __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16B_imm_evex(vecX dst, immI con) %{ + predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateB con)); + format %{ "movq $dst,[$constantaddress]\n\t" + "vpbroadcastb $dst,$dst\t! replicate16B" %} + ins_encode %{ + int vector_len = 0; + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); + __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl32B_imm_evex(vecY dst, immI con) %{ + predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateB con)); + format %{ "movq $dst,[$constantaddress]\n\t" + "vpbroadcastb $dst,$dst\t! replicate32B" %} + ins_encode %{ + int vector_len = 1; + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); + __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl64B_imm_evex(vecZ dst, immI con) %{ + predicate(n->as_Vector()->length() == 64 && UseAVX > 2); + match(Set dst (ReplicateB con)); + format %{ "movq $dst,[$constantaddress]\n\t" + "vpbroadcastb $dst,$dst\t! upper replicate64B" %} + ins_encode %{ + int vector_len = 2; + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); + __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 64 && UseAVX > 2); + match(Set dst (ReplicateB zero)); + format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} + ins_encode %{ + // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). + int vector_len = 2; + __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4S_evex(vecD dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateS src)); + format %{ "vpbroadcastw $dst,$src\t! replicate4S" %} + ins_encode %{ + int vector_len = 0; + __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl4S_mem_evex(vecD dst, memory mem) %{ + predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateS (LoadS mem))); + format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} + ins_encode %{ + int vector_len = 0; + __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8S_evex(vecX dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateS src)); + format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} + ins_encode %{ + int vector_len = 0; + __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8S_mem_evex(vecX dst, memory mem) %{ + predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateS (LoadS mem))); + format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} + ins_encode %{ + int vector_len = 0; + __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16S_evex(vecY dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateS src)); + format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} + ins_encode %{ + int vector_len = 1; + __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16S_mem_evex(vecY dst, memory mem) %{ + predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateS (LoadS mem))); + format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} + ins_encode %{ + int vector_len = 1; + __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl32S_evex(vecZ dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 32 && UseAVX > 2); + match(Set dst (ReplicateS src)); + format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} + ins_encode %{ + int vector_len = 2; + __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ + predicate(n->as_Vector()->length() == 32 && UseAVX > 2); + match(Set dst (ReplicateS (LoadS mem))); + format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} + ins_encode %{ + int vector_len = 2; + __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8S_imm_evex(vecX dst, immI con) %{ + predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateS con)); + format %{ "movq $dst,[$constantaddress]\n\t" + "vpbroadcastw $dst,$dst\t! replicate8S" %} + ins_encode %{ + int vector_len = 0; + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); + __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16S_imm_evex(vecY dst, immI con) %{ + predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); + match(Set dst (ReplicateS con)); + format %{ "movq $dst,[$constantaddress]\n\t" + "vpbroadcastw $dst,$dst\t! replicate16S" %} + ins_encode %{ + int vector_len = 1; + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); + __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl32S_imm_evex(vecZ dst, immI con) %{ + predicate(n->as_Vector()->length() == 32 && UseAVX > 2); + match(Set dst (ReplicateS con)); + format %{ "movq $dst,[$constantaddress]\n\t" + "vpbroadcastw $dst,$dst\t! replicate32S" %} + ins_encode %{ + int vector_len = 2; + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); + __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 32 && UseAVX > 2); + match(Set dst (ReplicateS zero)); + format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} + ins_encode %{ + // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). + int vector_len = 2; + __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4I_evex(vecX dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); + match(Set dst (ReplicateI src)); + format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} + ins_encode %{ + int vector_len = 0; + __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl4I_mem_evex(vecX dst, memory mem) %{ + predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); + match(Set dst (ReplicateI (LoadI mem))); + format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} + ins_encode %{ + int vector_len = 0; + __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8I_evex(vecY dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); + match(Set dst (ReplicateI src)); + format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} + ins_encode %{ + int vector_len = 1; + __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8I_mem_evex(vecY dst, memory mem) %{ + predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); + match(Set dst (ReplicateI (LoadI mem))); + format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} + ins_encode %{ + int vector_len = 1; + __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16I_evex(vecZ dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 16 && UseAVX > 2); + match(Set dst (ReplicateI src)); + format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} + ins_encode %{ + int vector_len = 2; + __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ + predicate(n->as_Vector()->length() == 16 && UseAVX > 2); + match(Set dst (ReplicateI (LoadI mem))); + format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} + ins_encode %{ + int vector_len = 2; + __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl4I_imm_evex(vecX dst, immI con) %{ + predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); + match(Set dst (ReplicateI con)); + format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" + "vpbroadcastd $dst,$dst\t! replicate4I" %} + ins_encode %{ + int vector_len = 0; + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); + __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8I_imm_evex(vecY dst, immI con) %{ + predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); + match(Set dst (ReplicateI con)); + format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" + "vpbroadcastd $dst,$dst\t! replicate8I" %} + ins_encode %{ + int vector_len = 1; + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); + __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16I_imm_evex(vecZ dst, immI con) %{ + predicate(n->as_Vector()->length() == 16 && UseAVX > 2); + match(Set dst (ReplicateI con)); + format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" + "vpbroadcastd $dst,$dst\t! replicate16I" %} + ins_encode %{ + int vector_len = 2; + __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); + __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 16 && UseAVX > 2); + match(Set dst (ReplicateI zero)); + format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} + ins_encode %{ + // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). + int vector_len = 2; + __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Replicate long (8 byte) scalar to be vector +#ifdef _LP64 +instruct Repl4L_evex(vecY dst, rRegL src) %{ + predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); + match(Set dst (ReplicateL src)); + format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} + ins_encode %{ + int vector_len = 1; + __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8L_evex(vecZ dst, rRegL src) %{ + predicate(n->as_Vector()->length() == 8 && UseAVX > 2); + match(Set dst (ReplicateL src)); + format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} + ins_encode %{ + int vector_len = 2; + __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); + %} + ins_pipe( pipe_slow ); +%} +#else // _LP64 +instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ + predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); + match(Set dst (ReplicateL src)); + effect(TEMP dst, USE src, TEMP tmp); + format %{ "movdl $dst,$src.lo\n\t" + "movdl $tmp,$src.hi\n\t" + "punpckldq $dst,$tmp\n\t" + "vpbroadcastq $dst,$dst\t! replicate4L" %} + ins_encode %{ + int vector_len = 1; + __ movdl($dst$$XMMRegister, $src$$Register); + __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); + __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); + __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ + predicate(n->as_Vector()->length() == 8 && UseAVX > 2); + match(Set dst (ReplicateL src)); + effect(TEMP dst, USE src, TEMP tmp); + format %{ "movdl $dst,$src.lo\n\t" + "movdl $tmp,$src.hi\n\t" + "punpckldq $dst,$tmp\n\t" + "vpbroadcastq $dst,$dst\t! replicate8L" %} + ins_encode %{ + int vector_len = 2; + __ movdl($dst$$XMMRegister, $src$$Register); + __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); + __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); + __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} +#endif // _LP64 + +instruct Repl4L_imm_evex(vecY dst, immL con) %{ + predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); + match(Set dst (ReplicateL con)); + format %{ "movq $dst,[$constantaddress]\n\t" + "vpbroadcastq $dst,$dst\t! replicate4L" %} + ins_encode %{ + int vector_len = 1; + __ movq($dst$$XMMRegister, $constantaddress($con)); + __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8L_imm_evex(vecZ dst, immL con) %{ + predicate(n->as_Vector()->length() == 8 && UseAVX > 2); + match(Set dst (ReplicateL con)); + format %{ "movq $dst,[$constantaddress]\n\t" + "vpbroadcastq $dst,$dst\t! replicate8L" %} + ins_encode %{ + int vector_len = 2; + __ movq($dst$$XMMRegister, $constantaddress($con)); + __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl2L_mem_evex(vecX dst, memory mem) %{ + predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl()); + match(Set dst (ReplicateL (LoadL mem))); + format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} + ins_encode %{ + int vector_len = 0; + __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl4L_mem_evex(vecY dst, memory mem) %{ + predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); + match(Set dst (ReplicateL (LoadL mem))); + format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} + ins_encode %{ + int vector_len = 1; + __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ + predicate(n->as_Vector()->length() == 8 && UseAVX > 2); + match(Set dst (ReplicateL (LoadL mem))); + format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} + ins_encode %{ + int vector_len = 2; + __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ + predicate(n->as_Vector()->length() == 8 && UseAVX > 2); + match(Set dst (ReplicateL zero)); + format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} + ins_encode %{ + // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). + int vector_len = 2; + __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl8F_evex(vecY dst, regF src) %{ + predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); + match(Set dst (ReplicateF src)); + format %{ "vbroadcastss $dst,$src\t! replicate8F" %} + ins_encode %{ + int vector_len = 1; + __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8F_mem_evex(vecY dst, memory mem) %{ + predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); + match(Set dst (ReplicateF (LoadF mem))); + format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} + ins_encode %{ + int vector_len = 1; + __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16F_evex(vecZ dst, regF src) %{ + predicate(n->as_Vector()->length() == 16 && UseAVX > 2); + match(Set dst (ReplicateF src)); + format %{ "vbroadcastss $dst,$src\t! replicate16F" %} + ins_encode %{ + int vector_len = 2; + __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ + predicate(n->as_Vector()->length() == 16 && UseAVX > 2); + match(Set dst (ReplicateF (LoadF mem))); + format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} + ins_encode %{ + int vector_len = 2; + __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ + predicate(n->as_Vector()->length() == 16 && UseAVX > 2); + match(Set dst (ReplicateF zero)); + format %{ "vxorps $dst k0,$dst,$dst\t! replicate16F zero" %} + ins_encode %{ + int vector_len = 2; + __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4D_evex(vecY dst, regD src) %{ + predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); + match(Set dst (ReplicateD src)); + format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} + ins_encode %{ + int vector_len = 1; + __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl4D_mem_evex(vecY dst, memory mem) %{ + predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); + match(Set dst (ReplicateD (LoadD mem))); + format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} + ins_encode %{ + int vector_len = 1; + __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8D_evex(vecZ dst, regD src) %{ + predicate(n->as_Vector()->length() == 8 && UseAVX > 2); + match(Set dst (ReplicateD src)); + format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} + ins_encode %{ + int vector_len = 2; + __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ + predicate(n->as_Vector()->length() == 8 && UseAVX > 2); + match(Set dst (ReplicateD (LoadD mem))); + format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} + ins_encode %{ + int vector_len = 2; + __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ + predicate(n->as_Vector()->length() == 8 && UseAVX > 2); match(Set dst (ReplicateD zero)); format %{ "vxorpd $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} ins_encode %{ @@ -4972,6 +5474,17 @@ ins_pipe( pipe_slow ); %} +instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (AddVB src (LoadVector mem))); + format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} + ins_encode %{ + int vector_len = 0; + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vadd8B(vecD dst, vecD src) %{ predicate(n->as_Vector()->length() == 8); match(Set dst (AddVB dst src)); @@ -4993,6 +5506,17 @@ ins_pipe( pipe_slow ); %} +instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (AddVB src (LoadVector mem))); + format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} + ins_encode %{ + int vector_len = 0; + __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vadd16B(vecX dst, vecX src) %{ predicate(n->as_Vector()->length() == 16); match(Set dst (AddVB dst src)); @@ -5091,6 +5615,17 @@ ins_pipe( pipe_slow ); %} +instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (AddVS src (LoadVector mem))); + format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vadd4S(vecD dst, vecD src) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (AddVS dst src)); @@ -5112,6 +5647,17 @@ ins_pipe( pipe_slow ); %} +instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (AddVS src (LoadVector mem))); + format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vadd8S(vecX dst, vecX src) %{ predicate(n->as_Vector()->length() == 8); match(Set dst (AddVS dst src)); @@ -5210,6 +5756,17 @@ ins_pipe( pipe_slow ); %} +instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (AddVI src (LoadVector mem))); + format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} + ins_encode %{ + int vector_len = 0; + __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vadd4I(vecX dst, vecX src) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (AddVI dst src)); @@ -5385,6 +5942,17 @@ ins_pipe( pipe_slow ); %} +instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (AddVF src (LoadVector mem))); + format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} + ins_encode %{ + int vector_len = 0; + __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vadd4F(vecX dst, vecX src) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (AddVF dst src)); @@ -5562,6 +6130,17 @@ ins_pipe( pipe_slow ); %} +instruct vsub4B_mem(vecS dst, vecS src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (SubVB src (LoadVector mem))); + format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vsub8B(vecD dst, vecD src) %{ predicate(n->as_Vector()->length() == 8); match(Set dst (SubVB dst src)); @@ -5583,6 +6162,17 @@ ins_pipe( pipe_slow ); %} +instruct vsub8B_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 8); + match(Set dst (SubVB src (LoadVector mem))); + format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} + ins_encode %{ + int vector_len = 0; + __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vsub16B(vecX dst, vecX src) %{ predicate(n->as_Vector()->length() == 16); match(Set dst (SubVB dst src)); @@ -5681,6 +6271,17 @@ ins_pipe( pipe_slow ); %} +instruct vsub2S_mem(vecS dst, vecS src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (SubVS src (LoadVector mem))); + format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vsub4S(vecD dst, vecD src) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (SubVS dst src)); @@ -5702,6 +6303,17 @@ ins_pipe( pipe_slow ); %} +instruct vsub4S_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (SubVS src (LoadVector mem))); + format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vsub8S(vecX dst, vecX src) %{ predicate(n->as_Vector()->length() == 8); match(Set dst (SubVS dst src)); @@ -5800,6 +6412,17 @@ ins_pipe( pipe_slow ); %} +instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (SubVI src (LoadVector mem))); + format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} + ins_encode %{ + int vector_len = 0; + __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vsub4I(vecX dst, vecX src) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (SubVI dst src)); @@ -5975,6 +6598,17 @@ ins_pipe( pipe_slow ); %} +instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (SubVF src (LoadVector mem))); + format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} + ins_encode %{ + int vector_len = 0; + __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vsub4F(vecX dst, vecX src) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (SubVF dst src)); @@ -6152,6 +6786,17 @@ ins_pipe( pipe_slow ); %} +instruct vmul2S_mem(vecS dst, vecS src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (MulVS src (LoadVector mem))); + format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} + ins_encode %{ + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vmul4S(vecD dst, vecD src) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (MulVS dst src)); @@ -6173,6 +6818,17 @@ ins_pipe( pipe_slow ); %} +instruct vmul4S_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (MulVS src (LoadVector mem))); + format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} + ins_encode %{ + int vector_len = 0; + __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vmul8S(vecX dst, vecX src) %{ predicate(n->as_Vector()->length() == 8); match(Set dst (MulVS dst src)); @@ -6271,6 +6927,49 @@ ins_pipe( pipe_slow ); %} +instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (MulVI src (LoadVector mem))); + format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} + ins_encode %{ + int vector_len = 0; + __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul4I(vecX dst, vecX src) %{ + predicate(UseSSE > 3 && n->as_Vector()->length() == 4); + match(Set dst (MulVI dst src)); + format %{ "pmulld $dst,$src\t! mul packed4I" %} + ins_encode %{ + __ pmulld($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (MulVI src1 src2)); + format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} + ins_encode %{ + int vector_len = 0; + __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 4); + match(Set dst (MulVI src (LoadVector mem))); + format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} + ins_encode %{ + int vector_len = 0; + __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); match(Set dst (MulVL src1 src2)); @@ -6282,34 +6981,13 @@ ins_pipe( pipe_slow ); %} -instruct vmul4I(vecX dst, vecX src) %{ - predicate(UseSSE > 3 && n->as_Vector()->length() == 4); - match(Set dst (MulVI dst src)); - format %{ "pmulld $dst,$src\t! mul packed4I" %} - ins_encode %{ - __ pmulld($dst$$XMMRegister, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (MulVI src1 src2)); - format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} - ins_encode %{ - int vector_len = 0; - __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ - predicate(UseAVX > 0 && n->as_Vector()->length() == 4); - match(Set dst (MulVI src (LoadVector mem))); - format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} - ins_encode %{ - int vector_len = 0; - __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); +instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); + match(Set dst (MulVL src (LoadVector mem))); + format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} + ins_encode %{ + int vector_len = 0; + __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} @@ -6336,6 +7014,28 @@ ins_pipe( pipe_slow ); %} +instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); + match(Set dst (MulVL src1 src2)); + format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} + ins_encode %{ + int vector_len = 2; + __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); + %} + ins_pipe( pipe_slow ); +%} + +instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ + predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); + match(Set dst (MulVL src (LoadVector mem))); + format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} + ins_encode %{ + int vector_len = 2; + __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ predicate(UseAVX > 1 && n->as_Vector()->length() == 8); match(Set dst (MulVI src1 src2)); @@ -6347,13 +7047,13 @@ ins_pipe( pipe_slow ); %} -instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); - match(Set dst (MulVL src1 src2)); - format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} - ins_encode %{ - int vector_len = 2; - __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); +instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ + predicate(UseAVX > 1 && n->as_Vector()->length() == 8); + match(Set dst (MulVI src (LoadVector mem))); + format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} + ins_encode %{ + int vector_len = 1; + __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); %} ins_pipe( pipe_slow ); %} @@ -6369,28 +7069,6 @@ ins_pipe( pipe_slow ); %} -instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ - predicate(UseAVX > 1 && n->as_Vector()->length() == 8); - match(Set dst (MulVI src (LoadVector mem))); - format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} - ins_encode %{ - int vector_len = 1; - __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ - predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); - match(Set dst (MulVL src (LoadVector mem))); - format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} - ins_encode %{ - int vector_len = 2; - __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); - %} - ins_pipe( pipe_slow ); -%} - instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ predicate(UseAVX > 2 && n->as_Vector()->length() == 16); match(Set dst (MulVI src (LoadVector mem))); @@ -6424,6 +7102,17 @@ ins_pipe( pipe_slow ); %} +instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (MulVF src (LoadVector mem))); + format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} + ins_encode %{ + int vector_len = 0; + __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vmul4F(vecX dst, vecX src) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (MulVF dst src)); @@ -6601,6 +7290,17 @@ ins_pipe( pipe_slow ); %} +instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length() == 2); + match(Set dst (DivVF src (LoadVector mem))); + format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} + ins_encode %{ + int vector_len = 0; + __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vdiv4F(vecX dst, vecX src) %{ predicate(n->as_Vector()->length() == 4); match(Set dst (DivVF dst src)); @@ -7878,6 +8578,17 @@ ins_pipe( pipe_slow ); %} +instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); + match(Set dst (AndV src (LoadVector mem))); + format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} + ins_encode %{ + int vector_len = 0; + __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vand8B(vecD dst, vecD src) %{ predicate(n->as_Vector()->length_in_bytes() == 8); match(Set dst (AndV dst src)); @@ -7899,6 +8610,17 @@ ins_pipe( pipe_slow ); %} +instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); + match(Set dst (AndV src (LoadVector mem))); + format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} + ins_encode %{ + int vector_len = 0; + __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vand16B(vecX dst, vecX src) %{ predicate(n->as_Vector()->length_in_bytes() == 16); match(Set dst (AndV dst src)); @@ -7998,6 +8720,17 @@ ins_pipe( pipe_slow ); %} +instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); + match(Set dst (OrV src (LoadVector mem))); + format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} + ins_encode %{ + int vector_len = 0; + __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vor8B(vecD dst, vecD src) %{ predicate(n->as_Vector()->length_in_bytes() == 8); match(Set dst (OrV dst src)); @@ -8019,6 +8752,17 @@ ins_pipe( pipe_slow ); %} +instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); + match(Set dst (OrV src (LoadVector mem))); + format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} + ins_encode %{ + int vector_len = 0; + __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vor16B(vecX dst, vecX src) %{ predicate(n->as_Vector()->length_in_bytes() == 16); match(Set dst (OrV dst src)); @@ -8118,6 +8862,17 @@ ins_pipe( pipe_slow ); %} +instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); + match(Set dst (XorV src (LoadVector mem))); + format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} + ins_encode %{ + int vector_len = 0; + __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vxor8B(vecD dst, vecD src) %{ predicate(n->as_Vector()->length_in_bytes() == 8); match(Set dst (XorV dst src)); @@ -8139,6 +8894,17 @@ ins_pipe( pipe_slow ); %} +instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ + predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); + match(Set dst (XorV src (LoadVector mem))); + format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} + ins_encode %{ + int vector_len = 0; + __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); + %} + ins_pipe( pipe_slow ); +%} + instruct vxor16B(vecX dst, vecX src) %{ predicate(n->as_Vector()->length_in_bytes() == 16); match(Set dst (XorV dst src));
--- a/src/cpu/zero/vm/globals_zero.hpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/cpu/zero/vm/globals_zero.hpp Thu Jul 02 11:12:59 2015 +0100 @@ -63,7 +63,8 @@ define_pd_global(bool, PreserveFramePointer, false); -#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \ +#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \ + \ product(bool, UseFastEmptyMethods, true, \ "Use fast method entry code for empty methods") \ \
--- a/src/os/aix/vm/decoder_aix.hpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/os/aix/vm/decoder_aix.hpp Thu Jul 02 11:12:59 2015 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved. * Copyright 2013 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -38,8 +38,8 @@ virtual bool demangle(const char* symbol, char* buf, int buflen) { return false; } // demangled by getFuncName - virtual bool decode(address addr, char* buf, int buflen, int* offset, const char* modulepath) { - return (::getFuncName((codeptr_t)addr, buf, buflen, offset, 0, 0, 0) == 0); + virtual bool decode(address addr, char* buf, int buflen, int* offset, const char* modulepath, bool demangle) { + return (::getFuncName((codeptr_t)addr, buf, buflen, offset, 0, 0, 0, demangle) == 0); } virtual bool decode(address addr, char *buf, int buflen, int* offset, const void *base) { ShouldNotReachHere();
--- a/src/os/aix/vm/globals_aix.hpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/os/aix/vm/globals_aix.hpp Thu Jul 02 11:12:59 2015 +0100 @@ -29,7 +29,7 @@ // // Defines Aix specific flags. They are not available on other platforms. // -#define RUNTIME_OS_FLAGS(develop, develop_pd, product, product_pd, diagnostic, notproduct) \ +#define RUNTIME_OS_FLAGS(develop, develop_pd, product, product_pd, diagnostic, notproduct, range, constraint) \ \ /* Use 64K pages for virtual memory (shmat). */ \ product(bool, Use64KPages, true, \
--- a/src/os/aix/vm/os_aix.cpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/os/aix/vm/os_aix.cpp Thu Jul 02 11:12:59 2015 +0100 @@ -1439,7 +1439,8 @@ } bool os::dll_address_to_function_name(address addr, char *buf, - int buflen, int *offset) { + int buflen, int *offset, + bool demangle) { if (offset) { *offset = -1; } @@ -1454,7 +1455,7 @@ } // Go through Decoder::decode to call getFuncName which reads the name from the traceback table. - return Decoder::decode(addr, buf, buflen, offset); + return Decoder::decode(addr, buf, buflen, offset, demangle); } static int getModuleName(codeptr_t pc, // [in] program counter @@ -1653,7 +1654,7 @@ } } -void os::pd_print_cpu_info(outputStream* st) { +void os::pd_print_cpu_info(outputStream* st, char* buf, size_t buflen) { // cpu st->print("CPU:"); st->print("total %d", os::processor_count()); @@ -3761,10 +3762,6 @@ return fetcher.result(); } -// Not neede on Aix. -// int os::Aix::safe_cond_timedwait(pthread_cond_t *_cond, pthread_mutex_t *_mutex, const struct timespec *_abstime) { -// } - //////////////////////////////////////////////////////////////////////////////// // debug support
--- a/src/os/aix/vm/porting_aix.cpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/os/aix/vm/porting_aix.cpp Thu Jul 02 11:12:59 2015 +0100 @@ -114,7 +114,8 @@ int* p_displacement, // [out] optional: displacement (-1 if not available) const struct tbtable** p_tb, // [out] optional: ptr to traceback table to get further // information (NULL if not available) - char* p_errmsg, size_t errmsglen // [out] optional: user provided buffer for error messages + char* p_errmsg, size_t errmsglen,// [out] optional: user provided buffer for error messages + bool demangle // [in] whether to demangle the name ) { struct tbtable* tb = 0; unsigned int searchcount = 0; @@ -216,15 +217,17 @@ p_name[0] = '\0'; // If it is a C++ name, try and demangle it using the Demangle interface (see demangle.h). - char* rest; - Name* const name = Demangle(buf, rest); - if (name) { - const char* const demangled_name = name->Text(); - if (demangled_name) { - strncpy(p_name, demangled_name, namelen-1); - p_name[namelen-1] = '\0'; + if (demangle) { + char* rest; + Name* const name = Demangle(buf, rest); + if (name) { + const char* const demangled_name = name->Text(); + if (demangled_name) { + strncpy(p_name, demangled_name, namelen-1); + p_name[namelen-1] = '\0'; + } + delete name; } - delete name; } // Fallback: if demangling did not work, just provide the unmangled name. @@ -325,7 +328,7 @@ int displacement = 0; if (getFuncName((codeptr_t) p, funcname, sizeof(funcname), &displacement, - NULL, NULL, 0) == 0) { + NULL, NULL, 0, true /* demangle */) == 0) { if (funcname[0] != '\0') { const char* const interned = dladdr_fixed_strings.intern(funcname); info->dli_sname = interned;
--- a/src/os/aix/vm/porting_aix.hpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/os/aix/vm/porting_aix.hpp Thu Jul 02 11:12:59 2015 +0100 @@ -87,7 +87,8 @@ char* p_name, size_t namelen, // [out] optional: user provided buffer for the function name int* p_displacement, // [out] optional: displacement const struct tbtable** p_tb, // [out] optional: ptr to traceback table to get further information - char* p_errmsg, size_t errmsglen // [out] optional: user provided buffer for error messages + char* p_errmsg, size_t errmsglen,// [out] optional: user provided buffer for error messages + bool demangle = true // [in] whether to demangle the name ); // -------------------------------------------------------------------------
--- a/src/os/bsd/vm/decoder_machO.hpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/os/bsd/vm/decoder_machO.hpp Thu Jul 02 11:12:59 2015 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -42,7 +42,7 @@ virtual bool decode(address pc, char* buf, int buflen, int* offset, const void* base); virtual bool decode(address pc, char* buf, int buflen, int* offset, - const char* module_path = NULL) { + const char* module_path, bool demangle) { ShouldNotReachHere(); return false; }
--- a/src/os/bsd/vm/globals_bsd.hpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/os/bsd/vm/globals_bsd.hpp Thu Jul 02 11:12:59 2015 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,19 +28,20 @@ // // Defines Bsd specific flags. They are not available on other platforms. // -#define RUNTIME_OS_FLAGS(develop, develop_pd, product, product_pd, diagnostic, notproduct) \ - product(bool, UseOprofile, false, \ - "enable support for Oprofile profiler") \ - \ - product(bool, UseBsdPosixThreadCPUClocks, true, \ - "enable fast Bsd Posix clocks where available") \ -/* NB: The default value of UseBsdPosixThreadCPUClocks may be \ - overridden in Arguments::parse_each_vm_init_arg. */ \ - \ - product(bool, UseHugeTLBFS, false, \ - "Use MAP_HUGETLB for large pages") \ - \ - product(bool, UseSHM, false, \ +#define RUNTIME_OS_FLAGS(develop, develop_pd, product, product_pd, diagnostic, notproduct, range, constraint) \ + \ + product(bool, UseOprofile, false, \ + "enable support for Oprofile profiler") \ + \ + /* NB: The default value of UseBsdPosixThreadCPUClocks may be */ \ + /* overridden in Arguments::parse_each_vm_init_arg. */ \ + product(bool, UseBsdPosixThreadCPUClocks, true, \ + "enable fast Bsd Posix clocks where available") \ + \ + product(bool, UseHugeTLBFS, false, \ + "Use MAP_HUGETLB for large pages") \ + \ + product(bool, UseSHM, false, \ "Use SYSV shared memory for large pages") //
--- a/src/os/bsd/vm/os_bsd.cpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/os/bsd/vm/os_bsd.cpp Thu Jul 02 11:12:59 2015 +0100 @@ -637,11 +637,6 @@ ////////////////////////////////////////////////////////////////////////////// // create new thread -// check if it's safe to start a new thread -static bool _thread_safety_check(Thread* thread) { - return true; -} - #ifdef __APPLE__ // library handle for calling objc_registerThreadWithCollector() // without static linking to the libobjc library @@ -681,15 +676,6 @@ OSThread* osthread = thread->osthread(); Monitor* sync = osthread->startThread_lock(); - // non floating stack BsdThreads needs extra check, see above - if (!_thread_safety_check(thread)) { - // notify parent thread - MutexLockerEx ml(sync, Mutex::_no_safepoint_check_flag); - osthread->set_state(ZOMBIE); - sync->notify_all(); - return NULL; - } - osthread->set_thread_id(os::Bsd::gettid()); #ifdef __APPLE__ @@ -1339,7 +1325,8 @@ #define MACH_MAXSYMLEN 256 bool os::dll_address_to_function_name(address addr, char *buf, - int buflen, int *offset) { + int buflen, int *offset, + bool demangle) { // buf is not optional, but offset is optional assert(buf != NULL, "sanity check"); @@ -1349,7 +1336,7 @@ if (dladdr((void*)addr, &dlinfo) != 0) { // see if we have a matching symbol if (dlinfo.dli_saddr != NULL && dlinfo.dli_sname != NULL) { - if (!Decoder::demangle(dlinfo.dli_sname, buf, buflen)) { + if (!(demangle && Decoder::demangle(dlinfo.dli_sname, buf, buflen))) { jio_snprintf(buf, buflen, "%s", dlinfo.dli_sname); } if (offset != NULL) *offset = addr - (address)dlinfo.dli_saddr; @@ -1358,15 +1345,16 @@ // no matching symbol so try for just file info if (dlinfo.dli_fname != NULL && dlinfo.dli_fbase != NULL) { if (Decoder::decode((address)(addr - (address)dlinfo.dli_fbase), - buf, buflen, offset, dlinfo.dli_fname)) { + buf, buflen, offset, dlinfo.dli_fname, demangle)) { return true; } } // Handle non-dynamic manually: if (dlinfo.dli_fbase != NULL && - Decoder::decode(addr, localbuf, MACH_MAXSYMLEN, offset, dlinfo.dli_fbase)) { - if (!Decoder::demangle(localbuf, buf, buflen)) { + Decoder::decode(addr, localbuf, MACH_MAXSYMLEN, offset, + dlinfo.dli_fbase)) { + if (!(demangle && Decoder::demangle(localbuf, buf, buflen))) { jio_snprintf(buf, buflen, "%s", localbuf); } return true; @@ -1706,7 +1694,7 @@ os::Posix::print_load_average(st); } -void os::pd_print_cpu_info(outputStream* st) { +void os::pd_print_cpu_info(outputStream* st, char* buf, size_t buflen) { // Nothing to do for now. } @@ -2276,8 +2264,6 @@ return os::uncommit_memory(addr, size); } -static address _highest_vm_reserved_address = NULL; - // If 'fixed' is true, anon_mmap() will attempt to reserve anonymous memory // at 'requested_addr'. If there are existing memory mappings at the same // location, however, they will be overwritten. If 'fixed' is false, @@ -2300,23 +2286,9 @@ addr = (char*)::mmap(requested_addr, bytes, PROT_NONE, flags, -1, 0); - if (addr != MAP_FAILED) { - // anon_mmap() should only get called during VM initialization, - // don't need lock (actually we can skip locking even it can be called - // from multiple threads, because _highest_vm_reserved_address is just a - // hint about the upper limit of non-stack memory regions.) - if ((address)addr + bytes > _highest_vm_reserved_address) { - _highest_vm_reserved_address = (address)addr + bytes; - } - } - return addr == MAP_FAILED ? NULL : addr; } -// Don't update _highest_vm_reserved_address, because there might be memory -// regions above addr + size. If so, releasing a memory region only creates -// a hole in the address space, it doesn't help prevent heap-stack collision. -// static int anon_munmap(char * addr, size_t size) { return ::munmap(addr, size) == 0; } @@ -2490,15 +2462,7 @@ assert(bytes % os::vm_page_size() == 0, "reserving unexpected size block"); // Repeatedly allocate blocks until the block is allocated at the - // right spot. Give up after max_tries. Note that reserve_memory() will - // automatically update _highest_vm_reserved_address if the call is - // successful. The variable tracks the highest memory address every reserved - // by JVM. It is used to detect heap-stack collision if running with - // fixed-stack BsdThreads. Because here we may attempt to reserve more - // space than needed, it could confuse the collision detecting code. To - // solve the problem, save current _highest_vm_reserved_address and - // calculate the correct value before return. - address old_highest = _highest_vm_reserved_address; + // right spot. // Bsd mmap allows caller to pass an address as hint; give it a try first, // if kernel honors the hint then we can return immediately. @@ -2552,10 +2516,8 @@ } if (i < max_tries) { - _highest_vm_reserved_address = MAX2(old_highest, (address)requested_addr + bytes); return requested_addr; } else { - _highest_vm_reserved_address = old_highest; return NULL; } } @@ -3715,12 +3677,6 @@ return fetcher.result(); } -int os::Bsd::safe_cond_timedwait(pthread_cond_t *_cond, - pthread_mutex_t *_mutex, - const struct timespec *_abstime) { - return pthread_cond_timedwait(_cond, _mutex, _abstime); -} - //////////////////////////////////////////////////////////////////////////////// // debug support @@ -4286,7 +4242,7 @@ // In that case, we should propagate the notify to another waiter. while (_Event < 0) { - status = os::Bsd::safe_cond_timedwait(_cond, _mutex, &abst); + status = pthread_cond_timedwait(_cond, _mutex, &abst); if (status != 0 && WorkAroundNPTLTimedWaitHang) { pthread_cond_destroy(_cond); pthread_cond_init(_cond, NULL); @@ -4492,7 +4448,7 @@ if (time == 0) { status = pthread_cond_wait(_cond, _mutex); } else { - status = os::Bsd::safe_cond_timedwait(_cond, _mutex, &absTime); + status = pthread_cond_timedwait(_cond, _mutex, &absTime); if (status != 0 && WorkAroundNPTLTimedWaitHang) { pthread_cond_destroy(_cond); pthread_cond_init(_cond, NULL);
--- a/src/os/bsd/vm/os_bsd.hpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/os/bsd/vm/os_bsd.hpp Thu Jul 02 11:12:59 2015 +0100 @@ -30,9 +30,6 @@ // Information about the protection of the page at address '0' on this os. static bool zero_page_read_protected() { return true; } -// pthread_getattr_np comes with BsdThreads-0.9-7 on RedHat 7.1 -typedef int (*pthread_getattr_func_type)(pthread_t, pthread_attr_t *); - #ifdef __APPLE__ // Mac OS X doesn't support clock_gettime. Stub out the type, it is // unused @@ -145,9 +142,6 @@ // none present - // BsdThreads work-around for 6292965 - static int safe_cond_timedwait(pthread_cond_t *_cond, pthread_mutex_t *_mutex, const struct timespec *_abstime); - private: typedef int (*sched_getcpu_func_t)(void); typedef int (*numa_node_to_cpus_func_t)(int node, unsigned long *buffer, int bufferlen);
--- a/src/os/linux/vm/globals_linux.hpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/os/linux/vm/globals_linux.hpp Thu Jul 02 11:12:59 2015 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,14 +28,15 @@ // // Defines Linux specific flags. They are not available on other platforms. // -#define RUNTIME_OS_FLAGS(develop, develop_pd, product, product_pd, diagnostic, notproduct) \ +#define RUNTIME_OS_FLAGS(develop, develop_pd, product, product_pd, diagnostic, notproduct, range, constraint) \ + \ product(bool, UseOprofile, false, \ "enable support for Oprofile profiler") \ \ + /* NB: The default value of UseLinuxPosixThreadCPUClocks may be */ \ + /* overridden in Arguments::parse_each_vm_init_arg. */ \ product(bool, UseLinuxPosixThreadCPUClocks, true, \ "enable fast Linux Posix clocks where available") \ -/* NB: The default value of UseLinuxPosixThreadCPUClocks may be \ - overridden in Arguments::parse_each_vm_init_arg. */ \ \ product(bool, UseHugeTLBFS, false, \ "Use MAP_HUGETLB for large pages") \
--- a/src/os/linux/vm/os_linux.cpp Tue Jun 16 17:31:53 2015 +0100 +++ b/src/os/linux/vm/os_linux.cpp Thu Jul 02 11:12:59 2015 +0100 @@ -135,8 +135,6 @@ pthread_t os::Linux::_main_thread; int os::Linux::_page_size = -1; const int os::Linux::_vm_default_page_size = (8 * K); -bool os::Linux::_is_floating_stack = false; -bool os::Linux::_is_NPTL = false; bool os::Linux::_supports_fast_thread_cpu_time = false; const char * os::Linux::_glibc_version = NULL; const char * os::Linux::_libpthread_version = NULL; @@ -150,8 +148,6 @@ static sigset_t check_signal_done; static bool check_signals = true; -static pid_t _initial_pid = 0; - // Signal number used to suspend/resume a thread // do not use any signal number less than SIGSEGV, see 4355769 @@ -223,18 +219,10 @@ // // Returns the kernel thread id of the currently running thread. Kernel // thread id is used to access /proc. -// -// (Note that getpid() on LinuxThreads returns kernel thread id too; but -// on NPTL, it returns the same pid for all threads, as required by POSIX.) -// pid_t os::Linux::gettid() { int rslt = syscall(SYS_gettid); - if (rslt == -1) { - // old kernel, no NPTL support - return getpid(); - } else { - return (pid_t)rslt; - } + assert(rslt != -1, "must be."); // old linuxthreads implementation? + return (pid_t)rslt; } // Most versions of linux have a bug where the number of processors are @@ -508,68 +496,48 @@ // detecting pthread library void os::Linux::libpthread_init() { - // Save glibc and pthread version strings. Note that _CS_GNU_LIBC_VERSION - // and _CS_GNU_LIBPTHREAD_VERSION are supported in glibc >= 2.3.2. Use a - // generic name for earlier versions. - // Define macros here so we can build HotSpot on old systems. -#ifndef _CS_GNU_LIBC_VERSION - #define _CS_GNU_LIBC_VERSION 2 + // Save glibc and pthread version strings. +#if !defined(_CS_GNU_LIBC_VERSION) || \ + !defined(_CS_GNU_LIBPTHREAD_VERSION) + #error "glibc too old (< 2.3.2)" #endif -#ifndef _CS_GNU_LIBPTHREAD_VERSION - #define _CS_GNU_LIBPTHREAD_VERSION 3 -#endif size_t n = confstr(_CS_GNU_LIBC_VERSION, NULL, 0); - if (n > 0) { - char *str = (char *)malloc(n, mtInternal); - confstr(_CS_GNU_LIBC_VERSION, str, n); - os::Linux::set_glibc_version(str); - } else { - // _CS_GNU_LIBC_VERSION is not supported, try gnu_get_libc_version() - static char _gnu_libc_version[32]; - jio_snprintf(_gnu_libc_version, sizeof(_gnu_libc_version), - "glibc %s %s", gnu_get_libc_version(), gnu_get_libc_release()); - os::Linux::set_glibc_version(_gnu_libc_version); - } + assert(n > 0, "cannot retrieve glibc version"); + char *str = (char *)malloc(n, mtInternal); + confstr(_CS_GNU_LIBC_VERSION, str, n); + os::Linux::set_glibc_version(str); n = confstr(_CS_GNU_LIBPTHREAD_VERSION, NULL, 0); - if (n > 0) { - char *str = (char *)malloc(n, mtInternal); - confstr(_CS_GNU_LIBPTHREAD_VERSION, str, n); - // Vanilla RH-9 (glibc 2.3.2) has a bug that confstr() always tells - // us "NPTL-0.29" even we are running with LinuxThreads. Check if this - // is the case. LinuxThreads has a hard limit on max number of threads. - // So sysconf(_SC_THREAD_THREADS_MAX) will return a positive value. - // On the other hand, NPTL does not have such a limit, sysconf() - // will return -1 and errno is not changed. Check if it is really NPTL. - if (strcmp(os::Linux::glibc_version(), "glibc 2.3.2") == 0 && - strstr(str, "NPTL") && - sysconf(_SC_THREAD_THREADS_MAX) > 0) { - free(str); - os::Linux::set_libpthread_version("linuxthreads"); - } else { - os::Linux::set_libpthread_version(str); - } - } else { - // glibc before 2.3.2 only has LinuxThreads. - os::Linux::set_libpthread_version("linuxthreads"); - } - - if (strstr(libpthread_version(), "NPTL")) { - os::Linux::set_is_NPTL(); - } else { - os::Linux::set_is_LinuxThreads(); - } - - // LinuxThreads have two flavors: floating-stack mode, which allows variable - // stack size; and fixed-stack mode. NPTL is always floating-stack. - if (os::Linux::is_NPTL() || os::Linux::supports_variable_stack_size()) { - os::Linux::set_is_floating_stack(); - } + assert(n > 0, "cannot retrieve pthread version"); + str = (char *)malloc(n, mtInternal); + confstr(_CS_GNU_LIBPTHREAD_VERSION, str, n); + os::Linux::set_libpthread_version(str); } ///////////////////////////////////////////////////////////////////////////// -// thread stack +// thread stack expansion + +// os::Linux::manually_expand_stack() takes care of expanding the thread +// stack. Note that this is normally not needed: pthread stacks allocate +// thread stack using mmap() without MAP_NORESERVE, so the stack is already +// committed. Therefore it is not necessary to expand the stack manually. +// +// Manually expanding the stack was historically needed on LinuxThreads +// thread stacks, which were allocated with mmap(MAP_GROWSDOWN). Nowadays +// it is kept to deal with very rare corner cases: +// +// For one, user may run the VM on an own implementation of threads +// whose stacks are - like the old LinuxThreads - implemented using +// mmap(MAP_GROWSDOWN). +// +// Also, this coding may be needed if the VM is running on the primordial +// thread. Normally we avoid running on the primordial thread; however, +// user may still invoke the VM on the primord