OpenJDK / amber / amber
changeset 13172:11268064fd9a
Merge
author | lana |
---|---|
date | Tue, 03 Jul 2012 18:23:04 -0700 |
parents | 1ac5e9a54a6e 600c9a1feb01 |
children | 89587a8ac2f4 |
files | |
diffstat | 142 files changed, 53972 insertions(+), 3994 deletions(-) [+] |
line wrap: on
line diff
--- a/.hgtags Wed Jun 27 21:09:29 2012 +0100 +++ b/.hgtags Tue Jul 03 18:23:04 2012 -0700 @@ -166,3 +166,4 @@ 5c5a64ec0839df5affe9394b99ff338c363acbca jdk8-b42 69d8a827cdf9236be9694a46d75c710d71dac7d7 jdk8-b43 7e981cb0ad6a194f1fa859f9ad47586db461f269 jdk8-b44 +9b19b2302c28f4da6d4078f66234abecfed5688a jdk8-b45
--- a/.hgtags-top-repo Wed Jun 27 21:09:29 2012 +0100 +++ b/.hgtags-top-repo Tue Jul 03 18:23:04 2012 -0700 @@ -166,3 +166,4 @@ 1ce5dc16416611c58b7480ca67a2eee5153498a6 jdk8-b42 661c9aae602bbd9766d12590800c90f1edd1d8dd jdk8-b43 e4f81a817447c3a4f6868f083c81c2fb1b15d44c jdk8-b44 +633f2378c904c92bb922a6e19e9f62fe8eac14af jdk8-b45
--- a/corba/.hgtags Wed Jun 27 21:09:29 2012 +0100 +++ b/corba/.hgtags Tue Jul 03 18:23:04 2012 -0700 @@ -166,3 +166,4 @@ 79cc42c9c71bbd6630ede681642e98f5e4a841fa jdk8-b42 cd879aff5d3cc1f58829aab3116880aa19525b78 jdk8-b43 439d9bf8e4ff204cc89c9974c1515a508b2cc6ff jdk8-b44 +747dad9e9d37d244a5c765a1afe9194f7ddae118 jdk8-b45
--- a/hotspot/.hgtags Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/.hgtags Tue Jul 03 18:23:04 2012 -0700 @@ -256,3 +256,5 @@ e77b8e0ed1f84e3e268239e276c7ab64fa573baa jdk8-b43 5ba29a1db46ecb80a321ca873adb56a3fe6ad320 hs24-b14 831e5c76a20af18f3c08c5a95ed31be0e128a010 jdk8-b44 +9d5f20961bc5846fa8d098d534effafbbdae0a58 jdk8-b45 +40e5a3f2907ed02b335c7caa8ecf068cc801380d hs24-b15
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/bugspot/BugSpot.java Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/bugspot/BugSpot.java Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -657,7 +657,7 @@ while (fr != null) { trace.add(new StackTraceEntry(fr, getCDebugger())); try { - fr = fr.sender(); + fr = fr.sender(t); } catch (AddressException e) { e.printStackTrace(); showMessageDialog("Error while walking stack; stack trace will be truncated\n(see console for details)",
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/bsd/amd64/BsdAMD64CFrame.java Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/bsd/amd64/BsdAMD64CFrame.java Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,6 +25,7 @@ package sun.jvm.hotspot.debugger.bsd.amd64; import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.amd64.*; import sun.jvm.hotspot.debugger.bsd.*; import sun.jvm.hotspot.debugger.cdbg.*; import sun.jvm.hotspot.debugger.cdbg.basic.*; @@ -51,8 +52,11 @@ return rbp; } - public CFrame sender() { - if (rbp == null) { + public CFrame sender(ThreadProxy thread) { + AMD64ThreadContext context = (AMD64ThreadContext) thread.getContext(); + Address rsp = context.getRegisterAsAddress(AMD64ThreadContext.RSP); + + if ( (rbp == null) || rbp.lessThan(rsp) ) { return null; }
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/bsd/x86/BsdX86CFrame.java Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/bsd/x86/BsdX86CFrame.java Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,6 +28,7 @@ import sun.jvm.hotspot.debugger.bsd.*; import sun.jvm.hotspot.debugger.cdbg.*; import sun.jvm.hotspot.debugger.cdbg.basic.*; +import sun.jvm.hotspot.debugger.x86.*; final public class BsdX86CFrame extends BasicCFrame { // package/class internals only @@ -52,8 +53,11 @@ return ebp; } - public CFrame sender() { - if (ebp == null) { + public CFrame sender(ThreadProxy thread) { + X86ThreadContext context = (X86ThreadContext) thread.getContext(); + Address esp = context.getRegisterAsAddress(X86ThreadContext.ESP); + + if ( (ebp == null) || ebp.lessThan(esp) ) { return null; }
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/cdbg/CFrame.java Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/cdbg/CFrame.java Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,7 +34,7 @@ public interface CFrame { /** Returns null when no more frames on stack */ - public CFrame sender(); + public CFrame sender(ThreadProxy th); /** Get the program counter of this frame */ public Address pc();
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/cdbg/basic/amd64/AMD64CFrame.java Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/cdbg/basic/amd64/AMD64CFrame.java Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,6 +25,7 @@ package sun.jvm.hotspot.debugger.cdbg.basic.amd64; import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.amd64.*; import sun.jvm.hotspot.debugger.cdbg.*; import sun.jvm.hotspot.debugger.cdbg.basic.*; @@ -43,8 +44,11 @@ this.pc = pc; } - public CFrame sender() { - if (rbp == null) { + public CFrame sender(ThreadProxy thread) { + AMD64ThreadContext context = (AMD64ThreadContext) thread.getContext(); + Address rsp = context.getRegisterAsAddress(AMD64ThreadContext.RSP); + + if ( (rbp == null) || rbp.lessThan(rsp) ) { return null; }
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/cdbg/basic/x86/X86CFrame.java Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/cdbg/basic/x86/X86CFrame.java Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,6 +25,7 @@ package sun.jvm.hotspot.debugger.cdbg.basic.x86; import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.x86.*; import sun.jvm.hotspot.debugger.cdbg.*; import sun.jvm.hotspot.debugger.cdbg.basic.*; @@ -43,8 +44,11 @@ this.pc = pc; } - public CFrame sender() { - if (ebp == null) { + public CFrame sender(ThreadProxy thread) { + X86ThreadContext context = (X86ThreadContext) thread.getContext(); + Address esp = context.getRegisterAsAddress(X86ThreadContext.ESP); + + if ( (ebp == null) || ebp.lessThan(esp) ) { return null; }
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/amd64/LinuxAMD64CFrame.java Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/amd64/LinuxAMD64CFrame.java Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,6 +25,7 @@ package sun.jvm.hotspot.debugger.linux.amd64; import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.amd64.*; import sun.jvm.hotspot.debugger.linux.*; import sun.jvm.hotspot.debugger.cdbg.*; import sun.jvm.hotspot.debugger.cdbg.basic.*; @@ -51,8 +52,11 @@ return rbp; } - public CFrame sender() { - if (rbp == null) { + public CFrame sender(ThreadProxy thread) { + AMD64ThreadContext context = (AMD64ThreadContext) thread.getContext(); + Address rsp = context.getRegisterAsAddress(AMD64ThreadContext.RSP); + + if ( (rbp == null) || rbp.lessThan(rsp) ) { return null; }
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/sparc/LinuxSPARCCFrame.java Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/sparc/LinuxSPARCCFrame.java Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -57,7 +57,7 @@ return sp; } - public CFrame sender() { + public CFrame sender(ThreadProxy thread) { if (sp == null) { return null; }
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/x86/LinuxX86CFrame.java Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/x86/LinuxX86CFrame.java Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,6 +28,7 @@ import sun.jvm.hotspot.debugger.linux.*; import sun.jvm.hotspot.debugger.cdbg.*; import sun.jvm.hotspot.debugger.cdbg.basic.*; +import sun.jvm.hotspot.debugger.x86.*; final public class LinuxX86CFrame extends BasicCFrame { // package/class internals only @@ -52,8 +53,11 @@ return ebp; } - public CFrame sender() { - if (ebp == null) { + public CFrame sender(ThreadProxy thread) { + X86ThreadContext context = (X86ThreadContext) thread.getContext(); + Address esp = context.getRegisterAsAddress(X86ThreadContext.ESP); + + if ( (ebp == null) || ebp.lessThan(esp) ) { return null; }
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcCFrame.java Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcCFrame.java Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -37,7 +37,7 @@ return fp; } - public CFrame sender() { + public CFrame sender(ThreadProxy t) { return sender; }
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/oops/OopUtilities.java Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/oops/OopUtilities.java Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -141,18 +141,19 @@ public static String stringOopToString(Oop stringOop) { if (offsetField == null) { InstanceKlass k = (InstanceKlass) stringOop.getKlass(); - offsetField = (IntField) k.findField("offset", "I"); - countField = (IntField) k.findField("count", "I"); + offsetField = (IntField) k.findField("offset", "I"); // optional + countField = (IntField) k.findField("count", "I"); // optional valueField = (OopField) k.findField("value", "[C"); if (Assert.ASSERTS_ENABLED) { - Assert.that(offsetField != null && - countField != null && - valueField != null, "must find all java.lang.String fields"); + Assert.that(valueField != null, "Field \'value\' of java.lang.String not found"); } } - return charArrayToString((TypeArray) valueField.getValue(stringOop), - offsetField.getValue(stringOop), - countField.getValue(stringOop)); + if (offsetField != null && countField != null) { + return charArrayToString((TypeArray) valueField.getValue(stringOop), + offsetField.getValue(stringOop), + countField.getValue(stringOop)); + } + return charArrayToString((TypeArray) valueField.getValue(stringOop)); } public static String stringOopToEscapedString(Oop stringOop) {
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/tools/PStack.java Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/tools/PStack.java Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -158,7 +158,7 @@ printUnknown(out); } } - f = f.sender(); + f = f.sender(th); } } catch (Exception exp) { exp.printStackTrace();
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/ObjectReader.java Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/ObjectReader.java Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -85,6 +85,21 @@ this(new ProcImageClassLoader()); } + static void debugPrintln(String msg) { + if (DEBUG) { + System.err.println("DEBUG>" + msg); + } + } + + static void debugPrintStackTrace(Exception exp) { + if (DEBUG) { + StackTraceElement[] els = exp.getStackTrace(); + for (int i = 0; i < els.length; i++) { + System.err.println("DEBUG>" + els[i].toString()); + } + } + } + public Object readObject(Oop oop) throws ClassNotFoundException { if (oop instanceof Instance) { return readInstance((Instance) oop); @@ -120,13 +135,96 @@ } protected Symbol javaLangString; + protected Symbol javaUtilHashtableEntry; + protected Symbol javaUtilHashtable; + protected Symbol javaUtilProperties; + + protected Symbol getVMSymbol(String name) { + return VM.getVM().getSymbolTable().probe(name); + } + protected Symbol javaLangString() { if (javaLangString == null) { - javaLangString = VM.getVM().getSymbolTable().probe("java/lang/String"); + javaLangString = getVMSymbol("java/lang/String"); } return javaLangString; } + protected Symbol javaUtilHashtableEntry() { + if (javaUtilHashtableEntry == null) { + javaUtilHashtableEntry = getVMSymbol("java/util/Hashtable$Entry"); + } + return javaUtilHashtableEntry; + } + + protected Symbol javaUtilHashtable() { + if (javaUtilHashtable == null) { + javaUtilHashtable = getVMSymbol("java/util/Hashtable"); + } + return javaUtilHashtable; + } + + protected Symbol javaUtilProperties() { + if (javaUtilProperties == null) { + javaUtilProperties = getVMSymbol("java/util/Properties"); + } + return javaUtilProperties; + } + + private void setHashtableEntry(java.util.Hashtable p, Oop oop) { + InstanceKlass ik = (InstanceKlass)oop.getKlass(); + OopField keyField = (OopField)ik.findField("key", "Ljava/lang/Object;"); + OopField valueField = (OopField)ik.findField("value", "Ljava/lang/Object;"); + OopField nextField = (OopField)ik.findField("next", "Ljava/util/Hashtable$Entry;"); + if (DEBUG) { + if (Assert.ASSERTS_ENABLED) { + Assert.that(ik.getName().equals(javaUtilHashtableEntry()), "Not a Hashtable$Entry?"); + Assert.that(keyField != null && valueField != null && nextField != null, "Invalid fields!"); + } + } + + Object key = null; + Object value = null; + Oop next = null; + try { + key = readObject(keyField.getValue(oop)); + value = readObject(valueField.getValue(oop)); + next = (Oop)nextField.getValue(oop); + // For Properties, should use setProperty(k, v). Since it only runs in SA + // using put(k, v) should be OK. + p.put(key, value); + if (next != null) { + setHashtableEntry(p, next); + } + } catch (ClassNotFoundException ce) { + if( DEBUG) { + debugPrintln("Class not found " + ce); + debugPrintStackTrace(ce); + } + } + } + + protected Object getHashtable(Instance oop, boolean isProperties) { + InstanceKlass k = (InstanceKlass)oop.getKlass(); + OopField tableField = (OopField)k.findField("table", "[Ljava/util/Hashtable$Entry;"); + if (tableField == null) { + debugPrintln("Could not find field of [Ljava/util/Hashtable$Entry;"); + return null; + } + java.util.Hashtable table = (isProperties) ? new java.util.Properties() + : new java.util.Hashtable(); + ObjArray kvs = (ObjArray)tableField.getValue(oop); + long size = kvs.getLength(); + debugPrintln("Hashtable$Entry Size = " + size); + for (long i=0; i<size; i++) { + Oop entry = kvs.getObjAt(i); + if (entry != null && entry.isInstance()) { + setHashtableEntry(table, entry); + } + } + return table; + } + public Object readInstance(Instance oop) throws ClassNotFoundException { Object result = getFromObjTable(oop); if (result == null) { @@ -134,11 +232,21 @@ // Handle java.lang.String instances differently. As part of JSR-133, fields of immutable // classes have been made final. The algorithm below will not be able to read Strings from // debuggee (can't use reflection to set final fields). But, need to read Strings is very - // important. FIXME: need a framework to handle many other special cases. + // important. + // Same for Hashtable, key and hash are final, could not be set in the algorithm too. + // FIXME: need a framework to handle many other special cases. if (kls.getName().equals(javaLangString())) { return OopUtilities.stringOopToString(oop); } + if (kls.getName().equals(javaUtilHashtable())) { + return getHashtable(oop, false); + } + + if (kls.getName().equals(javaUtilProperties())) { + return getHashtable(oop, true); + } + Class clz = readClass(kls); try { result = clz.newInstance(); @@ -164,8 +272,8 @@ break; } catch (Exception exp) { if (DEBUG) { - System.err.println("Can't create object using " + c); - exp.printStackTrace(); + debugPrintln("Can't create object using " + c); + debugPrintStackTrace(exp); } } } @@ -329,8 +437,8 @@ arrayObj[ifd.getIndex()] = readObject(field.getValue(getObj())); } catch (Exception e) { if (DEBUG) { - System.err.println("Array element set failed for " + ifd); - e.printStackTrace(); + debugPrintln("Array element set failed for " + ifd); + debugPrintStackTrace(e); } } } @@ -348,8 +456,8 @@ private void printFieldSetError(java.lang.reflect.Field f, Exception ex) { if (DEBUG) { - if (f != null) System.err.println("Field set failed for " + f); - ex.printStackTrace(); + if (f != null) debugPrintln("Field set failed for " + f); + debugPrintStackTrace(ex); } } @@ -601,7 +709,7 @@ return Class.forName(className, true, cl); } catch (Exception e) { if (DEBUG) { - System.err.println("Can't load class " + className); + debugPrintln("Can't load class " + className); } throw new RuntimeException(e); }
--- a/hotspot/make/bsd/makefiles/universal.gmk Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/make/bsd/makefiles/universal.gmk Tue Jul 03 18:23:04 2012 -0700 @@ -110,4 +110,5 @@ .PHONY: universal_product universal_fastdebug universal_debug \ all_product_universal all_fastdebug_universal all_debug_universal \ - universalize export_universal copy_universal + universalize export_universal copy_universal \ + $(UNIVERSAL_LIPO_LIST) $(UNIVERSAL_COPY_LIST)
--- a/hotspot/make/hotspot_version Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/make/hotspot_version Tue Jul 03 18:23:04 2012 -0700 @@ -35,7 +35,7 @@ HS_MAJOR_VER=24 HS_MINOR_VER=0 -HS_BUILD_NUMBER=14 +HS_BUILD_NUMBER=15 JDK_MAJOR_VER=1 JDK_MINOR_VER=8
--- a/hotspot/make/jprt.properties Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/make/jprt.properties Tue Jul 03 18:23:04 2012 -0700 @@ -102,6 +102,11 @@ jprt.my.linux.armvfp.jdk7u6=${jprt.my.linux.armvfp.jdk7} jprt.my.linux.armvfp=${jprt.my.linux.armvfp.${jprt.tools.default.release}} +jprt.my.linux.armv6.jdk8=linux_armv6_2.6 +jprt.my.linux.armv6.jdk7=linux_armv6_2.6 +jprt.my.linux.armv6.jdk7u6=${jprt.my.linux.armv6.jdk7} +jprt.my.linux.armv6=${jprt.my.linux.armv6.${jprt.tools.default.release}} + jprt.my.linux.armsflt.jdk8=linux_armsflt_2.6 jprt.my.linux.armsflt.jdk7=linux_armsflt_2.6 jprt.my.linux.armsflt.jdk7u6=${jprt.my.linux.armsflt.jdk7} @@ -134,7 +139,7 @@ ${jprt.my.macosx.x64}-{product|fastdebug|debug}, \ ${jprt.my.windows.i586}-{product|fastdebug|debug}, \ ${jprt.my.windows.x64}-{product|fastdebug|debug}, \ - ${jprt.my.linux.armvfp}-{product|fastdebug} + ${jprt.my.linux.armv6}-{product|fastdebug} jprt.build.targets.open= \ ${jprt.my.solaris.i586}-{productOpen}, \
--- a/hotspot/make/pic.make Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/make/pic.make Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ # -# Copyright (c) 2006, 2007, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -30,6 +30,13 @@ ifneq ($(OSNAME), windows) ifndef LP64 + PARTIAL_NONPIC=1 + endif + PIC_ARCH = ppc + ifneq ("$(filter $(PIC_ARCH),$(BUILDARCH))","") + PARTIAL_NONPIC=0 + endif + ifeq ($(PARTIAL_NONPIC),1) NONPIC_DIRS = memory oops gc_implementation gc_interface NONPIC_DIRS := $(foreach dir,$(NONPIC_DIRS), $(GAMMADIR)/src/share/vm/$(dir)) # Look for source files under NONPIC_DIRS
--- a/hotspot/make/solaris/makefiles/add_gnu_debuglink.make Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/make/solaris/makefiles/add_gnu_debuglink.make Tue Jul 03 18:23:04 2012 -0700 @@ -24,8 +24,11 @@ # Rules to build add_gnu_debuglink, used by vm.make on Solaris -GENERATED = ../generated -ADD_GNU_DEBUGLINK = $(GENERATED)/add_gnu_debuglink +# Allow $(ADD_GNU_DEBUGLINK) to be called from any directory. +# We don't set or use the GENERATED macro to avoid affecting +# other HotSpot Makefiles. +TOPDIR = $(shell echo `pwd`) +ADD_GNU_DEBUGLINK = $(TOPDIR)/../generated/add_gnu_debuglink ADD_GNU_DEBUGLINK_DIR = $(GAMMADIR)/src/os/solaris/add_gnu_debuglink ADD_GNU_DEBUGLINK_SRC = $(ADD_GNU_DEBUGLINK_DIR)/add_gnu_debuglink.c
--- a/hotspot/make/solaris/makefiles/defs.make Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/make/solaris/makefiles/defs.make Tue Jul 03 18:23:04 2012 -0700 @@ -203,10 +203,18 @@ EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm.diz EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm_db.diz EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm_dtrace.diz + ifeq ($(ARCH_DATA_MODEL),32) + EXPORT_LIST += $(EXPORT_SERVER_DIR)/64/libjvm_db.diz + EXPORT_LIST += $(EXPORT_SERVER_DIR)/64/libjvm_dtrace.diz + endif else EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm.debuginfo EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm_db.debuginfo EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm_dtrace.debuginfo + ifeq ($(ARCH_DATA_MODEL),32) + EXPORT_LIST += $(EXPORT_SERVER_DIR)/64/libjvm_db.debuginfo + EXPORT_LIST += $(EXPORT_SERVER_DIR)/64/libjvm_dtrace.debuginfo + endif endif endif endif
--- a/hotspot/make/solaris/makefiles/dtrace.make Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/make/solaris/makefiles/dtrace.make Tue Jul 03 18:23:04 2012 -0700 @@ -94,23 +94,24 @@ # Making 64/libjvm_db.so: 64-bit version of libjvm_db.so which handles 32-bit libjvm.so ifneq ("${ISA}","${BUILDARCH}") -XLIBJVM_DB = 64/$(LIBJVM_DB) -XLIBJVM_DB_G = 64/$(LIBJVM_DB_G) -XLIBJVM_DTRACE = 64/$(LIBJVM_DTRACE) -XLIBJVM_DTRACE_G = 64/$(LIBJVM_DTRACE_G) +XLIBJVM_DIR = 64 +XLIBJVM_DB = $(XLIBJVM_DIR)/$(LIBJVM_DB) +XLIBJVM_DB_G = $(XLIBJVM_DIR)/$(LIBJVM_DB_G) +XLIBJVM_DTRACE = $(XLIBJVM_DIR)/$(LIBJVM_DTRACE) +XLIBJVM_DTRACE_G = $(XLIBJVM_DIR)/$(LIBJVM_DTRACE_G) -XLIBJVM_DB_DEBUGINFO = 64/$(LIBJVM_DB_DEBUGINFO) -XLIBJVM_DB_DIZ = 64/$(LIBJVM_DB_DIZ) -XLIBJVM_DB_G_DEBUGINFO = 64/$(LIBJVM_DB_G_DEBUGINFO) -XLIBJVM_DB_G_DIZ = 64/$(LIBJVM_DB_G_DIZ) -XLIBJVM_DTRACE_DEBUGINFO = 64/$(LIBJVM_DTRACE_DEBUGINFO) -XLIBJVM_DTRACE_DIZ = 64/$(LIBJVM_DTRACE_DIZ) -XLIBJVM_DTRACE_G_DEBUGINFO = 64/$(LIBJVM_DTRACE_G_DEBUGINFO) -XLIBJVM_DTRACE_G_DIZ = 64/$(LIBJVM_DTRACE_G_DIZ) +XLIBJVM_DB_DEBUGINFO = $(XLIBJVM_DIR)/$(LIBJVM_DB_DEBUGINFO) +XLIBJVM_DB_DIZ = $(XLIBJVM_DIR)/$(LIBJVM_DB_DIZ) +XLIBJVM_DB_G_DEBUGINFO = $(XLIBJVM_DIR)/$(LIBJVM_DB_G_DEBUGINFO) +XLIBJVM_DB_G_DIZ = $(XLIBJVM_DIR)/$(LIBJVM_DB_G_DIZ) +XLIBJVM_DTRACE_DEBUGINFO = $(XLIBJVM_DIR)/$(LIBJVM_DTRACE_DEBUGINFO) +XLIBJVM_DTRACE_DIZ = $(XLIBJVM_DIR)/$(LIBJVM_DTRACE_DIZ) +XLIBJVM_DTRACE_G_DEBUGINFO = $(XLIBJVM_DIR)/$(LIBJVM_DTRACE_G_DEBUGINFO) +XLIBJVM_DTRACE_G_DIZ = $(XLIBJVM_DIR)/$(LIBJVM_DTRACE_G_DIZ) $(XLIBJVM_DB): $(ADD_GNU_DEBUGLINK) $(FIX_EMPTY_SEC_HDR_FLAGS) $(DTRACE_SRCDIR)/$(JVM_DB).c $(JVMOFFS).h $(LIBJVM_DB_MAPFILE) @echo Making $@ - $(QUIETLY) mkdir -p 64/ ; \ + $(QUIETLY) mkdir -p $(XLIBJVM_DIR) ; \ $(CC) $(SYMFLAG) $(ARCHFLAG/$(ISA)) -D$(TYPE) -I. -I$(GENERATED) \ $(SHARED_FLAG) $(LFLAGS_JVM_DB) -o $@ $(DTRACE_SRCDIR)/$(JVM_DB).c -lc [ -f $(XLIBJVM_DB_G) ] || { ln -s $(LIBJVM_DB) $(XLIBJVM_DB_G); } @@ -124,8 +125,10 @@ $(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(XLIBJVM_DB_DEBUGINFO) # $(OBJCOPY) --add-gnu-debuglink=... corrupts SUNW_* sections. # Use $(ADD_GNU_DEBUGLINK) until a fixed $(OBJCOPY) is available. -# $(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(XLIBJVM_DB_DEBUGINFO) $@ - $(QUIETLY) $(ADD_GNU_DEBUGLINK) $(XLIBJVM_DB_DEBUGINFO) $@ +# $(OBJCOPY) --add-gnu-debuglink=$(LIBJVM_DB_DEBUGINFO) $(LIBJVM_DB) ; +# Do this part in the $(XLIBJVM_DIR) subdir so $(XLIBJVM_DIR) is not +# in the link name: + ( cd $(XLIBJVM_DIR) && $(ADD_GNU_DEBUGLINK) $(LIBJVM_DB_DEBUGINFO) $(LIBJVM_DB) ) ifeq ($(STRIP_POLICY),all_strip) $(QUIETLY) $(STRIP) $@ else @@ -134,17 +137,19 @@ # implied else here is no stripping at all endif endif - [ -f $(XLIBJVM_DB_G_DEBUGINFO) ] || { ln -s $(XLIBJVM_DB_DEBUGINFO) $(XLIBJVM_DB_G_DEBUGINFO); } + [ -f $(XLIBJVM_DB_G_DEBUGINFO) ] || { cd $(XLIBJVM_DIR) && ln -s $(LIBJVM_DB_DEBUGINFO) $(LIBJVM_DB_G_DEBUGINFO); } ifeq ($(ZIP_DEBUGINFO_FILES),1) - $(ZIPEXE) -q -y $(XLIBJVM_DB_DIZ) $(XLIBJVM_DB_DEBUGINFO) $(XLIBJVM_DB_G_DEBUGINFO) +# Do this part in the $(XLIBJVM_DIR) subdir so $(XLIBJVM_DIR) is not +# in the archived name: + ( cd $(XLIBJVM_DIR) && $(ZIPEXE) -q -y $(LIBJVM_DB_DIZ) $(LIBJVM_DB_DEBUGINFO) $(LIBJVM_DB_G_DEBUGINFO) ) $(RM) $(XLIBJVM_DB_DEBUGINFO) $(XLIBJVM_DB_G_DEBUGINFO) - [ -f $(XLIBJVM_DB_G_DIZ) ] || { ln -s $(XLIBJVM_DB_DIZ) $(XLIBJVM_DB_G_DIZ); } + [ -f $(XLIBJVM_DB_G_DIZ) ] || { cd $(XLIBJVM_DIR) && ln -s $(LIBJVM_DB_DIZ) $(LIBJVM_DB_G_DIZ); } endif endif $(XLIBJVM_DTRACE): $(ADD_GNU_DEBUGLINK) $(FIX_EMPTY_SEC_HDR_FLAGS) $(DTRACE_SRCDIR)/$(JVM_DTRACE).c $(DTRACE_SRCDIR)/$(JVM_DTRACE).h $(LIBJVM_DTRACE_MAPFILE) @echo Making $@ - $(QUIETLY) mkdir -p 64/ ; \ + $(QUIETLY) mkdir -p $(XLIBJVM_DIR) ; \ $(CC) $(SYMFLAG) $(ARCHFLAG/$(ISA)) -D$(TYPE) -I. \ $(SHARED_FLAG) $(LFLAGS_JVM_DTRACE) -o $@ $(DTRACE_SRCDIR)/$(JVM_DTRACE).c -lc -lthread -ldoor [ -f $(XLIBJVM_DTRACE_G) ] || { ln -s $(LIBJVM_DTRACE) $(XLIBJVM_DTRACE_G); } @@ -153,8 +158,10 @@ $(QUIETLY) $(FIX_EMPTY_SEC_HDR_FLAGS) $@ $(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(XLIBJVM_DTRACE_DEBUGINFO) # $(OBJCOPY) --add-gnu-debuglink=... corrupts SUNW_* sections. -# $(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(XLIBJVM_DTRACE_DEBUGINFO) $@ - $(QUIETLY) $(ADD_GNU_DEBUGLINK) $(XLIBJVM_DTRACE_DEBUGINFO) $@ +# $(OBJCOPY) --add-gnu-debuglink=$(LIBJVM_DTRACE_DEBUGINFO) $(LIBJVM_DTRACE) ; +# Do this part in the $(XLIBJVM_DIR) subdir so $(XLIBJVM_DIR) is not +# in the link name: + ( cd $(XLIBJVM_DIR) && $(ADD_GNU_DEBUGLINK) $(LIBJVM_DTRACE_DEBUGINFO) $(LIBJVM_DTRACE) ) ifeq ($(STRIP_POLICY),all_strip) $(QUIETLY) $(STRIP) $@ else @@ -163,11 +170,13 @@ # implied else here is no stripping at all endif endif - [ -f $(XLIBJVM_DTRACE_G_DEBUGINFO) ] || { ln -s $(XLIBJVM_DTRACE_DEBUGINFO) $(XLIBJVM_DTRACE_G_DEBUGINFO); } + [ -f $(XLIBJVM_DTRACE_G_DEBUGINFO) ] || { cd $(XLIBJVM_DIR) && ln -s $(LIBJVM_DTRACE_DEBUGINFO) $(LIBJVM_DTRACE_G_DEBUGINFO); } ifeq ($(ZIP_DEBUGINFO_FILES),1) - $(ZIPEXE) -q -y $(XLIBJVM_DTRACE_DIZ) $(XLIBJVM_DTRACE_DEBUGINFO) $(XLIBJVM_DTRACE_G_DEBUGINFO) +# Do this part in the $(XLIBJVM_DIR) subdir so $(XLIBJVM_DIR) is not +# in the archived name: + ( cd $(XLIBJVM_DIR) && $(ZIPEXE) -q -y $(LIBJVM_DTRACE_DIZ) $(LIBJVM_DTRACE_DEBUGINFO) $(LIBJVM_DTRACE_G_DEBUGINFO) ) $(RM) $(XLIBJVM_DTRACE_DEBUGINFO) $(XLIBJVM_DTRACE_G_DEBUGINFO) - [ -f $(XLIBJVM_DTRACE_G_DIZ) ] || { ln -s $(XLIBJVM_DTRACE_DIZ) $(XLIBJVM_DTRACE_G_DIZ); } + [ -f $(XLIBJVM_DTRACE_G_DIZ) ] || { cd $(XLIBJVM_DIR) && ln -s $(LIBJVM_DTRACE_DIZ) $(LIBJVM_DTRACE_G_DIZ); } endif endif
--- a/hotspot/make/solaris/makefiles/fix_empty_sec_hdr_flags.make Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/make/solaris/makefiles/fix_empty_sec_hdr_flags.make Tue Jul 03 18:23:04 2012 -0700 @@ -24,8 +24,11 @@ # Rules to build fix_empty_sec_hdr_flags, used by vm.make on Solaris -GENERATED = ../generated -FIX_EMPTY_SEC_HDR_FLAGS = $(GENERATED)/fix_empty_sec_hdr_flags +# Allow $(FIX_EMPTY_SEC_HDR_FLAGS) to be called from any directory. +# We don't set or use the GENERATED macro to avoid affecting +# other HotSpot Makefiles. +TOPDIR = $(shell echo `pwd`) +FIX_EMPTY_SEC_HDR_FLAGS = $(TOPDIR)/../generated/fix_empty_sec_hdr_flags FIX_EMPTY_SEC_HDR_FLAGS_DIR = $(GAMMADIR)/src/os/solaris/fix_empty_sec_hdr_flags FIX_EMPTY_SEC_HDR_FLAGS_SRC = $(FIX_EMPTY_SEC_HDR_FLAGS_DIR)/fix_empty_sec_hdr_flags.c
--- a/hotspot/src/cpu/sparc/vm/sparc.ad Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/cpu/sparc/vm/sparc.ad Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ // -// Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it @@ -678,18 +678,26 @@ static inline jdouble replicate_immI(int con, int count, int width) { // Load a constant replicated "count" times with width "width" + assert(count*width == 8 && width <= 4, "sanity"); int bit_width = width * 8; - jlong elt_val = con; - elt_val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits - jlong val = elt_val; + jlong val = con; + val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits for (int i = 0; i < count - 1; i++) { - val <<= bit_width; - val |= elt_val; + val |= (val << bit_width); } jdouble dval = *((jdouble*) &val); // coerce to double type return dval; } +static inline jdouble replicate_immF(float con) { + // Replicate float con 2 times and pack into vector. + int val = *((int*)&con); + jlong lval = val; + lval = (lval << 32) | (lval & 0xFFFFFFFFl); + jdouble dval = *((jdouble*) &lval); // coerce to double type + return dval; +} + // Standard Sparc opcode form2 field breakdown static inline void emit2_19(CodeBuffer &cbuf, int f30, int f29, int f25, int f22, int f20, int f19, int f0 ) { f0 &= (1<<19)-1; // Mask displacement to 19 bits @@ -791,6 +799,7 @@ case Assembler::stdf_op3: st_op = Op_StoreD; break; case Assembler::ldsb_op3: ld_op = Op_LoadB; break; + case Assembler::ldub_op3: ld_op = Op_LoadUB; break; case Assembler::lduh_op3: ld_op = Op_LoadUS; break; case Assembler::ldsh_op3: ld_op = Op_LoadS; break; case Assembler::ldx_op3: // may become LoadP or stay LoadI @@ -799,7 +808,6 @@ case Assembler::ldd_op3: ld_op = Op_LoadL; break; case Assembler::ldf_op3: ld_op = Op_LoadF; break; case Assembler::lddf_op3: ld_op = Op_LoadD; break; - case Assembler::ldub_op3: ld_op = Op_LoadB; break; case Assembler::prefetch_op3: ld_op = Op_LoadI; break; default: ShouldNotReachHere(); @@ -840,10 +848,7 @@ !(n->ideal_Opcode()==Op_PrefetchRead && ld_op==Op_LoadI) && !(n->ideal_Opcode()==Op_PrefetchWrite && ld_op==Op_LoadI) && !(n->ideal_Opcode()==Op_PrefetchAllocation && ld_op==Op_LoadI) && - !(n->ideal_Opcode()==Op_Load2I && ld_op==Op_LoadD) && - !(n->ideal_Opcode()==Op_Load4C && ld_op==Op_LoadD) && - !(n->ideal_Opcode()==Op_Load4S && ld_op==Op_LoadD) && - !(n->ideal_Opcode()==Op_Load8B && ld_op==Op_LoadD) && + !(n->ideal_Opcode()==Op_LoadVector && ld_op==Op_LoadD) && !(n->rule() == loadUB_rule)) { verify_oops_warning(n, n->ideal_Opcode(), ld_op); } @@ -855,9 +860,7 @@ !(n->ideal_Opcode()==Op_StoreI && st_op==Op_StoreF) && !(n->ideal_Opcode()==Op_StoreF && st_op==Op_StoreI) && !(n->ideal_Opcode()==Op_StoreL && st_op==Op_StoreI) && - !(n->ideal_Opcode()==Op_Store2I && st_op==Op_StoreD) && - !(n->ideal_Opcode()==Op_Store4C && st_op==Op_StoreD) && - !(n->ideal_Opcode()==Op_Store8B && st_op==Op_StoreD) && + !(n->ideal_Opcode()==Op_StoreVector && st_op==Op_StoreD) && !(n->ideal_Opcode()==Op_StoreD && st_op==Op_StoreI && n->rule() == storeD0_rule)) { verify_oops_warning(n, n->ideal_Opcode(), st_op); } @@ -1849,16 +1852,45 @@ address last_rethrow = NULL; // debugging aid for Rethrow encoding #endif +// Map Types to machine register types +const int Matcher::base2reg[Type::lastype] = { + Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN, + Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */ + 0, Op_RegD, 0, 0, /* Vectors */ + Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */ + 0, 0/*abio*/, + Op_RegP /* Return address */, 0, /* the memories */ + Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD, + 0 /*bottom*/ +}; + // Vector width in bytes -const uint Matcher::vector_width_in_bytes(void) { +const int Matcher::vector_width_in_bytes(BasicType bt) { + assert(MaxVectorSize == 8, ""); return 8; } // Vector ideal reg -const uint Matcher::vector_ideal_reg(void) { +const int Matcher::vector_ideal_reg(int size) { + assert(MaxVectorSize == 8, ""); return Op_RegD; } +// Limits on vector size (number of elements) loaded into vector. +const int Matcher::max_vector_size(const BasicType bt) { + assert(is_java_primitive(bt), "only primitive type vectors"); + return vector_width_in_bytes(bt)/type2aelembytes(bt); +} + +const int Matcher::min_vector_size(const BasicType bt) { + return max_vector_size(bt); // Same as max. +} + +// SPARC doesn't support misaligned vectors store/load. +const bool Matcher::misaligned_vectors_ok() { + return false; +} + // USII supports fxtof through the whole range of number, USIII doesn't const bool Matcher::convL2FSupported(void) { return VM_Version::has_fast_fxtof(); @@ -3125,50 +3157,6 @@ __ membar( Assembler::Membar_mask_bits(Assembler::StoreLoad) ); %} - enc_class enc_repl8b( iRegI src, iRegL dst ) %{ - MacroAssembler _masm(&cbuf); - Register src_reg = reg_to_register_object($src$$reg); - Register dst_reg = reg_to_register_object($dst$$reg); - __ sllx(src_reg, 56, dst_reg); - __ srlx(dst_reg, 8, O7); - __ or3 (dst_reg, O7, dst_reg); - __ srlx(dst_reg, 16, O7); - __ or3 (dst_reg, O7, dst_reg); - __ srlx(dst_reg, 32, O7); - __ or3 (dst_reg, O7, dst_reg); - %} - - enc_class enc_repl4b( iRegI src, iRegL dst ) %{ - MacroAssembler _masm(&cbuf); - Register src_reg = reg_to_register_object($src$$reg); - Register dst_reg = reg_to_register_object($dst$$reg); - __ sll(src_reg, 24, dst_reg); - __ srl(dst_reg, 8, O7); - __ or3(dst_reg, O7, dst_reg); - __ srl(dst_reg, 16, O7); - __ or3(dst_reg, O7, dst_reg); - %} - - enc_class enc_repl4s( iRegI src, iRegL dst ) %{ - MacroAssembler _masm(&cbuf); - Register src_reg = reg_to_register_object($src$$reg); - Register dst_reg = reg_to_register_object($dst$$reg); - __ sllx(src_reg, 48, dst_reg); - __ srlx(dst_reg, 16, O7); - __ or3 (dst_reg, O7, dst_reg); - __ srlx(dst_reg, 32, O7); - __ or3 (dst_reg, O7, dst_reg); - %} - - enc_class enc_repl2i( iRegI src, iRegL dst ) %{ - MacroAssembler _masm(&cbuf); - Register src_reg = reg_to_register_object($src$$reg); - Register dst_reg = reg_to_register_object($dst$$reg); - __ sllx(src_reg, 32, dst_reg); - __ srlx(dst_reg, 32, O7); - __ or3 (dst_reg, O7, dst_reg); - %} - %} //----------FRAME-------------------------------------------------------------- @@ -5932,50 +5920,6 @@ ins_pipe(iload_mem); %} -// Load Aligned Packed Byte into a Double Register -instruct loadA8B(regD dst, memory mem) %{ - match(Set dst (Load8B mem)); - ins_cost(MEMORY_REF_COST); - size(4); - format %{ "LDDF $mem,$dst\t! packed8B" %} - opcode(Assembler::lddf_op3); - ins_encode(simple_form3_mem_reg( mem, dst ) ); - ins_pipe(floadD_mem); -%} - -// Load Aligned Packed Char into a Double Register -instruct loadA4C(regD dst, memory mem) %{ - match(Set dst (Load4C mem)); - ins_cost(MEMORY_REF_COST); - size(4); - format %{ "LDDF $mem,$dst\t! packed4C" %} - opcode(Assembler::lddf_op3); - ins_encode(simple_form3_mem_reg( mem, dst ) ); - ins_pipe(floadD_mem); -%} - -// Load Aligned Packed Short into a Double Register -instruct loadA4S(regD dst, memory mem) %{ - match(Set dst (Load4S mem)); - ins_cost(MEMORY_REF_COST); - size(4); - format %{ "LDDF $mem,$dst\t! packed4S" %} - opcode(Assembler::lddf_op3); - ins_encode(simple_form3_mem_reg( mem, dst ) ); - ins_pipe(floadD_mem); -%} - -// Load Aligned Packed Int into a Double Register -instruct loadA2I(regD dst, memory mem) %{ - match(Set dst (Load2I mem)); - ins_cost(MEMORY_REF_COST); - size(4); - format %{ "LDDF $mem,$dst\t! packed2I" %} - opcode(Assembler::lddf_op3); - ins_encode(simple_form3_mem_reg( mem, dst ) ); - ins_pipe(floadD_mem); -%} - // Load Range instruct loadRange(iRegI dst, memory mem) %{ match(Set dst (LoadRange mem)); @@ -6599,17 +6543,6 @@ ins_pipe(fstoreF_mem_zero); %} -// Store Aligned Packed Bytes in Double register to memory -instruct storeA8B(memory mem, regD src) %{ - match(Set mem (Store8B mem src)); - ins_cost(MEMORY_REF_COST); - size(4); - format %{ "STDF $src,$mem\t! packed8B" %} - opcode(Assembler::stdf_op3); - ins_encode(simple_form3_mem_reg( mem, src ) ); - ins_pipe(fstoreD_mem_reg); -%} - // Convert oop pointer into compressed form instruct encodeHeapOop(iRegN dst, iRegP src) %{ predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull); @@ -6654,62 +6587,6 @@ %} -// Store Zero into Aligned Packed Bytes -instruct storeA8B0(memory mem, immI0 zero) %{ - match(Set mem (Store8B mem zero)); - ins_cost(MEMORY_REF_COST); - size(4); - format %{ "STX $zero,$mem\t! packed8B" %} - opcode(Assembler::stx_op3); - ins_encode(simple_form3_mem_reg( mem, R_G0 ) ); - ins_pipe(fstoreD_mem_zero); -%} - -// Store Aligned Packed Chars/Shorts in Double register to memory -instruct storeA4C(memory mem, regD src) %{ - match(Set mem (Store4C mem src)); - ins_cost(MEMORY_REF_COST); - size(4); - format %{ "STDF $src,$mem\t! packed4C" %} - opcode(Assembler::stdf_op3); - ins_encode(simple_form3_mem_reg( mem, src ) ); - ins_pipe(fstoreD_mem_reg); -%} - -// Store Zero into Aligned Packed Chars/Shorts -instruct storeA4C0(memory mem, immI0 zero) %{ - match(Set mem (Store4C mem (Replicate4C zero))); - ins_cost(MEMORY_REF_COST); - size(4); - format %{ "STX $zero,$mem\t! packed4C" %} - opcode(Assembler::stx_op3); - ins_encode(simple_form3_mem_reg( mem, R_G0 ) ); - ins_pipe(fstoreD_mem_zero); -%} - -// Store Aligned Packed Ints in Double register to memory -instruct storeA2I(memory mem, regD src) %{ - match(Set mem (Store2I mem src)); - ins_cost(MEMORY_REF_COST); - size(4); - format %{ "STDF $src,$mem\t! packed2I" %} - opcode(Assembler::stdf_op3); - ins_encode(simple_form3_mem_reg( mem, src ) ); - ins_pipe(fstoreD_mem_reg); -%} - -// Store Zero into Aligned Packed Ints -instruct storeA2I0(memory mem, immI0 zero) %{ - match(Set mem (Store2I mem zero)); - ins_cost(MEMORY_REF_COST); - size(4); - format %{ "STX $zero,$mem\t! packed2I" %} - opcode(Assembler::stx_op3); - ins_encode(simple_form3_mem_reg( mem, R_G0 ) ); - ins_pipe(fstoreD_mem_zero); -%} - - //----------MemBar Instructions----------------------------------------------- // Memory barrier flavors @@ -8880,150 +8757,6 @@ ins_pipe(ialu_reg_imm); %} -// Replicate scalar to packed byte values in Double register -instruct Repl8B_reg_helper(iRegL dst, iRegI src) %{ - effect(DEF dst, USE src); - format %{ "SLLX $src,56,$dst\n\t" - "SRLX $dst, 8,O7\n\t" - "OR $dst,O7,$dst\n\t" - "SRLX $dst,16,O7\n\t" - "OR $dst,O7,$dst\n\t" - "SRLX $dst,32,O7\n\t" - "OR $dst,O7,$dst\t! replicate8B" %} - ins_encode( enc_repl8b(src, dst)); - ins_pipe(ialu_reg); -%} - -// Replicate scalar to packed byte values in Double register -instruct Repl8B_reg(stackSlotD dst, iRegI src) %{ - match(Set dst (Replicate8B src)); - expand %{ - iRegL tmp; - Repl8B_reg_helper(tmp, src); - regL_to_stkD(dst, tmp); - %} -%} - -// Replicate scalar constant to packed byte values in Double register -instruct Repl8B_immI(regD dst, immI13 con, o7RegI tmp) %{ - match(Set dst (Replicate8B con)); - effect(KILL tmp); - format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl8B($con)" %} - ins_encode %{ - // XXX This is a quick fix for 6833573. - //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 8, 1)), $dst$$FloatRegister); - RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 8, 1)), $tmp$$Register); - __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg)); - %} - ins_pipe(loadConFD); -%} - -// Replicate scalar to packed char values into stack slot -instruct Repl4C_reg_helper(iRegL dst, iRegI src) %{ - effect(DEF dst, USE src); - format %{ "SLLX $src,48,$dst\n\t" - "SRLX $dst,16,O7\n\t" - "OR $dst,O7,$dst\n\t" - "SRLX $dst,32,O7\n\t" - "OR $dst,O7,$dst\t! replicate4C" %} - ins_encode( enc_repl4s(src, dst) ); - ins_pipe(ialu_reg); -%} - -// Replicate scalar to packed char values into stack slot -instruct Repl4C_reg(stackSlotD dst, iRegI src) %{ - match(Set dst (Replicate4C src)); - expand %{ - iRegL tmp; - Repl4C_reg_helper(tmp, src); - regL_to_stkD(dst, tmp); - %} -%} - -// Replicate scalar constant to packed char values in Double register -instruct Repl4C_immI(regD dst, immI con, o7RegI tmp) %{ - match(Set dst (Replicate4C con)); - effect(KILL tmp); - format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl4C($con)" %} - ins_encode %{ - // XXX This is a quick fix for 6833573. - //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 4, 2)), $dst$$FloatRegister); - RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 4, 2)), $tmp$$Register); - __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg)); - %} - ins_pipe(loadConFD); -%} - -// Replicate scalar to packed short values into stack slot -instruct Repl4S_reg_helper(iRegL dst, iRegI src) %{ - effect(DEF dst, USE src); - format %{ "SLLX $src,48,$dst\n\t" - "SRLX $dst,16,O7\n\t" - "OR $dst,O7,$dst\n\t" - "SRLX $dst,32,O7\n\t" - "OR $dst,O7,$dst\t! replicate4S" %} - ins_encode( enc_repl4s(src, dst) ); - ins_pipe(ialu_reg); -%} - -// Replicate scalar to packed short values into stack slot -instruct Repl4S_reg(stackSlotD dst, iRegI src) %{ - match(Set dst (Replicate4S src)); - expand %{ - iRegL tmp; - Repl4S_reg_helper(tmp, src); - regL_to_stkD(dst, tmp); - %} -%} - -// Replicate scalar constant to packed short values in Double register -instruct Repl4S_immI(regD dst, immI con, o7RegI tmp) %{ - match(Set dst (Replicate4S con)); - effect(KILL tmp); - format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl4S($con)" %} - ins_encode %{ - // XXX This is a quick fix for 6833573. - //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 4, 2)), $dst$$FloatRegister); - RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 4, 2)), $tmp$$Register); - __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg)); - %} - ins_pipe(loadConFD); -%} - -// Replicate scalar to packed int values in Double register -instruct Repl2I_reg_helper(iRegL dst, iRegI src) %{ - effect(DEF dst, USE src); - format %{ "SLLX $src,32,$dst\n\t" - "SRLX $dst,32,O7\n\t" - "OR $dst,O7,$dst\t! replicate2I" %} - ins_encode( enc_repl2i(src, dst)); - ins_pipe(ialu_reg); -%} - -// Replicate scalar to packed int values in Double register -instruct Repl2I_reg(stackSlotD dst, iRegI src) %{ - match(Set dst (Replicate2I src)); - expand %{ - iRegL tmp; - Repl2I_reg_helper(tmp, src); - regL_to_stkD(dst, tmp); - %} -%} - -// Replicate scalar zero constant to packed int values in Double register -instruct Repl2I_immI(regD dst, immI con, o7RegI tmp) %{ - match(Set dst (Replicate2I con)); - effect(KILL tmp); - format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl2I($con)" %} - ins_encode %{ - // XXX This is a quick fix for 6833573. - //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 2, 4)), $dst$$FloatRegister); - RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 2, 4)), $tmp$$Register); - __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg)); - %} - ins_pipe(loadConFD); -%} - //----------Control Flow Instructions------------------------------------------ // Compare Instructions // Compare Integers @@ -10742,6 +10475,308 @@ ins_pipe(istore_mem_reg); %} +// ====================VECTOR INSTRUCTIONS===================================== + +// Load Aligned Packed values into a Double Register +instruct loadV8(regD dst, memory mem) %{ + predicate(n->as_LoadVector()->memory_size() == 8); + match(Set dst (LoadVector mem)); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "LDDF $mem,$dst\t! load vector (8 bytes)" %} + ins_encode %{ + __ ldf(FloatRegisterImpl::D, $mem$$Address, as_DoubleFloatRegister($dst$$reg)); + %} + ins_pipe(floadD_mem); +%} + +// Store Vector in Double register to memory +instruct storeV8(memory mem, regD src) %{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem src)); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "STDF $src,$mem\t! store vector (8 bytes)" %} + ins_encode %{ + __ stf(FloatRegisterImpl::D, as_DoubleFloatRegister($src$$reg), $mem$$Address); + %} + ins_pipe(fstoreD_mem_reg); +%} + +// Store Zero into vector in memory +instruct storeV8B_zero(memory mem, immI0 zero) %{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem (ReplicateB zero))); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "STX $zero,$mem\t! store zero vector (8 bytes)" %} + ins_encode %{ + __ stx(G0, $mem$$Address); + %} + ins_pipe(fstoreD_mem_zero); +%} + +instruct storeV4S_zero(memory mem, immI0 zero) %{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem (ReplicateS zero))); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "STX $zero,$mem\t! store zero vector (4 shorts)" %} + ins_encode %{ + __ stx(G0, $mem$$Address); + %} + ins_pipe(fstoreD_mem_zero); +%} + +instruct storeV2I_zero(memory mem, immI0 zero) %{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem (ReplicateI zero))); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "STX $zero,$mem\t! store zero vector (2 ints)" %} + ins_encode %{ + __ stx(G0, $mem$$Address); + %} + ins_pipe(fstoreD_mem_zero); +%} + +instruct storeV2F_zero(memory mem, immF0 zero) %{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem (ReplicateF zero))); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "STX $zero,$mem\t! store zero vector (2 floats)" %} + ins_encode %{ + __ stx(G0, $mem$$Address); + %} + ins_pipe(fstoreD_mem_zero); +%} + +// Replicate scalar to packed byte values into Double register +instruct Repl8B_reg(regD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{ + predicate(n->as_Vector()->length() == 8 && UseVIS >= 3); + match(Set dst (ReplicateB src)); + effect(DEF dst, USE src, TEMP tmp, KILL tmp2); + format %{ "SLLX $src,56,$tmp\n\t" + "SRLX $tmp, 8,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\n\t" + "SRLX $tmp,16,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\n\t" + "SRLX $tmp,32,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\t! replicate8B\n\t" + "MOVXTOD $tmp,$dst\t! MoveL2D" %} + ins_encode %{ + Register Rsrc = $src$$Register; + Register Rtmp = $tmp$$Register; + Register Rtmp2 = $tmp2$$Register; + __ sllx(Rsrc, 56, Rtmp); + __ srlx(Rtmp, 8, Rtmp2); + __ or3 (Rtmp, Rtmp2, Rtmp); + __ srlx(Rtmp, 16, Rtmp2); + __ or3 (Rtmp, Rtmp2, Rtmp); + __ srlx(Rtmp, 32, Rtmp2); + __ or3 (Rtmp, Rtmp2, Rtmp); + __ movxtod(Rtmp, as_DoubleFloatRegister($dst$$reg)); + %} + ins_pipe(ialu_reg); +%} + +// Replicate scalar to packed byte values into Double stack +instruct Repl8B_stk(stackSlotD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{ + predicate(n->as_Vector()->length() == 8 && UseVIS < 3); + match(Set dst (ReplicateB src)); + effect(DEF dst, USE src, TEMP tmp, KILL tmp2); + format %{ "SLLX $src,56,$tmp\n\t" + "SRLX $tmp, 8,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\n\t" + "SRLX $tmp,16,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\n\t" + "SRLX $tmp,32,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\t! replicate8B\n\t" + "STX $tmp,$dst\t! regL to stkD" %} + ins_encode %{ + Register Rsrc = $src$$Register; + Register Rtmp = $tmp$$Register; + Register Rtmp2 = $tmp2$$Register; + __ sllx(Rsrc, 56, Rtmp); + __ srlx(Rtmp, 8, Rtmp2); + __ or3 (Rtmp, Rtmp2, Rtmp); + __ srlx(Rtmp, 16, Rtmp2); + __ or3 (Rtmp, Rtmp2, Rtmp); + __ srlx(Rtmp, 32, Rtmp2); + __ or3 (Rtmp, Rtmp2, Rtmp); + __ set ($dst$$disp + STACK_BIAS, Rtmp2); + __ stx (Rtmp, Rtmp2, $dst$$base$$Register); + %} + ins_pipe(ialu_reg); +%} + +// Replicate scalar constant to packed byte values in Double register +instruct Repl8B_immI(regD dst, immI13 con, o7RegI tmp) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateB con)); + effect(KILL tmp); + format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl8B($con)" %} + ins_encode %{ + // XXX This is a quick fix for 6833573. + //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 8, 1)), $dst$$FloatRegister); + RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 8, 1)), $tmp$$Register); + __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg)); + %} + ins_pipe(loadConFD); +%} + +// Replicate scalar to packed char/short values into Double register +instruct Repl4S_reg(regD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{ + predicate(n->as_Vector()->length() == 4 && UseVIS >= 3); + match(Set dst (ReplicateS src)); + effect(DEF dst, USE src, TEMP tmp, KILL tmp2); + format %{ "SLLX $src,48,$tmp\n\t" + "SRLX $tmp,16,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\n\t" + "SRLX $tmp,32,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\t! replicate4S\n\t" + "MOVXTOD $tmp,$dst\t! MoveL2D" %} + ins_encode %{ + Register Rsrc = $src$$Register; + Register Rtmp = $tmp$$Register; + Register Rtmp2 = $tmp2$$Register; + __ sllx(Rsrc, 48, Rtmp); + __ srlx(Rtmp, 16, Rtmp2); + __ or3 (Rtmp, Rtmp2, Rtmp); + __ srlx(Rtmp, 32, Rtmp2); + __ or3 (Rtmp, Rtmp2, Rtmp); + __ movxtod(Rtmp, as_DoubleFloatRegister($dst$$reg)); + %} + ins_pipe(ialu_reg); +%} + +// Replicate scalar to packed char/short values into Double stack +instruct Repl4S_stk(stackSlotD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{ + predicate(n->as_Vector()->length() == 4 && UseVIS < 3); + match(Set dst (ReplicateS src)); + effect(DEF dst, USE src, TEMP tmp, KILL tmp2); + format %{ "SLLX $src,48,$tmp\n\t" + "SRLX $tmp,16,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\n\t" + "SRLX $tmp,32,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\t! replicate4S\n\t" + "STX $tmp,$dst\t! regL to stkD" %} + ins_encode %{ + Register Rsrc = $src$$Register; + Register Rtmp = $tmp$$Register; + Register Rtmp2 = $tmp2$$Register; + __ sllx(Rsrc, 48, Rtmp); + __ srlx(Rtmp, 16, Rtmp2); + __ or3 (Rtmp, Rtmp2, Rtmp); + __ srlx(Rtmp, 32, Rtmp2); + __ or3 (Rtmp, Rtmp2, Rtmp); + __ set ($dst$$disp + STACK_BIAS, Rtmp2); + __ stx (Rtmp, Rtmp2, $dst$$base$$Register); + %} + ins_pipe(ialu_reg); +%} + +// Replicate scalar constant to packed char/short values in Double register +instruct Repl4S_immI(regD dst, immI con, o7RegI tmp) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateS con)); + effect(KILL tmp); + format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl4S($con)" %} + ins_encode %{ + // XXX This is a quick fix for 6833573. + //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 4, 2)), $dst$$FloatRegister); + RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 4, 2)), $tmp$$Register); + __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg)); + %} + ins_pipe(loadConFD); +%} + +// Replicate scalar to packed int values into Double register +instruct Repl2I_reg(regD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{ + predicate(n->as_Vector()->length() == 2 && UseVIS >= 3); + match(Set dst (ReplicateI src)); + effect(DEF dst, USE src, TEMP tmp, KILL tmp2); + format %{ "SLLX $src,32,$tmp\n\t" + "SRLX $tmp,32,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\t! replicate2I\n\t" + "MOVXTOD $tmp,$dst\t! MoveL2D" %} + ins_encode %{ + Register Rsrc = $src$$Register; + Register Rtmp = $tmp$$Register; + Register Rtmp2 = $tmp2$$Register; + __ sllx(Rsrc, 32, Rtmp); + __ srlx(Rtmp, 32, Rtmp2); + __ or3 (Rtmp, Rtmp2, Rtmp); + __ movxtod(Rtmp, as_DoubleFloatRegister($dst$$reg)); + %} + ins_pipe(ialu_reg); +%} + +// Replicate scalar to packed int values into Double stack +instruct Repl2I_stk(stackSlotD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{ + predicate(n->as_Vector()->length() == 2 && UseVIS < 3); + match(Set dst (ReplicateI src)); + effect(DEF dst, USE src, TEMP tmp, KILL tmp2); + format %{ "SLLX $src,32,$tmp\n\t" + "SRLX $tmp,32,$tmp2\n\t" + "OR $tmp,$tmp2,$tmp\t! replicate2I\n\t" + "STX $tmp,$dst\t! regL to stkD" %} + ins_encode %{ + Register Rsrc = $src$$Register; + Register Rtmp = $tmp$$Register; + Register Rtmp2 = $tmp2$$Register; + __ sllx(Rsrc, 32, Rtmp); + __ srlx(Rtmp, 32, Rtmp2); + __ or3 (Rtmp, Rtmp2, Rtmp); + __ set ($dst$$disp + STACK_BIAS, Rtmp2); + __ stx (Rtmp, Rtmp2, $dst$$base$$Register); + %} + ins_pipe(ialu_reg); +%} + +// Replicate scalar zero constant to packed int values in Double register +instruct Repl2I_immI(regD dst, immI con, o7RegI tmp) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI con)); + effect(KILL tmp); + format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl2I($con)" %} + ins_encode %{ + // XXX This is a quick fix for 6833573. + //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 2, 4)), $dst$$FloatRegister); + RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 2, 4)), $tmp$$Register); + __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg)); + %} + ins_pipe(loadConFD); +%} + +// Replicate scalar to packed float values into Double stack +instruct Repl2F_stk(stackSlotD dst, regF src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateF src)); + ins_cost(MEMORY_REF_COST*2); + format %{ "STF $src,$dst.hi\t! packed2F\n\t" + "STF $src,$dst.lo" %} + opcode(Assembler::stf_op3); + ins_encode(simple_form3_mem_reg(dst, src), form3_mem_plus_4_reg(dst, src)); + ins_pipe(fstoreF_stk_reg); +%} + +// Replicate scalar zero constant to packed float values in Double register +instruct Repl2F_immF(regD dst, immF con, o7RegI tmp) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateF con)); + effect(KILL tmp); + format %{ "LDDF [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl2F($con)" %} + ins_encode %{ + // XXX This is a quick fix for 6833573. + //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immF($con$$constant)), $dst$$FloatRegister); + RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immF($con$$constant)), $tmp$$Register); + __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg)); + %} + ins_pipe(loadConFD); +%} + //----------PEEPHOLE RULES----------------------------------------------------- // These must follow all instruction definitions as they use the names // defined in the instructions definitions.
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -217,6 +217,8 @@ // Currently not supported anywhere. FLAG_SET_DEFAULT(UseFPUForSpilling, false); + MaxVectorSize = 8; + assert((InteriorEntryAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size"); #endif
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp Tue Jul 03 18:23:04 2012 -0700 @@ -1637,6 +1637,13 @@ emit_byte(0xC0 | encode); } +void Assembler::movlhps(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE); + emit_byte(0x16); + emit_byte(0xC0 | encode); +} + void Assembler::movb(Register dst, Address src) { NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); InstructionMark im(this); @@ -1686,6 +1693,14 @@ emit_operand(dst, src); } +void Assembler::movdl(Address dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionMark im(this); + simd_prefix(dst, src, VEX_SIMD_66); + emit_byte(0x7E); + emit_operand(src, dst); +} + void Assembler::movdqa(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); @@ -1716,6 +1731,35 @@ emit_operand(src, dst); } +// Move Unaligned 256bit Vector +void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) { + assert(UseAVX, ""); + bool vector256 = true; + int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256); + emit_byte(0x6F); + emit_byte(0xC0 | encode); +} + +void Assembler::vmovdqu(XMMRegister dst, Address src) { + assert(UseAVX, ""); + InstructionMark im(this); + bool vector256 = true; + vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256); + emit_byte(0x6F); + emit_operand(dst, src); +} + +void Assembler::vmovdqu(Address dst, XMMRegister src) { + assert(UseAVX, ""); + InstructionMark im(this); + bool vector256 = true; + // swap src<->dst for encoding + assert(src != xnoreg, "sanity"); + vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector256); + emit_byte(0x7F); + emit_operand(src, dst); +} + // Uses zero extension on 64bit void Assembler::movl(Register dst, int32_t imm32) { @@ -3112,6 +3156,13 @@ emit_operand(dst, src); } +void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx(), ""); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256); + emit_byte(0x57); + emit_byte(0xC0 | encode); +} + void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src) { assert(VM_Version::supports_avx(), ""); InstructionMark im(this); @@ -3120,6 +3171,30 @@ emit_operand(dst, src); } +void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + assert(VM_Version::supports_avx(), ""); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_NONE, vector256); + emit_byte(0x57); + emit_byte(0xC0 | encode); +} + +void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + bool vector256 = true; + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A); + emit_byte(0x18); + emit_byte(0xC0 | encode); + // 0x00 - insert into lower 128 bits + // 0x01 - insert into upper 128 bits + emit_byte(0x01); +} + +void Assembler::vzeroupper() { + assert(VM_Version::supports_avx(), ""); + (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE); + emit_byte(0x77); +} + #ifndef _LP64 // 32bit only pieces of the assembler
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -591,8 +591,9 @@ void vex_prefix(XMMRegister dst, XMMRegister nds, Address src, VexSimdPrefix pre, bool vector256 = false) { - vex_prefix(src, nds->encoding(), dst->encoding(), - pre, VEX_OPCODE_0F, false, vector256); + int dst_enc = dst->encoding(); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector256); } int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, @@ -600,9 +601,12 @@ bool vex_w, bool vector256); int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, - VexSimdPrefix pre, bool vector256 = false) { - return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), - pre, VEX_OPCODE_0F, false, vector256); + VexSimdPrefix pre, bool vector256 = false, + VexOpcode opc = VEX_OPCODE_0F) { + int src_enc = src->encoding(); + int dst_enc = dst->encoding(); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector256); } void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, @@ -1261,6 +1265,7 @@ void movdl(XMMRegister dst, Register src); void movdl(Register dst, XMMRegister src); void movdl(XMMRegister dst, Address src); + void movdl(Address dst, XMMRegister src); // Move Double Quadword void movdq(XMMRegister dst, Register src); @@ -1274,6 +1279,14 @@ void movdqu(XMMRegister dst, Address src); void movdqu(XMMRegister dst, XMMRegister src); + // Move Unaligned 256bit Vector + void vmovdqu(Address dst, XMMRegister src); + void vmovdqu(XMMRegister dst, Address src); + void vmovdqu(XMMRegister dst, XMMRegister src); + + // Move lower 64bit to high 64bit in 128bit register + void movlhps(XMMRegister dst, XMMRegister src); + void movl(Register dst, int32_t imm32); void movl(Address dst, int32_t imm32); void movl(Register dst, Register src); @@ -1615,6 +1628,17 @@ void vxorpd(XMMRegister dst, XMMRegister nds, Address src); void vxorps(XMMRegister dst, XMMRegister nds, Address src); + // AVX Vector instrucitons. + void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256); + void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src); + + // AVX instruction which is used to clear upper 128 bits of YMM registers and + // to avoid transaction penalty between AVX and SSE states. There is no + // penalty if legacy SSE instructions are encoded using VEX prefix because + // they always clear upper 128 bits. It should be used before calling + // runtime code and native libraries. + void vzeroupper(); protected: // Next instructions require address alignment 16 bytes SSE mode. @@ -2529,9 +2553,13 @@ void vsubss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubss(dst, nds, src); } void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src); + // AVX Vector instructions + + void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); } void vxorpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorpd(dst, nds, src); } void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src); + void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); } void vxorps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorps(dst, nds, src); } void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
--- a/hotspot/src/cpu/x86/vm/register_x86.cpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/cpu/x86/vm/register_x86.cpp Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -35,7 +35,7 @@ const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr + 2 * FloatRegisterImpl::number_of_registers; const int ConcreteRegisterImpl::max_xmm = ConcreteRegisterImpl::max_fpr + - 2 * XMMRegisterImpl::number_of_registers; + 8 * XMMRegisterImpl::number_of_registers; const char* RegisterImpl::name() const { const char* names[number_of_registers] = { #ifndef AMD64
--- a/hotspot/src/cpu/x86/vm/register_x86.hpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/cpu/x86/vm/register_x86.hpp Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -158,7 +158,7 @@ XMMRegister successor() const { return as_XMMRegister(encoding() + 1); } // accessors - int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } + int encoding() const { assert(is_valid(), err_msg("invalid register (%d)", (int)(intptr_t)this )); return (intptr_t)this; } bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } const char* name() const; }; @@ -216,7 +216,7 @@ RegisterImpl::number_of_registers + // "H" half of a 64bit register #endif // AMD64 2 * FloatRegisterImpl::number_of_registers + - 2 * XMMRegisterImpl::number_of_registers + + 8 * XMMRegisterImpl::number_of_registers + 1 // eflags };
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Tue Jul 03 18:23:04 2012 -0700 @@ -467,6 +467,32 @@ if (!supports_avx ()) // Drop to 0 if no AVX support UseAVX = 0; +#ifdef COMPILER2 + if (UseFPUForSpilling) { + if (UseSSE < 2) { + // Only supported with SSE2+ + FLAG_SET_DEFAULT(UseFPUForSpilling, false); + } + } + if (MaxVectorSize > 0) { + if (!is_power_of_2(MaxVectorSize)) { + warning("MaxVectorSize must be a power of 2"); + FLAG_SET_DEFAULT(MaxVectorSize, 32); + } + if (MaxVectorSize > 32) { + FLAG_SET_DEFAULT(MaxVectorSize, 32); + } + if (MaxVectorSize > 16 && UseAVX == 0) { + // Only supported with AVX+ + FLAG_SET_DEFAULT(MaxVectorSize, 16); + } + if (UseSSE < 2) { + // Only supported with SSE2+ + FLAG_SET_DEFAULT(MaxVectorSize, 0); + } + } +#endif + // On new cpus instructions which update whole XMM register should be used // to prevent partial register stall due to dependencies on high half. // @@ -544,6 +570,12 @@ } } +#ifdef COMPILER2 + if (MaxVectorSize > 16) { + // Limit vectors size to 16 bytes on current AMD cpus. + FLAG_SET_DEFAULT(MaxVectorSize, 16); + } +#endif // COMPILER2 } if( is_intel() ) { // Intel cpus specific settings @@ -606,15 +638,6 @@ FLAG_SET_DEFAULT(UsePopCountInstruction, false); } -#ifdef COMPILER2 - if (UseFPUForSpilling) { - if (UseSSE < 2) { - // Only supported with SSE2+ - FLAG_SET_DEFAULT(UseFPUForSpilling, false); - } - } -#endif - assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
--- a/hotspot/src/cpu/x86/vm/vmreg_x86.cpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/cpu/x86/vm/vmreg_x86.cpp Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -48,8 +48,9 @@ XMMRegister xreg = ::as_XMMRegister(0); for ( ; i < ConcreteRegisterImpl::max_xmm ; ) { - regName[i++] = xreg->name(); - regName[i++] = xreg->name(); + for (int j = 0 ; j < 8 ; j++) { + regName[i++] = xreg->name(); + } xreg = xreg->successor(); } for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) {
--- a/hotspot/src/cpu/x86/vm/vmreg_x86.inline.hpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/cpu/x86/vm/vmreg_x86.inline.hpp Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -39,7 +39,7 @@ } inline VMReg XMMRegisterImpl::as_VMReg() { - return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_fpr); + return VMRegImpl::as_VMReg((encoding() << 3) + ConcreteRegisterImpl::max_fpr); } @@ -75,7 +75,7 @@ inline XMMRegister VMRegImpl::as_XMMRegister() { assert( is_XMMRegister() && is_even(value()), "must be" ); // Yuk - return ::as_XMMRegister((value() - ConcreteRegisterImpl::max_fpr) >> 1); + return ::as_XMMRegister((value() - ConcreteRegisterImpl::max_fpr) >> 3); } inline bool VMRegImpl::is_concrete() {
--- a/hotspot/src/cpu/x86/vm/x86.ad Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/cpu/x86/vm/x86.ad Tue Jul 03 18:23:04 2012 -0700 @@ -24,6 +24,456 @@ // X86 Common Architecture Description File +//----------REGISTER DEFINITION BLOCK------------------------------------------ +// This information is used by the matcher and the register allocator to +// describe individual registers and classes of registers within the target +// archtecture. + +register %{ +//----------Architecture Description Register Definitions---------------------- +// General Registers +// "reg_def" name ( register save type, C convention save type, +// ideal register type, encoding ); +// Register Save Types: +// +// NS = No-Save: The register allocator assumes that these registers +// can be used without saving upon entry to the method, & +// that they do not need to be saved at call sites. +// +// SOC = Save-On-Call: The register allocator assumes that these registers +// can be used without saving upon entry to the method, +// but that they must be saved at call sites. +// +// SOE = Save-On-Entry: The register allocator assumes that these registers +// must be saved before using them upon entry to the +// method, but they do not need to be saved at call +// sites. +// +// AS = Always-Save: The register allocator assumes that these registers +// must be saved before using them upon entry to the +// method, & that they must be saved at call sites. +// +// Ideal Register Type is used to determine how to save & restore a +// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get +// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. +// +// The encoding number is the actual bit-pattern placed into the opcodes. + +// XMM registers. 256-bit registers or 8 words each, labeled (a)-h. +// Word a in each register holds a Float, words ab hold a Double. +// The whole registers are used in SSE4.2 version intrinsics, +// array copy stubs and superword operations (see UseSSE42Intrinsics, +// UseXMMForArrayCopy and UseSuperword flags). +// XMM8-XMM15 must be encoded with REX (VEX for UseAVX). +// Linux ABI: No register preserved across function calls +// XMM0-XMM7 might hold parameters +// Windows ABI: XMM6-XMM15 preserved across function calls +// XMM0-XMM3 might hold parameters + +reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); +reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()); +reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()); +reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()); +reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()); +reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); +reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()); +reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()); +reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()); +reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()); +reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); +reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()); +reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()); +reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()); +reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()); +reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); +reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()); +reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()); +reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()); +reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()); +reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); +reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()); +reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()); +reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()); +reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()); +reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); +reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()); +reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()); +reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()); +reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()); +reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +#ifdef _WIN64 + +reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); +reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()); +reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()); +reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()); +reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()); +reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); +reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()); +reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()); +reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()); +reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()); +reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); +reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()); +reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()); +reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()); +reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()); +reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); +reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()); +reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()); +reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()); +reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()); +reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); +reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()); +reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()); +reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()); +reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()); +reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); +reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()); +reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()); +reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()); +reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()); +reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); +reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()); +reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()); +reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()); +reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()); +reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); +reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()); +reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()); +reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()); +reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()); +reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); +reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()); +reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()); +reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()); +reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()); +reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); +reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()); +reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()); +reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()); +reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()); +reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +#else // _WIN64 + +reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); +reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()); +reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()); +reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()); +reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()); +reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); +reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()); +reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()); +reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()); +reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()); +reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +#ifdef _LP64 + +reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); +reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()); +reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()); +reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()); +reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()); +reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); +reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()); +reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()); +reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()); +reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()); +reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); +reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()); +reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()); +reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()); +reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()); +reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); +reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()); +reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()); +reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()); +reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()); +reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); +reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()); +reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()); +reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()); +reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()); +reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); +reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()); +reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()); +reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()); +reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()); +reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); +reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()); +reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()); +reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()); +reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()); +reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); +reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()); +reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()); +reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()); +reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()); +reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()); +reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()); +reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next()); + +#endif // _LP64 + +#endif // _WIN64 + +#ifdef _LP64 +reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); +#else +reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); +#endif // _LP64 + +alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, + XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, + XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, + XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, + XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, + XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, + XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, + XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h +#ifdef _LP64 + ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, + XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, + XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, + XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, + XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, + XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, + XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, + XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h +#endif + ); + +// flags allocation class should be last. +alloc_class chunk2(RFLAGS); + +// Singleton class for condition codes +reg_class int_flags(RFLAGS); + +// Class for all float registers +reg_class float_reg(XMM0, + XMM1, + XMM2, + XMM3, + XMM4, + XMM5, + XMM6, + XMM7 +#ifdef _LP64 + ,XMM8, + XMM9, + XMM10, + XMM11, + XMM12, + XMM13, + XMM14, + XMM15 +#endif + ); + +// Class for all double registers +reg_class double_reg(XMM0, XMM0b, + XMM1, XMM1b, + XMM2, XMM2b, + XMM3, XMM3b, + XMM4, XMM4b, + XMM5, XMM5b, + XMM6, XMM6b, + XMM7, XMM7b +#ifdef _LP64 + ,XMM8, XMM8b, + XMM9, XMM9b, + XMM10, XMM10b, + XMM11, XMM11b, + XMM12, XMM12b, + XMM13, XMM13b, + XMM14, XMM14b, + XMM15, XMM15b +#endif + ); + +// Class for all 32bit vector registers +reg_class vectors_reg(XMM0, + XMM1, + XMM2, + XMM3, + XMM4, + XMM5, + XMM6, + XMM7 +#ifdef _LP64 + ,XMM8, + XMM9, + XMM10, + XMM11, + XMM12, + XMM13, + XMM14, + XMM15 +#endif + ); + +// Class for all 64bit vector registers +reg_class vectord_reg(XMM0, XMM0b, + XMM1, XMM1b, + XMM2, XMM2b, + XMM3, XMM3b, + XMM4, XMM4b, + XMM5, XMM5b, + XMM6, XMM6b, + XMM7, XMM7b +#ifdef _LP64 + ,XMM8, XMM8b, + XMM9, XMM9b, + XMM10, XMM10b, + XMM11, XMM11b, + XMM12, XMM12b, + XMM13, XMM13b, + XMM14, XMM14b, + XMM15, XMM15b +#endif + ); + +// Class for all 128bit vector registers +reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d, + XMM1, XMM1b, XMM1c, XMM1d, + XMM2, XMM2b, XMM2c, XMM2d, + XMM3, XMM3b, XMM3c, XMM3d, + XMM4, XMM4b, XMM4c, XMM4d, + XMM5, XMM5b, XMM5c, XMM5d, + XMM6, XMM6b, XMM6c, XMM6d, + XMM7, XMM7b, XMM7c, XMM7d +#ifdef _LP64 + ,XMM8, XMM8b, XMM8c, XMM8d, + XMM9, XMM9b, XMM9c, XMM9d, + XMM10, XMM10b, XMM10c, XMM10d, + XMM11, XMM11b, XMM11c, XMM11d, + XMM12, XMM12b, XMM12c, XMM12d, + XMM13, XMM13b, XMM13c, XMM13d, + XMM14, XMM14b, XMM14c, XMM14d, + XMM15, XMM15b, XMM15c, XMM15d +#endif + ); + +// Class for all 256bit vector registers +reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, + XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, + XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, + XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, + XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, + XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, + XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, + XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h +#ifdef _LP64 + ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, + XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, + XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, + XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, + XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, + XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, + XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, + XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h +#endif + ); + +%} + source %{ // Float masks come from different places depending on platform. #ifdef _LP64 @@ -38,6 +488,252 @@ static address double_signflip() { return (address)double_signflip_pool; } #endif +// Map Types to machine register types +const int Matcher::base2reg[Type::lastype] = { + Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN, + Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */ + Op_VecS, Op_VecD, Op_VecX, Op_VecY, /* Vectors */ + Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */ + 0, 0/*abio*/, + Op_RegP /* Return address */, 0, /* the memories */ + Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD, + 0 /*bottom*/ +}; + +// Max vector size in bytes. 0 if not supported. +const int Matcher::vector_width_in_bytes(BasicType bt) { + assert(is_java_primitive(bt), "only primitive type vectors"); + if (UseSSE < 2) return 0; + // SSE2 supports 128bit vectors for all types. + // AVX2 supports 256bit vectors for all types. + int size = (UseAVX > 1) ? 32 : 16; + // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. + if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) + size = 32; + // Use flag to limit vector size. + size = MIN2(size,(int)MaxVectorSize); + // Minimum 2 values in vector (or 4 for bytes). + switch (bt) { + case T_DOUBLE: + case T_LONG: + if (size < 16) return 0; + case T_FLOAT: + case T_INT: + if (size < 8) return 0; + case T_BOOLEAN: + case T_BYTE: + case T_CHAR: + case T_SHORT: + if (size < 4) return 0; + break; + default: + ShouldNotReachHere(); + } + return size; +} + +// Limits on vector size (number of elements) loaded into vector. +const int Matcher::max_vector_size(const BasicType bt) { + return vector_width_in_bytes(bt)/type2aelembytes(bt); +} +const int Matcher::min_vector_size(const BasicType bt) { + int max_size = max_vector_size(bt); + // Min size which can be loaded into vector is 4 bytes. + int size = (type2aelembytes(bt) == 1) ? 4 : 2; + return MIN2(size,max_size); +} + +// Vector ideal reg corresponding to specidied size in bytes +const int Matcher::vector_ideal_reg(int size) { + assert(MaxVectorSize >= size, ""); + switch(size) { + case 4: return Op_VecS; + case 8: return Op_VecD; + case 16: return Op_VecX; + case 32: return Op_VecY; + } + ShouldNotReachHere(); + return 0; +} + +// x86 supports misaligned vectors store/load. +const bool Matcher::misaligned_vectors_ok() { + return !AlignVector; // can be changed by flag +} + +// Helper methods for MachSpillCopyNode::implementation(). +static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, + int src_hi, int dst_hi, uint ireg, outputStream* st) { + // In 64-bit VM size calculation is very complex. Emitting instructions + // into scratch buffer is used to get size in 64-bit VM. + LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) + assert(ireg == Op_VecS || // 32bit vector + (src_lo & 1) == 0 && (src_lo + 1) == src_hi && + (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, + "no non-adjacent vector moves" ); + if (cbuf) { + MacroAssembler _masm(cbuf); + int offset = __ offset(); + switch (ireg) { + case Op_VecS: // copy whole register + case Op_VecD: + case Op_VecX: + __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); + break; + case Op_VecY: + __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); + break; + default: + ShouldNotReachHere(); + } + int size = __ offset() - offset; +#ifdef ASSERT + // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. + assert(!do_size || size == 4, "incorrect size calculattion"); +#endif + return size; +#ifndef PRODUCT + } else if (!do_size) { + switch (ireg) { + case Op_VecS: + case Op_VecD: + case Op_VecX: + st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); + break; + case Op_VecY: + st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); + break; + default: + ShouldNotReachHere(); + } +#endif + } + // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. + return 4; +} + +static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, + int stack_offset, int reg, uint ireg, outputStream* st) { + // In 64-bit VM size calculation is very complex. Emitting instructions + // into scratch buffer is used to get size in 64-bit VM. + LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) + if (cbuf) { + MacroAssembler _masm(cbuf); + int offset = __ offset(); + if (is_load) { + switch (ireg) { + case Op_VecS: + __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); + break; + case Op_VecD: + __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); + break; + case Op_VecX: + __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); + break; + case Op_VecY: + __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); + break; + default: + ShouldNotReachHere(); + } + } else { // store + switch (ireg) { + case Op_VecS: + __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); + break; + case Op_VecD: + __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); + break; + case Op_VecX: + __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); + break; + case Op_VecY: + __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); + break; + default: + ShouldNotReachHere(); + } + } + int size = __ offset() - offset; +#ifdef ASSERT + int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); + // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. + assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); +#endif + return size; +#ifndef PRODUCT + } else if (!do_size) { + if (is_load) { + switch (ireg) { + case Op_VecS: + st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); + break; + case Op_VecD: + st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); + break; + case Op_VecX: + st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); + break; + case Op_VecY: + st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); + break; + default: + ShouldNotReachHere(); + } + } else { // store + switch (ireg) { + case Op_VecS: + st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); + break; + case Op_VecD: + st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); + break; + case Op_VecX: + st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); + break; + case Op_VecY: + st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); + break; + default: + ShouldNotReachHere(); + } + } +#endif + } + int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); + // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. + return 5+offset_size; +} + +static inline jfloat replicate4_imm(int con, int width) { + // Load a constant of "width" (in bytes) and replicate it to fill 32bit. + assert(width == 1 || width == 2, "only byte or short types here"); + int bit_width = width * 8; + jint val = con; + val &= (1 << bit_width) - 1; // mask off sign bits + while(bit_width < 32) { + val |= (val << bit_width); + bit_width <<= 1; + } + jfloat fval = *((jfloat*) &val); // coerce to float type + return fval; +} + +static inline jdouble replicate8_imm(int con, int width) { + // Load a constant of "width" (in bytes) and replicate it to fill 64bit. + assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); + int bit_width = width * 8; + jlong val = con; + val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits + while(bit_width < 64) { + val |= (val << bit_width); + bit_width <<= 1; + } + jdouble dval = *((jdouble*) &val); // coerce to double type + return dval; +} + #ifndef PRODUCT void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { st->print("nop \t# %d bytes pad for loops and calls", _count); @@ -103,6 +799,46 @@ %} + +//----------OPERANDS----------------------------------------------------------- +// Operand definitions must precede instruction definitions for correct parsing +// in the ADLC because operands constitute user defined types which are used in +// instruction definitions. + +// Vectors +operand vecS() %{ + constraint(ALLOC_IN_RC(vectors_reg)); + match(VecS); + + format %{ %} + interface(REG_INTER); +%} + +operand vecD() %{ + constraint(ALLOC_IN_RC(vectord_reg)); + match(VecD); + + format %{ %} + interface(REG_INTER); +%} + +operand vecX() %{ + constraint(ALLOC_IN_RC(vectorx_reg)); + match(VecX); + + format %{ %} + interface(REG_INTER); +%} + +operand vecY() %{ + constraint(ALLOC_IN_RC(vectory_reg)); + match(VecY); + + format %{ %} + interface(REG_INTER); +%} + + // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) // ============================================================================ @@ -852,3 +1588,797 @@ ins_pipe(pipe_slow); %} + +// ====================VECTOR INSTRUCTIONS===================================== + +// Load vectors (4 bytes long) +instruct loadV4(vecS dst, memory mem) %{ + predicate(n->as_LoadVector()->memory_size() == 4); + match(Set dst (LoadVector mem)); + ins_cost(125); + format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $mem$$Address); + %} + ins_pipe( pipe_slow ); +%} + +// Load vectors (8 bytes long) +instruct loadV8(vecD dst, memory mem) %{ + predicate(n->as_LoadVector()->memory_size() == 8); + match(Set dst (LoadVector mem)); + ins_cost(125); + format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} + ins_encode %{ + __ movq($dst$$XMMRegister, $mem$$Address); + %} + ins_pipe( pipe_slow ); +%} + +// Load vectors (16 bytes long) +instruct loadV16(vecX dst, memory mem) %{ + predicate(n->as_LoadVector()->memory_size() == 16); + match(Set dst (LoadVector mem)); + ins_cost(125); + format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} + ins_encode %{ + __ movdqu($dst$$XMMRegister, $mem$$Address); + %} + ins_pipe( pipe_slow ); +%} + +// Load vectors (32 bytes long) +instruct loadV32(vecY dst, memory mem) %{ + predicate(n->as_LoadVector()->memory_size() == 32); + match(Set dst (LoadVector mem)); + ins_cost(125); + format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} + ins_encode %{ + __ vmovdqu($dst$$XMMRegister, $mem$$Address); + %} + ins_pipe( pipe_slow ); +%} + +// Store vectors +instruct storeV4(memory mem, vecS src) %{ + predicate(n->as_StoreVector()->memory_size() == 4); + match(Set mem (StoreVector mem src)); + ins_cost(145); + format %{ "movd $mem,$src\t! store vector (4 bytes)" %} + ins_encode %{ + __ movdl($mem$$Address, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct storeV8(memory mem, vecD src) %{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem src)); + ins_cost(145); + format %{ "movq $mem,$src\t! store vector (8 bytes)" %} + ins_encode %{ + __ movq($mem$$Address, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct storeV16(memory mem, vecX src) %{ + predicate(n->as_StoreVector()->memory_size() == 16); + match(Set mem (StoreVector mem src)); + ins_cost(145); + format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} + ins_encode %{ + __ movdqu($mem$$Address, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct storeV32(memory mem, vecY src) %{ + predicate(n->as_StoreVector()->memory_size() == 32); + match(Set mem (StoreVector mem src)); + ins_cost(145); + format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} + ins_encode %{ + __ vmovdqu($mem$$Address, $src$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Replicate byte scalar to be vector +instruct Repl4B(vecS dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateB src)); + format %{ "movd $dst,$src\n\t" + "punpcklbw $dst,$dst\n\t" + "pshuflw $dst,$dst,0x00\t! replicate4B" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8B(vecD dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateB src)); + format %{ "movd $dst,$src\n\t" + "punpcklbw $dst,$dst\n\t" + "pshuflw $dst,$dst,0x00\t! replicate8B" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16B(vecX dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateB src)); + format %{ "movd $dst,$src\n\t" + "punpcklbw $dst,$dst\n\t" + "pshuflw $dst,$dst,0x00\n\t" + "movlhps $dst,$dst\t! replicate16B" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl32B(vecY dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (ReplicateB src)); + format %{ "movd $dst,$src\n\t" + "punpcklbw $dst,$dst\n\t" + "pshuflw $dst,$dst,0x00\n\t" + "movlhps $dst,$dst\n\t" + "vinsertf128h $dst,$dst,$dst\t! replicate32B" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Replicate byte scalar immediate to be vector by loading from const table. +instruct Repl4B_imm(vecS dst, immI con) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateB con)); + format %{ "movss $dst,[$constantaddress]\t! replicate4B($con)" %} + ins_encode %{ + __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8B_imm(vecD dst, immI con) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateB con)); + format %{ "movsd $dst,[$constantaddress]\t! replicate8B($con)" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16B_imm(vecX dst, immI con) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateB con)); + format %{ "movsd $dst,[$constantaddress]\t! replicate16B($con)\n\t" + "movlhps $dst,$dst" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl32B_imm(vecY dst, immI con) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (ReplicateB con)); + format %{ "movsd $dst,[$constantaddress]\t! lreplicate32B($con)\n\t" + "movlhps $dst,$dst\n\t" + "vinsertf128h $dst,$dst,$dst" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Replicate byte scalar zero to be vector +instruct Repl4B_zero(vecS dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateB zero)); + format %{ "pxor $dst,$dst\t! replicate4B zero" %} + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl8B_zero(vecD dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateB zero)); + format %{ "pxor $dst,$dst\t! replicate8B zero" %} + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl16B_zero(vecX dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateB zero)); + format %{ "pxor $dst,$dst\t! replicate16B zero" %} + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl32B_zero(vecY dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (ReplicateB zero)); + format %{ "vxorpd $dst,$dst,$dst\t! replicate32B zero" %} + ins_encode %{ + // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). + bool vector256 = true; + __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Replicate char/short (2 byte) scalar to be vector +instruct Repl2S(vecS dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateS src)); + format %{ "movd $dst,$src\n\t" + "pshuflw $dst,$dst,0x00\t! replicate2S" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4S(vecD dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateS src)); + format %{ "movd $dst,$src\n\t" + "pshuflw $dst,$dst,0x00\t! replicate4S" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl8S(vecX dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateS src)); + format %{ "movd $dst,$src\n\t" + "pshuflw $dst,$dst,0x00\n\t" + "movlhps $dst,$dst\t! replicate8S" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16S(vecY dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateS src)); + format %{ "movd $dst,$src\n\t" + "pshuflw $dst,$dst,0x00\n\t" + "movlhps $dst,$dst\n\t" + "vinsertf128h $dst,$dst,$dst\t! replicate16S" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. +instruct Repl2S_imm(vecS dst, immI con) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateS con)); + format %{ "movss $dst,[$constantaddress]\t! replicate2S($con)" %} + ins_encode %{ + __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4S_imm(vecD dst, immI con) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateS con)); + format %{ "movsd $dst,[$constantaddress]\t! replicate4S($con)" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl8S_imm(vecX dst, immI con) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateS con)); + format %{ "movsd $dst,[$constantaddress]\t! replicate8S($con)\n\t" + "movlhps $dst,$dst" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl16S_imm(vecY dst, immI con) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateS con)); + format %{ "movsd $dst,[$constantaddress]\t! replicate16S($con)\n\t" + "movlhps $dst,$dst\n\t" + "vinsertf128h $dst,$dst,$dst" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Replicate char/short (2 byte) scalar zero to be vector +instruct Repl2S_zero(vecS dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateS zero)); + format %{ "pxor $dst,$dst\t! replicate2S zero" %} + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4S_zero(vecD dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateS zero)); + format %{ "pxor $dst,$dst\t! replicate4S zero" %} + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl8S_zero(vecX dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateS zero)); + format %{ "pxor $dst,$dst\t! replicate8S zero" %} + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl16S_zero(vecY dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateS zero)); + format %{ "vxorpd $dst,$dst,$dst\t! replicate16S zero" %} + ins_encode %{ + // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). + bool vector256 = true; + __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Replicate integer (4 byte) scalar to be vector +instruct Repl2I(vecD dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI src)); + format %{ "movd $dst,$src\n\t" + "pshufd $dst,$dst,0x00\t! replicate2I" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4I(vecX dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateI src)); + format %{ "movd $dst,$src\n\t" + "pshufd $dst,$dst,0x00\t! replicate4I" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8I(vecY dst, rRegI src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateI src)); + format %{ "movd $dst,$src\n\t" + "pshufd $dst,$dst,0x00\n\t" + "vinsertf128h $dst,$dst,$dst\t! replicate8I" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Replicate integer (4 byte) scalar immediate to be vector by loading from const table. +instruct Repl2I_imm(vecD dst, immI con) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI con)); + format %{ "movsd $dst,[$constantaddress]\t! replicate2I($con)" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4I_imm(vecX dst, immI con) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateI con)); + format %{ "movsd $dst,[$constantaddress]\t! replicate4I($con)\n\t" + "movlhps $dst,$dst" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8I_imm(vecY dst, immI con) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateI con)); + format %{ "movsd $dst,[$constantaddress]\t! replicate8I($con)\n\t" + "movlhps $dst,$dst\n\t" + "vinsertf128h $dst,$dst,$dst" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Integer could be loaded into xmm register directly from memory. +instruct Repl2I_mem(vecD dst, memory mem) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI (LoadVector mem))); + format %{ "movd $dst,$mem\n\t" + "pshufd $dst,$dst,0x00\t! replicate2I" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $mem$$Address); + __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4I_mem(vecX dst, memory mem) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateI (LoadVector mem))); + format %{ "movd $dst,$mem\n\t" + "pshufd $dst,$dst,0x00\t! replicate4I" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $mem$$Address); + __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8I_mem(vecY dst, memory mem) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateI (LoadVector mem))); + format %{ "movd $dst,$mem\n\t" + "pshufd $dst,$dst,0x00\n\t" + "vinsertf128h $dst,$dst,$dst\t! replicate8I" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $mem$$Address); + __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Replicate integer (4 byte) scalar zero to be vector +instruct Repl2I_zero(vecD dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI zero)); + format %{ "pxor $dst,$dst\t! replicate2I" %} + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4I_zero(vecX dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateI zero)); + format %{ "pxor $dst,$dst\t! replicate4I zero)" %} + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl8I_zero(vecY dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateI zero)); + format %{ "vxorpd $dst,$dst,$dst\t! replicate8I zero" %} + ins_encode %{ + // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). + bool vector256 = true; + __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Replicate long (8 byte) scalar to be vector +#ifdef _LP64 +instruct Repl2L(vecX dst, rRegL src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateL src)); + format %{ "movdq $dst,$src\n\t" + "movlhps $dst,$dst\t! replicate2L" %} + ins_encode %{ + __ movdq($dst$$XMMRegister, $src$$Register); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl4L(vecY dst, rRegL src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateL src)); + format %{ "movdq $dst,$src\n\t" + "movlhps $dst,$dst\n\t" + "vinsertf128h $dst,$dst,$dst\t! replicate4L" %} + ins_encode %{ + __ movdq($dst$$XMMRegister, $src$$Register); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} +#else // _LP64 +instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateL src)); + effect(TEMP dst, USE src, TEMP tmp); + format %{ "movdl $dst,$src.lo\n\t" + "movdl $tmp,$src.hi\n\t" + "punpckldq $dst,$tmp\n\t" + "movlhps $dst,$dst\t! replicate2L"%} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); + __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateL src)); + effect(TEMP dst, USE src, TEMP tmp); + format %{ "movdl $dst,$src.lo\n\t" + "movdl $tmp,$src.hi\n\t" + "punpckldq $dst,$tmp\n\t" + "movlhps $dst,$dst\n\t" + "vinsertf128h $dst,$dst,$dst\t! replicate4L" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); + __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} +#endif // _LP64 + +// Replicate long (8 byte) scalar immediate to be vector by loading from const table. +instruct Repl2L_imm(vecX dst, immL con) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateL con)); + format %{ "movsd $dst,[$constantaddress]\t! replicate2L($con)\n\t" + "movlhps $dst,$dst" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $constantaddress($con)); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl4L_imm(vecY dst, immL con) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateL con)); + format %{ "movsd $dst,[$constantaddress]\t! replicate4L($con)\n\t" + "movlhps $dst,$dst\n\t" + "vinsertf128h $dst,$dst,$dst" %} + ins_encode %{ + __ movdbl($dst$$XMMRegister, $constantaddress($con)); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Long could be loaded into xmm register directly from memory. +instruct Repl2L_mem(vecX dst, memory mem) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateL (LoadVector mem))); + format %{ "movq $dst,$mem\n\t" + "movlhps $dst,$dst\t! replicate2L" %} + ins_encode %{ + __ movq($dst$$XMMRegister, $mem$$Address); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl4L_mem(vecY dst, memory mem) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateL (LoadVector mem))); + format %{ "movq $dst,$mem\n\t" + "movlhps $dst,$dst\n\t" + "vinsertf128h $dst,$dst,$dst\t! replicate4L" %} + ins_encode %{ + __ movq($dst$$XMMRegister, $mem$$Address); + __ movlhps($dst$$XMMRegister, $dst$$XMMRegister); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Replicate long (8 byte) scalar zero to be vector +instruct Repl2L_zero(vecX dst, immL0 zero) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateL zero)); + format %{ "pxor $dst,$dst\t! replicate2L zero" %} + ins_encode %{ + __ pxor($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4L_zero(vecY dst, immL0 zero) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateL zero)); + format %{ "vxorpd $dst,$dst,$dst\t! replicate4L zero" %} + ins_encode %{ + // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). + bool vector256 = true; + __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Replicate float (4 byte) scalar to be vector +instruct Repl2F(vecD dst, regF src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateF src)); + format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} + ins_encode %{ + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4F(vecX dst, regF src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateF src)); + format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} + ins_encode %{ + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl8F(vecY dst, regF src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateF src)); + format %{ "pshufd $dst,$src,0x00\n\t" + "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} + ins_encode %{ + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Replicate float (4 byte) scalar zero to be vector +instruct Repl2F_zero(vecD dst, immF0 zero) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateF zero)); + format %{ "xorps $dst,$dst\t! replicate2F zero" %} + ins_encode %{ + __ xorps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4F_zero(vecX dst, immF0 zero) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateF zero)); + format %{ "xorps $dst,$dst\t! replicate4F zero" %} + ins_encode %{ + __ xorps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl8F_zero(vecY dst, immF0 zero) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateF zero)); + format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} + ins_encode %{ + bool vector256 = true; + __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); + %} + ins_pipe( fpu_reg_reg ); +%} + +// Replicate double (8 bytes) scalar to be vector +instruct Repl2D(vecX dst, regD src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateD src)); + format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} + ins_encode %{ + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); + %} + ins_pipe( pipe_slow ); +%} + +instruct Repl4D(vecY dst, regD src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateD src)); + format %{ "pshufd $dst,$src,0x44\n\t" + "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} + ins_encode %{ + __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); + __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Replicate double (8 byte) scalar zero to be vector +instruct Repl2D_zero(vecX dst, immD0 zero) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateD zero)); + format %{ "xorpd $dst,$dst\t! replicate2D zero" %} + ins_encode %{ + __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe( fpu_reg_reg ); +%} + +instruct Repl4D_zero(vecY dst, immD0 zero) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateD zero)); + format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} + ins_encode %{ + bool vector256 = true; + __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); + %} + ins_pipe( fpu_reg_reg ); +%} +
--- a/hotspot/src/cpu/x86/vm/x86_32.ad Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/cpu/x86/vm/x86_32.ad Tue Jul 03 18:23:04 2012 -0700 @@ -74,9 +74,6 @@ reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); -// Special Registers -reg_def EFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); - // Float registers. We treat TOS/FPR0 special. It is invisible to the // allocator, and only shows up in the encodings. reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); @@ -105,27 +102,6 @@ reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); -// XMM registers. 128-bit registers or 4 words each, labeled a-d. -// Word a in each register holds a Float, words ab hold a Double. -// We currently do not use the SIMD capabilities, so registers cd -// are unused at the moment. -reg_def XMM0a( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); -reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()); -reg_def XMM1a( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); -reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()); -reg_def XMM2a( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); -reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()); -reg_def XMM3a( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); -reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()); -reg_def XMM4a( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); -reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()); -reg_def XMM5a( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); -reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()); -reg_def XMM6a( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); -reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()); -reg_def XMM7a( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); -reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()); - // Specify priority of register selection within phases of register // allocation. Highest priority is first. A useful heuristic is to // give registers a low priority when they are required by machine @@ -138,15 +114,6 @@ FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, FPR6L, FPR6H, FPR7L, FPR7H ); -alloc_class chunk1( XMM0a, XMM0b, - XMM1a, XMM1b, - XMM2a, XMM2b, - XMM3a, XMM3b, - XMM4a, XMM4b, - XMM5a, XMM5b, - XMM6a, XMM6b, - XMM7a, XMM7b, EFLAGS); - //----------Architecture Description Register Classes-------------------------- // Several register classes are automatically defined based upon information in @@ -159,12 +126,12 @@ // Class for all registers reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); // Class for general registers -reg_class e_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX); +reg_class int_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX); // Class for general registers which may be used for implicit null checks on win95 // Also safe for use by tailjump. We don't want to allocate in rbp, -reg_class e_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX); +reg_class int_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX); // Class of "X" registers -reg_class x_reg(EBX, ECX, EDX, EAX); +reg_class int_x_reg(EBX, ECX, EDX, EAX); // Class of registers that can appear in an address with no offset. // EBP and ESP require an extra instruction byte for zero offset. // Used in fast-unlock @@ -193,8 +160,6 @@ reg_class sp_reg(ESP); // Singleton class for instruction pointer // reg_class ip_reg(EIP); -// Singleton class for condition codes -reg_class int_flags(EFLAGS); // Class of integer register pairs reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI ); // Class of integer register pairs that aligns with calling convention @@ -206,29 +171,18 @@ // Floating point registers. Notice FPR0 is not a choice. // FPR0 is not ever allocated; we use clever encodings to fake // a 2-address instructions out of Intels FP stack. -reg_class flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); - -// make a register class for SSE registers -reg_class xmm_reg(XMM0a, XMM1a, XMM2a, XMM3a, XMM4a, XMM5a, XMM6a, XMM7a); - -// make a double register class for SSE2 registers -reg_class xdb_reg(XMM0a,XMM0b, XMM1a,XMM1b, XMM2a,XMM2b, XMM3a,XMM3b, - XMM4a,XMM4b, XMM5a,XMM5b, XMM6a,XMM6b, XMM7a,XMM7b ); - -reg_class dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, - FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, - FPR7L,FPR7H ); - -reg_class flt_reg0( FPR1L ); -reg_class dbl_reg0( FPR1L,FPR1H ); -reg_class dbl_reg1( FPR2L,FPR2H ); -reg_class dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, - FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); - -// XMM6 and XMM7 could be used as temporary registers for long, float and -// double values for SSE2. -reg_class xdb_reg6( XMM6a,XMM6b ); -reg_class xdb_reg7( XMM7a,XMM7b ); +reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); + +reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, + FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, + FPR7L,FPR7H ); + +reg_class fp_flt_reg0( FPR1L ); +reg_class fp_dbl_reg0( FPR1L,FPR1H ); +reg_class fp_dbl_reg1( FPR2L,FPR2H ); +reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, + FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); + %} @@ -412,7 +366,7 @@ } } - // eRegI ereg, memory mem) %{ // emit_reg_mem + // rRegI ereg, memory mem) %{ // emit_reg_mem void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, bool displace_is_oop ) { // There is no index & no scale, use form without SIB byte if ((index == 0x4) && @@ -787,7 +741,7 @@ #endif } int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); - // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes. + // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. return size+5+offset_size; } @@ -821,7 +775,7 @@ } #endif } - // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes. + // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. // Only MOVAPS SSE prefix uses 1 byte. int sz = 4; if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && @@ -903,6 +857,108 @@ return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); } +// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. +static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, + int src_hi, int dst_hi, uint ireg, outputStream* st); + +static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, + int stack_offset, int reg, uint ireg, outputStream* st); + +static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, + int dst_offset, uint ireg, outputStream* st) { + int calc_size = 0; + int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); + int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); + switch (ireg) { + case Op_VecS: + calc_size = 3+src_offset_size + 3+dst_offset_size; + break; + case Op_VecD: + calc_size = 3+src_offset_size + 3+dst_offset_size; + src_offset += 4; + dst_offset += 4; + src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); + dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); + calc_size += 3+src_offset_size + 3+dst_offset_size; + break; + case Op_VecX: + calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; + break; + case Op_VecY: + calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; + break; + default: + ShouldNotReachHere(); + } + if (cbuf) { + MacroAssembler _masm(cbuf); + int offset = __ offset(); + switch (ireg) { + case Op_VecS: + __ pushl(Address(rsp, src_offset)); + __ popl (Address(rsp, dst_offset)); + break; + case Op_VecD: + __ pushl(Address(rsp, src_offset)); + __ popl (Address(rsp, dst_offset)); + __ pushl(Address(rsp, src_offset+4)); + __ popl (Address(rsp, dst_offset+4)); + break; + case Op_VecX: + __ movdqu(Address(rsp, -16), xmm0); + __ movdqu(xmm0, Address(rsp, src_offset)); + __ movdqu(Address(rsp, dst_offset), xmm0); + __ movdqu(xmm0, Address(rsp, -16)); + break; + case Op_VecY: + __ vmovdqu(Address(rsp, -32), xmm0); + __ vmovdqu(xmm0, Address(rsp, src_offset)); + __ vmovdqu(Address(rsp, dst_offset), xmm0); + __ vmovdqu(xmm0, Address(rsp, -32)); + break; + default: + ShouldNotReachHere(); + } + int size = __ offset() - offset; + assert(size == calc_size, "incorrect size calculattion"); + return size; +#ifndef PRODUCT + } else if (!do_size) { + switch (ireg) { + case Op_VecS: + st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" + "popl [rsp + #%d]", + src_offset, dst_offset); + break; + case Op_VecD: + st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" + "popq [rsp + #%d]\n\t" + "pushl [rsp + #%d]\n\t" + "popq [rsp + #%d]", + src_offset, dst_offset, src_offset+4, dst_offset+4); + break; + case Op_VecX: + st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" + "movdqu xmm0, [rsp + #%d]\n\t" + "movdqu [rsp + #%d], xmm0\n\t" + "movdqu xmm0, [rsp - #16]", + src_offset, dst_offset); + break; + case Op_VecY: + st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" + "vmovdqu xmm0, [rsp + #%d]\n\t" + "vmovdqu [rsp + #%d], xmm0\n\t" + "vmovdqu xmm0, [rsp - #32]", + src_offset, dst_offset); + break; + default: + ShouldNotReachHere(); + } +#endif + } + return calc_size; +} + uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { // Get registers to move OptoReg::Name src_second = ra_->get_reg_second(in(1)); @@ -923,6 +979,29 @@ if( src_first == dst_first && src_second == dst_second ) return size; // Self copy, no move + if (bottom_type()->isa_vect() != NULL) { + uint ireg = ideal_reg(); + assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); + assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); + assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY), "sanity"); + if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { + // mem -> mem + int src_offset = ra_->reg2offset(src_first); + int dst_offset = ra_->reg2offset(dst_first); + return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); + } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { + return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); + } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { + int stack_offset = ra_->reg2offset(dst_first); + return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); + } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { + int stack_offset = ra_->reg2offset(src_first); + return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); + } else { + ShouldNotReachHere(); + } + } + // -------------------------------------- // Check for mem-mem move. push/pop to move. if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { @@ -1313,16 +1392,6 @@ return true; } -// Vector width in bytes -const uint Matcher::vector_width_in_bytes(void) { - return UseSSE >= 2 ? 8 : 0; -} - -// Vector ideal reg -const uint Matcher::vector_ideal_reg(void) { - return Op_RegD; -} - // Is this branch offset short enough that a short branch can be used? // // NOTE: If the platform does not provide any short branch variants, then @@ -1452,7 +1521,7 @@ // arguments in those registers not be available to the callee. bool Matcher::can_be_java_arg( int reg ) { if( reg == ECX_num || reg == EDX_num ) return true; - if( (reg == XMM0a_num || reg == XMM1a_num) && UseSSE>=1 ) return true; + if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; return false; } @@ -1565,16 +1634,16 @@ emit_opcode(cbuf,0x66); %} - enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many) + enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} - enc_class OpcRegReg (immI opcode, eRegI dst, eRegI src) %{ // OpcRegReg(Many) + enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) emit_opcode(cbuf,$opcode$$constant); emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} - enc_class mov_r32_imm0( eRegI dst ) %{ + enc_class mov_r32_imm0( rRegI dst ) %{ emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 %} @@ -1621,7 +1690,7 @@ %} // Dense encoding for older common ops - enc_class Opc_plus(immI opcode, eRegI reg) %{ + enc_class Opc_plus(immI opcode, rRegI reg) %{ emit_opcode(cbuf, $opcode$$constant + $reg$$reg); %} @@ -1637,7 +1706,7 @@ } %} - enc_class OpcSErm (eRegI dst, immI imm) %{ // OpcSEr/m + enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m // Emit primary opcode and set sign-extend bit // Check for 8-bit immediate, and set sign extend bit in opcode if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { @@ -1682,7 +1751,7 @@ else emit_d32(cbuf,con); %} - enc_class OpcSReg (eRegI dst) %{ // BSWAP + enc_class OpcSReg (rRegI dst) %{ // BSWAP emit_cc(cbuf, $secondary, $dst$$reg ); %} @@ -1700,7 +1769,7 @@ emit_rm(cbuf, 0x3, destlo, desthi); %} - enc_class RegOpc (eRegI div) %{ // IDIV, IMOD, JMP indirect, ... + enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... emit_rm(cbuf, 0x3, $secondary, $div$$reg ); %} @@ -1891,20 +1960,20 @@ // runtime_call_Relocation::spec(), RELOC_IMM32 ); // %} - enc_class RegOpcImm (eRegI dst, immI8 shift) %{ // SHL, SAR, SHR + enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR $$$emit8$primary; emit_rm(cbuf, 0x3, $secondary, $dst$$reg); $$$emit8$shift$$constant; %} - enc_class LdImmI (eRegI dst, immI src) %{ // Load Immediate + enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate // Load immediate does not have a zero or sign extended version // for 8-bit immediates emit_opcode(cbuf, 0xB8 + $dst$$reg); $$$emit32$src$$constant; %} - enc_class LdImmP (eRegI dst, immI src) %{ // Load Immediate + enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate // Load immediate does not have a zero or sign extended version // for 8-bit immediates emit_opcode(cbuf, $primary + $dst$$reg); @@ -1943,15 +2012,15 @@ // Encode a reg-reg copy. If it is useless, then empty encoding. - enc_class enc_Copy( eRegI dst, eRegI src ) %{ + enc_class enc_Copy( rRegI dst, rRegI src ) %{ encode_Copy( cbuf, $dst$$reg, $src$$reg ); %} - enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{ + enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ encode_Copy( cbuf, $dst$$reg, $src$$reg ); %} - enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many) + enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} @@ -1973,7 +2042,7 @@ emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); %} - enc_class RegReg_HiLo( eRegL src, eRegI dst ) %{ + enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); %} @@ -2068,7 +2137,7 @@ cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand %} - enc_class RegMem (eRegI ereg, memory mem) %{ // emit_reg_mem + enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem int reg_encoding = $ereg$$reg; int base = $mem$$base; int index = $mem$$index; @@ -2132,7 +2201,7 @@ // Clone of RegMem but accepts an extra parameter to access each // half of a double in memory; it never needs relocation info. - enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, eRegI rm_reg) %{ + enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ emit_opcode(cbuf,$opcode$$constant); int reg_encoding = $rm_reg$$reg; int base = $mem$$base; @@ -2168,7 +2237,7 @@ encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop); %} - enc_class RegLea (eRegI dst, eRegI src0, immI src1 ) %{ // emit_reg_lea + enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea int reg_encoding = $dst$$reg; int base = $src0$$reg; // 0xFFFFFFFF indicates no base int index = 0x04; // 0x04 indicates no index @@ -2178,7 +2247,7 @@ encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop); %} - enc_class min_enc (eRegI dst, eRegI src) %{ // MIN + enc_class min_enc (rRegI dst, rRegI src) %{ // MIN // Compare dst,src emit_opcode(cbuf,0x3B); emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); @@ -2190,7 +2259,7 @@ emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} - enc_class max_enc (eRegI dst, eRegI src) %{ // MAX + enc_class max_enc (rRegI dst, rRegI src) %{ // MAX // Compare dst,src emit_opcode(cbuf,0x3B); emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); @@ -2221,7 +2290,7 @@ encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop); %} - enc_class neg_reg(eRegI dst) %{ + enc_class neg_reg(rRegI dst) %{ // NEG $dst emit_opcode(cbuf,0xF7); emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); @@ -2251,7 +2320,7 @@ emit_rm(cbuf, 0x3, $p$$reg, tmpReg); %} - enc_class enc_cmpLTP_mem(eRegI p, eRegI q, memory mem, eCXRegI tmp) %{ // cadd_cmpLT + enc_class enc_cmpLTP_mem(rRegI p, rRegI q, memory mem, eCXRegI tmp) %{ // cadd_cmpLT int tmpReg = $tmp$$reg; // SUB $p,$q @@ -2390,12 +2459,12 @@ %} // Special case for moving an integer register to a stack slot. - enc_class OpcPRegSS( stackSlotI dst, eRegI src ) %{ // RegSS + enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); %} // Special case for moving a register to a stack slot. - enc_class RegSS( stackSlotI dst, eRegI src ) %{ // RegSS + enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS // Opcode already emitted emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte @@ -2640,7 +2709,7 @@ // equal_result = 0; // nan_result = -1; - enc_class CmpF_Result(eRegI dst) %{ + enc_class CmpF_Result(rRegI dst) %{ // fnstsw_ax(); emit_opcode( cbuf, 0xDF); emit_opcode( cbuf, 0xE0); @@ -2685,7 +2754,7 @@ // done: %} - enc_class convert_int_long( regL dst, eRegI src ) %{ + enc_class convert_int_long( regL dst, rRegI src ) %{ // mov $dst.lo,$src int dst_encoding = $dst$$reg; int src_encoding = $src$$reg; @@ -2754,7 +2823,7 @@ emit_rm( cbuf, 0x3, 0x4, $src$$reg); %} - enc_class long_multiply( eADXRegL dst, eRegL src, eRegI tmp ) %{ + enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ // Basic idea: lo(result) = lo(x_lo * y_lo) // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) // MOV $tmp,$src.lo @@ -2780,7 +2849,7 @@ emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); %} - enc_class long_multiply_con( eADXRegL dst, immL_127 src, eRegI tmp ) %{ + enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ // Basic idea: lo(result) = lo(src * y_lo) // hi(result) = hi(src * y_lo) + lo(src * y_hi) // IMUL $tmp,EDX,$src @@ -2836,7 +2905,7 @@ emit_d8(cbuf, 4*4); %} - enc_class long_cmp_flags0( eRegL src, eRegI tmp ) %{ + enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ // MOV $tmp,$src.lo emit_opcode(cbuf, 0x8B); emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); @@ -2857,7 +2926,7 @@ emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); %} - enc_class long_cmp_flags2( eRegL src1, eRegL src2, eRegI tmp ) %{ + enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits emit_opcode( cbuf, 0x3B ); emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); @@ -2869,7 +2938,7 @@ emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); %} - enc_class long_cmp_flags3( eRegL src, eRegI tmp ) %{ + enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ // XOR $tmp,$tmp emit_opcode(cbuf,0x33); // XOR emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); @@ -3762,9 +3831,9 @@ // in SSE2+ mode we want to keep the FPU stack clean so pretend // that C functions return float and double results in XMM0. if( ideal_reg == Op_RegD && UseSSE>=2 ) - return OptoRegPair(XMM0b_num,XMM0a_num); + return OptoRegPair(XMM0b_num,XMM0_num); if( ideal_reg == Op_RegF && UseSSE>=2 ) - return OptoRegPair(OptoReg::Bad,XMM0a_num); + return OptoRegPair(OptoReg::Bad,XMM0_num); return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); %} @@ -3775,9 +3844,9 @@ static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; if( ideal_reg == Op_RegD && UseSSE>=2 ) - return OptoRegPair(XMM0b_num,XMM0a_num); + return OptoRegPair(XMM0b_num,XMM0_num); if( ideal_reg == Op_RegF && UseSSE>=1 ) - return OptoRegPair(OptoReg::Bad,XMM0a_num); + return OptoRegPair(OptoReg::Bad,XMM0_num); return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); %} @@ -4147,8 +4216,8 @@ // Register Operands // Integer Register -operand eRegI() %{ - constraint(ALLOC_IN_RC(e_reg)); +operand rRegI() %{ + constraint(ALLOC_IN_RC(int_reg)); match(RegI); match(xRegI); match(eAXRegI); @@ -4163,8 +4232,8 @@ %} // Subset of Integer Register -operand xRegI(eRegI reg) %{ - constraint(ALLOC_IN_RC(x_reg)); +operand xRegI(rRegI reg) %{ + constraint(ALLOC_IN_RC(int_x_reg)); match(reg); match(eAXRegI); match(eBXRegI); @@ -4179,7 +4248,7 @@ operand eAXRegI(xRegI reg) %{ constraint(ALLOC_IN_RC(eax_reg)); match(reg); - match(eRegI); + match(rRegI); format %{ "EAX" %} interface(REG_INTER); @@ -4189,7 +4258,7 @@ operand eBXRegI(xRegI reg) %{ constraint(ALLOC_IN_RC(ebx_reg)); match(reg); - match(eRegI); + match(rRegI); format %{ "EBX" %} interface(REG_INTER); @@ -4198,7 +4267,7 @@ operand eCXRegI(xRegI reg) %{ constraint(ALLOC_IN_RC(ecx_reg)); match(reg); - match(eRegI); + match(rRegI); format %{ "ECX" %} interface(REG_INTER); @@ -4207,7 +4276,7 @@ operand eDXRegI(xRegI reg) %{ constraint(ALLOC_IN_RC(edx_reg)); match(reg); - match(eRegI); + match(rRegI); format %{ "EDX" %} interface(REG_INTER); @@ -4216,7 +4285,7 @@ operand eDIRegI(xRegI reg) %{ constraint(ALLOC_IN_RC(edi_reg)); match(reg); - match(eRegI); + match(rRegI); format %{ "EDI" %} interface(REG_INTER); @@ -4263,7 +4332,7 @@ operand eSIRegI(xRegI reg) %{ constraint(ALLOC_IN_RC(esi_reg)); match(reg); - match(eRegI); + match(rRegI); format %{ "ESI" %} interface(REG_INTER); @@ -4284,7 +4353,7 @@ %} operand eRegP() %{ - constraint(ALLOC_IN_RC(e_reg)); + constraint(ALLOC_IN_RC(int_reg)); match(RegP); match(eAXRegP); match(eBXRegP); @@ -4297,7 +4366,7 @@ // On windows95, EBP is not safe to use for implicit null tests. operand eRegP_no_EBP() %{ - constraint(ALLOC_IN_RC(e_reg_no_rbp)); + constraint(ALLOC_IN_RC(int_reg_no_rbp)); match(RegP); match(eAXRegP); match(eBXRegP); @@ -4477,7 +4546,7 @@ // Float register operands operand regDPR() %{ predicate( UseSSE < 2 ); - constraint(ALLOC_IN_RC(dbl_reg)); + constraint(ALLOC_IN_RC(fp_dbl_reg)); match(RegD); match(regDPR1); match(regDPR2); @@ -4487,7 +4556,7 @@ operand regDPR1(regDPR reg) %{ predicate( UseSSE < 2 ); - constraint(ALLOC_IN_RC(dbl_reg0)); + constraint(ALLOC_IN_RC(fp_dbl_reg0)); match(reg); format %{ "FPR1" %} interface(REG_INTER); @@ -4495,7 +4564,7 @@ operand regDPR2(regDPR reg) %{ predicate( UseSSE < 2 ); - constraint(ALLOC_IN_RC(dbl_reg1)); + constraint(ALLOC_IN_RC(fp_dbl_reg1)); match(reg); format %{ "FPR2" %} interface(REG_INTER); @@ -4503,45 +4572,16 @@ operand regnotDPR1(regDPR reg) %{ predicate( UseSSE < 2 ); - constraint(ALLOC_IN_RC(dbl_notreg0)); + constraint(ALLOC_IN_RC(fp_dbl_notreg0)); match(reg); format %{ %} interface(REG_INTER); %} -// XMM Double register operands -operand regD() %{ - predicate( UseSSE>=2 ); - constraint(ALLOC_IN_RC(xdb_reg)); - match(RegD); - match(regD6); - match(regD7); - format %{ %} - interface(REG_INTER); -%} - -// XMM6 double register operands -operand regD6(regD reg) %{ - predicate( UseSSE>=2 ); - constraint(ALLOC_IN_RC(xdb_reg6)); - match(reg); - format %{ "XMM6" %} - interface(REG_INTER); -%} - -// XMM7 double register operands -operand regD7(regD reg) %{ - predicate( UseSSE>=2 ); - constraint(ALLOC_IN_RC(xdb_reg7)); - match(reg); - format %{ "XMM7" %} - interface(REG_INTER); -%} - // Float register operands operand regFPR() %{ predicate( UseSSE < 2 ); - constraint(ALLOC_IN_RC(flt_reg)); + constraint(ALLOC_IN_RC(fp_flt_reg)); match(RegF); match(regFPR1); format %{ %} @@ -4551,21 +4591,30 @@ // Float register operands operand regFPR1(regFPR reg) %{ predicate( UseSSE < 2 ); - constraint(ALLOC_IN_RC(flt_reg0)); + constraint(ALLOC_IN_RC(fp_flt_reg0)); match(reg); format %{ "FPR1" %} interface(REG_INTER); %} -// XMM register operands +// XMM Float register operands operand regF() %{ predicate( UseSSE>=1 ); - constraint(ALLOC_IN_RC(xmm_reg)); + constraint(ALLOC_IN_RC(float_reg)); match(RegF); format %{ %} interface(REG_INTER); %} +// XMM Double register operands +operand regD() %{ + predicate( UseSSE>=2 ); + constraint(ALLOC_IN_RC(double_reg)); + match(RegD); + format %{ %} + interface(REG_INTER); +%} + //----------Memory Operands---------------------------------------------------- // Direct Memory Operand @@ -4583,7 +4632,7 @@ // Indirect Memory Operand operand indirect(eRegP reg) %{ - constraint(ALLOC_IN_RC(e_reg)); + constraint(ALLOC_IN_RC(int_reg)); match(reg); format %{ "[$reg]" %} @@ -4622,7 +4671,7 @@ %} // Indirect Memory Plus Long Offset Operand -operand indOffset32X(eRegI reg, immP off) %{ +operand indOffset32X(rRegI reg, immP off) %{ match(AddP off reg); format %{ "[$reg + $off]" %} @@ -4635,7 +4684,7 @@ %} // Indirect Memory Plus Index Register Plus Offset Operand -operand indIndexOffset(eRegP reg, eRegI ireg, immI off) %{ +operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ match(AddP (AddP reg ireg) off); op_cost(10); @@ -4649,7 +4698,7 @@ %} // Indirect Memory Plus Index Register Plus Offset Operand -operand indIndex(eRegP reg, eRegI ireg) %{ +operand indIndex(eRegP reg, rRegI ireg) %{ match(AddP reg ireg); op_cost(10); @@ -4667,7 +4716,7 @@ // // ------------------------------------------------------------------------- // // Scaled Memory Operands // // Indirect Memory Times Scale Plus Offset Operand -// operand indScaleOffset(immP off, eRegI ireg, immI2 scale) %{ +// operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ // match(AddP off (LShiftI ireg scale)); // // op_cost(10); @@ -4681,7 +4730,7 @@ // %} // Indirect Memory Times Scale Plus Index Register -operand indIndexScale(eRegP reg, eRegI ireg, immI2 scale) %{ +operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ match(AddP reg (LShiftI ireg scale)); op_cost(10); @@ -4695,7 +4744,7 @@ %} // Indirect Memory Times Scale Plus Index Register Plus Offset Operand -operand indIndexScaleOffset(eRegP reg, immI off, eRegI ireg, immI2 scale) %{ +operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ match(AddP (AddP reg (LShiftI ireg scale)) off); op_cost(10); @@ -4823,7 +4872,7 @@ // Indirect Memory Operand operand indirect_win95_safe(eRegP_no_EBP reg) %{ - constraint(ALLOC_IN_RC(e_reg)); + constraint(ALLOC_IN_RC(int_reg)); match(reg); op_cost(100); @@ -4867,7 +4916,7 @@ %} // Indirect Memory Plus Index Register Plus Offset Operand -operand indIndexOffset_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI off) +operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) %{ match(AddP (AddP reg ireg) off); @@ -4882,7 +4931,7 @@ %} // Indirect Memory Times Scale Plus Index Register -operand indIndexScale_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI2 scale) +operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) %{ match(AddP reg (LShiftI ireg scale)); @@ -4897,7 +4946,7 @@ %} // Indirect Memory Times Scale Plus Index Register Plus Offset Operand -operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, eRegI ireg, immI2 scale) +operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) %{ match(AddP (AddP reg (LShiftI ireg scale)) off); @@ -5086,7 +5135,7 @@ // Or: _mem if it requires the big decoder and a memory unit. // Integer ALU reg operation -pipe_class ialu_reg(eRegI dst) %{ +pipe_class ialu_reg(rRegI dst) %{ single_instruction; dst : S4(write); dst : S3(read); @@ -5104,7 +5153,7 @@ %} // Integer ALU reg operation using big decoder -pipe_class ialu_reg_fat(eRegI dst) %{ +pipe_class ialu_reg_fat(rRegI dst) %{ single_instruction; dst : S4(write); dst : S3(read); @@ -5122,7 +5171,7 @@ %} // Integer ALU reg-reg operation -pipe_class ialu_reg_reg(eRegI dst, eRegI src) %{ +pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ single_instruction; dst : S4(write); src : S3(read); @@ -5140,7 +5189,7 @@ %} // Integer ALU reg-reg operation -pipe_class ialu_reg_reg_fat(eRegI dst, memory src) %{ +pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ single_instruction; dst : S4(write); src : S3(read); @@ -5158,7 +5207,7 @@ %} // Integer ALU reg-mem operation -pipe_class ialu_reg_mem(eRegI dst, memory mem) %{ +pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ single_instruction; dst : S5(write); mem : S3(read); @@ -5187,7 +5236,7 @@ %} // Integer Store to Memory -pipe_class ialu_mem_reg(memory mem, eRegI src) %{ +pipe_class ialu_mem_reg(memory mem, rRegI src) %{ single_instruction; mem : S3(read); src : S5(read); @@ -5216,7 +5265,7 @@ %} // Integer ALU0 reg-reg operation -pipe_class ialu_reg_reg_alu0(eRegI dst, eRegI src) %{ +pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ single_instruction; dst : S4(write); src : S3(read); @@ -5225,7 +5274,7 @@ %} // Integer ALU0 reg-mem operation -pipe_class ialu_reg_mem_alu0(eRegI dst, memory mem) %{ +pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ single_instruction; dst : S5(write); mem : S3(read); @@ -5235,7 +5284,7 @@ %} // Integer ALU reg-reg operation -pipe_class ialu_cr_reg_reg(eFlagsReg cr, eRegI src1, eRegI src2) %{ +pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ single_instruction; cr : S4(write); src1 : S3(read); @@ -5245,7 +5294,7 @@ %} // Integer ALU reg-imm operation -pipe_class ialu_cr_reg_imm(eFlagsReg cr, eRegI src1) %{ +pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ single_instruction; cr : S4(write); src1 : S3(read); @@ -5254,7 +5303,7 @@ %} // Integer ALU reg-mem operation -pipe_class ialu_cr_reg_mem(eFlagsReg cr, eRegI src1, memory src2) %{ +pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ single_instruction; cr : S4(write); src1 : S3(read); @@ -5265,7 +5314,7 @@ %} // Conditional move reg-reg -pipe_class pipe_cmplt( eRegI p, eRegI q, eRegI y ) %{ +pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ instruction_count(4); y : S4(read); q : S3(read); @@ -5274,7 +5323,7 @@ %} // Conditional move reg-reg -pipe_class pipe_cmov_reg( eRegI dst, eRegI src, eFlagsReg cr ) %{ +pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ single_instruction; dst : S4(write); src : S3(read); @@ -5283,7 +5332,7 @@ %} // Conditional move reg-mem -pipe_class pipe_cmov_mem( eFlagsReg cr, eRegI dst, memory src) %{ +pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ single_instruction; dst : S4(write); src : S3(read); @@ -5534,7 +5583,7 @@ // in the encode section of the architecture description. //----------BSWAP-Instruction-------------------------------------------------- -instruct bytes_reverse_int(eRegI dst) %{ +instruct bytes_reverse_int(rRegI dst) %{ match(Set dst (ReverseBytesI dst)); format %{ "BSWAP $dst" %} @@ -5555,7 +5604,7 @@ ins_pipe( ialu_reg_reg); %} -instruct bytes_reverse_unsigned_short(eRegI dst, eFlagsReg cr) %{ +instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ match(Set dst (ReverseBytesUS dst)); effect(KILL cr); @@ -5568,7 +5617,7 @@ ins_pipe( ialu_reg ); %} -instruct bytes_reverse_short(eRegI dst, eFlagsReg cr) %{ +instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ match(Set dst (ReverseBytesS dst)); effect(KILL cr); @@ -5584,7 +5633,7 @@ //---------- Zeros Count Instructions ------------------------------------------ -instruct countLeadingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{ +instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ predicate(UseCountLeadingZerosInstruction); match(Set dst (CountLeadingZerosI src)); effect(KILL cr); @@ -5596,7 +5645,7 @@ ins_pipe(ialu_reg); %} -instruct countLeadingZerosI_bsr(eRegI dst, eRegI src, eFlagsReg cr) %{ +instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ predicate(!UseCountLeadingZerosInstruction); match(Set dst (CountLeadingZerosI src)); effect(KILL cr); @@ -5621,7 +5670,7 @@ ins_pipe(ialu_reg); %} -instruct countLeadingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{ +instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ predicate(UseCountLeadingZerosInstruction); match(Set dst (CountLeadingZerosL src)); effect(TEMP dst, KILL cr); @@ -5644,7 +5693,7 @@ ins_pipe(ialu_reg); %} -instruct countLeadingZerosL_bsr(eRegI dst, eRegL src, eFlagsReg cr) %{ +instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ predicate(!UseCountLeadingZerosInstruction); match(Set dst (CountLeadingZerosL src)); effect(TEMP dst, KILL cr); @@ -5680,7 +5729,7 @@ ins_pipe(ialu_reg); %} -instruct countTrailingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{ +instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (CountTrailingZerosI src)); effect(KILL cr); @@ -5699,7 +5748,7 @@ ins_pipe(ialu_reg); %} -instruct countTrailingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{ +instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ match(Set dst (CountTrailingZerosL src)); effect(TEMP dst, KILL cr); @@ -5731,7 +5780,7 @@ //---------- Population Count Instructions ------------------------------------- -instruct popCountI(eRegI dst, eRegI src, eFlagsReg cr) %{ +instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ predicate(UsePopCountInstruction); match(Set dst (PopCountI src)); effect(KILL cr); @@ -5743,7 +5792,7 @@ ins_pipe(ialu_reg); %} -instruct popCountI_mem(eRegI dst, memory mem, eFlagsReg cr) %{ +instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ predicate(UsePopCountInstruction); match(Set dst (PopCountI (LoadI mem))); effect(KILL cr); @@ -5756,7 +5805,7 @@ %} // Note: Long.bitCount(long) returns an int. -instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{ +instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ predicate(UsePopCountInstruction); match(Set dst (PopCountL src)); effect(KILL cr, TEMP tmp, TEMP dst); @@ -5773,7 +5822,7 @@ %} // Note: Long.bitCount(long) returns an int. -instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{ +instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ predicate(UsePopCountInstruction); match(Set dst (PopCountL (LoadL mem))); effect(KILL cr, TEMP tmp, TEMP dst); @@ -5877,7 +5926,7 @@ %} // Load Short (16bit signed) -instruct loadS(eRegI dst, memory mem) %{ +instruct loadS(rRegI dst, memory mem) %{ match(Set dst (LoadS mem)); ins_cost(125); @@ -5891,7 +5940,7 @@ %} // Load Short (16 bit signed) to Byte (8 bit signed) -instruct loadS2B(eRegI dst, memory mem, immI_24 twentyfour) %{ +instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); ins_cost(125); @@ -5922,7 +5971,7 @@ %} // Load Unsigned Short/Char (16bit unsigned) -instruct loadUS(eRegI dst, memory mem) %{ +instruct loadUS(rRegI dst, memory mem) %{ match(Set dst (LoadUS mem)); ins_cost(125); @@ -5936,7 +5985,7 @@ %} // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) -instruct loadUS2B(eRegI dst, memory mem, immI_24 twentyfour) %{ +instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); ins_cost(125); @@ -5997,7 +6046,7 @@ %} // Load Integer -instruct loadI(eRegI dst, memory mem) %{ +instruct loadI(rRegI dst, memory mem) %{ match(Set dst (LoadI mem)); ins_cost(125); @@ -6011,7 +6060,7 @@ %} // Load Integer (32 bit signed) to Byte (8 bit signed) -instruct loadI2B(eRegI dst, memory mem, immI_24 twentyfour) %{ +instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); ins_cost(125); @@ -6023,7 +6072,7 @@ %} // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) -instruct loadI2UB(eRegI dst, memory mem, immI_255 mask) %{ +instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ match(Set dst (AndI (LoadI mem) mask)); ins_cost(125); @@ -6035,7 +6084,7 @@ %} // Load Integer (32 bit signed) to Short (16 bit signed) -instruct loadI2S(eRegI dst, memory mem, immI_16 sixteen) %{ +instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); ins_cost(125); @@ -6047,7 +6096,7 @@ %} // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) -instruct loadI2US(eRegI dst, memory mem, immI_65535 mask) %{ +instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ match(Set dst (AndI (LoadI mem) mask)); ins_cost(125); @@ -6208,7 +6257,7 @@ %} // Load Range -instruct loadRange(eRegI dst, memory mem) %{ +instruct loadRange(rRegI dst, memory mem) %{ match(Set dst (LoadRange mem)); ins_cost(125); @@ -6305,66 +6354,6 @@ ins_pipe( fpu_reg_mem ); %} -// Load Aligned Packed Byte to XMM register -instruct loadA8B(regD dst, memory mem) %{ - predicate(UseSSE>=1); - match(Set dst (Load8B mem)); - ins_cost(125); - format %{ "MOVQ $dst,$mem\t! packed8B" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - -// Load Aligned Packed Short to XMM register -instruct loadA4S(regD dst, memory mem) %{ - predicate(UseSSE>=1); - match(Set dst (Load4S mem)); - ins_cost(125); - format %{ "MOVQ $dst,$mem\t! packed4S" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - -// Load Aligned Packed Char to XMM register -instruct loadA4C(regD dst, memory mem) %{ - predicate(UseSSE>=1); - match(Set dst (Load4C mem)); - ins_cost(125); - format %{ "MOVQ $dst,$mem\t! packed4C" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - -// Load Aligned Packed Integer to XMM register -instruct load2IU(regD dst, memory mem) %{ - predicate(UseSSE>=1); - match(Set dst (Load2I mem)); - ins_cost(125); - format %{ "MOVQ $dst,$mem\t! packed2I" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - -// Load Aligned Packed Single to XMM -instruct loadA2F(regD dst, memory mem) %{ - predicate(UseSSE>=1); - match(Set dst (Load2F mem)); - ins_cost(145); - format %{ "MOVQ $dst,$mem\t! packed2F" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - // Load Effective Address instruct leaP8(eRegP dst, indOffset8 mem) %{ match(Set dst mem); @@ -6417,7 +6406,7 @@ %} // Load Constant -instruct loadConI(eRegI dst, immI src) %{ +instruct loadConI(rRegI dst, immI src) %{ match(Set dst src); format %{ "MOV $dst,$src" %} @@ -6426,7 +6415,7 @@ %} // Load Constant zero -instruct loadConI0(eRegI dst, immI0 src, eFlagsReg cr) %{ +instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ match(Set dst src); effect(KILL cr); @@ -6594,7 +6583,7 @@ %} // Load Stack Slot -instruct loadSSI(eRegI dst, stackSlotI src) %{ +instruct loadSSI(rRegI dst, stackSlotI src) %{ match(Set dst src); ins_cost(125); @@ -6821,7 +6810,7 @@ %} // Store Char/Short -instruct storeC(memory mem, eRegI src) %{ +instruct storeC(memory mem, rRegI src) %{ match(Set mem (StoreC mem src)); ins_cost(125); @@ -6832,7 +6821,7 @@ %} // Store Integer -instruct storeI(memory mem, eRegI src) %{ +instruct storeI(memory mem, rRegI src) %{ match(Set mem (StoreI mem src)); ins_cost(125); @@ -6976,42 +6965,6 @@ ins_pipe( ialu_mem_imm ); %} -// Store Aligned Packed Byte XMM register to memory -instruct storeA8B(memory mem, regD src) %{ - predicate(UseSSE>=1); - match(Set mem (Store8B mem src)); - ins_cost(145); - format %{ "MOVQ $mem,$src\t! packed8B" %} - ins_encode %{ - __ movq($mem$$Address, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -// Store Aligned Packed Char/Short XMM register to memory -instruct storeA4C(memory mem, regD src) %{ - predicate(UseSSE>=1); - match(Set mem (Store4C mem src)); - ins_cost(145); - format %{ "MOVQ $mem,$src\t! packed4C" %} - ins_encode %{ - __ movq($mem$$Address, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -// Store Aligned Packed Integer XMM register to memory -instruct storeA2I(memory mem, regD src) %{ - predicate(UseSSE>=1); - match(Set mem (Store2I mem src)); - ins_cost(145); - format %{ "MOVQ $mem,$src\t! packed2I" %} - ins_encode %{ - __ movq($mem$$Address, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - // Store CMS card-mark Immediate instruct storeImmCM(memory mem, immI8 src) %{ match(Set mem (StoreCM mem src)); @@ -7073,18 +7026,6 @@ ins_pipe( pipe_slow ); %} -// Store Aligned Packed Single Float XMM register to memory -instruct storeA2F(memory mem, regD src) %{ - predicate(UseSSE>=1); - match(Set mem (Store2F mem src)); - ins_cost(145); - format %{ "MOVQ $mem,$src\t! packed2F" %} - ins_encode %{ - __ movq($mem$$Address, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - // Store Float instruct storeFPR( memory mem, regFPR1 src) %{ predicate(UseSSE==0); @@ -7146,7 +7087,7 @@ %} // Store Integer to stack slot -instruct storeSSI(stackSlotI dst, eRegI src) %{ +instruct storeSSI(stackSlotI dst, rRegI src) %{ match(Set dst src); ins_cost(100); @@ -7271,7 +7212,7 @@ ins_pipe(empty); %} -instruct castP2X(eRegI dst, eRegP src ) %{ +instruct castP2X(rRegI dst, eRegP src ) %{ match(Set dst (CastP2X src)); ins_cost(50); format %{ "MOV $dst, $src\t# CastP2X" %} @@ -7281,7 +7222,7 @@ //----------Conditional Move--------------------------------------------------- // Conditional move -instruct jmovI_reg(cmpOp cop, eFlagsReg cr, eRegI dst, eRegI src) %{ +instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ predicate(!VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); ins_cost(200); @@ -7298,7 +7239,7 @@ ins_pipe( pipe_cmov_reg ); %} -instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src) %{ +instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ predicate(!VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); ins_cost(200); @@ -7315,7 +7256,7 @@ ins_pipe( pipe_cmov_reg ); %} -instruct cmovI_reg(eRegI dst, eRegI src, eFlagsReg cr, cmpOp cop ) %{ +instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); ins_cost(200); @@ -7325,7 +7266,7 @@ ins_pipe( pipe_cmov_reg ); %} -instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src ) %{ +instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); ins_cost(200); @@ -7335,7 +7276,7 @@ ins_pipe( pipe_cmov_reg ); %} -instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, eRegI src ) %{ +instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); ins_cost(200); @@ -7345,7 +7286,7 @@ %} // Conditional move -instruct cmovI_mem(cmpOp cop, eFlagsReg cr, eRegI dst, memory src) %{ +instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); ins_cost(250); @@ -7356,7 +7297,7 @@ %} // Conditional move -instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, eRegI dst, memory src) %{ +instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); ins_cost(250); @@ -7366,7 +7307,7 @@ ins_pipe( pipe_cmov_mem ); %} -instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, memory src) %{ +instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); ins_cost(250); @@ -7620,7 +7561,7 @@ //----------Arithmetic Instructions-------------------------------------------- //----------Addition Instructions---------------------------------------------- // Integer Addition Instructions -instruct addI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ +instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (AddI dst src)); effect(KILL cr); @@ -7631,7 +7572,7 @@ ins_pipe( ialu_reg_reg ); %} -instruct addI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ +instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ match(Set dst (AddI dst src)); effect(KILL cr); @@ -7641,7 +7582,7 @@ ins_pipe( ialu_reg ); %} -instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{ +instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ predicate(UseIncDec); match(Set dst (AddI dst src)); effect(KILL cr); @@ -7653,7 +7594,7 @@ ins_pipe( ialu_reg ); %} -instruct leaI_eReg_immI(eRegI dst, eRegI src0, immI src1) %{ +instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ match(Set dst (AddI src0 src1)); ins_cost(110); @@ -7673,7 +7614,7 @@ ins_pipe( ialu_reg_reg ); %} -instruct decI_eReg(eRegI dst, immI_M1 src, eFlagsReg cr) %{ +instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ predicate(UseIncDec); match(Set dst (AddI dst src)); effect(KILL cr); @@ -7685,7 +7626,7 @@ ins_pipe( ialu_reg ); %} -instruct addP_eReg(eRegP dst, eRegI src, eFlagsReg cr) %{ +instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ match(Set dst (AddP dst src)); effect(KILL cr); @@ -7707,7 +7648,7 @@ ins_pipe( ialu_reg ); %} -instruct addI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{ +instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ match(Set dst (AddI dst (LoadI src))); effect(KILL cr); @@ -7718,7 +7659,7 @@ ins_pipe( ialu_reg_mem ); %} -instruct addI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{ +instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (AddI (LoadI dst) src))); effect(KILL cr); @@ -7780,7 +7721,7 @@ ins_pipe( empty ); %} -instruct castII( eRegI dst ) %{ +instruct castII( rRegI dst ) %{ match(Set dst (CastII dst)); format %{ "#castII of $dst" %} ins_encode( /*empty encoding*/ ); @@ -7814,7 +7755,7 @@ // Conditional-store of an int value. // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. -instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{ +instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ match(Set cr (StoreIConditional mem (Binary oldval newval))); effect(KILL oldval); format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} @@ -7847,7 +7788,7 @@ // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them -instruct compareAndSwapL( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ +instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); effect(KILL cr, KILL oldval); format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" @@ -7860,7 +7801,7 @@ ins_pipe( pipe_cmpxchg ); %} -instruct compareAndSwapP( eRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ +instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); effect(KILL cr, KILL oldval); format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" @@ -7872,7 +7813,7 @@ ins_pipe( pipe_cmpxchg ); %} -instruct compareAndSwapI( eRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ +instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); effect(KILL cr, KILL oldval); format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" @@ -7886,7 +7827,7 @@ //----------Subtraction Instructions------------------------------------------- // Integer Subtraction Instructions -instruct subI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ +instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (SubI dst src)); effect(KILL cr); @@ -7897,7 +7838,7 @@ ins_pipe( ialu_reg_reg ); %} -instruct subI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ +instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ match(Set dst (SubI dst src)); effect(KILL cr); @@ -7908,7 +7849,7 @@ ins_pipe( ialu_reg ); %} -instruct subI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{ +instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ match(Set dst (SubI dst (LoadI src))); effect(KILL cr); @@ -7919,7 +7860,7 @@ ins_pipe( ialu_reg_mem ); %} -instruct subI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{ +instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (SubI (LoadI dst) src))); effect(KILL cr); @@ -7931,7 +7872,7 @@ %} // Subtract from a pointer -instruct subP_eReg(eRegP dst, eRegI src, immI0 zero, eFlagsReg cr) %{ +instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ match(Set dst (AddP dst (SubI zero src))); effect(KILL cr); @@ -7942,7 +7883,7 @@ ins_pipe( ialu_reg_reg ); %} -instruct negI_eReg(eRegI dst, immI0 zero, eFlagsReg cr) %{ +instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ match(Set dst (SubI zero dst)); effect(KILL cr); @@ -7957,7 +7898,7 @@ //----------Multiplication/Division Instructions------------------------------- // Integer Multiplication Instructions // Multiply Register -instruct mulI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ +instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (MulI dst src)); effect(KILL cr); @@ -7970,7 +7911,7 @@ %} // Multiply 32-bit Immediate -instruct mulI_eReg_imm(eRegI dst, eRegI src, immI imm, eFlagsReg cr) %{ +instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ match(Set dst (MulI src imm)); effect(KILL cr); @@ -8026,7 +7967,7 @@ %} // Multiply Memory 32-bit Immediate -instruct mulI_mem_imm(eRegI dst, memory src, immI imm, eFlagsReg cr) %{ +instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ match(Set dst (MulI (LoadI src) imm)); effect(KILL cr); @@ -8038,7 +7979,7 @@ %} // Multiply Memory -instruct mulI(eRegI dst, memory src, eFlagsReg cr) %{ +instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ match(Set dst (MulI dst (LoadI src))); effect(KILL cr); @@ -8075,7 +8016,7 @@ %} // Multiply Register Long -instruct mulL_eReg(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{ +instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ match(Set dst (MulL dst src)); effect(KILL cr, TEMP tmp); ins_cost(4*100+3*400); @@ -8093,7 +8034,7 @@ %} // Multiply Register Long where the left operand's high 32 bits are zero -instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{ +instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ predicate(is_operand_hi32_zero(n->in(1))); match(Set dst (MulL dst src)); effect(KILL cr, TEMP tmp); @@ -8114,7 +8055,7 @@ %} // Multiply Register Long where the right operand's high 32 bits are zero -instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{ +instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ predicate(is_operand_hi32_zero(n->in(2))); match(Set dst (MulL dst src)); effect(KILL cr, TEMP tmp); @@ -8150,7 +8091,7 @@ %} // Multiply Register Long by small constant -instruct mulL_eReg_con(eADXRegL dst, immL_127 src, eRegI tmp, eFlagsReg cr) %{ +instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ match(Set dst (MulL dst src)); effect(KILL cr, TEMP tmp); ins_cost(2*100+2*400); @@ -8248,7 +8189,7 @@ %} // Divide Register Long (no special case since divisor != -1) -instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{ +instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ match(Set dst (DivL dst imm)); effect( TEMP tmp, TEMP tmp2, KILL cr ); ins_cost(1000); @@ -8319,7 +8260,7 @@ %} // Remainder Register Long (remainder fit into 32 bits) -instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{ +instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ match(Set dst (ModL dst imm)); effect( TEMP tmp, TEMP tmp2, KILL cr ); ins_cost(1000); @@ -8387,7 +8328,7 @@ // Integer Shift Instructions // Shift Left by one -instruct shlI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{ +instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ match(Set dst (LShiftI dst shift)); effect(KILL cr); @@ -8399,7 +8340,7 @@ %} // Shift Left by 8-bit immediate -instruct salI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{ +instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ match(Set dst (LShiftI dst shift)); effect(KILL cr); @@ -8411,7 +8352,7 @@ %} // Shift Left by variable -instruct salI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{ +instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ match(Set dst (LShiftI dst shift)); effect(KILL cr); @@ -8423,7 +8364,7 @@ %} // Arithmetic shift right by one -instruct sarI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{ +instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ match(Set dst (RShiftI dst shift)); effect(KILL cr); @@ -8445,7 +8386,7 @@ %} // Arithmetic Shift Right by 8-bit immediate -instruct sarI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{ +instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ match(Set dst (RShiftI dst shift)); effect(KILL cr); @@ -8468,7 +8409,7 @@ %} // Arithmetic Shift Right by variable -instruct sarI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{ +instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ match(Set dst (RShiftI dst shift)); effect(KILL cr); @@ -8480,7 +8421,7 @@ %} // Logical shift right by one -instruct shrI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{ +instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ match(Set dst (URShiftI dst shift)); effect(KILL cr); @@ -8492,7 +8433,7 @@ %} // Logical Shift Right by 8-bit immediate -instruct shrI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{ +instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ match(Set dst (URShiftI dst shift)); effect(KILL cr); @@ -8506,7 +8447,7 @@ // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. // This idiom is used by the compiler for the i2b bytecode. -instruct i2b(eRegI dst, xRegI src, immI_24 twentyfour) %{ +instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); size(3); @@ -8519,7 +8460,7 @@ // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. // This idiom is used by the compiler the i2s bytecode. -instruct i2s(eRegI dst, xRegI src, immI_16 sixteen) %{ +instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); size(3); @@ -8532,7 +8473,7 @@ // Logical Shift Right by variable -instruct shrI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{ +instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ match(Set dst (URShiftI dst shift)); effect(KILL cr); @@ -8548,7 +8489,7 @@ //----------Integer Logical Instructions--------------------------------------- // And Instructions // And Register with Register -instruct andI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ +instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (AndI dst src)); effect(KILL cr); @@ -8560,7 +8501,7 @@ %} // And Register with Immediate -instruct andI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ +instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ match(Set dst (AndI dst src)); effect(KILL cr); @@ -8572,7 +8513,7 @@ %} // And Register with Memory -instruct andI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{ +instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ match(Set dst (AndI dst (LoadI src))); effect(KILL cr); @@ -8584,7 +8525,7 @@ %} // And Memory with Register -instruct andI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{ +instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (AndI (LoadI dst) src))); effect(KILL cr); @@ -8610,7 +8551,7 @@ // Or Instructions // Or Register with Register -instruct orI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ +instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (OrI dst src)); effect(KILL cr); @@ -8621,7 +8562,7 @@ ins_pipe( ialu_reg_reg ); %} -instruct orI_eReg_castP2X(eRegI dst, eRegP src, eFlagsReg cr) %{ +instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ match(Set dst (OrI dst (CastP2X src))); effect(KILL cr); @@ -8634,7 +8575,7 @@ // Or Register with Immediate -instruct orI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ +instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ match(Set dst (OrI dst src)); effect(KILL cr); @@ -8646,7 +8587,7 @@ %} // Or Register with Memory -instruct orI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{ +instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ match(Set dst (OrI dst (LoadI src))); effect(KILL cr); @@ -8658,7 +8599,7 @@ %} // Or Memory with Register -instruct orI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{ +instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (OrI (LoadI dst) src))); effect(KILL cr); @@ -8684,7 +8625,7 @@ // ROL/ROR // ROL expand -instruct rolI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{ +instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ effect(USE_DEF dst, USE shift, KILL cr); format %{ "ROL $dst, $shift" %} @@ -8693,7 +8634,7 @@ ins_pipe( ialu_reg ); %} -instruct rolI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{ +instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ effect(USE_DEF dst, USE shift, KILL cr); format %{ "ROL $dst, $shift" %} @@ -8713,7 +8654,7 @@ // end of ROL expand // ROL 32bit by one once -instruct rolI_eReg_i1(eRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ +instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); expand %{ @@ -8722,7 +8663,7 @@ %} // ROL 32bit var by imm8 once -instruct rolI_eReg_i8(eRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ +instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); @@ -8750,7 +8691,7 @@ %} // ROR expand -instruct rorI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{ +instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ effect(USE_DEF dst, USE shift, KILL cr); format %{ "ROR $dst, $shift" %} @@ -8759,7 +8700,7 @@ ins_pipe( ialu_reg ); %} -instruct rorI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{ +instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ effect (USE_DEF dst, USE shift, KILL cr); format %{ "ROR $dst, $shift" %} @@ -8779,7 +8720,7 @@ // end of ROR expand // ROR right once -instruct rorI_eReg_i1(eRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ +instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); expand %{ @@ -8788,7 +8729,7 @@ %} // ROR 32bit by immI8 once -instruct rorI_eReg_i8(eRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ +instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); @@ -8817,7 +8758,7 @@ // Xor Instructions // Xor Register with Register -instruct xorI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{ +instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ match(Set dst (XorI dst src)); effect(KILL cr); @@ -8829,7 +8770,7 @@ %} // Xor Register with Immediate -1 -instruct xorI_eReg_im1(eRegI dst, immI_M1 imm) %{ +instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ match(Set dst (XorI dst imm)); size(2); @@ -8841,7 +8782,7 @@ %} // Xor Register with Immediate -instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{ +instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ match(Set dst (XorI dst src)); effect(KILL cr); @@ -8853,7 +8794,7 @@ %} // Xor Register with Memory -instruct xorI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{ +instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ match(Set dst (XorI dst (LoadI src))); effect(KILL cr); @@ -8865,7 +8806,7 @@ %} // Xor Memory with Register -instruct xorI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{ +instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ match(Set dst (StoreI dst (XorI (LoadI dst) src))); effect(KILL cr); @@ -8890,14 +8831,14 @@ //----------Convert Int to Boolean--------------------------------------------- -instruct movI_nocopy(eRegI dst, eRegI src) %{ +instruct movI_nocopy(rRegI dst, rRegI src) %{ effect( DEF dst, USE src ); format %{ "MOV $dst,$src" %} ins_encode( enc_Copy( dst, src) ); ins_pipe( ialu_reg_reg ); %} -instruct ci2b( eRegI dst, eRegI src, eFlagsReg cr ) %{ +instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ effect( USE_DEF dst, USE src, KILL cr ); size(4); @@ -8908,7 +8849,7 @@ ins_pipe( ialu_reg_reg_long ); %} -instruct convI2B( eRegI dst, eRegI src, eFlagsReg cr ) %{ +instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ match(Set dst (Conv2B src)); expand %{ @@ -8917,14 +8858,14 @@ %} %} -instruct movP_nocopy(eRegI dst, eRegP src) %{ +instruct movP_nocopy(rRegI dst, eRegP src) %{ effect( DEF dst, USE src ); format %{ "MOV $dst,$src" %} ins_encode( enc_Copy( dst, src) ); ins_pipe( ialu_reg_reg ); %} -instruct cp2b( eRegI dst, eRegP src, eFlagsReg cr ) %{ +instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ effect( USE_DEF dst, USE src, KILL cr ); format %{ "NEG $dst\n\t" "ADC $dst,$src" %} @@ -8933,7 +8874,7 @@ ins_pipe( ialu_reg_reg_long ); %} -instruct convP2B( eRegI dst, eRegP src, eFlagsReg cr ) %{ +instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ match(Set dst (Conv2B src)); expand %{ @@ -8958,7 +8899,7 @@ ins_pipe( pipe_slow ); %} -instruct cmpLTMask0( eRegI dst, immI0 zero, eFlagsReg cr ) %{ +instruct cmpLTMask0( rRegI dst, immI0 zero, eFlagsReg cr ) %{ match(Set dst (CmpLTMask dst zero)); effect( DEF dst, KILL cr ); ins_cost(100); @@ -9430,7 +9371,7 @@ %} // Compare vs zero into -1,0,1 -instruct cmpDPR_0(eRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ +instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE<=1); match(Set dst (CmpD3 src1 zero)); effect(KILL cr, KILL rax); @@ -9444,7 +9385,7 @@ %} // Compare into -1,0,1 -instruct cmpDPR_reg(eRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ +instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE<=1); match(Set dst (CmpD3 src1 src2)); effect(KILL cr, KILL rax); @@ -10222,7 +10163,7 @@ %} // Compare vs zero into -1,0,1 -instruct cmpFPR_0(eRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ +instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE == 0); match(Set dst (CmpF3 src1 zero)); effect(KILL cr, KILL rax); @@ -10236,7 +10177,7 @@ %} // Compare into -1,0,1 -instruct cmpFPR_reg(eRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ +instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ predicate(UseSSE == 0); match(Set dst (CmpF3 src1 src2)); effect(KILL cr, KILL rax); @@ -11156,7 +11097,7 @@ ins_pipe( fpu_reg_mem ); %} -instruct convI2D_reg(regD dst, eRegI src) %{ +instruct convI2D_reg(regD dst, rRegI src) %{ predicate( UseSSE>=2 && !UseXmmI2D ); match(Set dst (ConvI2D src)); format %{ "CVTSI2SD $dst,$src" %} @@ -11176,7 +11117,7 @@ ins_pipe( pipe_slow ); %} -instruct convXI2D_reg(regD dst, eRegI src) +instruct convXI2D_reg(regD dst, rRegI src) %{ predicate( UseSSE>=2 && UseXmmI2D ); match(Set dst (ConvI2D src)); @@ -11264,7 +11205,7 @@ %} // Convert an int to a float in xmm; no rounding step needed. -instruct convI2F_reg(regF dst, eRegI src) %{ +instruct convI2F_reg(regF dst, rRegI src) %{ predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); match(Set dst (ConvI2F src)); format %{ "CVTSI2SS $dst, $src" %} @@ -11274,7 +11215,7 @@ ins_pipe( pipe_slow ); %} - instruct convXI2F_reg(regF dst, eRegI src) + instruct convXI2F_reg(regF dst, rRegI src) %{ predicate( UseSSE>=2 && UseXmmI2F ); match(Set dst (ConvI2F src)); @@ -11288,7 +11229,7 @@ ins_pipe(pipe_slow); // XXX %} -instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{ +instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ match(Set dst (ConvI2L src)); effect(KILL cr); ins_cost(375); @@ -11300,7 +11241,7 @@ %} // Zero-extend convert int to long -instruct convI2L_reg_zex(eRegL dst, eRegI src, immL_32bits mask, eFlagsReg flags ) %{ +instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ match(Set dst (AndL (ConvI2L src) mask) ); effect( KILL flags ); ins_cost(250); @@ -11380,7 +11321,7 @@ ins_pipe( pipe_slow ); %} -instruct convL2I_reg( eRegI dst, eRegL src ) %{ +instruct convL2I_reg( rRegI dst, eRegL src ) %{ match(Set dst (ConvL2I src)); effect( DEF dst, USE src ); format %{ "MOV $dst,$src.lo" %} @@ -11389,7 +11330,7 @@ %} -instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{ +instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ match(Set dst (MoveF2I src)); effect( DEF dst, USE src ); ins_cost(100); @@ -11424,7 +11365,7 @@ ins_pipe( pipe_slow ); %} -instruct MoveF2I_reg_reg_sse(eRegI dst, regF src) %{ +instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ predicate(UseSSE>=2); match(Set dst (MoveF2I src)); effect( DEF dst, USE src ); @@ -11436,7 +11377,7 @@ ins_pipe( pipe_slow ); %} -instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{ +instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ match(Set dst (MoveI2F src)); effect( DEF dst, USE src ); @@ -11476,7 +11417,7 @@ ins_pipe( pipe_slow ); %} -instruct MoveI2F_reg_reg_sse(regF dst, eRegI src) %{ +instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ predicate(UseSSE>=2); match(Set dst (MoveI2F src)); effect( DEF dst, USE src ); @@ -11610,186 +11551,6 @@ ins_pipe( pipe_slow ); %} -// Replicate scalar to packed byte (1 byte) values in xmm -instruct Repl8B_reg(regD dst, regD src) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate8B src)); - format %{ "MOVDQA $dst,$src\n\t" - "PUNPCKLBW $dst,$dst\n\t" - "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} - ins_encode %{ - if ($dst$$reg != $src$$reg) { - __ movdqa($dst$$XMMRegister, $src$$XMMRegister); - } - __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - %} - ins_pipe( pipe_slow ); -%} - -// Replicate scalar to packed byte (1 byte) values in xmm -instruct Repl8B_eRegI(regD dst, eRegI src) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate8B src)); - format %{ "MOVD $dst,$src\n\t" - "PUNPCKLBW $dst,$dst\n\t" - "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - %} - ins_pipe( pipe_slow ); -%} - -// Replicate scalar zero to packed byte (1 byte) values in xmm -instruct Repl8B_immI0(regD dst, immI0 zero) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate8B zero)); - format %{ "PXOR $dst,$dst\t! replicate8B" %} - ins_encode %{ - __ pxor($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed shore (2 byte) values in xmm -instruct Repl4S_reg(regD dst, regD src) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate4S src)); - format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %} - ins_encode %{ - __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed shore (2 byte) values in xmm -instruct Repl4S_eRegI(regD dst, eRegI src) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate4S src)); - format %{ "MOVD $dst,$src\n\t" - "PSHUFLW $dst,$dst,0x00\t! replicate4S" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar zero to packed short (2 byte) values in xmm -instruct Repl4S_immI0(regD dst, immI0 zero) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate4S zero)); - format %{ "PXOR $dst,$dst\t! replicate4S" %} - ins_encode %{ - __ pxor($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed char (2 byte) values in xmm -instruct Repl4C_reg(regD dst, regD src) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate4C src)); - format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %} - ins_encode %{ - __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed char (2 byte) values in xmm -instruct Repl4C_eRegI(regD dst, eRegI src) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate4C src)); - format %{ "MOVD $dst,$src\n\t" - "PSHUFLW $dst,$dst,0x00\t! replicate4C" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar zero to packed char (2 byte) values in xmm -instruct Repl4C_immI0(regD dst, immI0 zero) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate4C zero)); - format %{ "PXOR $dst,$dst\t! replicate4C" %} - ins_encode %{ - __ pxor($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed integer (4 byte) values in xmm -instruct Repl2I_reg(regD dst, regD src) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate2I src)); - format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %} - ins_encode %{ - __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed integer (4 byte) values in xmm -instruct Repl2I_eRegI(regD dst, eRegI src) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate2I src)); - format %{ "MOVD $dst,$src\n\t" - "PSHUFD $dst,$dst,0x00\t! replicate2I" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar zero to packed integer (2 byte) values in xmm -instruct Repl2I_immI0(regD dst, immI0 zero) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate2I zero)); - format %{ "PXOR $dst,$dst\t! replicate2I" %} - ins_encode %{ - __ pxor($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed single precision floating point values in xmm -instruct Repl2F_reg(regD dst, regD src) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate2F src)); - format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} - ins_encode %{ - __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed single precision floating point values in xmm -instruct Repl2F_regF(regD dst, regF src) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate2F src)); - format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} - ins_encode %{ - __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed single precision floating point values in xmm -instruct Repl2F_immF0(regD dst, immF0 zero) %{ - predicate(UseSSE>=2); - match(Set dst (Replicate2F zero)); - format %{ "PXOR $dst,$dst\t! replicate2F" %} - ins_encode %{ - __ pxor($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} // ======================================================================= // fast clearing of an array @@ -11898,7 +11659,7 @@ //----------Control Flow Instructions------------------------------------------ // Signed compare Instructions -instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{ +instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ match(Set cr (CmpI op1 op2)); effect( DEF cr, USE op1, USE op2 ); format %{ "CMP $op1,$op2" %} @@ -11907,7 +11668,7 @@ ins_pipe( ialu_cr_reg_reg ); %} -instruct compI_eReg_imm(eFlagsReg cr, eRegI op1, immI op2) %{ +instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ match(Set cr (CmpI op1 op2)); effect( DEF cr, USE op1 ); format %{ "CMP $op1,$op2" %} @@ -11918,7 +11679,7 @@ %} // Cisc-spilled version of cmpI_eReg -instruct compI_eReg_mem(eFlagsReg cr, eRegI op1, memory op2) %{ +instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ match(Set cr (CmpI op1 (LoadI op2))); format %{ "CMP $op1,$op2" %} @@ -11928,7 +11689,7 @@ ins_pipe( ialu_cr_reg_mem ); %} -instruct testI_reg( eFlagsReg cr, eRegI src, immI0 zero ) %{ +instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ match(Set cr (CmpI src zero)); effect( DEF cr, USE src ); @@ -11938,7 +11699,7 @@ ins_pipe( ialu_cr_reg_imm ); %} -instruct testI_reg_imm( eFlagsReg cr, eRegI src, immI con, immI0 zero ) %{ +instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ match(Set cr (CmpI (AndI src con) zero)); format %{ "TEST $src,$con" %} @@ -11947,7 +11708,7 @@ ins_pipe( ialu_cr_reg_imm ); %} -instruct testI_reg_mem( eFlagsReg cr, eRegI src, memory mem, immI0 zero ) %{ +instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ match(Set cr (CmpI (AndI src mem) zero)); format %{ "TEST $src,$mem" %} @@ -11958,7 +11719,7 @@ // Unsigned compare Instructions; really, same as signed except they // produce an eFlagsRegU instead of eFlagsReg. -instruct compU_eReg(eFlagsRegU cr, eRegI op1, eRegI op2) %{ +instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ match(Set cr (CmpU op1 op2)); format %{ "CMPu $op1,$op2" %} @@ -11967,7 +11728,7 @@ ins_pipe( ialu_cr_reg_reg ); %} -instruct compU_eReg_imm(eFlagsRegU cr, eRegI op1, immI op2) %{ +instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ match(Set cr (CmpU op1 op2)); format %{ "CMPu $op1,$op2" %} @@ -11977,7 +11738,7 @@ %} // // Cisc-spilled version of cmpU_eReg -instruct compU_eReg_mem(eFlagsRegU cr, eRegI op1, memory op2) %{ +instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ match(Set cr (CmpU op1 (LoadI op2))); format %{ "CMPu $op1,$op2" %} @@ -11988,7 +11749,7 @@ %} // // Cisc-spilled version of cmpU_eReg -//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, eRegI op2) %{ +//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ // match(Set cr (CmpU (LoadI op1) op2)); // // format %{ "CMPu $op1,$op2" %} @@ -11997,7 +11758,7 @@ // ins_encode( OpcP, RegMem( op1, op2) ); //%} -instruct testU_reg( eFlagsRegU cr, eRegI src, immI0 zero ) %{ +instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ match(Set cr (CmpU src zero)); format %{ "TESTu $src,$src" %} @@ -12093,7 +11854,7 @@ // *** Min and Max using the conditional move are slower than the // *** branch version on a Pentium III. // // Conditional move for min -//instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ +//instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ // effect( USE_DEF op2, USE op1, USE cr ); // format %{ "CMOVlt $op2,$op1\t! min" %} // opcode(0x4C,0x0F); @@ -12102,7 +11863,7 @@ //%} // //// Min Register with Register (P6 version) -//instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{ +//instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ // predicate(VM_Version::supports_cmov() ); // match(Set op2 (MinI op1 op2)); // ins_cost(200); @@ -12114,7 +11875,7 @@ //%} // Min Register with Register (generic version) -instruct minI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{ +instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ match(Set dst (MinI dst src)); effect(KILL flags); ins_cost(300); @@ -12129,7 +11890,7 @@ // *** Min and Max using the conditional move are slower than the // *** branch version on a Pentium III. // // Conditional move for max -//instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ +//instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ // effect( USE_DEF op2, USE op1, USE cr ); // format %{ "CMOVgt $op2,$op1\t! max" %} // opcode(0x4F,0x0F); @@ -12138,7 +11899,7 @@ //%} // // // Max Register with Register (P6 version) -//instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{ +//instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ // predicate(VM_Version::supports_cmov() ); // match(Set op2 (MaxI op1 op2)); // ins_cost(200); @@ -12150,7 +11911,7 @@ //%} // Max Register with Register (generic version) -instruct maxI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{ +instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ match(Set dst (MaxI dst src)); effect(KILL flags); ins_cost(300); @@ -12211,7 +11972,7 @@ // ============================================================================ // Branch Instructions // Jump Table -instruct jumpXtnd(eRegI switch_val) %{ +instruct jumpXtnd(rRegI switch_val) %{ match(Jump switch_val); ins_cost(350); format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} @@ -12629,7 +12390,7 @@ // Manifest a CmpL result in the normal flags. Only good for LT or GE // compares. Can be used for LE or GT compares by reversing arguments. // NOT GOOD FOR EQ/NE tests. -instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, eRegI tmp ) %{ +instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ match( Set flags (CmpL src1 src2 )); effect( TEMP tmp ); ins_cost(300); @@ -12675,7 +12436,7 @@ %} // Compare 2 longs and CMOVE ints. -instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, eRegI src) %{ +instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); ins_cost(200); @@ -12685,7 +12446,7 @@ ins_pipe( pipe_cmov_reg ); %} -instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, memory src) %{ +instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); ins_cost(250); @@ -12746,7 +12507,7 @@ //====== // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. -instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eRegI tmp ) %{ +instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ match( Set flags (CmpL src zero )); effect(TEMP tmp); ins_cost(200); @@ -12803,7 +12564,7 @@ %} // Compare 2 longs and CMOVE ints. -instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, eRegI src) %{ +instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); ins_cost(200); @@ -12813,7 +12574,7 @@ ins_pipe( pipe_cmov_reg ); %} -instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, memory src) %{ +instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); ins_cost(250); @@ -12875,7 +12636,7 @@ //====== // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. // Same as cmpL_reg_flags_LEGT except must negate src -instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, eRegI tmp ) %{ +instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ match( Set flags (CmpL src zero )); effect( TEMP tmp ); ins_cost(300); @@ -12889,7 +12650,7 @@ // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands // requires a commuted test to get the same result. -instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, eRegI tmp ) %{ +instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ match( Set flags (CmpL src1 src2 )); effect( TEMP tmp ); ins_cost(300); @@ -12936,7 +12697,7 @@ %} // Compare 2 longs and CMOVE ints. -instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, eRegI src) %{ +instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); ins_cost(200); @@ -12946,7 +12707,7 @@ ins_pipe( pipe_cmov_reg ); %} -instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, memory src) %{ +instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); ins_cost(250); @@ -13275,11 +13036,11 @@ // ---------EXAMPLE---------------------------------------------------------- // // // pertinent parts of existing instructions in architecture description -// instruct movI(eRegI dst, eRegI src) %{ +// instruct movI(rRegI dst, rRegI src) %{ // match(Set dst (CopyI src)); // %} // -// instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{ +// instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ // match(Set dst (AddI dst src)); // effect(KILL cr); // %} @@ -13324,11 +13085,11 @@ // %} // // Change load of spilled value to only a spill -// instruct storeI(memory mem, eRegI src) %{ +// instruct storeI(memory mem, rRegI src) %{ // match(Set mem (StoreI mem src)); // %} // -// instruct loadI(eRegI dst, memory mem) %{ +// instruct loadI(rRegI dst, memory mem) %{ // match(Set dst (LoadI mem)); // %} //
--- a/hotspot/src/cpu/x86/vm/x86_64.ad Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/cpu/x86/vm/x86_64.ad Tue Jul 03 18:23:04 2012 -0700 @@ -131,102 +131,6 @@ // Floating Point Registers -// XMM registers. 128-bit registers or 4 words each, labeled (a)-d. -// Word a in each register holds a Float, words ab hold a Double. We -// currently do not use the SIMD capabilities, so registers cd are -// unused at the moment. -// XMM8-XMM15 must be encoded with REX. -// Linux ABI: No register preserved across function calls -// XMM0-XMM7 might hold parameters -// Windows ABI: XMM6-XMM15 preserved across function calls -// XMM0-XMM3 might hold parameters - -reg_def XMM0 (SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); -reg_def XMM0_H (SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()); - -reg_def XMM1 (SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); -reg_def XMM1_H (SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()); - -reg_def XMM2 (SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); -reg_def XMM2_H (SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()); - -reg_def XMM3 (SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); -reg_def XMM3_H (SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()); - -reg_def XMM4 (SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); -reg_def XMM4_H (SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()); - -reg_def XMM5 (SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); -reg_def XMM5_H (SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()); - -#ifdef _WIN64 - -reg_def XMM6 (SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); -reg_def XMM6_H (SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()); - -reg_def XMM7 (SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); -reg_def XMM7_H (SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()); - -reg_def XMM8 (SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); -reg_def XMM8_H (SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()); - -reg_def XMM9 (SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); -reg_def XMM9_H (SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()); - -reg_def XMM10 (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); -reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()); - -reg_def XMM11 (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); -reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()); - -reg_def XMM12 (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); -reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()); - -reg_def XMM13 (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); -reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()); - -reg_def XMM14 (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); -reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()); - -reg_def XMM15 (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); -reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()); - -#else - -reg_def XMM6 (SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); -reg_def XMM6_H (SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()); - -reg_def XMM7 (SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); -reg_def XMM7_H (SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()); - -reg_def XMM8 (SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); -reg_def XMM8_H (SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()); - -reg_def XMM9 (SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); -reg_def XMM9_H (SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()); - -reg_def XMM10 (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); -reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()); - -reg_def XMM11 (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); -reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()); - -reg_def XMM12 (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); -reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()); - -reg_def XMM13 (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); -reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()); - -reg_def XMM14 (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); -reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()); - -reg_def XMM15 (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); -reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()); - -#endif // _WIN64 - -reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); - // Specify priority of register selection within phases of register // allocation. Highest priority is first. A useful heuristic is to // give registers a low priority when they are required by machine @@ -252,26 +156,6 @@ R15, R15_H, RSP, RSP_H); -// XXX probably use 8-15 first on Linux -alloc_class chunk1(XMM0, XMM0_H, - XMM1, XMM1_H, - XMM2, XMM2_H, - XMM3, XMM3_H, - XMM4, XMM4_H, - XMM5, XMM5_H, - XMM6, XMM6_H, - XMM7, XMM7_H, - XMM8, XMM8_H, - XMM9, XMM9_H, - XMM10, XMM10_H, - XMM11, XMM11_H, - XMM12, XMM12_H, - XMM13, XMM13_H, - XMM14, XMM14_H, - XMM15, XMM15_H); - -alloc_class chunk2(RFLAGS); - //----------Architecture Description Register Classes-------------------------- // Several register classes are automatically defined based upon information in @@ -501,46 +385,7 @@ // Singleton class for instruction pointer // reg_class ip_reg(RIP); -// Singleton class for condition codes -reg_class int_flags(RFLAGS); - -// Class for all float registers -reg_class float_reg(XMM0, - XMM1, - XMM2, - XMM3, - XMM4, - XMM5, - XMM6, - XMM7, - XMM8, - XMM9, - XMM10, - XMM11, - XMM12, - XMM13, - XMM14, - XMM15); - -// Class for all double registers -reg_class double_reg(XMM0, XMM0_H, - XMM1, XMM1_H, - XMM2, XMM2_H, - XMM3, XMM3_H, - XMM4, XMM4_H, - XMM5, XMM5_H, - XMM6, XMM6_H, - XMM7, XMM7_H, - XMM8, XMM8_H, - XMM9, XMM9_H, - XMM10, XMM10_H, - XMM11, XMM11_H, - XMM12, XMM12_H, - XMM13, XMM13_H, - XMM14, XMM14_H, - XMM15, XMM15_H); -%} - +%} //----------SOURCE BLOCK------------------------------------------------------- // This is a block of C++ code which provides values, functions, and @@ -1027,12 +872,84 @@ return rc_float; } +// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. +static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, + int src_hi, int dst_hi, uint ireg, outputStream* st); + +static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, + int stack_offset, int reg, uint ireg, outputStream* st); + +static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, + int dst_offset, uint ireg, outputStream* st) { + if (cbuf) { + MacroAssembler _masm(cbuf); + switch (ireg) { + case Op_VecS: + __ movq(Address(rsp, -8), rax); + __ movl(rax, Address(rsp, src_offset)); + __ movl(Address(rsp, dst_offset), rax); + __ movq(rax, Address(rsp, -8)); + break; + case Op_VecD: + __ pushq(Address(rsp, src_offset)); + __ popq (Address(rsp, dst_offset)); + break; + case Op_VecX: + __ pushq(Address(rsp, src_offset)); + __ popq (Address(rsp, dst_offset)); + __ pushq(Address(rsp, src_offset+8)); + __ popq (Address(rsp, dst_offset+8)); + break; + case Op_VecY: + __ vmovdqu(Address(rsp, -32), xmm0); + __ vmovdqu(xmm0, Address(rsp, src_offset)); + __ vmovdqu(Address(rsp, dst_offset), xmm0); + __ vmovdqu(xmm0, Address(rsp, -32)); + break; + default: + ShouldNotReachHere(); + } +#ifndef PRODUCT + } else { + switch (ireg) { + case Op_VecS: + st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t" + "movl rax, [rsp + #%d]\n\t" + "movl [rsp + #%d], rax\n\t" + "movq rax, [rsp - #8]", + src_offset, dst_offset); + break; + case Op_VecD: + st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t" + "popq [rsp + #%d]", + src_offset, dst_offset); + break; + case Op_VecX: + st->print("pushq [rsp + #%d]\t# 128-bit mem-mem spill\n\t" + "popq [rsp + #%d]\n\t" + "pushq [rsp + #%d]\n\t" + "popq [rsp + #%d]", + src_offset, dst_offset, src_offset+8, dst_offset+8); + break; + case Op_VecY: + st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" + "vmovdqu xmm0, [rsp + #%d]\n\t" + "vmovdqu [rsp + #%d], xmm0\n\t" + "vmovdqu xmm0, [rsp - #32]", + src_offset, dst_offset); + break; + default: + ShouldNotReachHere(); + } +#endif + } +} + uint MachSpillCopyNode::implementation(CodeBuffer* cbuf, PhaseRegAlloc* ra_, bool do_size, - outputStream* st) const -{ - + outputStream* st) const { + assert(cbuf != NULL || st != NULL, "sanity"); // Get registers to move OptoReg::Name src_second = ra_->get_reg_second(in(1)); OptoReg::Name src_first = ra_->get_reg_first(in(1)); @@ -1050,7 +967,30 @@ if (src_first == dst_first && src_second == dst_second) { // Self copy, no move return 0; - } else if (src_first_rc == rc_stack) { + } + if (bottom_type()->isa_vect() != NULL) { + uint ireg = ideal_reg(); + assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); + assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY), "sanity"); + if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { + // mem -> mem + int src_offset = ra_->reg2offset(src_first); + int dst_offset = ra_->reg2offset(dst_first); + vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st); + } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) { + vec_mov_helper(cbuf, false, src_first, dst_first, src_second, dst_second, ireg, st); + } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) { + int stack_offset = ra_->reg2offset(dst_first); + vec_spill_helper(cbuf, false, false, stack_offset, src_first, ireg, st); + } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) { + int stack_offset = ra_->reg2offset(src_first); + vec_spill_helper(cbuf, false, true, stack_offset, dst_first, ireg, st); + } else { + ShouldNotReachHere(); + } + return 0; + } + if (src_first_rc == rc_stack) { // mem -> if (dst_first_rc == rc_stack) { // mem -> mem @@ -1061,23 +1001,16 @@ int src_offset = ra_->reg2offset(src_first); int dst_offset = ra_->reg2offset(dst_first); if (cbuf) { - emit_opcode(*cbuf, 0xFF); - encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false); - - emit_opcode(*cbuf, 0x8F); - encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false); - + MacroAssembler _masm(cbuf); + __ pushq(Address(rsp, src_offset)); + __ popq (Address(rsp, dst_offset)); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("pushq [rsp + #%d]\t# 64-bit mem-mem spill\n\t" - "popq [rsp + #%d]", - src_offset, - dst_offset); + "popq [rsp + #%d]", + src_offset, dst_offset); #endif } - return - 3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + - 3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)); } else { // 32-bit assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); @@ -1086,46 +1019,22 @@ int src_offset = ra_->reg2offset(src_first); int dst_offset = ra_->reg2offset(dst_first); if (cbuf) { - emit_opcode(*cbuf, Assembler::REX_W); - emit_opcode(*cbuf, 0x89); - emit_opcode(*cbuf, 0x44); - emit_opcode(*cbuf, 0x24); - emit_opcode(*cbuf, 0xF8); - - emit_opcode(*cbuf, 0x8B); - encode_RegMem(*cbuf, - RAX_enc, - RSP_enc, 0x4, 0, src_offset, - false); - - emit_opcode(*cbuf, 0x89); - encode_RegMem(*cbuf, - RAX_enc, - RSP_enc, 0x4, 0, dst_offset, - false); - - emit_opcode(*cbuf, Assembler::REX_W); - emit_opcode(*cbuf, 0x8B); - emit_opcode(*cbuf, 0x44); - emit_opcode(*cbuf, 0x24); - emit_opcode(*cbuf, 0xF8); - + MacroAssembler _masm(cbuf); + __ movq(Address(rsp, -8), rax); + __ movl(rax, Address(rsp, src_offset)); + __ movl(Address(rsp, dst_offset), rax); + __ movq(rax, Address(rsp, -8)); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movq [rsp - #8], rax\t# 32-bit mem-mem spill\n\t" - "movl rax, [rsp + #%d]\n\t" - "movl [rsp + #%d], rax\n\t" - "movq rax, [rsp - #8]", - src_offset, - dst_offset); + "movl rax, [rsp + #%d]\n\t" + "movl [rsp + #%d], rax\n\t" + "movq rax, [rsp - #8]", + src_offset, dst_offset); #endif } - return - 5 + // movq - 3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl - 3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl - 5; // movq } + return 0; } else if (dst_first_rc == rc_int) { // mem -> gpr if ((src_first & 1) == 0 && src_first + 1 == src_second && @@ -1133,52 +1042,32 @@ // 64-bit int offset = ra_->reg2offset(src_first); if (cbuf) { - if (Matcher::_regEncode[dst_first] < 8) { - emit_opcode(*cbuf, Assembler::REX_W); - } else { - emit_opcode(*cbuf, Assembler::REX_WR); - } - emit_opcode(*cbuf, 0x8B); - encode_RegMem(*cbuf, - Matcher::_regEncode[dst_first], - RSP_enc, 0x4, 0, offset, - false); + MacroAssembler _masm(cbuf); + __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset)); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movq %s, [rsp + #%d]\t# spill", Matcher::regName[dst_first], offset); #endif } - return - ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX } else { // 32-bit assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); int offset = ra_->reg2offset(src_first); if (cbuf) { - if (Matcher::_regEncode[dst_first] >= 8) { - emit_opcode(*cbuf, Assembler::REX_R); - } - emit_opcode(*cbuf, 0x8B); - encode_RegMem(*cbuf, - Matcher::_regEncode[dst_first], - RSP_enc, 0x4, 0, offset, - false); + MacroAssembler _masm(cbuf); + __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset)); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movl %s, [rsp + #%d]\t# spill", Matcher::regName[dst_first], offset); #endif } - return - ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + - ((Matcher::_regEncode[dst_first] < 8) - ? 3 - : 4); // REX } + return 0; } else if (dst_first_rc == rc_float) { // mem-> xmm if ((src_first & 1) == 0 && src_first + 1 == src_second && @@ -1189,18 +1078,13 @@ MacroAssembler _masm(cbuf); __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("%s %s, [rsp + #%d]\t# spill", UseXmmLoadAndClearUpper ? "movsd " : "movlpd", Matcher::regName[dst_first], offset); #endif } - return - ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + - ((Matcher::_regEncode[dst_first] >= 8) - ? 6 - : (5 + ((UseAVX>0)?1:0))); // REX } else { // 32-bit assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); @@ -1210,18 +1094,14 @@ MacroAssembler _masm(cbuf); __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movss %s, [rsp + #%d]\t# spill", Matcher::regName[dst_first], offset); #endif } - return - ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + - ((Matcher::_regEncode[dst_first] >= 8) - ? 6 - : (5 + ((UseAVX>0)?1:0))); // REX } + return 0; } } else if (src_first_rc == rc_int) { // gpr -> @@ -1232,113 +1112,65 @@ // 64-bit int offset = ra_->reg2offset(dst_first); if (cbuf) { - if (Matcher::_regEncode[src_first] < 8) { - emit_opcode(*cbuf, Assembler::REX_W); - } else { - emit_opcode(*cbuf, Assembler::REX_WR); - } - emit_opcode(*cbuf, 0x89); - encode_RegMem(*cbuf, - Matcher::_regEncode[src_first], - RSP_enc, 0x4, 0, offset, - false); + MacroAssembler _masm(cbuf); + __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first])); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movq [rsp + #%d], %s\t# spill", offset, Matcher::regName[src_first]); #endif } - return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX } else { // 32-bit assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); int offset = ra_->reg2offset(dst_first); if (cbuf) { - if (Matcher::_regEncode[src_first] >= 8) { - emit_opcode(*cbuf, Assembler::REX_R); - } - emit_opcode(*cbuf, 0x89); - encode_RegMem(*cbuf, - Matcher::_regEncode[src_first], - RSP_enc, 0x4, 0, offset, - false); + MacroAssembler _masm(cbuf); + __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first])); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movl [rsp + #%d], %s\t# spill", offset, Matcher::regName[src_first]); #endif } - return - ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + - ((Matcher::_regEncode[src_first] < 8) - ? 3 - : 4); // REX } + return 0; } else if (dst_first_rc == rc_int) { // gpr -> gpr if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) { // 64-bit if (cbuf) { - if (Matcher::_regEncode[dst_first] < 8) { - if (Matcher::_regEncode[src_first] < 8) { - emit_opcode(*cbuf, Assembler::REX_W); - } else { - emit_opcode(*cbuf, Assembler::REX_WB); - } - } else { - if (Matcher::_regEncode[src_first] < 8) { - emit_opcode(*cbuf, Assembler::REX_WR); - } else { - emit_opcode(*cbuf, Assembler::REX_WRB); - } - } - emit_opcode(*cbuf, 0x8B); - emit_rm(*cbuf, 0x3, - Matcher::_regEncode[dst_first] & 7, - Matcher::_regEncode[src_first] & 7); + MacroAssembler _masm(cbuf); + __ movq(as_Register(Matcher::_regEncode[dst_first]), + as_Register(Matcher::_regEncode[src_first])); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movq %s, %s\t# spill", Matcher::regName[dst_first], Matcher::regName[src_first]); #endif } - return 3; // REX + return 0; } else { // 32-bit assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); if (cbuf) { - if (Matcher::_regEncode[dst_first] < 8) { - if (Matcher::_regEncode[src_first] >= 8) { - emit_opcode(*cbuf, Assembler::REX_B); - } - } else { - if (Matcher::_regEncode[src_first] < 8) { - emit_opcode(*cbuf, Assembler::REX_R); - } else { - emit_opcode(*cbuf, Assembler::REX_RB); - } - } - emit_opcode(*cbuf, 0x8B); - emit_rm(*cbuf, 0x3, - Matcher::_regEncode[dst_first] & 7, - Matcher::_regEncode[src_first] & 7); + MacroAssembler _masm(cbuf); + __ movl(as_Register(Matcher::_regEncode[dst_first]), + as_Register(Matcher::_regEncode[src_first])); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movl %s, %s\t# spill", Matcher::regName[dst_first], Matcher::regName[src_first]); #endif } - return - (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8) - ? 2 - : 3; // REX + return 0; } } else if (dst_first_rc == rc_float) { // gpr -> xmm @@ -1349,13 +1181,12 @@ MacroAssembler _masm(cbuf); __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movdq %s, %s\t# spill", Matcher::regName[dst_first], Matcher::regName[src_first]); #endif } - return 5; // REX } else { // 32-bit assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); @@ -1364,17 +1195,14 @@ MacroAssembler _masm(cbuf); __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movdl %s, %s\t# spill", Matcher::regName[dst_first], Matcher::regName[src_first]); #endif } - return - (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8) - ? 5 - : (4 + ((UseAVX>0)?1:0)); // REX } + return 0; } } else if (src_first_rc == rc_float) { // xmm -> @@ -1388,17 +1216,12 @@ MacroAssembler _masm(cbuf); __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first])); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movsd [rsp + #%d], %s\t# spill", offset, Matcher::regName[src_first]); #endif } - return - ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + - ((Matcher::_regEncode[src_first] >= 8) - ? 6 - : (5 + ((UseAVX>0)?1:0))); // REX } else { // 32-bit assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); @@ -1408,18 +1231,14 @@ MacroAssembler _masm(cbuf); __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first])); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movss [rsp + #%d], %s\t# spill", offset, Matcher::regName[src_first]); #endif } - return - ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + - ((Matcher::_regEncode[src_first] >=8) - ? 6 - : (5 + ((UseAVX>0)?1:0))); // REX } + return 0; } else if (dst_first_rc == rc_int) { // xmm -> gpr if ((src_first & 1) == 0 && src_first + 1 == src_second && @@ -1429,13 +1248,12 @@ MacroAssembler _masm(cbuf); __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first])); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movdq %s, %s\t# spill", Matcher::regName[dst_first], Matcher::regName[src_first]); #endif } - return 5; // REX } else { // 32-bit assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); @@ -1444,17 +1262,14 @@ MacroAssembler _masm(cbuf); __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first])); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("movdl %s, %s\t# spill", Matcher::regName[dst_first], Matcher::regName[src_first]); #endif } - return - (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8) - ? 5 - : (4 + ((UseAVX>0)?1:0)); // REX } + return 0; } else if (dst_first_rc == rc_float) { // xmm -> xmm if ((src_first & 1) == 0 && src_first + 1 == src_second && @@ -1464,17 +1279,13 @@ MacroAssembler _masm(cbuf); __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first])); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("%s %s, %s\t# spill", UseXmmRegToRegMoveAll ? "movapd" : "movsd ", Matcher::regName[dst_first], Matcher::regName[src_first]); #endif } - return - (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8) - ? 5 - : (4 + ((UseAVX>0)?1:0)); // REX } else { // 32-bit assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); @@ -1483,42 +1294,35 @@ MacroAssembler _masm(cbuf); __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first])); #ifndef PRODUCT - } else if (!do_size) { + } else { st->print("%s %s, %s\t# spill", UseXmmRegToRegMoveAll ? "movaps" : "movss ", Matcher::regName[dst_first], Matcher::regName[src_first]); #endif } - return ((UseAVX>0) ? 5: - ((Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8) - ? (UseXmmRegToRegMoveAll ? 4 : 5) - : (UseXmmRegToRegMoveAll ? 3 : 4))); // REX } + return 0; } } assert(0," foo "); Unimplemented(); - return 0; } #ifndef PRODUCT -void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const -{ +void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { implementation(NULL, ra_, false, st); } #endif -void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const -{ +void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { implementation(&cbuf, ra_, false, NULL); } -uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const -{ - return implementation(NULL, ra_, true, NULL); +uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); } //============================================================================= @@ -1735,16 +1539,6 @@ return true; } -// Vector width in bytes -const uint Matcher::vector_width_in_bytes(void) { - return 8; -} - -// Vector ideal reg -const uint Matcher::vector_ideal_reg(void) { - return Op_RegD; -} - // Is this branch offset short enough that a short branch can be used? // // NOTE: If the platform does not provide any short branch variants, then @@ -1831,21 +1625,21 @@ bool Matcher::can_be_java_arg(int reg) { return - reg == RDI_num || reg == RDI_H_num || - reg == RSI_num || reg == RSI_H_num || - reg == RDX_num || reg == RDX_H_num || - reg == RCX_num || reg == RCX_H_num || - reg == R8_num || reg == R8_H_num || - reg == R9_num || reg == R9_H_num || - reg == R12_num || reg == R12_H_num || - reg == XMM0_num || reg == XMM0_H_num || - reg == XMM1_num || reg == XMM1_H_num || - reg == XMM2_num || reg == XMM2_H_num || - reg == XMM3_num || reg == XMM3_H_num || - reg == XMM4_num || reg == XMM4_H_num || - reg == XMM5_num || reg == XMM5_H_num || - reg == XMM6_num || reg == XMM6_H_num || - reg == XMM7_num || reg == XMM7_H_num; + reg == RDI_num || reg == RDI_H_num || + reg == RSI_num || reg == RSI_H_num || + reg == RDX_num || reg == RDX_H_num || + reg == RCX_num || reg == RCX_H_num || + reg == R8_num || reg == R8_H_num || + reg == R9_num || reg == R9_H_num || + reg == R12_num || reg == R12_H_num || + reg == XMM0_num || reg == XMM0b_num || + reg == XMM1_num || reg == XMM1b_num || + reg == XMM2_num || reg == XMM2b_num || + reg == XMM3_num || reg == XMM3b_num || + reg == XMM4_num || reg == XMM4b_num || + reg == XMM5_num || reg == XMM5b_num || + reg == XMM6_num || reg == XMM6b_num || + reg == XMM7_num || reg == XMM7b_num; } bool Matcher::is_spillable_arg(int reg) @@ -3220,10 +3014,11 @@ OptoReg::Bad, // Op_RegI RAX_H_num, // Op_RegP OptoReg::Bad, // Op_RegF - XMM0_H_num, // Op_RegD + XMM0b_num, // Op_RegD RAX_H_num // Op_RegL }; - assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type"); + // Excluded flags and vector registers. + assert(ARRAY_SIZE(hi) == _last_machine_leaf - 5, "missing type"); return OptoRegPair(hi[ideal_reg], lo[ideal_reg]); %} %} @@ -3985,7 +3780,6 @@ interface(REG_INTER); %} - //----------Memory Operands---------------------------------------------------- // Direct Memory Operand // operand direct(immP addr) @@ -5416,61 +5210,6 @@ ins_pipe(pipe_slow); // XXX %} -// Load Aligned Packed Byte to XMM register -instruct loadA8B(regD dst, memory mem) %{ - match(Set dst (Load8B mem)); - ins_cost(125); - format %{ "MOVQ $dst,$mem\t! packed8B" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - -// Load Aligned Packed Short to XMM register -instruct loadA4S(regD dst, memory mem) %{ - match(Set dst (Load4S mem)); - ins_cost(125); - format %{ "MOVQ $dst,$mem\t! packed4S" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - -// Load Aligned Packed Char to XMM register -instruct loadA4C(regD dst, memory mem) %{ - match(Set dst (Load4C mem)); - ins_cost(125); - format %{ "MOVQ $dst,$mem\t! packed4C" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - -// Load Aligned Packed Integer to XMM register -instruct load2IU(regD dst, memory mem) %{ - match(Set dst (Load2I mem)); - ins_cost(125); - format %{ "MOVQ $dst,$mem\t! packed2I" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - -// Load Aligned Packed Single to XMM -instruct loadA2F(regD dst, memory mem) %{ - match(Set dst (Load2F mem)); - ins_cost(125); - format %{ "MOVQ $dst,$mem\t! packed2F" %} - ins_encode %{ - __ movq($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - // Load Effective Address instruct leaP8(rRegP dst, indOffset8 mem) %{ @@ -6200,39 +5939,6 @@ ins_pipe(ialu_mem_imm); %} -// Store Aligned Packed Byte XMM register to memory -instruct storeA8B(memory mem, regD src) %{ - match(Set mem (Store8B mem src)); - ins_cost(145); - format %{ "MOVQ $mem,$src\t! packed8B" %} - ins_encode %{ - __ movq($mem$$Address, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -// Store Aligned Packed Char/Short XMM register to memory -instruct storeA4C(memory mem, regD src) %{ - match(Set mem (Store4C mem src)); - ins_cost(145); - format %{ "MOVQ $mem,$src\t! packed4C" %} - ins_encode %{ - __ movq($mem$$Address, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -// Store Aligned Packed Integer XMM register to memory -instruct storeA2I(memory mem, regD src) %{ - match(Set mem (Store2I mem src)); - ins_cost(145); - format %{ "MOVQ $mem,$src\t! packed2I" %} - ins_encode %{ - __ movq($mem$$Address, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - // Store CMS card-mark Immediate instruct storeImmCM0_reg(memory mem, immI0 zero) %{ @@ -6258,17 +5964,6 @@ ins_pipe(ialu_mem_imm); %} -// Store Aligned Packed Single Float XMM register to memory -instruct storeA2F(memory mem, regD src) %{ - match(Set mem (Store2F mem src)); - ins_cost(145); - format %{ "MOVQ $mem,$src\t! packed2F" %} - ins_encode %{ - __ movq($mem$$Address, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - // Store Float instruct storeF(memory mem, regF src) %{ @@ -10377,172 +10072,6 @@ ins_pipe( pipe_slow ); %} -// Replicate scalar to packed byte (1 byte) values in xmm -instruct Repl8B_reg(regD dst, regD src) %{ - match(Set dst (Replicate8B src)); - format %{ "MOVDQA $dst,$src\n\t" - "PUNPCKLBW $dst,$dst\n\t" - "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} - ins_encode %{ - if ($dst$$reg != $src$$reg) { - __ movdqa($dst$$XMMRegister, $src$$XMMRegister); - } - __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - %} - ins_pipe( pipe_slow ); -%} - -// Replicate scalar to packed byte (1 byte) values in xmm -instruct Repl8B_rRegI(regD dst, rRegI src) %{ - match(Set dst (Replicate8B src)); - format %{ "MOVD $dst,$src\n\t" - "PUNPCKLBW $dst,$dst\n\t" - "PSHUFLW $dst,$dst,0x00\t! replicate8B" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - %} - ins_pipe( pipe_slow ); -%} - -// Replicate scalar zero to packed byte (1 byte) values in xmm -instruct Repl8B_immI0(regD dst, immI0 zero) %{ - match(Set dst (Replicate8B zero)); - format %{ "PXOR $dst,$dst\t! replicate8B" %} - ins_encode %{ - __ pxor($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed shore (2 byte) values in xmm -instruct Repl4S_reg(regD dst, regD src) %{ - match(Set dst (Replicate4S src)); - format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %} - ins_encode %{ - __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed shore (2 byte) values in xmm -instruct Repl4S_rRegI(regD dst, rRegI src) %{ - match(Set dst (Replicate4S src)); - format %{ "MOVD $dst,$src\n\t" - "PSHUFLW $dst,$dst,0x00\t! replicate4S" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar zero to packed short (2 byte) values in xmm -instruct Repl4S_immI0(regD dst, immI0 zero) %{ - match(Set dst (Replicate4S zero)); - format %{ "PXOR $dst,$dst\t! replicate4S" %} - ins_encode %{ - __ pxor($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed char (2 byte) values in xmm -instruct Repl4C_reg(regD dst, regD src) %{ - match(Set dst (Replicate4C src)); - format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %} - ins_encode %{ - __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed char (2 byte) values in xmm -instruct Repl4C_rRegI(regD dst, rRegI src) %{ - match(Set dst (Replicate4C src)); - format %{ "MOVD $dst,$src\n\t" - "PSHUFLW $dst,$dst,0x00\t! replicate4C" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar zero to packed char (2 byte) values in xmm -instruct Repl4C_immI0(regD dst, immI0 zero) %{ - match(Set dst (Replicate4C zero)); - format %{ "PXOR $dst,$dst\t! replicate4C" %} - ins_encode %{ - __ pxor($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed integer (4 byte) values in xmm -instruct Repl2I_reg(regD dst, regD src) %{ - match(Set dst (Replicate2I src)); - format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %} - ins_encode %{ - __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed integer (4 byte) values in xmm -instruct Repl2I_rRegI(regD dst, rRegI src) %{ - match(Set dst (Replicate2I src)); - format %{ "MOVD $dst,$src\n\t" - "PSHUFD $dst,$dst,0x00\t! replicate2I" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar zero to packed integer (2 byte) values in xmm -instruct Repl2I_immI0(regD dst, immI0 zero) %{ - match(Set dst (Replicate2I zero)); - format %{ "PXOR $dst,$dst\t! replicate2I" %} - ins_encode %{ - __ pxor($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed single precision floating point values in xmm -instruct Repl2F_reg(regD dst, regD src) %{ - match(Set dst (Replicate2F src)); - format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} - ins_encode %{ - __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed single precision floating point values in xmm -instruct Repl2F_regF(regD dst, regF src) %{ - match(Set dst (Replicate2F src)); - format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %} - ins_encode %{ - __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Replicate scalar to packed single precision floating point values in xmm -instruct Repl2F_immF0(regD dst, immF0 zero) %{ - match(Set dst (Replicate2F zero)); - format %{ "PXOR $dst,$dst\t! replicate2F" %} - ins_encode %{ - __ pxor($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( fpu_reg_reg ); -%} - // ======================================================================= // fast clearing of an array
--- a/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp Tue Jul 03 18:23:04 2012 -0700 @@ -516,7 +516,12 @@ } } - if (thread->thread_state() == _thread_in_Java) { + // We test if stub is already set (by the stack overflow code + // above) so it is not overwritten by the code that follows. This + // check is not required on other platforms, because on other + // platforms we check for SIGSEGV only or SIGBUS only, where here + // we have to check for both SIGSEGV and SIGBUS. + if (thread->thread_state() == _thread_in_Java && stub == NULL) { // Java thread running in Java code => find exception handler if any // a fault inside compiled code, the interpreter, or a stub
--- a/hotspot/src/share/vm/adlc/adlparse.cpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/share/vm/adlc/adlparse.cpp Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -115,6 +115,12 @@ parse_err(SYNERR, "expected one of - instruct, operand, ins_attrib, op_attrib, source, register, pipeline, encode\n Found %s",ident); } } + // Add reg_class spill_regs after parsing. + RegisterForm *regBlock = _AD.get_registers(); + if (regBlock == NULL) { + parse_err(SEMERR, "Did not declare 'register' definitions"); + } + regBlock->addSpillRegClass(); // Done with parsing, check consistency. @@ -768,11 +774,12 @@ //------------------------------reg_parse-------------------------------------- void ADLParser::reg_parse(void) { - - // Create the RegisterForm for the architecture description. - RegisterForm *regBlock = new RegisterForm(); // Build new Source object - regBlock->_linenum = linenum(); - _AD.addForm(regBlock); + RegisterForm *regBlock = _AD.get_registers(); // Information about registers encoding + if (regBlock == NULL) { + // Create the RegisterForm for the architecture description. + regBlock = new RegisterForm(); // Build new Source object + _AD.addForm(regBlock); + } skipws(); // Skip leading whitespace if (_curchar == '%' && *(_ptr+1) == '{') { @@ -796,15 +803,11 @@ parse_err(SYNERR, "Missing %c{ ... %c} block after register keyword.\n",'%','%'); return; } - - // Add reg_class spill_regs - regBlock->addSpillRegClass(); } //------------------------------encode_parse----------------------------------- void ADLParser::encode_parse(void) { EncodeForm *encBlock; // Information about instruction/operand encoding - char *desc = NULL; // String representation of encode rule _AD.getForm(&encBlock); if ( encBlock == NULL) {
--- a/hotspot/src/share/vm/adlc/archDesc.cpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/share/vm/adlc/archDesc.cpp Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ // -// Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it @@ -911,12 +911,24 @@ // Find last character in idealOp, it specifies the type char last_char = 0; const char *ptr = idealOp; - for( ; *ptr != '\0'; ++ptr) { + for (; *ptr != '\0'; ++ptr) { last_char = *ptr; } + // Match Vector types. + if (strncmp(idealOp, "Vec",3)==0) { + switch(last_char) { + case 'S': return "TypeVect::VECTS"; + case 'D': return "TypeVect::VECTD"; + case 'X': return "TypeVect::VECTX"; + case 'Y': return "TypeVect::VECTY"; + default: + internal_err("Vector type %s with unrecognized type\n",idealOp); + } + } + // !!!!! - switch( last_char ) { + switch(last_char) { case 'I': return "TypeInt::INT"; case 'P': return "TypePtr::BOTTOM"; case 'N': return "TypeNarrowOop::BOTTOM";
--- a/hotspot/src/share/vm/adlc/forms.cpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/share/vm/adlc/forms.cpp Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -265,47 +265,22 @@ if( strcmp(opType,"LoadN")==0 ) return Form::idealN; if( strcmp(opType,"LoadRange")==0 ) return Form::idealI; if( strcmp(opType,"LoadS")==0 ) return Form::idealS; - if( strcmp(opType,"Load16B")==0 ) return Form::idealB; - if( strcmp(opType,"Load8B")==0 ) return Form::idealB; - if( strcmp(opType,"Load4B")==0 ) return Form::idealB; - if( strcmp(opType,"Load8C")==0 ) return Form::idealC; - if( strcmp(opType,"Load4C")==0 ) return Form::idealC; - if( strcmp(opType,"Load2C")==0 ) return Form::idealC; - if( strcmp(opType,"Load8S")==0 ) return Form::idealS; - if( strcmp(opType,"Load4S")==0 ) return Form::idealS; - if( strcmp(opType,"Load2S")==0 ) return Form::idealS; - if( strcmp(opType,"Load2D")==0 ) return Form::idealD; - if( strcmp(opType,"Load4F")==0 ) return Form::idealF; - if( strcmp(opType,"Load2F")==0 ) return Form::idealF; - if( strcmp(opType,"Load4I")==0 ) return Form::idealI; - if( strcmp(opType,"Load2I")==0 ) return Form::idealI; - if( strcmp(opType,"Load2L")==0 ) return Form::idealL; + if( strcmp(opType,"LoadVector")==0 ) return Form::idealV; assert( strcmp(opType,"Load") != 0, "Must type Loads" ); return Form::none; } Form::DataType Form::is_store_to_memory(const char *opType) const { if( strcmp(opType,"StoreB")==0) return Form::idealB; - if( strcmp(opType,"StoreCM")==0) return Form::idealB; + if( strcmp(opType,"StoreCM")==0) return Form::idealB; if( strcmp(opType,"StoreC")==0) return Form::idealC; if( strcmp(opType,"StoreD")==0) return Form::idealD; if( strcmp(opType,"StoreF")==0) return Form::idealF; if( strcmp(opType,"StoreI")==0) return Form::idealI; if( strcmp(opType,"StoreL")==0) return Form::idealL; if( strcmp(opType,"StoreP")==0) return Form::idealP; - if( strcmp(opType,"StoreN")==0) return Form::idealN; - if( strcmp(opType,"Store16B")==0) return Form::idealB; - if( strcmp(opType,"Store8B")==0) return Form::idealB; - if( strcmp(opType,"Store4B")==0) return Form::idealB; - if( strcmp(opType,"Store8C")==0) return Form::idealC; - if( strcmp(opType,"Store4C")==0) return Form::idealC; - if( strcmp(opType,"Store2C")==0) return Form::idealC; - if( strcmp(opType,"Store2D")==0) return Form::idealD; - if( strcmp(opType,"Store4F")==0) return Form::idealF; - if( strcmp(opType,"Store2F")==0) return Form::idealF; - if( strcmp(opType,"Store4I")==0) return Form::idealI; - if( strcmp(opType,"Store2I")==0) return Form::idealI; - if( strcmp(opType,"Store2L")==0) return Form::idealL; + if( strcmp(opType,"StoreN")==0) return Form::idealN; + if( strcmp(opType,"StoreVector")==0 ) return Form::idealV; assert( strcmp(opType,"Store") != 0, "Must type Stores" ); return Form::none; }
--- a/hotspot/src/share/vm/adlc/forms.hpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/share/vm/adlc/forms.hpp Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -172,7 +172,8 @@ idealB = 6, // Byte type idealC = 7, // Char type idealS = 8, // String type - idealN = 9 // Narrow oop types + idealN = 9, // Narrow oop types + idealV = 10 // Vector type }; // Convert ideal name to a DataType, return DataType::none if not a 'ConX' Form::DataType ideal_to_const_type(const char *ideal_type_name) const;
--- a/hotspot/src/share/vm/adlc/formsopt.cpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/share/vm/adlc/formsopt.cpp Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -66,7 +66,7 @@ // for spill-slots/regs. void RegisterForm::addSpillRegClass() { // Stack slots start at the next available even register number. - _reg_ctr = (_reg_ctr+1) & ~1; + _reg_ctr = (_reg_ctr+7) & ~7; const char *rc_name = "stack_slots"; RegClass *reg_class = new RegClass(rc_name); reg_class->_stack_or_reg = true; @@ -150,9 +150,14 @@ int RegisterForm::RegMask_Size() { // Need at least this many words int words_for_regs = (_reg_ctr + 31)>>5; - // Add a few for incoming & outgoing arguments to calls. + // The array of Register Mask bits should be large enough to cover + // all the machine registers and all parameters that need to be passed + // on the stack (stack registers) up to some interesting limit. Methods + // that need more parameters will NOT be compiled. On Intel, the limit + // is something like 90+ parameters. + // Add a few (3 words == 96 bits) for incoming & outgoing arguments to calls. // Round up to the next doubleword size. - return (words_for_regs + 2 + 1) & ~1; + return (words_for_regs + 3 + 1) & ~1; } void RegisterForm::dump() { // Debug printer
--- a/hotspot/src/share/vm/adlc/formssel.cpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/share/vm/adlc/formssel.cpp Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -432,6 +432,14 @@ return _matrule->is_ideal_store(); } +// Return 'true' if this instruction matches an ideal vector node +bool InstructForm::is_vector() const { + if( _matrule == NULL ) return false; + + return _matrule->is_vector(); +} + + // Return the input register that must match the output register // If this is not required, return 0 uint InstructForm::two_address(FormDict &globals) { @@ -751,6 +759,9 @@ if (needs_base_oop_edge(globals)) return true; + if (is_vector()) return true; + if (is_mach_constant()) return true; + return false; } @@ -3381,11 +3392,8 @@ "StoreI","StoreL","StoreP","StoreN","StoreD","StoreF" , "StoreB","StoreC","Store" ,"StoreFP", "LoadI", "LoadUI2L", "LoadL", "LoadP" ,"LoadN", "LoadD" ,"LoadF" , - "LoadB" , "LoadUB", "LoadUS" ,"LoadS" ,"Load" , - "Store4I","Store2I","Store2L","Store2D","Store4F","Store2F","Store16B", - "Store8B","Store4B","Store8C","Store4C","Store2C", - "Load4I" ,"Load2I" ,"Load2L" ,"Load2D" ,"Load4F" ,"Load2F" ,"Load16B" , - "Load8B" ,"Load4B" ,"Load8C" ,"Load4C" ,"Load2C" ,"Load8S", "Load4S","Load2S", + "LoadB" , "LoadUB", "LoadUS" ,"LoadS" ,"Load" , + "StoreVector", "LoadVector", "LoadRange", "LoadKlass", "LoadNKlass", "LoadL_unaligned", "LoadD_unaligned", "LoadPLocked", "StorePConditional", "StoreIConditional", "StoreLConditional", @@ -3822,6 +3830,10 @@ strcmp(opType,"RegL")==0 || strcmp(opType,"RegF")==0 || strcmp(opType,"RegD")==0 || + strcmp(opType,"VecS")==0 || + strcmp(opType,"VecD")==0 || + strcmp(opType,"VecX")==0 || + strcmp(opType,"VecY")==0 || strcmp(opType,"Reg" )==0) ) { return 1; } @@ -3938,19 +3950,12 @@ strcmp(opType,"ReverseBytesL")==0 || strcmp(opType,"ReverseBytesUS")==0 || strcmp(opType,"ReverseBytesS")==0 || - strcmp(opType,"Replicate16B")==0 || - strcmp(opType,"Replicate8B")==0 || - strcmp(opType,"Replicate4B")==0 || - strcmp(opType,"Replicate8C")==0 || - strcmp(opType,"Replicate4C")==0 || - strcmp(opType,"Replicate8S")==0 || - strcmp(opType,"Replicate4S")==0 || - strcmp(opType,"Replicate4I")==0 || - strcmp(opType,"Replicate2I")==0 || - strcmp(opType,"Replicate2L")==0 || - strcmp(opType,"Replicate4F")==0 || - strcmp(opType,"Replicate2F")==0 || - strcmp(opType,"Replicate2D")==0 || + strcmp(opType,"ReplicateB")==0 || + strcmp(opType,"ReplicateS")==0 || + strcmp(opType,"ReplicateI")==0 || + strcmp(opType,"ReplicateL")==0 || + strcmp(opType,"ReplicateF")==0 || + strcmp(opType,"ReplicateD")==0 || 0 /* 0 to line up columns nicely */ ) return 1; } @@ -4034,6 +4039,23 @@ return ideal_load; } +bool MatchRule::is_vector() const { + if( _rChild ) { + const char *opType = _rChild->_opType; + if( strcmp(opType,"ReplicateB")==0 || + strcmp(opType,"ReplicateS")==0 || + strcmp(opType,"ReplicateI")==0 || + strcmp(opType,"ReplicateL")==0 || + strcmp(opType,"ReplicateF")==0 || + strcmp(opType,"ReplicateD")==0 || + strcmp(opType,"LoadVector")==0 || + strcmp(opType,"StoreVector")==0 || + 0 /* 0 to line up columns nicely */ ) + return true; + } + return false; +} + bool MatchRule::skip_antidep_check() const { // Some loads operate on what is effectively immutable memory so we
--- a/hotspot/src/share/vm/adlc/formssel.hpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/share/vm/adlc/formssel.hpp Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -160,6 +160,7 @@ virtual bool is_ideal_safepoint() const; // node matches 'SafePoint' virtual bool is_ideal_nop() const; // node matches 'Nop' virtual bool is_ideal_control() const; // control node + virtual bool is_vector() const; // vector instruction virtual Form::CallType is_ideal_call() const; // matches ideal 'Call' virtual Form::DataType is_ideal_load() const; // node matches ideal 'LoadXNode' @@ -1011,6 +1012,7 @@ bool is_ideal_goto() const; // node matches ideal 'Goto' bool is_ideal_loopEnd() const; // node matches ideal 'LoopEnd' bool is_ideal_bool() const; // node matches ideal 'Bool' + bool is_vector() const; // vector instruction Form::DataType is_ideal_load() const;// node matches ideal 'LoadXNode' // Should antidep checks be disabled for this rule // See definition of MatchRule::skip_antidep_check
--- a/hotspot/src/share/vm/adlc/main.cpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/share/vm/adlc/main.cpp Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -250,6 +250,7 @@ AD.addInclude(AD._HPP_file, "opto/node.hpp"); AD.addInclude(AD._HPP_file, "opto/regalloc.hpp"); AD.addInclude(AD._HPP_file, "opto/subnode.hpp"); + AD.addInclude(AD._HPP_file, "opto/vectornode.hpp"); AD.addInclude(AD._CPP_CLONE_file, "precompiled.hpp"); AD.addInclude(AD._CPP_CLONE_file, "adfiles", get_basename(AD._HPP_file._name)); AD.addInclude(AD._CPP_EXPAND_file, "precompiled.hpp");
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/share/vm/classfile/altHashing.cpp Tue Jul 03 18:23:04 2012 -0700 @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "classfile/altHashing.hpp" +#include "classfile/symbolTable.hpp" +#include "classfile/systemDictionary.hpp" +#include "oops/markOop.hpp" +#include "runtime/thread.hpp" + +// Get the hash code of the classes mirror if it exists, otherwise just +// return a random number, which is one of the possible hash code used for +// objects. We don't want to call the synchronizer hash code to install +// this value because it may safepoint. +intptr_t object_hash(klassOop k) { + intptr_t hc = k->java_mirror()->mark()->hash(); + return hc != markOopDesc::no_hash ? hc : os::random(); +} + +// Seed value used for each alternative hash calculated. +jint AltHashing::compute_seed() { + jlong nanos = os::javaTimeNanos(); + jlong now = os::javaTimeMillis(); + jint SEED_MATERIAL[8] = { + (jint) object_hash(SystemDictionary::String_klass()), + (jint) object_hash(SystemDictionary::System_klass()), + (jint) os::random(), // current thread isn't a java thread + (jint) (((julong)nanos) >> 32), + (jint) nanos, + (jint) (((julong)now) >> 32), + (jint) now, + (jint) (os::javaTimeNanos() >> 2) + }; + + return murmur3_32(SEED_MATERIAL, 8); +} + + +// Murmur3 hashing for Symbol +jint AltHashing::murmur3_32(jint seed, const jbyte* data, int len) { + jint h1 = seed; + int count = len; + int offset = 0; + + // body + while (count >= 4) { + jint k1 = (data[offset] & 0x0FF) + | (data[offset + 1] & 0x0FF) << 8 + | (data[offset + 2] & 0x0FF) << 16 + | data[offset + 3] << 24; + + count -= 4; + offset += 4; + + k1 *= 0xcc9e2d51; + k1 = Integer_rotateLeft(k1, 15); + k1 *= 0x1b873593; + + h1 ^= k1; + h1 = Integer_rotateLeft(h1, 13); + h1 = h1 * 5 + 0xe6546b64; + } + + // tail + + if (count > 0) { + jint k1 = 0; + + switch (count) { + case 3: + k1 ^= (data[offset + 2] & 0xff) << 16; + // fall through + case 2: + k1 ^= (data[offset + 1] & 0xff) << 8; + // fall through + case 1: + k1 ^= (data[offset] & 0xff); + // fall through + default: + k1 *= 0xcc9e2d51; + k1 = Integer_rotateLeft(k1, 15); + k1 *= 0x1b873593; + h1 ^= k1; + } + } + + // finalization + h1 ^= len; + + // finalization mix force all bits of a hash block to avalanche + h1 ^= ((unsigned int)h1) >> 16; + h1 *= 0x85ebca6b; + h1 ^= ((unsigned int)h1) >> 13; + h1 *= 0xc2b2ae35; + h1 ^= ((unsigned int)h1) >> 16; + + return h1; +} + +// Murmur3 hashing for Strings +jint AltHashing::murmur3_32(jint seed, const jchar* data, int len) { + jint h1 = seed; + + int off = 0; + int count = len; + + // body + while (count >= 2) { + jchar d1 = data[off++] & 0xFFFF; + jchar d2 = data[off++]; + jint k1 = (d1 | d2 << 16); + + count -= 2; + + k1 *= 0xcc9e2d51; + k1 = Integer_rotateLeft(k1, 15); + k1 *= 0x1b873593; + + h1 ^= k1; + h1 = Integer_rotateLeft(h1, 13); + h1 = h1 * 5 + 0xe6546b64; + } + + // tail + + if (count > 0) { + int k1 = data[off]; + + k1 *= 0xcc9e2d51; + k1 = Integer_rotateLeft(k1, 15); + k1 *= 0x1b873593; + h1 ^= k1; + } + + // finalization + h1 ^= len * 2; // (Character.SIZE / Byte.SIZE); + + // finalization mix force all bits of a hash block to avalanche + h1 ^= ((unsigned int)h1) >> 16; + h1 *= 0x85ebca6b; + h1 ^= ((unsigned int)h1) >> 13; + h1 *= 0xc2b2ae35; + h1 ^= ((unsigned int)h1) >> 16; + + return h1; +} + +// Hash used for the seed. +jint AltHashing::murmur3_32(jint seed, const int* data, int len) { + jint h1 = seed; + + int off = 0; + int end = len; + + // body + while (off < end) { + jint k1 = data[off++]; + + k1 *= 0xcc9e2d51; + k1 = Integer_rotateLeft(k1, 15); + k1 *= 0x1b873593; + + h1 ^= k1; + h1 = Integer_rotateLeft(h1, 13); + h1 = h1 * 5 + 0xe6546b64; + } + + // tail (always empty, as body is always 32-bit chunks) + + // finalization + + h1 ^= len * 4; // (Integer.SIZE / Byte.SIZE); + + // finalization mix force all bits of a hash block to avalanche + h1 ^= ((juint)h1) >> 16; + h1 *= 0x85ebca6b; + h1 ^= ((juint)h1) >> 13; + h1 *= 0xc2b2ae35; + h1 ^= ((juint)h1) >> 16; + + return h1; +} + +jint AltHashing::murmur3_32(const int* data, int len) { + return murmur3_32(0, data, len); +} + +#ifndef PRODUCT +// Overloaded versions for internal test. +jint AltHashing::murmur3_32(const jbyte* data, int len) { + return murmur3_32(0, data, len); +} + +jint AltHashing::murmur3_32(const jchar* data, int len) { + return murmur3_32(0, data, len); +} + +// Internal test for alternate hashing. Translated from JDK version +// test/sun/misc/Hashing.java +static const jbyte ONE_BYTE[] = { (jbyte) 0x80}; +static const jbyte TWO_BYTE[] = { (jbyte) 0x80, (jbyte) 0x81}; +static const jchar ONE_CHAR[] = { (jchar) 0x8180}; +static const jbyte THREE_BYTE[] = { (jbyte) 0x80, (jbyte) 0x81, (jbyte) 0x82}; +static const jbyte FOUR_BYTE[] = { (jbyte) 0x80, (jbyte) 0x81, (jbyte) 0x82, (jbyte) 0x83}; +static const jchar TWO_CHAR[] = { (jchar) 0x8180, (jchar) 0x8382}; +static const jint ONE_INT[] = { 0x83828180}; +static const jbyte SIX_BYTE[] = { (jbyte) 0x80, (jbyte) 0x81, (jbyte) 0x82, (jbyte) 0x83, (jbyte) 0x84, (jbyte) 0x85}; +static const jchar THREE_CHAR[] = { (jchar) 0x8180, (jchar) 0x8382, (jchar) 0x8584}; +static const jbyte EIGHT_BYTE[] = { + (jbyte) 0x80, (jbyte) 0x81, (jbyte) 0x82, + (jbyte) 0x83, (jbyte) 0x84, (jbyte) 0x85, + (jbyte) 0x86, (jbyte) 0x87}; +static const jchar FOUR_CHAR[] = { + (jchar) 0x8180, (jchar) 0x8382, + (jchar) 0x8584, (jchar) 0x8786}; + +static const jint TWO_INT[] = { 0x83828180, 0x87868584}; + +static const juint MURMUR3_32_X86_CHECK_VALUE = 0xB0F57EE3; + +void AltHashing::testMurmur3_32_ByteArray() { + // printf("testMurmur3_32_ByteArray\n"); + + jbyte* vector = new jbyte[256]; + jbyte* hashes = new jbyte[4 * 256]; + + for (int i = 0; i < 256; i++) { + vector[i] = (jbyte) i; + } + + // Hash subranges {}, {0}, {0,1}, {0,1,2}, ..., {0,...,255} + for (int i = 0; i < 256; i++) { + jint hash = murmur3_32(256 - i, vector, i); + hashes[i * 4] = (jbyte) hash; + hashes[i * 4 + 1] = (jbyte) (((juint)hash) >> 8); + hashes[i * 4 + 2] = (jbyte) (((juint)hash) >> 16); + hashes[i * 4 + 3] = (jbyte) (((juint)hash) >> 24); + } + + // hash to get const result. + juint final_hash = murmur3_32(hashes, 4*256); + + assert (MURMUR3_32_X86_CHECK_VALUE == final_hash, + err_msg( + "Calculated hash result not as expected. Expected %08X got %08X\n", + MURMUR3_32_X86_CHECK_VALUE, + final_hash)); +} + +void AltHashing::testEquivalentHashes() { + jint jbytes, jchars, ints; + + // printf("testEquivalentHashes\n"); + + jbytes = murmur3_32(TWO_BYTE, 2); + jchars = murmur3_32(ONE_CHAR, 1); + assert (jbytes == jchars, + err_msg("Hashes did not match. b:%08x != c:%08x\n", jbytes, jchars)); + + jbytes = murmur3_32(FOUR_BYTE, 4); + jchars = murmur3_32(TWO_CHAR, 2); + ints = murmur3_32(ONE_INT, 1); + assert ((jbytes == jchars) && (jbytes == ints), + err_msg("Hashes did not match. b:%08x != c:%08x != i:%08x\n", jbytes, jchars, ints)); + + jbytes = murmur3_32(SIX_BYTE, 6); + jchars = murmur3_32(THREE_CHAR, 3); + assert (jbytes == jchars, + err_msg("Hashes did not match. b:%08x != c:%08x\n", jbytes, jchars)); + + jbytes = murmur3_32(EIGHT_BYTE, 8); + jchars = murmur3_32(FOUR_CHAR, 4); + ints = murmur3_32(TWO_INT, 2); + assert ((jbytes == jchars) && (jbytes == ints), + err_msg("Hashes did not match. b:%08x != c:%08x != i:%08x\n", jbytes, jchars, ints)); +} + +// Returns true if the alternate hashcode is correct +void AltHashing::test_alt_hash() { + testMurmur3_32_ByteArray(); + testEquivalentHashes(); +} +#endif // PRODUCT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/src/share/vm/classfile/altHashing.hpp Tue Jul 03 18:23:04 2012 -0700 @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_CLASSFILE_ALTHASHING_HPP +#define SHARE_VM_CLASSFILE_ALTHASHING_HPP + +#include "prims/jni.h" +#include "classfile/symbolTable.hpp" + +/** + * Hashing utilities. + * + * Implementation of Murmur3 hashing. + * This code was translated from src/share/classes/sun/misc/Hashing.java + * code in the JDK. + */ + +class AltHashing : AllStatic { + + // utility function copied from java/lang/Integer + static jint Integer_rotateLeft(jint i, int distance) { + return (i << distance) | (((juint)i) >> (32-distance)); + } + static jint murmur3_32(const int* data, int len); + static jint murmur3_32(jint seed, const int* data, int len); + +#ifndef PRODUCT + // Hashing functions used for internal testing + static jint murmur3_32(const jbyte* data, int len); + static jint murmur3_32(const jchar* data, int len); + static void testMurmur3_32_ByteArray(); + static void testEquivalentHashes(); +#endif // PRODUCT + + public: + static jint compute_seed(); + static jint murmur3_32(jint seed, const jbyte* data, int len); + static jint murmur3_32(jint seed, const jchar* data, int len); + NOT_PRODUCT(static void test_alt_hash();) +}; +#endif // SHARE_VM_CLASSFILE_ALTHASHING_HPP
--- a/hotspot/src/share/vm/classfile/javaClasses.cpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/share/vm/classfile/javaClasses.cpp Tue Jul 03 18:23:04 2012 -0700 @@ -23,6 +23,7 @@ */ #include "precompiled.hpp" +#include "classfile/altHashing.hpp" #include "classfile/javaClasses.hpp" #include "classfile/symbolTable.hpp" #include "classfile/vmSymbols.hpp" @@ -347,13 +348,26 @@ return result; } -unsigned int java_lang_String::hash_string(oop java_string) { +unsigned int java_lang_String::to_hash(oop java_string) { + int length = java_lang_String::length(java_string); + // Zero length string will hash to zero with String.toHash() function. + if (length == 0) return 0; + typeArrayOop value = java_lang_String::value(java_string); int offset = java_lang_String::offset(java_string); + return java_lang_String::to_hash(value->char_at_addr(offset), length); +} + +unsigned int java_lang_String::hash_string(oop java_string) { int length = java_lang_String::length(java_string); - - if (length == 0) return 0; - return hash_string(value->char_at_addr(offset), length); + // Zero length string doesn't hash necessarily hash to zero. + if (length == 0) { + return StringTable::hash_string(NULL, 0); + } + + typeArrayOop value = java_lang_String::value(java_string); + int offset = java_lang_String::offset(java_string); + return StringTable::hash_string(value->char_at_addr(offset), length); } Symbol* java_lang_String::as_symbol(Handle java_string, TRAPS) {
--- a/hotspot/src/share/vm/classfile/javaClasses.hpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/share/vm/classfile/javaClasses.hpp Tue Jul 03 18:23:04 2012 -0700 @@ -158,20 +158,16 @@ static jchar* as_unicode_string(oop java_string, int& length); // Compute the hash value for a java.lang.String object which would - // contain the characters passed in. This hash value is used for at - // least two purposes. + // contain the characters passed in. // - // (a) As the hash value used by the StringTable for bucket selection - // and comparison (stored in the HashtableEntry structures). This - // is used in the String.intern() method. + // As the hash value used by the String object itself, in + // String.hashCode(). This value is normally calculated in Java code + // in the String.hashCode method(), but is precomputed for String + // objects in the shared archive file. + // hash P(31) from Kernighan & Ritchie // - // (b) As the hash value used by the String object itself, in - // String.hashCode(). This value is normally calculate in Java code - // in the String.hashCode method(), but is precomputed for String - // objects in the shared archive file. - // - // For this reason, THIS ALGORITHM MUST MATCH String.hashCode(). - static unsigned int hash_string(jchar* s, int len) { + // For this reason, THIS ALGORITHM MUST MATCH String.toHash(). + template <typename T> static unsigned int to_hash(T* s, int len) { unsigned int h = 0; while (len-- > 0) { h = 31*h + (unsigned int) *s; @@ -179,6 +175,10 @@ } return h; } + static unsigned int to_hash(oop java_string); + + // This is the string hash code used by the StringTable, which may be + // the same as String.toHash or an alternate hash code. static unsigned int hash_string(oop java_string); static bool equals(oop java_string, jchar* chars, int len);
--- a/hotspot/src/share/vm/classfile/symbolTable.cpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/share/vm/classfile/symbolTable.cpp Tue Jul 03 18:23:04 2012 -0700 @@ -23,6 +23,7 @@ */ #include "precompiled.hpp" +#include "classfile/altHashing.hpp" #include "classfile/javaClasses.hpp" #include "classfile/symbolTable.hpp" #include "classfile/systemDictionary.hpp" @@ -34,19 +35,19 @@ #include "oops/oop.inline2.hpp" #include "runtime/mutexLocker.hpp" #include "utilities/hashtable.inline.hpp" +#include "utilities/numberSeq.hpp" // -------------------------------------------------------------------------- SymbolTable* SymbolTable::_the_table = NULL; // Static arena for symbols that are not deallocated Arena* SymbolTable::_arena = NULL; +bool SymbolTable::_needs_rehashing = false; +jint SymbolTable::_seed = 0; Symbol* SymbolTable::allocate_symbol(const u1* name, int len, bool c_heap, TRAPS) { - // Don't allow symbols to be created which cannot fit in a Symbol*. - if (len > Symbol::max_length()) { - THROW_MSG_0(vmSymbols::java_lang_InternalError(), - "name is too long to represent"); - } + assert (len <= Symbol::max_length(), "should be checked by caller"); + Symbol* sym; // Allocate symbols in the C heap when dumping shared spaces in case there // are temporary symbols we can remove. @@ -91,9 +92,14 @@ int total = 0; size_t memory_total = 0; for (int i = 0; i < the_table()->table_size(); ++i) { - for (HashtableEntry<Symbol*>** p = the_table()->bucket_addr(i); *p != NULL; ) { - HashtableEntry<Symbol*>* entry = *p; - if (entry->is_shared()) { + HashtableEntry<Symbol*>** p = the_table()->bucket_addr(i); + HashtableEntry<Symbol*>* entry = the_table()->bucket(i); + while (entry != NULL) { + // Shared entries are normally at the end of the bucket and if we run into + // a shared entry, then there is nothing more to remove. However, if we + // have rehashed the table, then the shared entries are no longer at the + // end of the bucket. + if (entry->is_shared() && !use_alternate_hashcode()) { break; } Symbol* s = entry->literal(); @@ -102,6 +108,7 @@ assert(s != NULL, "just checking"); // If reference count is zero, remove. if (s->refcount() == 0) { + assert(!entry->is_shared(), "shared entries should be kept live"); delete s; removed++; *p = entry->next(); @@ -109,6 +116,8 @@ } else { p = entry->next_addr(); } + // get next entry + entry = (HashtableEntry<Symbol*>*)HashtableEntry<Symbol*>::make_ptr(*p); } } symbols_removed += removed; @@ -121,12 +130,42 @@ } } +unsigned int SymbolTable::new_hash(Symbol* sym) { + ResourceMark rm; + // Use alternate hashing algorithm on this symbol. + return AltHashing::murmur3_32(seed(), (const jbyte*)sym->as_C_string(), sym->utf8_length()); +} + +// Create a new table and using alternate hash code, populate the new table +// with the existing strings. Set flag to use the alternate hash code afterwards. +void SymbolTable::rehash_table() { + assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); + // This should never happen with -Xshare:dump but it might in testing mode. + if (DumpSharedSpaces) return; + // Create a new symbol table + SymbolTable* new_table = new SymbolTable(); + + // Initialize the global seed for hashing. + _seed = AltHashing::compute_seed(); + assert(seed() != 0, "shouldn't be zero"); + + the_table()->move_to(new_table); + + // Delete the table and buckets (entries are reused in new table). + delete _the_table; + // Don't check if we need rehashing until the table gets unbalanced again. + // Then rehash with a new global seed. + _needs_rehashing = false; + _the_table = new_table; +} // Lookup a symbol in a bucket. Symbol* SymbolTable::lookup(int index, const char* name, int len, unsigned int hash) { + int count = 0; for (HashtableEntry<Symbol*>* e = bucket(index); e != NULL; e = e->next()) { + count++; // count all entries in this bucket, not just ones with same hash if (e->hash() == hash) { Symbol* sym = e->literal(); if (sym->equals(name, len)) { @@ -136,9 +175,20 @@ } } } + // If the bucket size is too deep check if this hash code is insufficient. + if (count >= BasicHashtable::rehash_count && !needs_rehashing()) { + _needs_rehashing = check_rehash_table(count); + } return NULL; } +// Pick hashing algorithm. +unsigned int SymbolTable::hash_symbol(const char* s, int len) { + return use_alternate_hashcode() ? + AltHashing::murmur3_32(seed(), (const jbyte*)s, len) : + java_lang_String::to_hash(s, len); +} + // We take care not to be blocking while holding the // SymbolTable_lock. Otherwise, the system might deadlock, since the @@ -156,6 +206,9 @@ // Found if (s != NULL) return s; + // Grab SymbolTable_lock first. + MutexLocker ml(SymbolTable_lock, THREAD); + // Otherwise, add to symbol to table return the_table()->basic_add(index, (u1*)name, len, hashValue, true, CHECK_NULL); } @@ -193,6 +246,9 @@ // We can't include the code in No_Safepoint_Verifier because of the // ResourceMark. + // Grab SymbolTable_lock first. + MutexLocker ml(SymbolTable_lock, THREAD); + return the_table()->basic_add(index, (u1*)buffer, len, hashValue, true, CHECK_NULL); } @@ -261,6 +317,9 @@ int names_count, const char** names, int* lengths, int* cp_indices, unsigned int* hashValues, TRAPS) { + // Grab SymbolTable_lock first. + MutexLocker ml(SymbolTable_lock, THREAD); + SymbolTable* table = the_table(); bool added = table->basic_add(class_loader, cp, names_count, names, lengths, cp_indices, hashValues, CHECK); @@ -281,18 +340,39 @@ if (result != NULL) { return result; } + // Grab SymbolTable_lock first. + MutexLocker ml(SymbolTable_lock, THREAD); + SymbolTable* table = the_table(); int index = table->hash_to_index(hash); return table->basic_add(index, (u1*)name, (int)strlen(name), hash, false, THREAD); } -Symbol* SymbolTable::basic_add(int index, u1 *name, int len, - unsigned int hashValue, bool c_heap, TRAPS) { +Symbol* SymbolTable::basic_add(int index_arg, u1 *name, int len, + unsigned int hashValue_arg, bool c_heap, TRAPS) { assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(), "proposed name of symbol must be stable"); - // Grab SymbolTable_lock first. - MutexLocker ml(SymbolTable_lock, THREAD); + // Don't allow symbols to be created which cannot fit in a Symbol*. + if (len > Symbol::max_length()) { + THROW_MSG_0(vmSymbols::java_lang_InternalError(), + "name is too long to represent"); + } + + // Cannot hit a safepoint in this function because the "this" pointer can move. + No_Safepoint_Verifier nsv; + + // Check if the symbol table has been rehashed, if so, need to recalculate + // the hash value and index. + unsigned int hashValue; + int index; + if (use_alternate_hashcode()) { + hashValue = hash_symbol((const char*)name, len); + index = hash_to_index(hashValue); + } else { + hashValue = hashValue_arg; + index = index_arg; + } // Since look-up was done lock-free, we need to check if another // thread beat us in the race to insert the symbol. @@ -328,14 +408,22 @@ } } - // Hold SymbolTable_lock through the symbol creation - MutexLocker ml(SymbolTable_lock, THREAD); + // Cannot hit a safepoint in this function because the "this" pointer can move. + No_Safepoint_Verifier nsv; for (int i=0; i<names_count; i++) { + // Check if the symbol table has been rehashed, if so, need to recalculate + // the hash value. + unsigned int hashValue; + if (use_alternate_hashcode()) { + hashValue = hash_symbol(names[i], lengths[i]); + } else { + hashValue = hashValues[i]; + } // Since look-up was done lock-free, we need to check if another // thread beat us in the race to insert the symbol. - int index = hash_to_index(hashValues[i]); - Symbol* test = lookup(index, names[i], lengths[i], hashValues[i]); + int index = hash_to_index(hashValue); + Symbol* test = lookup(index, names[i], lengths[i], hashValue); if (test != NULL) { // A race occurred and another thread introduced the symbol, this one // will be dropped and collected. Use test instead. @@ -347,7 +435,7 @@ bool c_heap = class_loader() != NULL; Symbol* sym = allocate_symbol((const u1*)names[i], lengths[i], c_heap, CHECK_(false)); assert(sym->equals(names[i], lengths[i]), "symbol must be properly initialized"); // why wouldn't it be??? - HashtableEntry<Symbol*>* entry = new_entry(hashValues[i], sym); + HashtableEntry<Symbol*>* entry = new_entry(hashValue, sym); add_entry(index, entry); cp->symbol_at_put(cp_indices[i], sym); } @@ -370,6 +458,24 @@ } } +void SymbolTable::dump(outputStream* st) { + NumberSeq summary; + for (int i = 0; i < the_table()->table_size(); ++i) { + int count = 0; + for (HashtableEntry<Symbol*>* e = the_table()->bucket(i); + e != NULL; e = e->next()) { + count++; + } + summary.add((double)count); + } + st->print_cr("SymbolTable statistics:"); + st->print_cr("Number of buckets : %7d", summary.num()); + st->print_cr("Average bucket size : %7.0f", summary.avg()); + st->print_cr("Variance of bucket size : %7.0f", summary.variance()); + st->print_cr("Std. dev. of bucket size: %7.0f", summary.sd()); + st->print_cr("Maximum bucket size : %7.0f", summary.maximum()); +} + //--------------------------------------------------------------------------- // Non-product code @@ -468,7 +574,6 @@ } } } - #endif // PRODUCT // -------------------------------------------------------------------------- @@ -514,38 +619,53 @@ // -------------------------------------------------------------------------- StringTable* StringTable::_the_table = NULL; +bool StringTable::_needs_rehashing = false; +jint StringTable::_seed = 0; + +// Pick hashing algorithm +unsigned int StringTable::hash_string(const jchar* s, int len) { + return use_alternate_hashcode() ? AltHashing::murmur3_32(seed(), s, len) : + java_lang_String::to_hash(s, len); +} + oop StringTable::lookup(int index, jchar* name, int len, unsigned int hash) { + int count = 0; for (HashtableEntry<oop>* l = bucket(index); l != NULL; l = l->next()) { + count++; if (l->hash() == hash) { if (java_lang_String::equals(l->literal(), name, len)) { return l->literal(); } } } + // If the bucket size is too deep check if this hash code is insufficient. + if (count >= BasicHashtable::rehash_count && !needs_rehashing()) { + _needs_rehashing = check_rehash_table(count); + } return NULL; } -oop StringTable::basic_add(int index, Handle string_or_null, jchar* name, - int len, unsigned int hashValue, TRAPS) { - debug_only(StableMemoryChecker smc(name, len * sizeof(name[0]))); - assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(), - "proposed name of symbol must be stable"); - - Handle string; - // try to reuse the string if possible - if (!string_or_null.is_null() && (!JavaObjectsInPerm || string_or_null()->is_perm())) { - string = string_or_null; - } else { - string = java_lang_String::create_tenured_from_unicode(name, len, CHECK_NULL); - } - - // Allocation must be done before grapping the SymbolTable_lock lock - MutexLocker ml(StringTable_lock, THREAD); +oop StringTable::basic_add(int index_arg, Handle string, jchar* name, + int len, unsigned int hashValue_arg, TRAPS) { assert(java_lang_String::equals(string(), name, len), "string must be properly initialized"); + // Cannot hit a safepoint in this function because the "this" pointer can move. + No_Safepoint_Verifier nsv; + + // Check if the symbol table has been rehashed, if so, need to recalculate + // the hash value and index before second lookup. + unsigned int hashValue; + int index; + if (use_alternate_hashcode()) { + hashValue = hash_string(name, len); + index = hash_to_index(hashValue); + } else { + hashValue = hashValue_arg; + index = index_arg; + } // Since look-up was done lock-free, we need to check if another // thread beat us in the race to insert the symbol. @@ -566,7 +686,7 @@ ResourceMark rm; int length; jchar* chars = symbol->as_unicode(length); - unsigned int hashValue = java_lang_String::hash_string(chars, length); + unsigned int hashValue = hash_string(chars, length); int index = the_table()->hash_to_index(hashValue); return the_table()->lookup(index, chars, length, hashValue); } @@ -574,15 +694,31 @@ oop StringTable::intern(Handle string_or_null, jchar* name, int len, TRAPS) { - unsigned int hashValue = java_lang_String::hash_string(name, len); + unsigned int hashValue = hash_string(name, len); int index = the_table()->hash_to_index(hashValue); - oop string = the_table()->lookup(index, name, len, hashValue); + oop found_string = the_table()->lookup(index, name, len, hashValue); // Found - if (string != NULL) return string; + if (found_string != NULL) return found_string; + + debug_only(StableMemoryChecker smc(name, len * sizeof(name[0]))); + assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(), + "proposed name of symbol must be stable"); + + Handle string; + // try to reuse the string if possible + if (!string_or_null.is_null() && (!JavaObjectsInPerm || string_or_null()->is_perm())) { + string = string_or_null; + } else { + string = java_lang_String::create_tenured_from_unicode(name, len, CHECK_NULL); + } + + // Grab the StringTable_lock before getting the_table() because it could + // change at safepoint. + MutexLocker ml(StringTable_lock, THREAD); // Otherwise, add to symbol to table - return the_table()->basic_add(index, string_or_null, name, len, + return the_table()->basic_add(index, string, name, len, hashValue, CHECK_NULL); } @@ -625,18 +761,24 @@ // entries at a safepoint. assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); for (int i = 0; i < the_table()->table_size(); ++i) { - for (HashtableEntry<oop>** p = the_table()->bucket_addr(i); *p != NULL; ) { - HashtableEntry<oop>* entry = *p; - if (entry->is_shared()) { + HashtableEntry<oop>** p = the_table()->bucket_addr(i); + HashtableEntry<oop>* entry = the_table()->bucket(i); + while (entry != NULL) { + // Shared entries are normally at the end of the bucket and if we run into + // a shared entry, then there is nothing more to remove. However, if we + // have rehashed the table, then the shared entries are no longer at the + // end of the bucket. + if (entry->is_shared() && !use_alternate_hashcode()) { break; } assert(entry->literal() != NULL, "just checking"); - if (is_alive->do_object_b(entry->literal())) { + if (entry->is_shared() || is_alive->do_object_b(entry->literal())) { p = entry->next_addr(); } else { *p = entry->next(); the_table()->free_entry(entry); } + entry = (HashtableEntry<oop>*)HashtableEntry<oop>::make_ptr(*p); } } } @@ -675,3 +817,53 @@ } } } + +void StringTable::dump(outputStream* st) { + NumberSeq summary; + for (int i = 0; i < the_table()->table_size(); ++i) { + HashtableEntry<oop>* p = the_table()->bucket(i); + int count = 0; + for ( ; p != NULL; p = p->next()) { + count++; + } + summary.add((double)count); + } + st->print_cr("StringTable statistics:"); + st->print_cr("Number of buckets : %7d", summary.num()); + st->print_cr("Average bucket size : %7.0f", summary.avg()); + st->print_cr("Variance of bucket size : %7.0f", summary.variance()); + st->print_cr("Std. dev. of bucket size: %7.0f", summary.sd()); + st->print_cr("Maximum bucket size : %7.0f", summary.maximum()); +} + + +unsigned int StringTable::new_hash(oop string) { + ResourceMark rm; + int length; + jchar* chars = java_lang_String::as_unicode_string(string, length); + // Use alternate hashing algorithm on the string + return AltHashing::murmur3_32(seed(), chars, length); +} + +// Create a new table and using alternate hash code, populate the new table +// with the existing strings. Set flag to use the alternate hash code afterwards. +void StringTable::rehash_table() { + assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); + // This should never happen with -Xshare:dump but it might in testing mode. + if (DumpSharedSpaces) return; + StringTable* new_table = new StringTable(); + + // Initialize new global seed for hashing. + _seed = AltHashing::compute_seed(); + assert(seed() != 0, "shouldn't be zero"); + + // Rehash the table + the_table()->move_to(new_table); + + // Delete the table and buckets (entries are reused in new table). + delete _the_table; + // Don't check if we need rehashing until the table gets unbalanced again. + // Then rehash with a new global seed. + _needs_rehashing = false; + _the_table = new_table; +}
--- a/hotspot/src/share/vm/classfile/symbolTable.hpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/share/vm/classfile/symbolTable.hpp Tue Jul 03 18:23:04 2012 -0700 @@ -40,6 +40,7 @@ // - symbolTableEntrys are allocated in blocks to reduce the space overhead. class BoolObjectClosure; +class outputStream; // Class to hold a newly created or referenced Symbol* temporarily in scope. @@ -78,6 +79,10 @@ // The symbol table static SymbolTable* _the_table; + // Set if one bucket is out of balance due to hash algorithm deficiency + static bool _needs_rehashing; + static jint _seed; + // For statistics static int symbols_removed; static int symbols_counted; @@ -119,6 +124,11 @@ static Arena* arena() { return _arena; } // called for statistics static void initialize_symbols(int arena_alloc_size = 0); + + static bool use_alternate_hashcode() { return _seed != 0; } + static jint seed() { return _seed; } + + unsigned int new_hash(Symbol* sym); public: enum { symbol_alloc_batch_size = 8, @@ -146,6 +156,8 @@ initialize_symbols(); } + static unsigned int hash_symbol(const char* s, int len); + static Symbol* lookup(const char* name, int len, TRAPS); // lookup only, won't add. Also calculate hash. static Symbol* lookup_only(const char* name, int len, unsigned int& hash); @@ -208,6 +220,7 @@ // Debugging static void verify(); + static void dump(outputStream* st); // Sharing static void copy_buckets(char** top, char*end) { @@ -219,8 +232,13 @@ static void reverse(void* boundary = NULL) { the_table()->Hashtable<Symbol*>::reverse(boundary); } + + // Rehash the symbol table if it gets out of balance + static void rehash_table(); + static bool needs_rehashing() { return _needs_rehashing; } }; + class StringTable : public Hashtable<oop> { friend class VMStructs; @@ -228,6 +246,10 @@ // The string table static StringTable* _the_table; + // Set if one bucket is out of balance due to hash algorithm deficiency + static bool _needs_rehashing; + static jint _seed; + static oop intern(Handle string_or_null, jchar* chars, int length, TRAPS); oop basic_add(int index, Handle string_or_null, jchar* name, int len, unsigned int hashValue, TRAPS); @@ -241,6 +263,10 @@ : Hashtable<oop>((int)StringTableSize, sizeof (HashtableEntry<oop>), t, number_of_entries) {} + static bool use_alternate_hashcode() { return _seed != 0; } + static jint seed() { return _seed; } + + unsigned int new_hash(oop s); public: // The string table static StringTable* the_table() { return _the_table; } @@ -265,6 +291,14 @@ // Invoke "f->do_oop" on the locations of all oops in the table. static void oops_do(OopClosure* f); + // Hashing algorithm, used as the hash value used by the + // StringTable for bucket selection and comparison (stored in the + // HashtableEntry structures). This is used in the String.intern() method. + static unsigned int hash_string(const jchar* s, int len); + + // Internal test. + static void test_alt_hash() PRODUCT_RETURN; + // Probing static oop lookup(Symbol* symbol); @@ -275,6 +309,7 @@ // Debugging static void verify(); + static void dump(outputStream* st); // Sharing static void copy_buckets(char** top, char*end) { @@ -286,6 +321,9 @@ static void reverse() { the_table()->Hashtable<oop>::reverse(); } + + // Rehash the symbol table if it gets out of balance + static void rehash_table(); + static bool needs_rehashing() { return _needs_rehashing; } }; - #endif // SHARE_VM_CLASSFILE_SYMBOLTABLE_HPP
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp Tue Jul 03 18:23:04 2012 -0700 @@ -111,6 +111,10 @@ template(getBootClassPathEntryForClass_name, "getBootClassPathEntryForClass") \ template(sun_misc_PostVMInitHook, "sun/misc/PostVMInitHook") \ \ + /* Java runtime version access */ \ + template(sun_misc_Version, "sun/misc/Version") \ + template(java_runtime_name_name, "java_runtime_name") \ + \ /* class file format tags */ \ template(tag_source_file, "SourceFile") \ template(tag_inner_classes, "InnerClasses") \
--- a/hotspot/src/share/vm/code/vmreg.cpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/share/vm/code/vmreg.cpp Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,7 +27,7 @@ #include "code/vmreg.hpp" // First VMReg value that could refer to a stack slot -VMReg VMRegImpl::stack0 = (VMReg)(intptr_t)((ConcreteRegisterImpl::number_of_registers + 1) & ~1); +VMReg VMRegImpl::stack0 = (VMReg)(intptr_t)((ConcreteRegisterImpl::number_of_registers + 7) & ~7); // VMRegs are 4 bytes wide on all platforms const int VMRegImpl::stack_slot_size = 4;
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Tue Jul 03 18:23:04 2012 -0700 @@ -4750,9 +4750,6 @@ _g1h->g1_policy()->record_thread_age_table(pss.age_table()); _g1h->update_surviving_young_words(pss.surviving_young_words()+1); - // Clean up any par-expanded rem sets. - HeapRegionRemSet::par_cleanup(); - if (ParallelGCVerbose) { MutexLocker x(stats_lock()); pss.print_termination_stats(worker_id);
--- a/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp Tue Jul 03 18:23:04 2012 -0700 @@ -53,6 +53,9 @@ develop(bool, G1TraceMarkStackOverflow, false, \ "If true, extra debugging code for CM restart for ovflw.") \ \ + develop(bool, G1TraceHeapRegionRememberedSet, false, \ + "Enables heap region remembered set debug logs") \ + \ diagnostic(bool, G1SummarizeConcMark, false, \ "Summarize concurrent mark info") \ \
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Tue Jul 03 18:23:04 2012 -0700 @@ -30,13 +30,10 @@ #include "gc_implementation/g1/heapRegionSeq.inline.hpp" #include "memory/allocation.hpp" #include "memory/space.inline.hpp" +#include "oops/oop.inline.hpp" #include "utilities/bitMap.inline.hpp" #include "utilities/globalDefinitions.hpp" -#define HRRS_VERBOSE 0 - -#define PRT_COUNT_OCCUPIED 1 - // OtherRegionsTable class PerRegionTable: public CHeapObj { @@ -45,14 +42,10 @@ HeapRegion* _hr; BitMap _bm; -#if PRT_COUNT_OCCUPIED jint _occupied; -#endif - PerRegionTable* _next_free; - PerRegionTable* next_free() { return _next_free; } - void set_next_free(PerRegionTable* prt) { _next_free = prt; } - + // next pointer for free/allocated lis + PerRegionTable* _next; static PerRegionTable* _free_list; @@ -69,63 +62,25 @@ // We need access in order to union things into the base table. BitMap* bm() { return &_bm; } -#if PRT_COUNT_OCCUPIED void recount_occupied() { _occupied = (jint) bm()->count_one_bits(); } -#endif PerRegionTable(HeapRegion* hr) : _hr(hr), -#if PRT_COUNT_OCCUPIED _occupied(0), -#endif _bm(HeapRegion::CardsPerRegion, false /* in-resource-area */) {} - static void free(PerRegionTable* prt) { - while (true) { - PerRegionTable* fl = _free_list; - prt->set_next_free(fl); - PerRegionTable* res = - (PerRegionTable*) - Atomic::cmpxchg_ptr(prt, &_free_list, fl); - if (res == fl) return; - } - ShouldNotReachHere(); - } - - static PerRegionTable* alloc(HeapRegion* hr) { - PerRegionTable* fl = _free_list; - while (fl != NULL) { - PerRegionTable* nxt = fl->next_free(); - PerRegionTable* res = - (PerRegionTable*) - Atomic::cmpxchg_ptr(nxt, &_free_list, fl); - if (res == fl) { - fl->init(hr); - return fl; - } else { - fl = _free_list; - } - } - assert(fl == NULL, "Loop condition."); - return new PerRegionTable(hr); - } - void add_card_work(CardIdx_t from_card, bool par) { if (!_bm.at(from_card)) { if (par) { if (_bm.par_at_put(from_card, 1)) { -#if PRT_COUNT_OCCUPIED Atomic::inc(&_occupied); -#endif } } else { _bm.at_put(from_card, 1); -#if PRT_COUNT_OCCUPIED _occupied++; -#endif } } } @@ -134,10 +89,13 @@ // Must make this robust in case "from" is not in "_hr", because of // concurrency. -#if HRRS_VERBOSE - gclog_or_tty->print_cr(" PRT::Add_reference_work(" PTR_FORMAT "->" PTR_FORMAT").", - from, *from); -#endif + if (G1TraceHeapRegionRememberedSet) { + gclog_or_tty->print_cr(" PRT::Add_reference_work(" PTR_FORMAT "->" PTR_FORMAT").", + from, + UseCompressedOops + ? oopDesc::load_decode_heap_oop((narrowOop*)from) + : oopDesc::load_decode_heap_oop((oop*)from)); + } HeapRegion* loc_hr = hr(); // If the test below fails, then this table was reused concurrently @@ -162,23 +120,16 @@ HeapRegion* hr() const { return _hr; } -#if PRT_COUNT_OCCUPIED jint occupied() const { // Overkill, but if we ever need it... // guarantee(_occupied == _bm.count_one_bits(), "Check"); return _occupied; } -#else - jint occupied() const { - return _bm.count_one_bits(); - } -#endif void init(HeapRegion* hr) { _hr = hr; -#if PRT_COUNT_OCCUPIED + _next = NULL; _occupied = 0; -#endif _bm.clear(); } @@ -194,9 +145,7 @@ HeapWord* hr_bot = hr()->bottom(); size_t hr_first_card_index = ctbs->index_for(hr_bot); bm()->set_intersection_at_offset(*card_bm, hr_first_card_index); -#if PRT_COUNT_OCCUPIED recount_occupied(); -#endif } void add_card(CardIdx_t from_card_index) { @@ -218,16 +167,6 @@ return sizeof(this) + _bm.size_in_words() * HeapWordSize; } - static size_t fl_mem_size() { - PerRegionTable* cur = _free_list; - size_t res = 0; - while (cur != NULL) { - res += sizeof(PerRegionTable); - cur = cur->next_free(); - } - return res; - } - // Requires "from" to be in "hr()". bool contains_reference(OopOrNarrowOopStar from) const { assert(hr()->is_in_reserved(from), "Precondition."); @@ -235,122 +174,29 @@ CardTableModRefBS::card_size); return _bm.at(card_ind); } -}; -PerRegionTable* PerRegionTable::_free_list = NULL; + PerRegionTable* next() const { return _next; } + void set_next(PerRegionTable* nxt) { _next = nxt; } + PerRegionTable** next_addr() { return &_next; } - -#define COUNT_PAR_EXPANDS 0 - -#if COUNT_PAR_EXPANDS -static jint n_par_expands = 0; -static jint n_par_contracts = 0; -static jint par_expand_list_len = 0; -static jint max_par_expand_list_len = 0; - -static void print_par_expand() { - Atomic::inc(&n_par_expands); - Atomic::inc(&par_expand_list_len); - if (par_expand_list_len > max_par_expand_list_len) { - max_par_expand_list_len = par_expand_list_len; - } - if ((n_par_expands % 10) == 0) { - gclog_or_tty->print_cr("\n\n%d par expands: %d contracts, " - "len = %d, max_len = %d\n.", - n_par_expands, n_par_contracts, par_expand_list_len, - max_par_expand_list_len); - } -} -#endif - -class PosParPRT: public PerRegionTable { - PerRegionTable** _par_tables; - - enum SomePrivateConstants { - ReserveParTableExpansion = 1 - }; - - void par_contract() { - assert(_par_tables != NULL, "Precondition."); - int n = HeapRegionRemSet::num_par_rem_sets()-1; - for (int i = 0; i < n; i++) { - _par_tables[i]->union_bitmap_into(bm()); - PerRegionTable::free(_par_tables[i]); - _par_tables[i] = NULL; - } -#if PRT_COUNT_OCCUPIED - // We must recount the "occupied." - recount_occupied(); -#endif - FREE_C_HEAP_ARRAY(PerRegionTable*, _par_tables); - _par_tables = NULL; -#if COUNT_PAR_EXPANDS - Atomic::inc(&n_par_contracts); - Atomic::dec(&par_expand_list_len); -#endif - } - - static PerRegionTable** _par_table_fl; - - PosParPRT* _next; - - static PosParPRT* _free_list; - - PerRegionTable** par_tables() const { - assert(uintptr_t(NULL) == 0, "Assumption."); - if (uintptr_t(_par_tables) <= ReserveParTableExpansion) - return NULL; - else - return _par_tables; - } - - PosParPRT* _next_par_expanded; - PosParPRT* next_par_expanded() { return _next_par_expanded; } - void set_next_par_expanded(PosParPRT* ppprt) { _next_par_expanded = ppprt; } - static PosParPRT* _par_expanded_list; - -public: - - PosParPRT(HeapRegion* hr) : PerRegionTable(hr), _par_tables(NULL) {} - - jint occupied() const { - jint res = PerRegionTable::occupied(); - if (par_tables() != NULL) { - for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) { - res += par_tables()[i]->occupied(); - } - } - return res; - } - - void init(HeapRegion* hr) { - PerRegionTable::init(hr); - _next = NULL; - if (par_tables() != NULL) { - for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) { - par_tables()[i]->init(hr); - } - } - } - - static void free(PosParPRT* prt) { + static void free(PerRegionTable* prt) { while (true) { - PosParPRT* fl = _free_list; + PerRegionTable* fl = _free_list; prt->set_next(fl); - PosParPRT* res = - (PosParPRT*) + PerRegionTable* res = + (PerRegionTable*) Atomic::cmpxchg_ptr(prt, &_free_list, fl); if (res == fl) return; } ShouldNotReachHere(); } - static PosParPRT* alloc(HeapRegion* hr) { - PosParPRT* fl = _free_list; + static PerRegionTable* alloc(HeapRegion* hr) { + PerRegionTable* fl = _free_list; while (fl != NULL) { - PosParPRT* nxt = fl->next(); - PosParPRT* res = - (PosParPRT*) + PerRegionTable* nxt = fl->next(); + PerRegionTable* res = + (PerRegionTable*) Atomic::cmpxchg_ptr(nxt, &_free_list, fl); if (res == fl) { fl->init(hr); @@ -360,148 +206,26 @@ } } assert(fl == NULL, "Loop condition."); - return new PosParPRT(hr); - } - - PosParPRT* next() const { return _next; } - void set_next(PosParPRT* nxt) { _next = nxt; } - PosParPRT** next_addr() { return &_next; } - - bool should_expand(int tid) { - // Given that we now defer RSet updates for after a GC we don't - // really need to expand the tables any more. This code should be - // cleaned up in the future (see CR 6921087). - return false; - } - - void par_expand() { - int n = HeapRegionRemSet::num_par_rem_sets()-1; - if (n <= 0) return; - if (_par_tables == NULL) { - PerRegionTable* res = - (PerRegionTable*) - Atomic::cmpxchg_ptr((PerRegionTable*)ReserveParTableExpansion, - &_par_tables, NULL); - if (res != NULL) return; - // Otherwise, we reserved the right to do the expansion. - - PerRegionTable** ptables = NEW_C_HEAP_ARRAY(PerRegionTable*, n); - for (int i = 0; i < n; i++) { - PerRegionTable* ptable = PerRegionTable::alloc(hr()); - ptables[i] = ptable; - } - // Here we do not need an atomic. - _par_tables = ptables; -#if COUNT_PAR_EXPANDS - print_par_expand(); -#endif - // We must put this table on the expanded list. - PosParPRT* exp_head = _par_expanded_list; - while (true) { - set_next_par_expanded(exp_head); - PosParPRT* res = - (PosParPRT*) - Atomic::cmpxchg_ptr(this, &_par_expanded_list, exp_head); - if (res == exp_head) return; - // Otherwise. - exp_head = res; - } - ShouldNotReachHere(); - } - } - - void add_reference(OopOrNarrowOopStar from, int tid) { - // Expand if necessary. - PerRegionTable** pt = par_tables(); - if (pt != NULL) { - // We always have to assume that mods to table 0 are in parallel, - // because of the claiming scheme in parallel expansion. A thread - // with tid != 0 that finds the table to be NULL, but doesn't succeed - // in claiming the right of expanding it, will end up in the else - // clause of the above if test. That thread could be delayed, and a - // thread 0 add reference could see the table expanded, and come - // here. Both threads would be adding in parallel. But we get to - // not use atomics for tids > 0. - if (tid == 0) { - PerRegionTable::add_reference(from); - } else { - pt[tid-1]->seq_add_reference(from); - } - } else { - // Not expanded -- add to the base table. - PerRegionTable::add_reference(from); - } - } - - void scrub(CardTableModRefBS* ctbs, BitMap* card_bm) { - assert(_par_tables == NULL, "Precondition"); - PerRegionTable::scrub(ctbs, card_bm); - } - - size_t mem_size() const { - size_t res = - PerRegionTable::mem_size() + sizeof(this) - sizeof(PerRegionTable); - if (_par_tables != NULL) { - for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) { - res += _par_tables[i]->mem_size(); - } - } - return res; + return new PerRegionTable(hr); } static size_t fl_mem_size() { - PosParPRT* cur = _free_list; + PerRegionTable* cur = _free_list; size_t res = 0; while (cur != NULL) { - res += sizeof(PosParPRT); + res += sizeof(PerRegionTable); cur = cur->next(); } return res; } - - bool contains_reference(OopOrNarrowOopStar from) const { - if (PerRegionTable::contains_reference(from)) return true; - if (_par_tables != NULL) { - for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) { - if (_par_tables[i]->contains_reference(from)) return true; - } - } - return false; - } - - static void par_contract_all(); }; -void PosParPRT::par_contract_all() { - PosParPRT* hd = _par_expanded_list; - while (hd != NULL) { - PosParPRT* nxt = hd->next_par_expanded(); - PosParPRT* res = - (PosParPRT*) - Atomic::cmpxchg_ptr(nxt, &_par_expanded_list, hd); - if (res == hd) { - // We claimed the right to contract this table. - hd->set_next_par_expanded(NULL); - hd->par_contract(); - hd = _par_expanded_list; - } else { - hd = res; - } - } -} - -PosParPRT* PosParPRT::_free_list = NULL; -PosParPRT* PosParPRT::_par_expanded_list = NULL; - -jint OtherRegionsTable::_cache_probes = 0; -jint OtherRegionsTable::_cache_hits = 0; +PerRegionTable* PerRegionTable::_free_list = NULL; size_t OtherRegionsTable::_max_fine_entries = 0; size_t OtherRegionsTable::_mod_max_fine_entries_mask = 0; -#if SAMPLE_FOR_EVICTION size_t OtherRegionsTable::_fine_eviction_stride = 0; size_t OtherRegionsTable::_fine_eviction_sample_size = 0; -#endif OtherRegionsTable::OtherRegionsTable(HeapRegion* hr) : _g1h(G1CollectedHeap::heap()), @@ -511,34 +235,36 @@ false /* in-resource-area */), _fine_grain_regions(NULL), _n_fine_entries(0), _n_coarse_entries(0), -#if SAMPLE_FOR_EVICTION _fine_eviction_start(0), -#endif _sparse_table(hr) { - typedef PosParPRT* PosParPRTPtr; + typedef PerRegionTable* PerRegionTablePtr; + if (_max_fine_entries == 0) { assert(_mod_max_fine_entries_mask == 0, "Both or none."); size_t max_entries_log = (size_t)log2_long((jlong)G1RSetRegionEntries); _max_fine_entries = (size_t)(1 << max_entries_log); _mod_max_fine_entries_mask = _max_fine_entries - 1; -#if SAMPLE_FOR_EVICTION + assert(_fine_eviction_sample_size == 0 && _fine_eviction_stride == 0, "All init at same time."); _fine_eviction_sample_size = MAX2((size_t)4, max_entries_log); _fine_eviction_stride = _max_fine_entries / _fine_eviction_sample_size; -#endif } - _fine_grain_regions = new PosParPRTPtr[_max_fine_entries]; - if (_fine_grain_regions == NULL) + + _fine_grain_regions = new PerRegionTablePtr[_max_fine_entries]; + + if (_fine_grain_regions == NULL) { vm_exit_out_of_memory(sizeof(void*)*_max_fine_entries, "Failed to allocate _fine_grain_entries."); + } + for (size_t i = 0; i < _max_fine_entries; i++) { _fine_grain_regions[i] = NULL; } } -int** OtherRegionsTable::_from_card_cache = NULL; +int** OtherRegionsTable::_from_card_cache = NULL; size_t OtherRegionsTable::_from_card_cache_max_regions = 0; size_t OtherRegionsTable::_from_card_cache_mem_size = 0; @@ -579,38 +305,26 @@ void OtherRegionsTable::add_reference(OopOrNarrowOopStar from, int tid) { size_t cur_hrs_ind = (size_t) hr()->hrs_index(); -#if HRRS_VERBOSE - gclog_or_tty->print_cr("ORT::add_reference_work(" PTR_FORMAT "->" PTR_FORMAT ").", - from, - UseCompressedOops - ? oopDesc::load_decode_heap_oop((narrowOop*)from) - : oopDesc::load_decode_heap_oop((oop*)from)); -#endif + if (G1TraceHeapRegionRememberedSet) { + gclog_or_tty->print_cr("ORT::add_reference_work(" PTR_FORMAT "->" PTR_FORMAT ").", + from, + UseCompressedOops + ? oopDesc::load_decode_heap_oop((narrowOop*)from) + : oopDesc::load_decode_heap_oop((oop*)from)); + } int from_card = (int)(uintptr_t(from) >> CardTableModRefBS::card_shift); -#if HRRS_VERBOSE - gclog_or_tty->print_cr("Table for [" PTR_FORMAT "...): card %d (cache = %d)", - hr()->bottom(), from_card, - _from_card_cache[tid][cur_hrs_ind]); -#endif + if (G1TraceHeapRegionRememberedSet) { + gclog_or_tty->print_cr("Table for [" PTR_FORMAT "...): card %d (cache = %d)", + hr()->bottom(), from_card, + _from_card_cache[tid][cur_hrs_ind]); + } -#define COUNT_CACHE 0 -#if COUNT_CACHE - jint p = Atomic::add(1, &_cache_probes); - if ((p % 10000) == 0) { - jint hits = _cache_hits; - gclog_or_tty->print_cr("%d/%d = %5.2f%% RS cache hits.", - _cache_hits, p, 100.0* (float)hits/(float)p); - } -#endif if (from_card == _from_card_cache[tid][cur_hrs_ind]) { -#if HRRS_VERBOSE - gclog_or_tty->print_cr(" from-card cache hit."); -#endif -#if COUNT_CACHE - Atomic::inc(&_cache_hits); -#endif + if (G1TraceHeapRegionRememberedSet) { + gclog_or_tty->print_cr(" from-card cache hit."); + } assert(contains_reference(from), "We just added it!"); return; } else { @@ -623,16 +337,16 @@ // If the region is already coarsened, return. if (_coarse_map.at(from_hrs_ind)) { -#if HRRS_VERBOSE - gclog_or_tty->print_cr(" coarse map hit."); -#endif + if (G1TraceHeapRegionRememberedSet) { + gclog_or_tty->print_cr(" coarse map hit."); + } assert(contains_reference(from), "We just added it!"); return; } // Otherwise find a per-region table to add it to. size_t ind = from_hrs_ind & _mod_max_fine_entries_mask; - PosParPRT* prt = find_region_table(ind, from_hr); + PerRegionTable* prt = find_region_table(ind, from_hr); if (prt == NULL) { MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag); // Confirm that it's really not there... @@ -649,35 +363,35 @@ _sparse_table.add_card(from_hrs_ind, card_index)) { if (G1RecordHRRSOops) { HeapRegionRemSet::record(hr(), from); -#if HRRS_VERBOSE - gclog_or_tty->print(" Added card " PTR_FORMAT " to region " - "[" PTR_FORMAT "...) for ref " PTR_FORMAT ".\n", - align_size_down(uintptr_t(from), - CardTableModRefBS::card_size), - hr()->bottom(), from); -#endif + if (G1TraceHeapRegionRememberedSet) { + gclog_or_tty->print(" Added card " PTR_FORMAT " to region " + "[" PTR_FORMAT "...) for ref " PTR_FORMAT ".\n", + align_size_down(uintptr_t(from), + CardTableModRefBS::card_size), + hr()->bottom(), from); + } } -#if HRRS_VERBOSE - gclog_or_tty->print_cr(" added card to sparse table."); -#endif + if (G1TraceHeapRegionRememberedSet) { + gclog_or_tty->print_cr(" added card to sparse table."); + } assert(contains_reference_locked(from), "We just added it!"); return; } else { -#if HRRS_VERBOSE - gclog_or_tty->print_cr(" [tid %d] sparse table entry " - "overflow(f: %d, t: %d)", - tid, from_hrs_ind, cur_hrs_ind); -#endif + if (G1TraceHeapRegionRememberedSet) { + gclog_or_tty->print_cr(" [tid %d] sparse table entry " + "overflow(f: %d, t: %d)", + tid, from_hrs_ind, cur_hrs_ind); + } } if (_n_fine_entries == _max_fine_entries) { prt = delete_region_table(); } else { - prt = PosParPRT::alloc(from_hr); + prt = PerRegionTable::alloc(from_hr); } prt->init(from_hr); - PosParPRT* first_prt = _fine_grain_regions[ind]; + PerRegionTable* first_prt = _fine_grain_regions[ind]; prt->set_next(first_prt); // XXX Maybe move to init? _fine_grain_regions[ind] = prt; _n_fine_entries++; @@ -704,38 +418,25 @@ // OtherRegionsTable for why this is OK. assert(prt != NULL, "Inv"); - if (prt->should_expand(tid)) { - MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag); - HeapRegion* prt_hr = prt->hr(); - if (prt_hr == from_hr) { - // Make sure the table still corresponds to the same region - prt->par_expand(); - prt->add_reference(from, tid); - } - // else: The table has been concurrently coarsened, evicted, and - // the table data structure re-used for another table. So, we - // don't need to add the reference any more given that the table - // has been coarsened and the whole region will be scanned anyway. - } else { - prt->add_reference(from, tid); - } + prt->add_reference(from); + if (G1RecordHRRSOops) { HeapRegionRemSet::record(hr(), from); -#if HRRS_VERBOSE - gclog_or_tty->print("Added card " PTR_FORMAT " to region " - "[" PTR_FORMAT "...) for ref " PTR_FORMAT ".\n", - align_size_down(uintptr_t(from), - CardTableModRefBS::card_size), - hr()->bottom(), from); -#endif + if (G1TraceHeapRegionRememberedSet) { + gclog_or_tty->print("Added card " PTR_FORMAT " to region " + "[" PTR_FORMAT "...) for ref " PTR_FORMAT ".\n", + align_size_down(uintptr_t(from), + CardTableModRefBS::card_size), + hr()->bottom(), from); + } } assert(contains_reference(from), "We just added it!"); } -PosParPRT* +PerRegionTable* OtherRegionsTable::find_region_table(size_t ind, HeapRegion* hr) const { assert(0 <= ind && ind < _max_fine_entries, "Preconditions."); - PosParPRT* prt = _fine_grain_regions[ind]; + PerRegionTable* prt = _fine_grain_regions[ind]; while (prt != NULL && prt->hr() != hr) { prt = prt->next(); } @@ -743,32 +444,16 @@ return prt; } - -#define DRT_CENSUS 0 - -#if DRT_CENSUS -static const int HistoSize = 6; -static int global_histo[HistoSize] = { 0, 0, 0, 0, 0, 0 }; -static int coarsenings = 0; -static int occ_sum = 0; -#endif - jint OtherRegionsTable::_n_coarsenings = 0; -PosParPRT* OtherRegionsTable::delete_region_table() { -#if DRT_CENSUS - int histo[HistoSize] = { 0, 0, 0, 0, 0, 0 }; - const int histo_limits[] = { 1, 4, 16, 64, 256, 2048 }; -#endif - +PerRegionTable* OtherRegionsTable::delete_region_table() { assert(_m.owned_by_self(), "Precondition"); assert(_n_fine_entries == _max_fine_entries, "Precondition"); - PosParPRT* max = NULL; + PerRegionTable* max = NULL; jint max_occ = 0; - PosParPRT** max_prev; + PerRegionTable** max_prev; size_t max_ind; -#if SAMPLE_FOR_EVICTION size_t i = _fine_eviction_start; for (size_t k = 0; k < _fine_eviction_sample_size; k++) { size_t ii = i; @@ -778,8 +463,8 @@ if (ii == _max_fine_entries) ii = 0; guarantee(ii != i, "We must find one."); } - PosParPRT** prev = &_fine_grain_regions[ii]; - PosParPRT* cur = *prev; + PerRegionTable** prev = &_fine_grain_regions[ii]; + PerRegionTable* cur = *prev; while (cur != NULL) { jint cur_occ = cur->occupied(); if (max == NULL || cur_occ > max_occ) { @@ -794,64 +479,27 @@ i = i + _fine_eviction_stride; if (i >= _n_fine_entries) i = i - _n_fine_entries; } + _fine_eviction_start++; - if (_fine_eviction_start >= _n_fine_entries) + + if (_fine_eviction_start >= _n_fine_entries) { _fine_eviction_start -= _n_fine_entries; -#else - for (int i = 0; i < _max_fine_entries; i++) { - PosParPRT** prev = &_fine_grain_regions[i]; - PosParPRT* cur = *prev; - while (cur != NULL) { - jint cur_occ = cur->occupied(); -#if DRT_CENSUS - for (int k = 0; k < HistoSize; k++) { - if (cur_occ <= histo_limits[k]) { - histo[k]++; global_histo[k]++; break; - } - } -#endif - if (max == NULL || cur_occ > max_occ) { - max = cur; - max_prev = prev; - max_ind = i; - max_occ = cur_occ; - } - prev = cur->next_addr(); - cur = cur->next(); - } } -#endif - // XXX + guarantee(max != NULL, "Since _n_fine_entries > 0"); -#if DRT_CENSUS - gclog_or_tty->print_cr("In a coarsening: histo of occs:"); - for (int k = 0; k < HistoSize; k++) { - gclog_or_tty->print_cr(" <= %4d: %5d.", histo_limits[k], histo[k]); - } - coarsenings++; - occ_sum += max_occ; - if ((coarsenings % 100) == 0) { - gclog_or_tty->print_cr("\ncoarsenings = %d; global summary:", coarsenings); - for (int k = 0; k < HistoSize; k++) { - gclog_or_tty->print_cr(" <= %4d: %5d.", histo_limits[k], global_histo[k]); - } - gclog_or_tty->print_cr("Avg occ of deleted region = %6.2f.", - (float)occ_sum/(float)coarsenings); - } -#endif // Set the corresponding coarse bit. size_t max_hrs_index = (size_t) max->hr()->hrs_index(); if (!_coarse_map.at(max_hrs_index)) { _coarse_map.at_put(max_hrs_index, true); _n_coarse_entries++; -#if 0 - gclog_or_tty->print("Coarsened entry in region [" PTR_FORMAT "...] " - "for region [" PTR_FORMAT "...] (%d coarse entries).\n", - hr()->bottom(), - max->hr()->bottom(), - _n_coarse_entries); -#endif + if (G1TraceHeapRegionRememberedSet) { + gclog_or_tty->print("Coarsened entry in region [" PTR_FORMAT "...] " + "for region [" PTR_FORMAT "...] (%d coarse entries).\n", + hr()->bottom(), + max->hr()->bottom(), + _n_coarse_entries); + } } // Unsplice. @@ -883,10 +531,10 @@ // Now do the fine-grained maps. for (size_t i = 0; i < _max_fine_entries; i++) { - PosParPRT* cur = _fine_grain_regions[i]; - PosParPRT** prev = &_fine_grain_regions[i]; + PerRegionTable* cur = _fine_grain_regions[i]; + PerRegionTable** prev = &_fine_grain_regions[i]; while (cur != NULL) { - PosParPRT* nxt = cur->next(); + PerRegionTable* nxt = cur->next(); // If the entire region is dead, eliminate. if (G1RSScrubVerbose) { gclog_or_tty->print_cr(" For other region %u:", @@ -899,7 +547,7 @@ if (G1RSScrubVerbose) { gclog_or_tty->print_cr(" deleted via region map."); } - PosParPRT::free(cur); + PerRegionTable::free(cur); } else { // Do fine-grain elimination. if (G1RSScrubVerbose) { @@ -914,7 +562,7 @@ *prev = nxt; cur->set_next(NULL); _n_fine_entries--; - PosParPRT::free(cur); + PerRegionTable::free(cur); } else { prev = cur->next_addr(); } @@ -940,7 +588,7 @@ size_t OtherRegionsTable::occ_fine() const { size_t sum = 0; for (size_t i = 0; i < _max_fine_entries; i++) { - PosParPRT* cur = _fine_grain_regions[i]; + PerRegionTable* cur = _fine_grain_regions[i]; while (cur != NULL) { sum += cur->occupied(); cur = cur->next(); @@ -962,13 +610,13 @@ MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag); size_t sum = 0; for (size_t i = 0; i < _max_fine_entries; i++) { - PosParPRT* cur = _fine_grain_regions[i]; + PerRegionTable* cur = _fine_grain_regions[i]; while (cur != NULL) { sum += cur->mem_size(); cur = cur->next(); } } - sum += (sizeof(PosParPRT*) * _max_fine_entries); + sum += (sizeof(PerRegionTable*) * _max_fine_entries); sum += (_coarse_map.size_in_words() * HeapWordSize); sum += (_sparse_table.mem_size()); sum += sizeof(*this) - sizeof(_sparse_table); // Avoid double counting above. @@ -980,7 +628,7 @@ } size_t OtherRegionsTable::fl_mem_size() { - return PerRegionTable::fl_mem_size() + PosParPRT::fl_mem_size(); + return PerRegionTable::fl_mem_size(); } void OtherRegionsTable::clear_fcc() { @@ -992,10 +640,10 @@ void OtherRegionsTable::clear() { MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag); for (size_t i = 0; i < _max_fine_entries; i++) { - PosParPRT* cur = _fine_grain_regions[i]; + PerRegionTable* cur = _fine_grain_regions[i]; while (cur != NULL) { - PosParPRT* nxt = cur->next(); - PosParPRT::free(cur); + PerRegionTable* nxt = cur->next(); + PerRegionTable::free(cur); cur = nxt; } _fine_grain_regions[i] = NULL; @@ -1035,8 +683,8 @@ bool OtherRegionsTable::del_single_region_table(size_t ind, HeapRegion* hr) { assert(0 <= ind && ind < _max_fine_entries, "Preconditions."); - PosParPRT** prev_addr = &_fine_grain_regions[ind]; - PosParPRT* prt = *prev_addr; + PerRegionTable** prev_addr = &_fine_grain_regions[ind]; + PerRegionTable* prt = *prev_addr; while (prt != NULL && prt->hr() != hr) { prev_addr = prt->next_addr(); prt = prt->next(); @@ -1044,7 +692,7 @@ if (prt != NULL) { assert(prt->hr() == hr, "Loop postcondition."); *prev_addr = prt->next(); - PosParPRT::free(prt); + PerRegionTable::free(prt); _n_fine_entries--; return true; } else { @@ -1065,7 +713,7 @@ // Is this region in the coarse map? if (_coarse_map.at(hr_ind)) return true; - PosParPRT* prt = find_region_table(hr_ind & _mod_max_fine_entries_mask, + PerRegionTable* prt = find_region_table(hr_ind & _mod_max_fine_entries_mask, hr); if (prt != NULL) { return prt->contains_reference(from); @@ -1145,7 +793,7 @@ G1CollectedHeap::heap()->bot_shared()->address_for_index(card_index); gclog_or_tty->print_cr(" Card " PTR_FORMAT, card_start); } - // XXX + if (iter.n_yielded() != occupied()) { gclog_or_tty->print_cr("Yielded disagrees with occupied:"); gclog_or_tty->print_cr(" %6d yielded (%6d coarse, %6d fine).", @@ -1163,10 +811,6 @@ SparsePRT::cleanup_all(); } -void HeapRegionRemSet::par_cleanup() { - PosParPRT::par_contract_all(); -} - void HeapRegionRemSet::clear() { _other_regions.clear(); assert(occupied() == 0, "Should be clear.");
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp Tue Jul 03 18:23:04 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -35,7 +35,7 @@ class G1BlockOffsetSharedArray; class HeapRegion; class HeapRegionRemSetIterator; -class PosParPRT; +class PerRegionTable; class SparsePRT; // Essentially a wrapper around SparsePRTCleanupTask. See @@ -79,15 +79,14 @@ size_t _n_coarse_entries; static jint _n_coarsenings; - PosParPRT** _fine_grain_regions; - size_t _n_fine_entries; + PerRegionTable** _fine_grain_regions; + size_t _n_fine_entries; -#define SAMPLE_FOR_EVICTION 1 -#if SAMPLE_FOR_EVICTION + // Used to sample a subset of the fine grain PRTs to determine which + // PRT to evict and coarsen. size_t _fine_eviction_start; static size_t _fine_eviction_stride; static size_t _fine_eviction_sample_size; -#endif SparsePRT _sparse_table; @@ -98,21 +97,18 @@ // Requires "prt" to be the first element of the bucket list appropriate // for "hr". If this list contains an entry for "hr", return it, // otherwise return "NULL". - PosParPRT* find_region_table(size_t ind, HeapRegion* hr) const; + PerRegionTable* find_region_table(size_t ind, HeapRegion* hr) const; - // Find, delete, and return a candidate PosParPRT, if any exists, + // Find, delete, and return a candidate PerRegionTable, if any exists, // adding the deleted region to the coarse bitmap. Requires the caller // to hold _m, and the fine-grain table to be full. - PosParPRT* delete_region_table(); + PerRegionTable* delete_region_table(); // If a PRT for "hr" is in the bucket list indicated by "ind" (which must // be the correct index for "hr"), delete it and return true; else return // false. bool del_single_region_table(size_t ind, HeapRegion* hr); - static jint _cache_probes; - static jint _cache_hits; - // Indexed by thread X heap region, to minimize thread contention. static int** _from_card_cache; static size_t _from_card_cache_max_regions; @@ -127,10 +123,6 @@ // sense. void add_reference(OopOrNarrowOopStar from, int tid); - void add_reference(OopOrNarrowOopStar from) { - return add_reference(from, 0); - } - // Removes any entries shown by the given bitmaps to contain only dead // objects. void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm); @@ -233,14 +225,12 @@ static jint n_coarsenings() { return OtherRegionsTable::n_coarsenings(); } - /* Used in the sequential case. Returns "true" iff this addition causes - the size limit to be reached. */ + // Used in the sequential case. void add_reference(OopOrNarrowOopStar from) { - _other_regions.add_reference(from); + _other_regions.add_reference(from, 0); } - /* Used in the parallel case. Returns "true" iff this addition causes - the size limit to be reached. */ + // Used in the parallel case. void add_reference(OopOrNarrowOopStar from, int tid) { _other_regions.add_reference(from, tid); } @@ -253,15 +243,6 @@ // entries for this region in other remsets. void clear(); - // Forget any entries due to pointers from "from_hr". - void clear_incoming_entry(HeapRegion* from_hr) { - _other_regions.clear_incoming_entry(from_hr); - } - -#if 0 - virtual void cleanup() = 0; -#endif - // Attempt to claim the region. Returns true iff this call caused an // atomic transition from Unclaimed to Claimed. bool claim_iter(); @@ -290,12 +271,6 @@ // Initialize the given iterator to iterate over this rem set. void init_iterator(HeapRegionRemSetIterator* iter) const; -#if 0 - // Apply the "do_card" method to the start address of every card in the - // rem set. Returns false if some application of the closure aborted. - virtual bool card_iterate(CardClosure* iter) = 0; -#endif - // The actual # of bytes this hr_remset takes up. size_t mem_size() { return _other_regions.mem_size() @@ -322,10 +297,7 @@ void print() const; // Called during a stop-world phase to perform any deferred cleanups. - // The second version may be called by parallel threads after then finish - // collection work. static void cleanup(); - static void par_cleanup(); // Declare the heap size (in # of regions) to the HeapRegionRemSet(s). // (Uses it to initialize from_card_cache). @@ -367,7 +339,7 @@ // Local caching of HRRS fields. const BitMap* _coarse_map; - PosParPRT** _fine_grain_regions; + PerRegionTable** _fine_grain_regions; G1BlockOffsetSharedArray* _bosa; G1CollectedHeap* _g1h; @@ -404,8 +376,9 @@ // Index of bucket-list we're working on. int _fine_array_index; + // Per Region Table we're doing within current bucket list. - PosParPRT* _fine_cur_prt; + PerRegionTable* _fine_cur_prt; /* SparsePRT::*/ SparsePRTIter _sparse_iter; @@ -435,12 +408,4 @@ } }; -#if 0 -class CardClosure: public Closure { -public: - virtual void do_card(HeapWord* card_start) = 0; -}; - -#endif - #endif // SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGIONREMSET_HPP
--- a/hotspot/src/share/vm/memory/dump.cpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/share/vm/memory/dump.cpp Tue Jul 03 18:23:04 2012 -0700 @@ -62,8 +62,8 @@ // written later, increasing the likelihood that the shared page contain // the hash can be shared. // -// NOTE THAT the algorithm in StringTable::hash_string() MUST MATCH the -// algorithm in java.lang.String.hashCode(). +// NOTE THAT we have to call java_lang_String::to_hash() to match the +// algorithm in java.lang.String.toHash(). class StringHashCodeClosure: public OopClosure { private: @@ -80,7 +80,7 @@ oop obj = *p; if (obj->klass() == SystemDictionary::String_klass() && java_lang_String::has_hash_field()) { - int hash = java_lang_String::hash_string(obj); + int hash = java_lang_String::to_hash(obj); obj->int_field_put(hash_offset, hash); } }
--- a/hotspot/src/share/vm/memory/universe.hpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/share/vm/memory/universe.hpp Tue Jul 03 18:23:04 2012 -0700 @@ -273,7 +273,7 @@ } static klassOop typeArrayKlassObj(BasicType t) { - assert((uint)t < T_VOID+1, "range check"); + assert((uint)t < T_VOID+1, err_msg("range check for type: %s", type2name(t))); assert(_typeArrayKlassObjs[t] != NULL, "domain check"); return _typeArrayKlassObjs[t]; }
--- a/hotspot/src/share/vm/opto/c2_globals.hpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/share/vm/opto/c2_globals.hpp Tue Jul 03 18:23:04 2012 -0700 @@ -81,6 +81,13 @@ product(intx, MaxLoopPad, (OptoLoopAlignment-1), \ "Align a loop if padding size in bytes is less or equal to this value") \ \ + product(intx, MaxVectorSize, 32, \ + "Max vector size in bytes, " \ + "actual size could be less depending on elements type") \ + \ + product(bool, AlignVector, false, \ + "Perform vector store/load alignment in loop") \ + \ product(intx, NumberOfLoopInstrToAlign, 4, \ "Number of first instructions in a loop to align") \ \ @@ -292,9 +299,12 @@ develop(bool, SuperWordRTDepCheck, false, \ "Enable runtime dependency checks.") \ \ - product(bool, TraceSuperWord, false, \ + notproduct(bool, TraceSuperWord, false, \ "Trace superword transforms") \ \ + notproduct(bool, TraceNewVectors, false, \ + "Trace creation of Vector nodes") \ + \ product_pd(bool, OptoBundling, \ "Generate nops to fill i-cache lines") \ \
--- a/hotspot/src/share/vm/opto/callGenerator.cpp Wed Jun 27 21:09:29 2012 +0100 +++ b/hotspot/src/share/vm/opto/callGenerator.cpp Tue Jul 03 18:23:04 2012 -0700 @@ -172,9 +172,11 @@ JVMState* DynamicCallGenerator::generate(JVMState* jvms) { GraphKit kit(jvms); + Compile* C = kit.C; + PhaseGVN& gvn = kit.gvn(); - if (kit.C->log() != NULL) { - kit.C->log()->elem("dynamic_call bci='%d'", jvms->bci()); + if (C->log() != NULL) { + C->log()->elem("dynamic_call bci='%d'", jvms->bci()); } // Get the constant pool cache from the caller class. @@ -190,18 +192,21 @@ size_t call_site_offset = cpcache->get_f1_offset(index); // Load the CallSite object from the constant pool cache. - const TypeOopPtr* cpcache_ptr = TypeOopPtr::make_from_constant(cpcache); - Node* cpcache_adr = kit.makecon(cpcache_ptr); - Node* call_site_adr = kit.basic_plus_adr(cpcache_adr, cpcache_adr, call_site_offset); - Node* call_site = kit.make_load(kit.control(), call_site_adr, TypeInstPtr::BOTTOM, T_OBJECT, Compile::AliasIdxRaw); + const TypeOopPtr* cpcache_type = TypeOopPtr::make_from_constant(cpcache); // returns TypeAryPtr of type T_OBJECT + const TypeOopPtr* call_site_type = TypeOopPtr::make_from_klass(C->env()->CallSite_klass()); + Node* cpcache_adr = kit.makecon(cpcache_type); + Node* call_site_adr = kit.basic_plus_adr(cpcache_adr, call_site_offset); + // The oops in the constant pool cache are not compress