changeset 57999:8f8232cadaff vectorSnippets

Expression Language prototype work for building MethodHandle-based kernels
author henryjen
date Thu, 14 Dec 2017 20:21:25 -0800
parents 7f913ccc2b21
children 44dd59eb191f
files test/jdk/panama/Expressions/pom.xml test/jdk/panama/Expressions/src/main/java/com/oracle/vector/CPUID.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/PatchableVecUtils.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/Ops.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/Shape.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/Shapes.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/Val.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/builder/MHMeta.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/comp/ExpComp.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/comp/ExpVarOrder.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/DoubleScalarBinOp.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/DoubleScalarOp.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/Expression.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/Expressions.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/FloatScalarBinOp.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/FloatScalarOp.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/ITE.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/IndexableVal.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/IndexedVal.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/IntScalarBinOp.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/IntScalarOp.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/LongScalarBinOp.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/LongScalarOp.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/VAdd.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/VBroadcast.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/VConst.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/VDiv.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/VMask.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/VMul.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/VProd.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/VSub.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/VSum.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/bexp/BExp.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/bexp/BOpExp.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/bexp/VBinBExp.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/bexp/VUnBExp.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/scalars/DoubleBinOp.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/scalars/DoubleOp.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/scalars/FloatBinOp.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/scalars/FloatOp.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/scalars/IntBinOp.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/scalars/IntOp.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/scalars/LongBinOp.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/scalars/LongOp.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/types/Float256.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/types/VectorClass.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/types/Vectors.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/stmt/Assignment.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/stmt/Builder.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/stmt/For.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/stmt/Statement.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/stmt/While.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/visitor/ExpressionEvaluator.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/visitor/StatementVisitor.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/ops/OpProvider.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/ops/OpProviders.java test/jdk/panama/Expressions/src/main/java/com/oracle/vector/ops/TestOps.java test/jdk/panama/Expressions/src/test/jdk/java/ArrTest.java test/jdk/panama/Expressions/src/test/jdk/java/CrossLane.java test/jdk/panama/Expressions/src/test/jdk/java/LoopExample/AddKernel.java test/jdk/panama/Expressions/src/test/jdk/java/LoopExample/AddKernelEL.java test/jdk/panama/Expressions/src/test/jdk/java/LoopExample/DeriveValueType.java test/jdk/panama/Expressions/src/test/jdk/java/LoopExample/Float256.java test/jdk/panama/Expressions/src/test/jdk/java/test/jdk/MaskTest.java test/jdk/panama/Expressions/src/test/jdk/java/test/jdk/ReduceTest.java test/jdk/panama/Expressions/src/test/jdk/java/test/jdk/ZipTest.java
diffstat 66 files changed, 10122 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/pom.xml	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <groupId>panama</groupId>
+    <artifactId>expressions</artifactId>
+    <version>.01-SNAPSHOT</version>
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <version>3.6.1</version>
+                <configuration>
+                    <fork>true</fork>
+                    <source>1.9</source>
+                    <target>1.9</target>
+                    <verbose>true</verbose>
+                    <compilerArgs>
+                        <!-- the expression here is a total hack to work around
+                        limitations in the expressing of compiler options -->
+                        <compilerArgument>--add-exports"
+                            "java.base/jdk.internal.misc=ALL-UNNAMED</compilerArgument>
+                        <compilerArgument>--add-exports"
+                            "java.base/jdk.internal.vm.annotation=ALL-UNNAMED</compilerArgument>
+                        <compilerArgument>--add-exports"
+                            "jdk.vm.ci/jdk.vm.ci.code=ALL-UNNAMED</compilerArgument>
+                        <compilerArgument>--add-exports"
+                            "jdk.vm.ci/jdk.vm.ci.amd64=ALL-UNNAMED</compilerArgument>
+                    </compilerArgs>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/CPUID.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.vector;
+
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodType;
+
+public class CPUID {
+    static boolean isX64() {
+        String arch = System.getProperties().getProperty("os.arch");
+        return "x86_64".equals(arch) || "amd64".equals(arch);
+    }
+
+    private static final MethodHandle MHcpuid = jdk.vm.ci.panama.MachineCodeSnippet.make(
+            "cpuid", MethodType.methodType(Long2.class, int.class /*esi*/, int.class /*edx*/),
+            /*isSupported=*/isX64(),
+            0x53,                                // push rbx          ;; callee-saved reg
+            0x8B, 0xC6,                          // mov eax,esi       ;; put cpuid arguments (eax, ecx)
+            0x8B, 0xCA,                          // mov ecx,edx
+            0x0F, 0xA2,                          // cpuid
+            0x66, 0x0F, 0x3A, 0x22, 0xC0, 0x00,  // pinsrd xmm0,eax,0 ;; pack result
+            0x66, 0x0F, 0x3A, 0x22, 0xC3, 0x01,  // pinsrd xmm0,ebx,1
+            0x66, 0x0F, 0x3A, 0x22, 0xC1, 0x02,  // pinsrd xmm0,ecx,2
+            0x66, 0x0F, 0x3A, 0x22, 0xC2, 0x03,  // pinsrd xmm0,edx,3
+            0x5B);                               // pop rbx           ;; restore
+
+
+    private final int eax;
+    private final int ebx;
+    private final int ecx;
+    private final int edx;
+
+    private CPUID(int eax, int ebx, int ecx, int edx) {
+        this.eax = eax;
+        this.ebx = ebx;
+        this.ecx = ecx;
+        this.edx = edx;
+    }
+
+    private static CPUID cpuid(int eax, int ecx) {
+        try {
+            Long2 res = (Long2)MHcpuid.invokeExact(eax, ecx);
+            return new CPUID(
+                    (int)(res.extract(0) & 0xFFFFFFFF), // eax
+                    (int)(res.extract(0) >>> 32),       // ebx
+                    (int)(res.extract(1) & 0xFFFFFFFF), // ecx
+                    (int)(res.extract(1) >>> 32));      // edx
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    enum Capabilities {
+        SSE, SSE2, SSE3, SSSE3, SSE41, SSE42,
+        AVX, AVX2, AVX512, BMI2
+    }
+
+    static boolean has(Capabilities cap) {
+        switch(cap) {
+            case SSE:    return ((cpuid(0x01, 0).edx >>> 25) & 1) != 0; // CPUID.EAX=01H:EDX.SSE  [bit 25] = 1
+            case SSE2:   return ((cpuid(0x01, 0).edx >>> 26) & 1) != 0; // CPUID.EAX=01H:EDX.SSE2 [bit 26] = 1
+            case SSE3:   return ((cpuid(0x01, 0).ecx >>>  0) & 1) != 0; // CPUID.EAX=01H:ECX.SSE3 [bit  0] = 1
+            case SSSE3:  return ((cpuid(0x01, 0).ecx >>>  9) & 1) != 0; // CPUID.EAX=01H:ECX.SSSE3[bit  9] = 1
+            case SSE41:  return ((cpuid(0x01, 0).ecx >>> 19) & 1) != 0; // CPUID.EAX=01H:ECX.SSE41[bit 19] = 1
+            case SSE42:  return ((cpuid(0x01, 0).ecx >>> 20) & 1) != 0; // CPUID.EAX=01H:ECX.SSE42[bit 20] = 1
+            case AVX:    return ((cpuid(0x01, 0).ecx >>> 28) & 1) != 0; // CPUID.EAX=01H:ECX.AVX  [bit 28] = 1
+            case AVX2:   return ((cpuid(0x07, 0).ebx >>>  5) & 1) != 0; // CPUID.EAX=07H.EBX.AVX2 [bit  5] = 1
+            case BMI2:   return ((cpuid(0x07, 0).ebx >>>  8) & 1) != 0; // CPUID.EAX=07H.EBX.BMI  [bit  8] = 1
+            case AVX512: return false; // TODO
+            default:
+                throw new Error("Unknown capability: "+cap.toString());
+        }
+    }
+
+    @Override
+    public String toString() {
+        return String.format("CPUID{ eax=%08x; ebx=%08x; ecx=%08x; edx=%08x }",
+                eax, ebx, ecx, edx);
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+
+        CPUID cpuid = (CPUID) o;
+
+        if (eax != cpuid.eax) return false;
+        if (ebx != cpuid.ebx) return false;
+        if (ecx != cpuid.ecx) return false;
+        return edx == cpuid.edx;
+
+    }
+
+    @Override
+    public int hashCode() {
+        int result = eax;
+        result = 31 * result + ebx;
+        result = 31 * result + ecx;
+        result = 31 * result + edx;
+        return result;
+    }
+
+    public static void main(String[] args) {
+        if (!isX64())  return; // Not supported
+
+        int max = cpuid(0, 0).eax;
+        for (int i = 0; i < max; i++) {
+            System.out.printf("0x%02xH: %s\n", i, cpuid(i, 0));
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/PatchableVecUtils.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,5205 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector;
+
+import jdk.internal.misc.Unsafe;
+import jdk.internal.vm.annotation.ForceInline;
+import jdk.vm.ci.code.Register;
+import jdk.vm.ci.panama.MachineCodeSnippet;
+
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodType;
+import jdk.internal.vm.annotation.Stable;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.Arrays;
+import java.util.stream.IntStream;
+import java.util.HashSet;
+
+import static com.oracle.vector.CPUID.*;
+import static com.oracle.vector.CPUID.Capabilities.*;
+
+import static com.oracle.vector.PatchableVecUtils.VEXLength.*;
+import static com.oracle.vector.PatchableVecUtils.VEXWBit.*;
+import static com.oracle.vector.PatchableVecUtils.VEXRBit.*;
+import static com.oracle.vector.PatchableVecUtils.VEXBBit.*;
+import static com.oracle.vector.PatchableVecUtils.VEXXBit.*;
+import static com.oracle.vector.PatchableVecUtils.SIMDPrefix.*;
+import static com.oracle.vector.PatchableVecUtils.VEXOpcode.*;
+import static jdk.vm.ci.amd64.AMD64.*;
+
+public class PatchableVecUtils {
+    private static final MethodType MT_L2_UNARY = MethodType.methodType(Long2.class, Long2.class);
+    private static final MethodType MT_L2_BINARY = MethodType.methodType(Long2.class, Long2.class, Long2.class);
+    private static final MethodType MT_L4_UNARY = MethodType.methodType(Long4.class, Long4.class);
+    private static final MethodType MT_L4_BINARY = MethodType.methodType(Long4.class, Long4.class, Long4.class);
+    private static final MethodType MT_INT_BINARY = MethodType.methodType(int.class, int.class, int.class);
+    private static final MethodType MT_LONG_BINARY = MethodType.methodType(long.class, long.class, long.class);
+    private static final MethodType MT_L2_L2_INT = MethodType.methodType(Long2.class, Long2.class, int.class);
+    private static final MethodType MT_L4_L4_INT = MethodType.methodType(Long4.class, Long4.class, int.class);
+    private static final MethodType MT_L2_L2_LONG = MethodType.methodType(Long2.class, Long2.class, Long.class);
+    private static final MethodType MT_L4_L4_LONG = MethodType.methodType(Long4.class, Long4.class, long.class);
+    private static final MethodType MT_L4_L4_LONG_INT = MethodType.methodType(Long4.class, Long4.class, long.class, int.class);
+    private static final MethodType MT_L2_INT = MethodType.methodType(Long2.class,int.class);
+    private static final MethodType MT_L4_INT = MethodType.methodType(Long4.class,int.class);
+    private static final MethodType MT_L2_BYTE = MethodType.methodType(Long2.class,byte.class);
+    private static final MethodType MT_L4_BYTE = MethodType.methodType(Long4.class,byte.class);
+    private static final MethodType MT_L2_SHORT = MethodType.methodType(Long2.class,short.class);
+    private static final MethodType MT_L4_SHORT = MethodType.methodType(Long4.class,short.class);
+    private static final MethodType MT_L2_LONG = MethodType.methodType(Long2.class,long.class);
+    private static final MethodType MT_L4_LONG = MethodType.methodType(Long4.class, long.class);
+    private static final MethodType MT_L2_LONG_BINARY = MethodType.methodType(Long2.class, Long2.class, long.class);
+    private static final MethodType MT_L4_L2 = MethodType.methodType(Long4.class, Long4.class, Long2.class);
+    private static final MethodType MT_L4_L2_ = MethodType.methodType(Long2.class, Long4.class);
+    private static final MethodType MT_INT_L2 = MethodType.methodType(int.class, Long2.class);
+    private static final MethodType MT_FLOAT_FLOAT_L4 = MethodType.methodType(float.class,float.class,Long4.class);
+    private static final MethodType MT_FLOAT_FLOAT_L4_L4 = MethodType.methodType(float.class,float.class,Long4.class,Long4.class);
+
+
+    private static final MethodType MT_VOID_OBJ_LONG_L4 = MethodType.methodType(void.class, Object.class, long.class, Long4.class);
+    private static final MethodType MT_VOID_OBJ_LONG_L2 = MethodType.methodType(void.class, Object.class, long.class, Long2.class);
+    private static final MethodType MT_L4_OBJ_LONG = MethodType.methodType(Long4.class, Object.class, long.class);
+    private static final MethodType MT_L2_OBJ_LONG = MethodType.methodType(Long2.class, Object.class, long.class);
+    private static final MethodType MT_LONG_L2 = MethodType.methodType(long.class, Long2.class);
+    private static final MethodType MT_L4_L4_FLOATARY_INT = MethodType.methodType(Long4.class, Long4.class, float[].class, int.class);
+    private static final MethodType MT_L4_L4_DOUBLEARY_INT = MethodType.methodType(Long4.class, Long4.class, double[].class, int.class);
+    private static final MethodType MT_L2_FLOAT = MethodType.methodType(Long2.class, float.class);
+    private static final MethodType MT_L4_FLOAT = MethodType.methodType(Long4.class, float.class);
+    private static final MethodType MT_L2_DOUBLE = MethodType.methodType(Long2.class, double.class);
+    private static final MethodType MT_L4_DOUBLE = MethodType.methodType(Long4.class, double.class);
+    private static final MethodType MT_FLOAT_FLOAT_L4_FLOATARY_INT = MethodType.methodType(float.class,float.class,Long4.class,float[].class,int.class);
+    private static final MethodType MT_L4_L4_L4_L4 = MethodType.methodType(Long4.class,Long4.class,Long4.class,Long4.class);
+    private static final MethodType MT_L2_L2_L2_L2 = MethodType.methodType(Long2.class,Long2.class,Long2.class,Long2.class);
+
+    private static final MethodType MT_FLOAT_L4 = MethodType.methodType(float.class,Long4.class);
+    private static final MethodType MT_DOUBLE_L4 = MethodType.methodType(double.class,Long4.class);
+
+    private static final MethodType MT_BYTE_L2 = MethodType.methodType(byte.class,Long2.class);
+    private static final MethodType MT_L2_L2_BYTE = MethodType.methodType(Long2.class, Long2.class, byte.class);
+    private static final MethodType MT_SHORT_L2 = MethodType.methodType(short.class,Long2.class);
+    private static final MethodType MT_L2_L2_SHORT = MethodType.methodType(Long2.class, Long2.class, short.class);
+
+    static private final Register[] ub = {r8, r9, r10, r11, r12, r13, r14, r15, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15};
+
+    static private boolean isUB64(Register r) {
+        for (Register anUb : ub) {
+            if (r == anUb) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    //NOTE: REX bits are 1's-complemented in VEX Bit form.
+    static private VEXXBit xBit(Register r) {
+        return isUB64(r) ? X_HIGH : X_LOW;
+    }
+
+    static private VEXRBit rBit(Register r) {
+        return isUB64(r) ? R_HIGH : R_LOW;
+    }
+
+    static private VEXBBit bBit(Register r) {
+        return isUB64(r) ? B_HIGH : B_LOW;
+    }
+
+    static boolean requires(CPUID.Capabilities cap) {
+        return isX64() && CPUID.has(cap);
+    }
+
+    static int sibByte(Register index, Register base, int scale) {
+        return ((scale & 0x3) << 6) | ((index.encoding() & 0x7) << 3) | (base.encoding() & 0x7);
+    }
+
+    static int vsibByte(Register index, Register base, int scale) {
+        if (!(cpuRegisters[base.encoding()] == base))
+            throw new IllegalArgumentException("VSIB Base register must be a GPR");
+        if (!(xmmRegistersSSE[index.encoding()] == index))
+            throw new IllegalArgumentException("VSIB Index register must be an XMM register");
+        return ((scale & 0x3) << 6) | ((index.encoding() & 0x7) << 3) | (base.encoding() & 0x7);
+    }
+
+    static int modRM_SIB_NODISP(Register reg) {
+        return 0b00000100 | ((reg.encoding() & 0x7) << 3);
+    }
+
+    static int modRM_SIB_DISP8(Register reg) {
+        return 0b01000100 | ((reg.encoding() & 0x7) << 3);
+    }
+
+    static int modRM_SIB_DISP32(Register reg) {
+        return 0b10000100 | ((reg.encoding() & 0x7) << 3);
+    }
+
+    //Register indirect addressing
+    /* REG, R/M , resp.*/
+    static int modRM_regInd(Register reg, Register r_m) {
+        assert (reg.encoding() & 0x7) == reg.encoding();
+        assert (r_m.encoding() & 0x7) == r_m.encoding();
+        return /*0x00 |*/ ((reg.encoding() & 0x7) << 3) | (r_m.encoding() & 0x7);
+    }
+
+    //Register base+index+disp8 addressing
+    static int modRM_reg_bid8(Register reg, Register r_m) {
+        assert (reg.encoding() & 0x7) == reg.encoding();
+        assert (r_m.encoding() & 0x7) == r_m.encoding();
+        return 0x40 | ((reg.encoding() & 0x7) << 3) | (r_m.encoding() & 0x7);
+    }
+
+    static int modRM(Register reg, Register r_m) {
+        assert (reg.encoding() & 0x7) == reg.encoding();
+        assert (r_m.encoding() & 0x7) == r_m.encoding();
+        return 0xC0 | ((reg.encoding() & 0x7) << 3) | (r_m.encoding() & 0x7);
+    }
+
+    // Vex encoding for instructions with nds registers
+    static int[] vex_prefix(VEXRBit r, VEXXBit x, VEXBBit b, VEXOpcode m,
+                            VEXWBit w, Register nds, VEXLength len, SIMDPrefix pp) {
+        assert ((nds.encoding() & 0x7) == nds.encoding());
+        if (b.isHigh() || x.isHigh() || w.isHigh() ||
+                (m == M_0F38) || (m == M_0F3A)) {
+            int rxb = (~((r.encoding() << 7) | (x.encoding() << 6) | (b.encoding() << 5))) & 0b11100000;
+            return new int[]{
+                    0xC4,
+                    rxb | m.encoding() & 0x3,
+                    (w.encoding() << 7) | ((~nds.encoding() & 0xF) << 3) | (len.encoding() << 2) | pp.encoding()};
+        } else {
+            return new int[]{
+                    0xC5,
+                    ((~r.encoding() << 7) & 0b10000000) | ((~nds.encoding() & 0xF) << 3) | (len.encoding() << 2) | pp.encoding()};
+        }
+    }
+
+    // Vex encoding for instructions in 2 address form
+    static int[] vex_prefix_nonds(VEXRBit r, VEXXBit x, VEXBBit b, VEXOpcode m,
+                                  VEXWBit w, int noreg, VEXLength len, SIMDPrefix pp) {
+        if (b.isHigh() || x.isHigh() || w.isHigh() ||
+                (m == M_0F38) || (m == M_0F3A)) {
+            int rxb = (~((r.encoding() << 7) | (x.encoding() << 6) | (b.encoding() << 5))) & 0b11100000;
+            return new int[]{
+                    0xC4,
+                    rxb | m.encoding() & 0x3,
+                    (w.encoding() << 7) | (noreg << 3) | (len.encoding() << 2) | pp.encoding()};
+        } else {
+            return new int[]{
+                    0xC5,
+                    ((~r.encoding() << 7) & 0b10000000) | (noreg << 3) | (len.encoding() << 2) | pp.encoding()};
+        }
+    }
+
+    static int[] vex_emit(int vex[], int... bytes) {
+        int[] result = new int[vex.length + bytes.length];
+        System.arraycopy(vex, 0, result, 0, vex.length);
+        System.arraycopy(bytes, 0, result, vex.length, result.length - vex.length);
+        return result;
+    }
+
+    static int[] join(int[]... arys) {
+        int len = 0;
+        int offset = 0;
+        for (int[] ary : arys) {
+            len += ary.length;
+        }
+        int[] res = new int[len];
+        for (int[] ary : arys) {
+            int l = ary.length;
+            System.arraycopy(ary, 0, res, offset, l);
+            offset += l;
+        }
+        return res;
+    }
+
+    /* ========================================================================================*/
+    // Vex emit mem of encoded values
+    static int[] vex_emit_mem(int vex[], int opcode, int modRM, int sib, int disp) {
+        if (vex.length == 2) {
+            return new int[]{
+                    vex[0], vex[1],
+                    opcode,
+                    modRM,
+                    sib,
+                    disp
+            };
+        } else if (vex.length == 3) {
+            return new int[]{
+                    vex[0], vex[1], vex[2],
+                    opcode,
+                    modRM,
+                    sib,
+                    disp
+            };
+        }
+        throw new UnsupportedOperationException("vex_emit_mem only supports arrays of length 2 or 3.");
+    }
+    /* ========================================================================================*/
+    static final MethodHandle HBOX_L2_MH = MachineCodeSnippet.make("hboxL2", MethodType.methodType(Long2.class, Long2.class), true);
+    static final MethodHandle HBOX_L4_MH = MachineCodeSnippet.make("hboxL4", MethodType.methodType(Long4.class, Long4.class), true);
+
+    @ForceInline
+    public static Long2 hbox(Long2 v) {
+        try {
+            return (Long2)HBOX_L2_MH.invokeExact(v);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    @ForceInline
+    public static Long4 hbox(Long4 v) {
+        try {
+            return (Long4)HBOX_L4_MH.invokeExact(v);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+
+    /* ========================================================================================*/
+    private static final int[] vpmulld(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F38, W_LOW, in1, l, PP_66);
+        return vex_emit(vex, 0x40, modRM(out, in2));
+
+    }
+
+    // VPMULLD xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vpmulld = MachineCodeSnippet.make(
+            "mm128_vpmulld", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE /*out*/, xmmRegistersSSE /*in1*/, xmmRegistersSSE /*in2*/},
+            (Register[] regs) ->
+                //VEX.NDS.128.66.0F38.WIG 40 /r
+                vpmulld(regs[0],regs[1],regs[2],L_128)
+            );
+
+
+    private static Long2 vpmulld_naive(Long2 a, Long2 b) {
+        long l1 = pack(
+                getInt(a, 0) * getInt(b, 0),
+                getInt(a, 1) * getInt(b, 1));
+        long l2 = pack(
+                getInt(a, 2) * getInt(b, 2),
+                getInt(a, 3) * getInt(b, 3));
+        return Long2.make(l1, l2);
+    }
+
+    public static Long2 vpmulld(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vpmulld.invokeExact(a, b);
+            assert assertEquals(res, vpmulld_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    // VPMULLD ymm1, ymm2, ymm3/m256
+    public static final MethodHandle MHm256_vpmulld = MachineCodeSnippet.make(
+            "mm256_vpmulld", MT_L4_BINARY, requires(AVX2),
+            new Register[][]{xmmRegistersSSE /*out*/, xmmRegistersSSE /*in1*/, xmmRegistersSSE /*in2*/},
+            (Register[] regs) ->
+                //VEX.NDS.256.66.0F38.WIG 40 /r
+                vpmulld(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vpmulld_naive(Long4 a, Long4 b) {
+        int[] res = new int[8];
+        for (int i = 0; i < 8; i++) {
+            res[i] = getInt(a, i) * getInt(b, i);
+        }
+        return Long4.make(
+                pack(res[0], res[1]),
+                pack(res[2], res[3]),
+                pack(res[4], res[5]),
+                pack(res[6], res[7]));
+    }
+
+    public static Long4 vpmulld(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vpmulld.invokeExact(a, b);
+            assert assertEquals(res, vpmulld_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vpxor(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_66);
+        return vex_emit(vex, 0xEF, modRM(out, in2));
+    }
+    // VPXOR xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vpxor = MachineCodeSnippet.make(
+            "mm128_vpxor", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE /*out*/, xmmRegistersSSE /*in1*/, xmmRegistersSSE /*in2*/},
+            (Register[] regs) ->
+                // VEX.NDS.128.66.0F.WIG EF /r
+                vpxor(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vpxor_naive(Long2 a, Long2 b) {
+        long la0, la1, lb0, lb1;
+        la0 = a.extract(0);
+        la1 = a.extract(1);
+        lb0 = b.extract(0);
+        lb1 = b.extract(1);
+        return Long2.make(la0 ^ lb0, la1 ^ lb1);
+    }
+
+    public static Long2 vpxor(Long2 v1, Long2 v2) {
+        try {
+            Long2 res = (Long2) MHm128_vpxor.invokeExact(v1, v2);
+            assert assertEquals(res, vpxor_naive(v1, v2));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    // VPXOR ymm1, ymm2, ymm3/m256
+    public static final MethodHandle MHm256_vpxor = MachineCodeSnippet.make(
+            "mm256_vpxor", MT_L4_BINARY, requires(AVX2),
+            new Register[][]{xmmRegistersSSE /*out*/, xmmRegistersSSE /*in1*/, xmmRegistersSSE /*in2*/},
+            (Register[] regs) ->
+                    vpxor(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vpxor_naive(Long4 a, Long4 b) {
+        int[] res = new int[8];
+        for (int i = 0; i < 8; i++) {
+            int a_i = getInt(a, i);
+            int b_i = getInt(b, i);
+            res[i] = a_i ^ b_i;
+        }
+        return Long4.make(
+                pack(res[0], res[1]),
+                pack(res[2], res[3]),
+                pack(res[4], res[5]),
+                pack(res[6], res[7])
+        );
+    }
+
+    public static Long4 vpxor(Long4 v1, Long4 v2) {
+        try {
+            Long4 res = (Long4) MHm256_vpxor.invokeExact(v1, v2);
+            assert assertEquals(res, vpxor_naive(v1, v2));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vpand(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_66);
+        return vex_emit(vex, 0xDB, modRM(out, in2));
+    }
+    // VPAND xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vpand = MachineCodeSnippet.make(
+            "mm128_vpand", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE /*out*/, xmmRegistersSSE /*in1*/, xmmRegistersSSE /*in2*/},
+            (Register[] regs) ->
+                // VEX.NDS.128.66.0F.WIG DB /r
+                vpand(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vpand_naive(Long2 a, Long2 b) {
+        long la0, la1, lb0, lb1;
+        la0 = a.extract(0);
+        la1 = a.extract(1);
+        lb0 = b.extract(0);
+        lb1 = b.extract(1);
+        return Long2.make(la0 & lb0, la1 & lb1);
+    }
+
+    public static Long2 vpand(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vpand.invokeExact(a, b);
+            assert assertEquals(res, vpand_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    // VPAND ymm1, ymm2, ymm3/m256
+    public static final MethodHandle MHm256_vpand = MachineCodeSnippet.make(
+            "mm256_vpand", MT_L4_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE /*out*/, xmmRegistersSSE /*in1*/, xmmRegistersSSE /*in2*/},
+            (Register[] regs) ->
+                // VEX.NDS.256.66.0F.WIG DB /r
+                vpand(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vpand_naive(Long4 a, Long4 b) {
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long4 vpand(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vpand.invokeExact(a, b);
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    // VPOR xmm1, xmm2, xmm3/m128
+    public static int[] vpor(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_66);
+        return vex_emit(vex, 0xEB, modRM(out, in2));
+    }
+    public static final MethodHandle MHm128_vpor = MachineCodeSnippet.make(
+            "mm128_vpor", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE /*out*/, xmmRegistersSSE /*in1*/, xmmRegistersSSE /*in2*/},
+            (Register[] regs) ->
+                // VEX.NDS.128.66.0F.WIG EB /r
+                vpor(regs[0],regs[1],regs[2],L_128)
+            );
+
+    public static final MethodHandle MHm256_vpor = MachineCodeSnippet.make(
+            "mm256_vpor", MT_L4_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE /*out*/, xmmRegistersSSE /*in1*/, xmmRegistersSSE /*in2*/},
+            (Register[] regs) ->
+                    // VEX.NDS.256.66.0F.WIG EB /r
+                    vpor(regs[0],regs[1],regs[2],L_256)
+    );
+
+    private static Long2 vpor_naive(Long2 a, Long2 b) {
+        long la0, la1, lb0, lb1;
+        la0 = a.extract(0);
+        la1 = a.extract(1);
+        lb0 = b.extract(0);
+        lb1 = b.extract(1);
+        return Long2.make(la0 | lb0, la1 | lb1);
+    }
+
+    public static Long2 vpor(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vpor.invokeExact(a, b);
+            assert assertEquals(res, vpor_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vpsubd(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_66);
+        return vex_emit(vex, 0xFA, modRM(out, in2));
+    }
+    // VPSUBD xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vpsubd = MachineCodeSnippet.make(
+            "mm128_vpsubd", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                // VEX.NDS.128.66.0F.WIG FA /r
+                vpsubd(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vpsubd_naive(Long2 a, Long2 b) {
+        long l1 = pack(
+                getInt(a, 0) - getInt(b, 0),
+                getInt(a, 1) - getInt(b, 1));
+        long l2 = pack(
+                getInt(a, 2) - getInt(b, 2),
+                getInt(a, 3) - getInt(b, 3));
+        return Long2.make(l1, l2);
+    }
+
+    public static Long2 vpsubd(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vpsubd.invokeExact(a, b);
+            assert assertEquals(res, vpsubd_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    // VPSUBD ymm1, ymm2, ymm3/m256
+    public static final MethodHandle MHm256_vpsubd = MachineCodeSnippet.make(
+            "mm256_vpsubd", MT_L4_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                // VEX.NDS.256.66.0F.WIG FA /r
+                vpsubd(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vpsubd_naive(Long4 a, Long4 b) {
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long4 vpsubd(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vpsubd.invokeExact(a, b);
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vpsignd(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F38, W_LOW, in1, l, PP_66);
+        return vex_emit(vex, 0x0A, modRM(out, in2));
+    }
+    // VPSIGND xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vpsign = MachineCodeSnippet.make(
+            "mm128_vpsignd", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.66.0F38.WIG 0A /r
+                vpsignd(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vpsignd_naive(Long2 a, Long2 b) {
+        int val;
+        int[] res = new int[4];
+        for (int i = 0; i < 4; i++) {
+            val = getInt(b, i);
+            if (val < 0) {
+                res[i] = -1 * (getInt(a, i));
+            } else if (val == 0) {
+                res[i] = 0;
+            } else {
+                res[i] = getInt(a, i);
+            }
+
+        }
+        return Long2.make(pack(res[0], res[1]), pack(res[2], res[3]));
+    }
+
+    public static Long2 vpsignd(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vpsign.invokeExact(a, b);
+            assert assertEquals(res, vpsignd_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    // VPSIGND ymm1, ymm2, ymm3/m256
+    public static final MethodHandle MHm256_vpsign = MachineCodeSnippet.make(
+            "mm256_vpsignd", MT_L4_BINARY, requires(AVX2),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.66.0F38.WIG 0A /r
+                vpsignd(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vpsignd_naive(Long4 a, Long4 b) {
+        int val;
+        int[] res = new int[8];
+        for (int i = 0; i < 8; i++) {
+            val = getInt(b, i);
+            if (val < 0) {
+                res[i] = -1 * (getInt(a, i));
+            } else if (val == 0) {
+                res[i] = 0;
+            } else {
+                res[i] = getInt(a, i);
+            }
+
+        }
+        return Long4.make(pack(res[0], res[1]), pack(res[2], res[3]), pack(res[4], res[5]), pack(res[6], res[7]));
+    }
+
+    public static Long4 vpsignd(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vpsign.invokeExact(a, b);
+            assert assertEquals(res, vpsignd_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    public static int[] vcmpeqd(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_66);
+        return vex_emit(vex, 0x76, modRM(out, in2));
+    }
+
+    // VPCMPEQD xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vpcmpeqd = MachineCodeSnippet.make(
+            "mm128_vpcmpeqd", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.66.0F.WIG 76 /r
+                vcmpeqd(regs[0],regs[1],regs[2],L_128)
+            );
+
+    static Long2 vpcmpeqd_naive(Long2 a, Long2 b) {
+        int[] res = new int[4];
+        for (int i = 0; i < 4; i++) {
+            int a_i = getInt(a, i);
+            int b_i = getInt(b, i);
+            if (a_i == b_i) {
+                res[i] = 0xFFFFFFFF;
+            }
+        }
+        return Long2.make(
+                pack(res[0], res[1]),
+                pack(res[2], res[3])
+        );
+    }
+
+    public static Long2 vpcmpeqd(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vpcmpeqd.invokeExact(a, b);
+            assert assertEquals(res, vpcmpeqd_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    // VPCMPEQD ymm1, ymm2, ymm3/m256
+    public static final MethodHandle MHm256_vpcmpeqd = MachineCodeSnippet.make(
+            "mm256_vpcmpeqd", MT_L4_BINARY, requires(AVX2),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.66.0F.WIG 76 /r
+                vcmpeqd(regs[0],regs[1],regs[2],L_256)
+            );
+
+    static Long4 vpcmpeqd_naive(Long4 a, Long4 b) {
+       /* double[] res = new double[4];
+        for (int i = 0; i < 4; i++) {
+            int a_i = getDouble(a, i);
+            int b_i = getDouble(b, i);
+            if (a_i == b_i) {
+                res[i] = 0xFFFFFFFF;
+            }
+        }
+        return Long2.make(
+                pack(res[0], res[1]),
+                pack(res[2], res[3])
+        );*/
+        return null;
+    }
+
+    public static Long4 vpcmpeqd(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vpcmpeqd.invokeExact(a, b);
+            //assert assertEquals(res, vpcmpeqd_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vcmpgtd(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out),X_LOW,bBit(in2),M_0F,W_LOW,in1,l,PP_66);
+        return vex_emit(vex, 0x66, modRM(out, in2));
+    }
+    // VPCMPGTD xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vpcmpgtd = MachineCodeSnippet.make(
+            "mm128_vpcmpgtd", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.66.0F.WIG 66 /r
+                vcmpgtd(regs[0],regs[1],regs[2],L_128)
+            );
+
+    static Long2 vpcmpgtd_naive(Long2 a, Long2 b) {
+        /*int[] res = new int[4];
+        for (int i = 0; i < 4; i++) {
+            int a_i = getInt(a, i);
+            int b_i = getInt(b, i);
+            if (a_i == b_i) {
+                res[i] = 0xFFFFFFFF;
+            }
+        }
+        return Long2.make(
+                pack(res[0], res[1]),
+                pack(res[2], res[3])
+        );*/
+        return null;
+    }
+
+    public static Long2 vpcmpgtd(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vpcmpgtd.invokeExact(a, b);
+            //assert assertEquals(res, vpcmpgtd_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    // VPCMPGTD ymm1, ymm2, ymm3/m256
+    public static final MethodHandle MHm256_vpcmpgtd = MachineCodeSnippet.make(
+            "mm256_vpcmpgtd", MT_L4_BINARY, requires(AVX2),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.66.0F.WIG 66 /r
+                vcmpgtd(regs[0],regs[1],regs[2],L_256)
+            );
+
+    static Long4 vpcmpgtd_naive(Long4 a, Long4 b) {
+       /* double[] res = new double[4];
+        for (int i = 0; i < 4; i++) {
+            int a_i = getDouble(a, i);
+            int b_i = getDouble(b, i);
+            if (a_i == b_i) {
+                res[i] = 0xFFFFFFFF;
+            }
+        }
+        return Long2.make(
+                pack(res[0], res[1]),
+                pack(res[2], res[3])
+        );*/
+        return null;
+    }
+
+    public static Long4 vpcmpgtd(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vpcmpgtd.invokeExact(a, b);
+            //assert assertEquals(res, vpcmpgtd_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vxorps(Register out, Register in1, Register in2, VEXLength l){
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_NONE);
+        return vex_emit(vex, 0x57, modRM(out, in2));
+    }
+    // VXORPS xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vxorps = MachineCodeSnippet.make(
+            "mm128_vxorps", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.0F.WIG 57 /r
+                vxorps(regs[0],regs[1],regs[2],L_128)
+            );
+
+    static Long2 vxorps_naive(Long2 a, Long2 b) {
+        int[] res = new int[4];
+        for (int i = 0; i < 4; i++) {
+            int a_i = getInt(a, i);
+            int b_i = getInt(b, i);
+            res[i] = a_i ^ b_i;
+
+        }
+        return Long2.make(
+                pack(res[0], res[1]),
+                pack(res[2], res[3])
+        );
+    }
+
+    public static Long2 vxorps(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vxorps.invokeExact(a, b);
+            assert assertEquals(res, vxorps_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    // VXORPS ymm1, ymm2, ymm3/m256
+    public static final MethodHandle MHm256_vxorps = MachineCodeSnippet.make(
+            "mm256_vxorps", MT_L4_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.0F.WIG 57 /r
+                vxorps(regs[0],regs[1],regs[2],L_256)
+            );
+
+    static Long4 vxorps_naive(Long4 a, Long4 b) {
+        int[] res = new int[8];
+        for (int i = 0; i < 8; i++) {
+            int a_i = getInt(a, i);
+            int b_i = getInt(b, i);
+            res[i] = a_i ^ b_i;
+
+        }
+        return Long4.make(
+                pack(res[0], res[1]),
+                pack(res[2], res[3]),
+                pack(res[4], res[5]),
+                pack(res[6], res[7])
+        );
+    }
+
+    public static Long4 vxorps(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vxorps.invokeExact(a, b);
+            assert assertEquals(res, vxorps_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vxorpd(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_66);
+        return vex_emit(vex, 0x57, modRM(out, in2));
+    }
+    // VXORPD xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vxorpd = MachineCodeSnippet.make(
+            "mm128_vxorpd", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.66.0F.WIG 57 /r
+                vxorpd(regs[0],regs[1],regs[2],L_128)
+
+            );
+
+    static Long2 vxorpd_naive(Long2 a, Long2 b) {
+        long[] res = new long[4];
+        for (int i = 0; i < 2; i++) {
+            long a_i = getLong(a, i);
+            long b_i = getLong(b, i);
+            res[i] = a_i ^ b_i;
+
+        }
+        return Long2.make(
+                res[0], res[1]
+        );
+    }
+
+    public static Long2 vxorpd(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vxorpd.invokeExact(a, b);
+            assert assertEquals(res, vxorpd_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    // VXORPD ymm1, ymm2, ymm3/m256
+    public static final MethodHandle MHm256_vxorpd = MachineCodeSnippet.make(
+            "mm256_vxorpd", MT_L4_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.66.0F.WIG 57 /r
+                vxorpd(regs[0],regs[1],regs[2],L_128)
+            );
+
+    static Long4 vxorpd_naive(Long4 a, Long4 b) {
+        /*long[] res = new long[4];
+        for (int i = 0; i < 4; i++) {
+            long a_i = getLong(a, i);
+            long b_i = getLong(b, i);
+            res[i] = a_i ^ b_i;
+
+        }
+        return Long2.make(
+                res[0], res[1]
+        );*/
+        return null;
+    }
+
+    public static Long4 vxorpd(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vxorpd.invokeExact(a, b);
+            //assert assertEquals(res, vxorpd_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    //Reg-to-reg vmovdqu
+    private static int[] xmmMov(Register dst, Register src, VEXLength l) {
+        if (Arrays.binarySearch(xmmRegistersSSE, dst) < 0 || Arrays.binarySearch(xmmRegistersSSE, src) < 0)
+            throw new UnsupportedOperationException("xmmMov needs xmm registers");
+        int[] vex = vex_prefix_nonds(rBit(dst), X_LOW, bBit(src), M_0F, W_LOW, 0b1111, l, PP_F3);
+        return vex_emit(vex, 0x6F, modRM(dst, src));
+    }
+
+    public static float dot_prod(float partial, Long4 left, Long4 right){
+        try {
+           return (float) sumprod_float_L4.invokeExact(partial,left,right);
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+
+    //private static final MethodType MT_FLOAT__FLOAT__L4_L4 = MethodType.methodType(Float.class,Float.class,Long4.class,Long4.class);
+    //(float,Long4,Long4)float
+    public static final MethodHandle sumprod_float_L4 = MachineCodeSnippet.make(
+        "sumprod_float_L4", MT_FLOAT_FLOAT_L4_L4,
+            new MachineCodeSnippet.Effect[]{},
+            requires(AVX),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE,xmmRegistersSSE,xmmRegistersSSE},
+            new Register[]{},
+            new int[]{},
+            (Register[] regs) -> {
+               Register out  = regs[0],
+                        psum = regs[1],
+                        inp1 = regs[2],
+                        inp2 = regs[3];
+                return join(
+                              vmulps(inp1,inp1,inp2,L_256)  //get products
+                        ,     vaddss(psum,inp1)             //add partial sum
+                        ,     vextractf128(psum,inp1,0x1)   //upper 128-bits of inp1
+                        ,     vaddps(psum,inp1,psum,L_128)  //fold inp1 in half into psum
+                        ,     vmovshdup(inp1,psum,L_128)    //expand (inp1's) upper qwords into inp1
+                        ,     vaddps(psum,psum,inp1,L_128)  //and add to psum
+                        ,     vmovhlps(inp1,inp1,psum)      //expand the lower qwords
+                        ,     vaddss(out, psum, inp1)       //and add the lowest position to get result
+                );
+            }
+    );
+
+    //(Long4)float
+    public static final MethodHandle sum_float_L4 = MachineCodeSnippet.make(
+            "sum_float_L4", MT_FLOAT_L4,
+            new MachineCodeSnippet.Effect[]{},
+            requires(AVX),
+            new Register[][]{{xmm0},{xmm1}},
+            new Register[]{xmm0,xmm1,xmm2},
+            new int[]{8,8,8},
+            (Register[] regs) -> {
+                Register out = regs[0],
+                         inp = regs[1];
+
+                Register psum = xmm2;
+                Register inp1 = xmm1;
+
+                if(out == xmm0 && inp == xmm1){
+                    return join (
+                          vextractf128(psum,inp1,0x1)   //upper 128-bits of inp1
+                    ,     vaddps(psum,inp1,psum,L_128)  //fold inp1 in half into psum
+                    ,     vmovshdup(inp1,psum,L_128)    //expand (inp1's) upper qwords into inp1
+                    ,     vaddps(psum,psum,inp1,L_128)  //and add to psum
+                    ,     vmovhlps(inp1,inp1,psum)      //expand the lower qwords
+                    ,     vaddss(out, psum, inp1)       //and add the lowest position to get result
+                    );
+                } else {
+                    //We have to patch movs in.
+                    return join (
+                          xmmMov(inp1,inp,L_256)
+                    ,     vextractf128(psum,inp1,0x1)   //upper 128-bits of inp1
+                    ,     vaddps(psum,inp1,psum,L_128)  //fold inp1 in half into psum
+                    ,     vmovshdup(inp1,psum,L_128)    //expand (inp1's) upper qwords into inp1
+                    ,     vaddps(psum,psum,inp1,L_128)  //and add to psum
+                    ,     vmovhlps(inp1,inp1,psum)      //expand the lower qwords
+                    ,     vaddss(out, psum, inp1)       //and add the lowest position to get result
+                    );
+                }
+
+            }
+    );
+
+    @ForceInline
+    public static float sum_float_L4(Long4 val){
+        try {
+          return (float) sum_float_L4.invokeExact(val);
+        } catch (Throwable e) {
+          throw new Error(e);
+        }
+    }
+
+    //(float,Long4,Long4)float
+    public static final MethodHandle sumprod_float_L4_sunk = MachineCodeSnippet.make(
+            "sumprod_float_L4", MT_FLOAT_FLOAT_L4_FLOATARY_INT,
+            new MachineCodeSnippet.Effect[]{},
+            requires(AVX),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE,xmmRegistersSSE,cpuRegisters,cpuRegisters},
+            new Register[]{},
+            new int[]{},
+            (Register[] regs) -> {
+                Register out  = regs[0],
+                        psum  = regs[1],
+                        inp1  = regs[2],
+                        base  = regs[3],
+                        index = regs[4];
+                return join(
+                         vaddss(psum,inp1)
+                   ,     vmulps_sunk(inp1,inp1,base,index,L_256)
+                   ,     vextractf128(psum,inp1,0x1)
+                   ,     vaddps(psum,inp1,psum,L_128)
+                   ,     vmovshdup(inp1,psum,L_128)
+                   ,     vaddps(psum,psum,inp1,L_128)
+                   ,     vmovhlps(inp1,inp1,psum)
+                   ,     vaddss(out, psum, inp1)
+                );
+            }
+    );
+
+    //(Long4)float
+    public static final MethodHandle sum_double_L4 = MachineCodeSnippet.make(
+            "sum_double_L4", MT_DOUBLE_L4,
+            new MachineCodeSnippet.Effect[]{},
+            requires(AVX),
+            new Register[][]{{xmm0},{xmm1}},
+            new Register[]{xmm0,xmm1,xmm2},
+            new int[]{8,8,8},
+            (Register[] regs) -> {
+                Register out = regs[0],
+                         inp = regs[1];
+
+                Register psum = xmm2;
+                Register inp1 = xmm1;
+
+                if(out == xmm0 && inp == xmm1){
+                    return join (
+                            vextractf128(psum,inp1,0x1)   //upper 128-bits of inp1
+                            ,     vaddpd(psum,inp1,psum,L_128)  //fold inp1 in half into psum
+                            ,     vhaddpd(out,psum,inp1,L_128)
+                    );
+                } else {
+                    //We have to patch movs in.
+                    return join (
+                            xmmMov(inp1,inp,L_256)
+                            ,     vextractf128(psum,inp1,0x1)   //upper 128-bits of inp1
+                            ,     vaddpd(psum,inp1,psum,L_128)  //fold inp1 in half into psum
+                            ,     vhaddpd(out,psum,inp1,L_128)
+                    );
+                }
+
+            }
+    );
+
+    public static double sum_double_l4(Long4 inp){
+        try {
+           return (double) sum_double_L4.invokeExact(inp);
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+
+    //VEX 256.F3.0F.WIG 16 /r
+    public static int[] vmovshdup(Register out, Register in, VEXLength l){
+       int[] vex = vex_prefix_nonds(rBit(out), X_LOW, B_LOW, M_0F, W_LOW, 0b1111, l, PP_F3);
+       return vex_emit(vex, 0x16, modRM(out, in));
+    }
+
+    //VEX.NDS.128.0F.WIG 12 /r
+    public static int[] vmovhlps(Register out, Register left, Register right){
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(right), M_0F, W_LOW, left, L_128, PP_NONE);
+        return vex_emit(vex, 0x12, modRM(out, right));
+    }
+
+    private static int[] vaddss(Register out, Register left, Register right){
+        return vaddss(out,left,right,L_128);
+    }
+
+    private static int[] vaddss(Register left, Register right){
+        return new int[]{0xF3, 0x0F, 0x58, modRM(right, left)};
+    }
+
+    private static int[] vmulps_sunk(Register out, Register inp, Register base, Register index, VEXLength l){
+        int[] vex = vex_prefix(rBit(out), xBit(index), bBit(base), M_0F, W_LOW, inp, l, PP_NONE);
+        return vex_emit(vex, 0x59, modRM_SIB_DISP8(out), sibByte(index, base, sibScale(Unsafe.ARRAY_FLOAT_INDEX_SCALE)), Unsafe.ARRAY_FLOAT_BASE_OFFSET);
+    }
+
+    private static int[] vextractf128(Register out, Register in, int imm){
+        int[] vex = vex_prefix_nonds(rBit(in), X_LOW, bBit(out), M_0F3A, W_LOW, 0b1111, L_256, PP_66);
+        return vex_emit(vex, 0x19, modRM(in, out), imm & 0x1);
+    }
+
+
+    private static int[] vhaddpd(Register out, Register left, Register right, VEXLength l){
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(right), M_0F, W_LOW, left, l, PP_66);
+        return vex_emit(vex, 0x7C, modRM(out,right));
+    }
+
+    private static int[] broadcastD(Register out, Register in, VEXLength l){
+        int[] vex = vex_prefix_nonds(rBit(out),X_LOW, bBit(in),M_0F38,W_LOW,0b1111,L_256,PP_66);
+        return vex_emit(vex,0x19,modRM(out,in));
+    }
+
+    private static int[] movQ(Register out, Register in) {
+        //VEX.128.66.0F.W1 6E /r
+        int[] vex = vex_prefix_nonds(rBit(out),X_LOW, bBit(in),M_0F,W_HIGH,0b1111,L_128,PP_66);
+        return vex_emit(vex,0x6E,modRM(out,in));
+    }
+
+    private static int[] loadImm64(Register out, long val){
+        //REX.W + B8 + rd io
+        //MOV r64, imm64
+        int rbit = (0b1000 & out.encoding) >> 3;
+        int rex = (0b01001000 | rbit);
+        int opcode = 0xB8 | (0b111 & out.encoding()); //lower 3 bits of opcode are the register encoding.
+        byte[] bytes = ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN).putLong(0,val).array();
+        return new int[]{rex,opcode,(bytes[0] & 0xFF),(bytes[1] & 0xFF),(bytes[2] & 0xFF),(bytes[3] & 0xFF),(bytes[4] & 0xFF),(bytes[5] & 0xFF),(bytes[6] & 0xFF),(bytes[7] & 0xFF)};
+    }
+
+
+
+    public static final MethodHandle MHm256_exp_double = MachineCodeSnippet.make(
+           "MHm256_exp_double",MT_L4_UNARY,new MachineCodeSnippet.Effect[0],
+            requires(AVX),new Register[][]{{xmm0},{xmm1}},new Register[]{xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,rbx},new int[]{8,8,8,8,8,8,8,2},
+            (Register[] regs) -> {
+                Register out = regs[0];
+                Register in  = regs[1]; //Low position double value
+
+                Register accum = xmm0;
+                Register dinp = xmm1;
+                Register fact = xmm2;
+                Register inx = xmm3;
+                Register ONES = xmm4;
+                Register scratch = xmm5;
+                Register inpscratch = xmm6;
+                VEXLength l = L_256;
+                final int UNROLLINGS = 14;
+
+                //init
+                //Long4 accum = PatchableVecUtils.vaddpd(arg,PatchableVecUtils.broadcastDoubleL4(1.0d));
+                //Long4 dinp = PatchableVecUtils.vmulpd(arg,arg);
+                //Long4 fact = PatchableVecUtils.broadcastDoubleL4(2.0d);
+                //Long4 inx = PatchableVecUtils.broadcastDoubleL4(3.0d);
+
+                int[] init = join(
+                          xmmMov(inpscratch,in,l) //insure the input value
+                        , loadImm64(rbx,Double.doubleToLongBits(1.0d))
+                        , movQ(accum,rbx)
+                        , broadcastD(accum,accum,l) //accum is all ones
+                        , xmmMov(ONES,accum,l)
+                        , vaddpd(fact,accum,accum,l) //fact is all twos
+                        , vaddpd(inx,accum,fact,l) //inx is all threes
+                        , vaddpd(accum,inpscratch,accum,l)
+                        , vmulpd(dinp,inpscratch,inpscratch,l)
+
+                );
+
+
+                //Initialize
+                //xmm0 = accum
+                //xmm1 = dinp
+                //xmm2 = fact
+                //xmm3 = inx
+                //xmm4 = <ONES>
+                //End init
+
+                /*
+
+        accum = PatchableVecUtils.vaddpd(accum,PatchableVecUtils.vdivpd(dinp,fact));
+        dinp = PatchableVecUtils.vmulpd(dinp,arg);
+        fact = PatchableVecUtils.vmulpd(fact,inx);
+        inx = PatchableVecUtils.vaddpd(inx,ONE);
+                 */
+
+                int[] loopbody = join(
+                          vdivpd(scratch,dinp,fact,l)
+                        , vaddpd(accum,accum,scratch,l)
+                        , vmulpd(dinp,dinp,inpscratch,l)
+                        , vmulpd(fact,fact,inx,l)
+                        , vaddpd(inx,inx,ONES,l)
+                );
+
+
+                int[] finalizer = xmmMov(out,accum,l);
+
+                int[] expandedBody = new int[UNROLLINGS*loopbody.length];
+                for(int i = 0; i < UNROLLINGS; i++){
+                    System.arraycopy(loopbody,0,expandedBody,i*loopbody.length,loopbody.length);
+                }
+
+                int[] res = join(init,expandedBody,finalizer);
+
+                return res;
+
+            }
+
+    );
+
+    @ForceInline public static Long4 expDouble(Long4 arg){
+        try {
+          Long4 res = (Long4) MHm256_exp_double.invokeExact(arg);
+          return res;
+        } catch (Throwable e){
+          throw new Error(e);
+        }
+    }
+
+
+
+
+    /* ========================================================================================*/
+    // broadcastDwords xmm1, r32
+    public static Long2 broadcastDwords(int a) {
+        try {
+            Long2 r1 = (Long2) MHm32_vmovd_gpr2xmm.invokeExact(a);
+            Long2 res = (Long2) MHm256_vpbroadcastd.invokeExact(r1);
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    // VMOVSS xmm1, xmm2, xmm3/m32
+    private static int[] vmovss(Register out, Register in1, Register in2, VEXLength l){
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_F3);
+        return vex_emit(vex, 0x10, modRM(out, in2));
+    }
+    public static final MethodHandle MHm32_vmovss = MachineCodeSnippet.make(
+            "mm32_vmovss", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.LIG.F3.0F.WIG 10 /r
+                vmovss(regs[0],regs[1],regs[2],L_128)
+            );
+
+    static Long2 vmovss_naive(Long2 a, Long2 b) {
+        float[] res = new float[4];
+        for (int i = 0; i < 4; i++) {
+            float a_i = getFloat(a, i);
+            if (i == 0) {
+                res[i] = getFloat(b, i);
+            } else {
+                res[i] = a_i;
+            }
+        }
+        return Long2.make(
+                pack(res[0], res[1]),
+                pack(res[2], res[3])
+        );
+    }
+
+    public static Long2 vmovss(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm32_vmovss.invokeExact(a, b);
+            assert assertEquals(res, vmovss_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vaddss(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_F3);
+        return vex_emit(vex, 0x58, modRM(out, in2));
+    }
+    // VADDSS xmm1, xmm2, xmm3/m32
+    public static final MethodHandle MHm32_vaddss = MachineCodeSnippet.make(
+            "mm32_vaddss", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.LIG.F3.0F.WIG 58 /r
+                vaddss(regs[0],regs[1],regs[2],L_128)
+            );
+
+    static Long2 vaddss_naive(Long2 a, Long2 b) {
+        float[] res = new float[4];
+        for (int i = 0; i < 4; i++) {
+            float a_i = getFloat(a, i);
+            if (i == 0) {
+                float b_i = getFloat(b, i);
+                res[i] = a_i + b_i;
+            } else {
+                res[i] = a_i;
+            }
+        }
+        return Long2.make(
+                pack(res[0], res[1]),
+                pack(res[2], res[3])
+        );
+    }
+
+    public static Long2 vaddss(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm32_vaddss.invokeExact(a, b);
+            assert assertEquals(res, vaddss_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    // VMULSS xmm1, xmm2, xmm3/m32
+    private static final int[] vmulss(Register out, Register in1, Register in2, VEXLength l){
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_F3);
+        return vex_emit(vex, 0x59, modRM(out, in2));
+    }
+    public static final MethodHandle MHm32_vmulss = MachineCodeSnippet.make(
+            "mm32_vmulss", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.LIG.F3.0F.WIG 59 /r
+                vmulss(regs[0],regs[1],regs[2],L_128)
+            );
+
+    static Long2 vmulss_naive(Long2 a, Long2 b) {
+        float[] res = new float[4];
+        for (int i = 0; i < 4; i++) {
+            float a_i = getFloat(a, i);
+            if (i == 0) {
+                float b_i = getFloat(b, i);
+                res[i] = a_i * b_i;
+            } else {
+                res[i] = a_i;
+            }
+        }
+        return Long2.make(
+                pack(res[0], res[1]),
+                pack(res[2], res[3])
+        );
+    }
+
+    public static Long2 vmulss(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm32_vmulss.invokeExact(a, b);
+            assert assertEquals(res, vmulss_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vpaddd(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_66);
+        return vex_emit(vex, 0xFE, modRM(out, in2));
+    }
+    // VPADDD xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vpaddd = MachineCodeSnippet.make(
+            "m128_vpaddd", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE /*out*/, xmmRegistersSSE /*in1*/, xmmRegistersSSE /*in2*/},
+            (Register[] regs) ->
+                // VEX.NDS.128.66.0F.WIG FE /r
+                vpaddd(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vpaddd_naive(Long2 a, Long2 b) {
+        long l1 = pack(
+                getInt(a, 0) + getInt(b, 0),
+                getInt(a, 1) + getInt(b, 1));
+        long l2 = pack(
+                getInt(a, 2) + getInt(b, 2),
+                getInt(a, 3) + getInt(b, 3));
+        return Long2.make(l1, l2);
+    }
+
+    public static Long2 vpaddd(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vpaddd.invokeExact(a, b);
+            assert assertEquals(res, vpaddd_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    // VPADDD ymm1, ymm2, ymm3/m256
+    public static final MethodHandle MHm256_vpaddd = MachineCodeSnippet.make(
+            "m256_vpaddd", MT_L4_BINARY, requires(AVX2),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                // VEX.NDS.256.66.0F.WIG FE /r
+                vpaddd(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vpaddd_naive(Long4 left, Long4 right) {
+        int[] res = new int[8];
+        for (int i = 0; i < 8; i++) {
+            res[i] = getInt(left, i) + getInt(right, i);
+        }
+        return Long4.make(pack(res[0], res[1]),
+                pack(res[2], res[3]),
+                pack(res[4], res[5]),
+                pack(res[6], res[7]));
+    }
+
+    public static Long4 vpaddd(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vpaddd.invokeExact(a, b);
+            assert assertEquals(res, vpaddd_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vblendps(Register out, Register in1, Register in2, VEXLength l, int imm) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_NONE);
+        return vex_emit(vex, 0x0C, modRM(out, in2), imm);
+    }
+    // VBLENDPS ymm1, ymm2, ymm3/m256, imm8
+    private static MethodHandle MHm256_vblendps_gen(int imm) {
+        return MachineCodeSnippet.make(
+                "mm256_vblendps", MT_L4_BINARY, requires(AVX),
+                new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+                (Register[] regs) ->
+                    //VEX.NDS.256.0F.WIG 0C /r
+                    vblendps(regs[0],regs[1],regs[2],L_256,imm)
+                );
+    }
+
+    private static Long4 vblendps_naive(Long4 a, Long4 b, int imm) {
+        float[] res = new float[8];
+        for (int i = 0; i < 8; i++) {
+            if (((1 << i) & imm) != 0) {
+                res[i] = getFloat(b, i);
+            } else {
+                res[i] = getFloat(a, i);
+            }
+        }
+        return Long4.make(pack(res[0], res[1]),
+                pack(res[2], res[3]),
+                pack(res[4], res[5]),
+                pack(res[6], res[7]));
+    }
+
+    @Stable public static final MethodHandle[] MHm256_vblendps_handles = IntStream
+            .range(0, 8)
+            .mapToObj(PatchableVecUtils::MHm256_vblendps_gen)
+            .toArray((int dontcare) -> new MethodHandle[8]);
+
+    public static Long4 vblendps(Long4 a, Long4 b, int imm) {
+        try {
+            Long4 res = (Long4) MHm256_vblendps_handles[imm].invokeExact(a, b);
+            assert assertEquals(res, vblendps_naive(a, b, imm));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    // VBLENDPS xmm1, xmm2, xmm3/m128, imm8
+    private static MethodHandle MHm128_vblendps_gen(int imm) {
+        return MachineCodeSnippet.make(
+                "mm256_vblendps", MT_L2_BINARY, requires(AVX),
+                new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+                (Register[] regs) ->
+                    //VEX.NDS.128.0F.WIG 0C /r
+                    vblendps(regs[0],regs[1],regs[2],L_256,imm)
+                );
+    }
+
+    private static Long2 vblendps_naive(Long2 a, Long2 b, int imm) {
+        float[] res = new float[4];
+        for (int i = 0; i < 4; i++) {
+            if (((1 << i) & imm) != 0) {
+                res[i] = getFloat(b, i);
+            } else {
+                res[i] = getFloat(a, i);
+            }
+        }
+        return Long2.make(pack(res[0], res[1]),
+                pack(res[2], res[3]));
+    }
+
+    @Stable public static final MethodHandle[] MHm128_vblendps_handles = IntStream
+            .range(0, 4)
+            .mapToObj(PatchableVecUtils::MHm128_vblendps_gen)
+            .toArray((int dontcare) -> new MethodHandle[4]);
+
+    public static Long2 vblendps(Long2 a, Long2 b, int imm) {
+        try {
+            Long2 res = (Long2) MHm128_vblendps_handles[imm].invokeExact(a, b);
+            assert assertEquals(res, vblendps_naive(a, b, imm));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    private static int[] vblendvpd(Register out, Register in1, Register in2, Register maskr, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out),X_LOW,bBit(in2),M_0F3A,W_LOW,in1,l,PP_66);
+        int maskenc = (maskr.encoding() << 4) & 0xF0;
+        return vex_emit(vex,0x4B,modRM(out,in2),maskenc);
+
+    }
+    // VBLENDVPD ymm1, ymm2, ymm3, ymm4
+    public static final MethodHandle MHm256_vblendvpd = MachineCodeSnippet.make(
+            "mm256_vblendvpd", MT_L4_L4_L4_L4, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.66.0F3A.W0 4B /r /is4
+                vblendvpd(regs[0],regs[1],regs[2],regs[3],L_256)
+            );
+
+    public static Long4 vblendvpd(Long4 a, Long4 b, Long4 c){
+        try {
+           Long4 res = (Long4) MHm256_vblendvpd.invokeExact(a,b,c);
+           return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    private static int[] vblendvps(Register out, Register in1, Register in2, Register maskr, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out),X_LOW,bBit(in2),M_0F3A,W_LOW,in1,l,PP_66);
+        int maskenc = (maskr.encoding() << 4) & 0xF0;
+        return vex_emit(vex,0x4A,modRM(out,in2),maskenc);
+    }
+    // VBLENDVPS ymm1, ymm2, ymm3, ymm4
+    public static final MethodHandle MHm256_vblendvps = MachineCodeSnippet.make(
+            "mm256_vblendvps", MT_L4_L4_L4_L4, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.66.0F3A.W0 4A /r /is4
+                vblendvps(regs[0],regs[1],regs[2],regs[3],L_256)
+            );
+
+    /* ========================================================================================*/
+    private static int[] vmulps(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_NONE);
+        return vex_emit(vex, 0x59, modRM(out, in2));
+    }
+
+    // VMULPS ymm1, ymm2, ymm3/m256
+    public static final MethodHandle MHm256_vmulps = MachineCodeSnippet.make(
+            "mm256_vmulps", MT_L4_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.0F.WIG 59 /r
+                vmulps(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vmulps_naive(Long4 a, Long4 b) {
+        float[] res = new float[8];
+        for (int i = 0; i < 8; i++) {
+            res[i] = getFloat(a, i) * getFloat(b, i);
+        }
+        return Long4.make(pack(res[0], res[1]),
+                pack(res[2], res[3]),
+                pack(res[4], res[5]),
+                pack(res[6], res[7]));
+    }
+
+    @ForceInline public static Long4 vmulps(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vmulps.invokeExact(a, b);
+            assert assertEquals(res, vmulps_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    private static int[] vmulpsSunk(Register out, Register vec, Register base, Register index, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), xBit(index), bBit(base), M_0F, W_LOW, vec, l, PP_NONE);
+        return vex_emit(vex, 0x59, modRM_SIB_DISP8(out), sibByte(index, base, sibScale(Unsafe.ARRAY_FLOAT_INDEX_SCALE)), Unsafe.ARRAY_FLOAT_BASE_OFFSET);
+    }
+
+    // VMULPS ymm1, ymm2, m256
+    public static final MethodHandle MHm256_vmulps_sunk = MachineCodeSnippet.make(
+            "mm256_vmulps_sunk", MT_L4_L4_FLOATARY_INT, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, cpuRegisters, cpuRegisters},
+            (Register[] regs) ->
+                //VEX.256.0F.WIG 59 /r
+                vmulpsSunk(regs[0],regs[1],regs[2],regs[3],L_256)
+            );
+
+    public static Long4 vmulps(Long4 a, float[] ary, int i) {
+        try {
+            Long4 res = (Long4) MHm256_vmulps_sunk.invokeExact(a, ary, i);
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    public static int[] vaddps(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_NONE);
+        return vex_emit(vex, 0x58, modRM(out, in2));
+    }
+    // VADDPS ymm1, ymm2, ymm3
+    public static final MethodHandle MHm256_vaddps = MachineCodeSnippet.make(
+            "mm256_vaddps", MT_L4_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.256.0F.WIG 58 /r
+                vaddps(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vaddps_naive(Long4 a, Long4 b) {
+        float[] res = new float[8];
+        for (int i = 0; i < 8; i++) {
+            res[i] = getFloat(a, i) + getFloat(b, i);
+        }
+        return long4FromFloatArray(res, 0);
+    }
+
+
+    @ForceInline public static Long4 vaddps(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vaddps.invokeExact(a, b);
+            //assert assertEquals(res, vaddps_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    private static int[] vaddpsSunk(Register out, Register vec, Register base, Register index, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), xBit(index), bBit(base), M_0F, W_LOW, vec, l, PP_NONE);
+        return vex_emit(vex, 0x58, modRM_SIB_DISP8(out), sibByte(index, base, Unsafe.ARRAY_FLOAT_INDEX_SCALE), Unsafe.ARRAY_FLOAT_BASE_OFFSET);
+
+    }
+    // VADDPS ymm1, ymm2, m256
+    public static final MethodHandle MHm256_vaddps_sunk = MachineCodeSnippet.make(
+            "mm256_vaddps_sunk", MT_L4_L4_FLOATARY_INT, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, cpuRegisters, cpuRegisters},
+            (Register[] regs) ->
+                //VEX.256.0F.WIG 58 /r
+                vaddpsSunk(regs[0],regs[1],regs[2],regs[3],L_256)
+            );
+
+    public static Long4 vaddps(Long4 a, float[] ary, int i) {
+        try {
+            Long4 res = (Long4) MHm256_vaddps_sunk.invokeExact(a, ary, i);
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vaddpd(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_66);
+        return vex_emit(vex, 0x58, modRM(out, in2));
+    }
+
+    //VADDPD xmm1, xmm2, xmm3/128
+    public static final MethodHandle MHm128_vaddpd = MachineCodeSnippet.make(
+            "mm128_vaddpd", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.66.0F.WIG 58 /r
+                vaddpd(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vaddpd_naive(Long2 a, Long2 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long2 vaddpd(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vaddpd.invokeExact(a, b);
+            //assert assertEquals(res, VADDPD_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    //VADDPD ymm1, ymm2, ymm3/256
+    public static final MethodHandle MHm256_vaddpd = MachineCodeSnippet.make(
+            "mm256_vaddpd", MT_L4_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.66.0F.WIG 58 /r
+                vaddpd(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vaddpd_naive(Long4 a, Long4 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    @ForceInline public static Long4 vaddpd(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vaddpd.invokeExact(a, b);
+            //assert assertEquals(res, VADDPD_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vmulpd(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_66);
+        return vex_emit(vex, 0x59, modRM(out, in2));
+    }
+    //VMULPD xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vmulpd = MachineCodeSnippet.make(
+            "mm128_vmulpd", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.66.0F.WIG 59 /r
+                vmulpd(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vmulpd_naive(Long2 a, Long2 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long2 vmulpd(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vmulpd.invokeExact(a, b);
+            //assert assertEquals(res, VMULPD_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    //VMULPD ymm1, ymm2, ymm3
+    public static final MethodHandle MHm256_vmulpd = MachineCodeSnippet.make(
+            "mm256_vmulpd", MT_L4_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.66.0F.WIG 59 /r
+                vmulpd(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vmulpd_naive(Long4 a, Long4 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long4 vmulpd(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vmulpd.invokeExact(a, b);
+            //assert assertEquals(res, VMULPD_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    private static int[] vmulpdSunkOffheap(Register out, Register vec2, Register base, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(base), M_0F, W_LOW, vec2, l, PP_66);
+        return vex_emit(vex, 0x59, (3 & base.encoding()) | ((3 & out.encoding()) << 3));
+    }
+    //VMULPD ymm1, ymm2, m256
+    public static final MethodHandle MHm256_vmulpd_sunk_offheap = MachineCodeSnippet.make(
+            "mm256_vmulpd_sunk_offheap", MT_L4_L4_LONG, requires(AVX),
+            //new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, cpuRegisters},
+            (Register[] regs) ->
+                //VEX.NDS.256.66.0F.WIG 59 /r
+                vmulpdSunkOffheap(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static int[] vmulpdSunkOffheapScaledOffset(Register out, Register vec2, Register base, Register index, int scale, int offset, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), xBit(index), bBit(base), M_0F, W_LOW, vec2, l, PP_66);
+        if(offset == 0) {
+            return vex_emit(vex, 0x59, modRM_SIB_NODISP(out), sibByte(index,base,scale));
+        } else {
+            return vex_emit(vex, 0x59, modRM_SIB_DISP32(out), sibByte(index, base, sibScale(scale)), offset);
+        }
+
+    }
+
+    //VMULPD ymm1, ymm2, m256
+    public static final MethodHandle MHm256_vmulpd_sunk_offheap_scale1 = MachineCodeSnippet.make(
+            "mm256_vmulpd_sunk_offheap", MT_L4_L4_LONG_INT, requires(AVX),
+            //new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, cpuRegisters, cpuRegisters},
+            (Register[] regs) ->
+                //VEX.NDS.256.66.0F.WIG 59 /r
+                vmulpdSunkOffheapScaledOffset(regs[0],regs[1],regs[2],regs[3],0,0,L_256)
+            );
+
+
+    //VMULPD ymm1, ymm2, m256
+    public static final MethodHandle MHm256_vmulpd_sunk = MachineCodeSnippet.make(
+            "mm256_vmulpd_sunk", MT_L4_L4_DOUBLEARY_INT, requires(AVX),
+            //new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, cpuRegisters, cpuRegisters},
+            (Register[] regs) ->
+                //VEX.NDS.256.66.0F.WIG 59 /r
+                vmulpdSunkOffheapScaledOffset(regs[0],regs[1],regs[2],regs[3],Unsafe.ARRAY_DOUBLE_INDEX_SCALE,Unsafe.ARRAY_DOUBLE_BASE_OFFSET,L_256)
+            );
+
+    private static Long4 vmulpd_naive(Long4 a, double[] ary, int index) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    @ForceInline public static Long4 vmulpd(Long4 a, double[] ary, int index) {
+        try {
+            Long4 res = (Long4) MHm256_vmulpd_sunk.invokeExact(a, ary, index);
+            //assert assertEquals(res, VMULPD_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+    public static Long4 vmulpd(Long4 a, long base, int index) {
+        try {
+            Long4 res = (Long4) MHm256_vmulpd_sunk_offheap_scale1.invokeExact(a, base,index);
+            //assert assertEquals(res, VMULPD_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static Long4 vmulpd(Long4 a, long base) {
+        try {
+            Long4 res = (Long4) MHm256_vmulpd_sunk_offheap.invokeExact(a, base);
+            //assert assertEquals(res, VMULPD_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vbroadcastd(Register out, Register in, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in), M_0F38, W_LOW, in, l, PP_66);
+        return vex_emit(vex, 0x58, modRM_regInd(out, in));
+    }
+    // VPBROADCASTD ymm1, m64
+    public static final MethodHandle MHm256_vpbroadcastd = MachineCodeSnippet.make(
+            "mm256_vpbroadcastd", MT_L4_LONG, requires(AVX),
+            new Register[][]{xmmRegistersSSE, cpuRegisters},
+            (Register[] regs) ->
+                //VEX.NDS.256.66.0F38.W0 58 /r
+                vbroadcastd(regs[0],regs[1],L_256)
+            );
+
+    public static Long4 vpbroadcastd(Long4 a) {
+        try {
+            Long4 res = (Long4) MHm256_vpbroadcastd.invokeExact(a);
+            //assert assertEquals(res, vbroadcastd_naive(a));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vpshufd(Register out, Register in, VEXLength l, int imm) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in), M_0F, W_LOW, in, l, PP_66);
+        return vex_emit(vex, 0x70, modRM(out, in), imm);
+    }
+    // VPSHUFD xmm1, xmm2/m128, imm8
+    private static MethodHandle MHm128_vpshufd(Integer imm) {
+        return MachineCodeSnippet.make(
+                "mm128_vpshufd", MT_L2_UNARY, requires(AVX),
+                new Register[][]{xmmRegistersSSE, xmmRegistersSSE},
+                (Register[] regs) ->
+                    //VEX.128.66.0F.WIG 70 /r ib
+                    vpshufd(regs[0],regs[1],L_128,imm)
+                );
+    }
+
+    static public Long2 vpshufd_naive(Long2 a, int imm) {
+        int[] res = new int[4];
+        for (int i = 0; i < 4; i++) {
+            int ordoff = i * 2;
+            int swap_index = (imm >> ordoff) & 0x3; //(imm & (3 << ordoff)) >> ordoff);
+            res[i] = getInt(a, swap_index);
+        }
+        Long2 ress = Long2.make(pack(res[0], res[1]), pack(res[2], res[3]));
+        return ress;
+    }
+
+    @Stable public static final MethodHandle[] MHm128_vpshufd_handles = IntStream
+            .range(0, 4)
+            .mapToObj(PatchableVecUtils::MHm128_vpshufd)
+            .toArray((int dontcare) -> new MethodHandle[4]);
+
+    public static Long2 vpshufd(Long2 a, int imm) {
+        try {
+            Long2 res = (Long2) MHm128_vpshufd_handles[imm].invokeExact(a);
+            assert assertEquals(res, vpshufd_naive(a, imm));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vpblendd(Register out, Register in1, Register in2, VEXLength l, int imm) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F3A, W_LOW, in1, l, PP_66);
+        return vex_emit(vex, 0x02, modRM(out, in2), imm);
+    }
+    // VPBLENDD xmm1, xmm2, xmm3/m128, imm8
+    private static MethodHandle MHm128_vpblendd(Integer imm) {
+        return MachineCodeSnippet.make(
+                "mm128_vpblendd", MT_L2_BINARY, requires(AVX2),
+                new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+                (Register[] regs) ->
+                    //VEX.NDS.128.66.0F3A.W0 02 /r ib
+                    vpblendd(regs[0],regs[1],regs[2],L_128,imm)
+                );
+    }
+
+    private static Long2 vpblendd_naive(Long2 a, Long2 b, int imm) {
+        int[] res = new int[4];
+        for (int i = 0; i < res.length; i++) {
+            if (((1 << i) & imm) != 0) {
+                res[i] = getInt(b, i);
+            } else {
+                res[i] = getInt(a, i);
+            }
+        }
+        return Long2.make(pack(res[0], res[1]), pack(res[2], res[3]));
+    }
+
+    @Stable public static final MethodHandle[] MHm128_vpblendd_handles = IntStream
+            .range(0, 4)
+            .mapToObj(PatchableVecUtils::MHm128_vpblendd)
+            .toArray((int dontcare) -> new MethodHandle[4]);
+
+    public static Long2 vpblendd(Long2 a, Long2 b, int imm) {
+        try {
+            Long2 res = (Long2) MHm128_vpblendd_handles[imm].invokeExact(a, b);
+            assert assertEquals(res, vpblendd_naive(a, b, imm));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vinsertps(Register out, Register in1, Register in2, VEXLength l, int imm) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F3A, W_LOW, in1, l, PP_66);
+        return vex_emit(vex, 0x21, modRM(out, in2), imm);
+    }
+    // VINSERTPS xmm1, xmm2, xmm3/m32, imm8
+    private static MethodHandle MHm128_vinsertps(Integer imm) {
+        return MachineCodeSnippet.make(
+                "mm128_vinsertps", MT_L2_BINARY, requires(AVX),
+                new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+                (Register[] regs) ->
+                    //VEX.NDS.128.66.0F3A.WIG 21 /r ib
+                    vinsertps(regs[0],regs[1],regs[2],L_128,imm)
+                );
+    }
+
+    private static Long2 vinsertps_naive(Long2 a, Long2 b, int imm) {
+        int src = (imm & 0b11000000) >>> 6,
+                dst = (imm & 0b00110000) >>> 4;
+
+        float[] src_a = new float[4],
+                src_b = new float[4];
+        for (int i = 0; i < 4; i++) {
+            src_a[i] = getFloat(a, i);
+            src_b[i] = getFloat(b, i);
+        }
+
+        src_a[dst] = src_b[src];
+
+        for (int i = 0; i < 4; i++) {
+            if (((1 << i) & imm) != 0) {
+                src_a[i] = 0f;
+            }
+        }
+        return Long2.make(pack(src_a[0], src_a[1]), pack(src_a[2], src_a[3]));
+    }
+
+    @Stable public static final MethodHandle[] MHm128_vinsertps_handles = IntStream
+            .range(0, 4)
+            .mapToObj(PatchableVecUtils::MHm128_vinsertps)
+            .toArray((int dontcare) -> new MethodHandle[4]);
+
+    public static Long2 vinsertps(Long2 a, Long2 b, int imm) {
+        try {
+            Long2 res = (Long2) MHm128_vinsertps_handles[imm].invokeExact(a, b);
+            assert assertEquals(res, vinsertps_naive(a, b, imm));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+
+    private static int[] vpinsrb(Register out, Register in1, Register in2, int imm){
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F3A, W_LOW, in1, L_128, PP_66);
+        return vex_emit(vex, 0x20, modRM(out, in2), imm);
+    }
+
+    // VPINSRB xmm1, xmm2, r/m8, imm8
+    private static MethodHandle MHm128_vpinsrb(Integer imm) {
+        return MachineCodeSnippet.make(
+                "mm128_vpinsrb", MT_L2_L2_BYTE, requires(AVX),
+                new Register[][]{xmmRegistersSSE, xmmRegistersSSE, cpuRegisters}, //{rdi, rsi, rdx, rcx, r8, r9}},
+                (Register[] regs) ->
+                    //VEX.NDS.128.66.0F3A.W0 20 /r ib
+                    vpinsrb(regs[0],regs[1],regs[2],imm)
+                );
+    }
+
+    private static Long2 vpinsrb_naive(Long2 a, byte val, int imm) {
+        int[] vals = new int[4];
+        for (int i = 0; i < 4; i++) {
+            vals[i] = getInt(a, i);
+        }
+        vals[imm & 0b11] = val;
+        return Long2.make(pack(vals[0], vals[1]), pack(vals[2], vals[3]));
+    }
+
+    @Stable public static final MethodHandle[] MHm128_vpinsrb_handles = IntStream
+            .range(0, 4)
+            .mapToObj(PatchableVecUtils::MHm128_vpinsrb)
+            .toArray((int dontcare) -> new MethodHandle[4]);
+
+    public static Long2 vpinsrb(Long2 a, byte val, int imm) {
+        try {
+            Long2 res = (Long2) MHm128_vpinsrb_handles[imm & 0b1111].invokeExact(a, val);
+            //assertEquals(res, vpinsrb_naive(a, val, imm));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vpinsrw(Register out, Register in1, Register in2, int imm){
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, L_128, PP_66);
+        return vex_emit(vex, 0xC4, modRM(out, in2), imm);
+    }
+    // VPINSRW xmm1, xmm2, r32/m16, imm8
+    private static MethodHandle MHm128_vpinsrw(Integer imm) {
+        return MachineCodeSnippet.make(
+                "mm128_vpinsrw", MT_L2_L2_SHORT, requires(AVX),
+                new Register[][]{xmmRegistersSSE, xmmRegistersSSE, cpuRegisters}, //{rdi, rsi, rdx, rcx, r8, r9}},
+                (Register[] regs) ->
+                    //VEX.NDS.128.66.0F.W0 C4 /r ib
+                    vpinsrw(regs[0],regs[1],regs[2],imm)
+                );
+    }
+
+    private static Long2 vpinsrw_naive(Long2 a, short val, int imm) {
+        /*int[] vals = new int[4];
+        for (int i = 0; i < 4; i++) {
+            vals[i] = getInt(a, i);
+        }
+        vals[imm & 0b11] = val;
+        return Long2.make(pack(vals[0], vals[1]), pack(vals[2], vals[3]));*/
+        return null;
+    }
+
+    @Stable public static final MethodHandle[] MHm128_vpinsrw_handles = IntStream
+            .range(0, 8)
+            .mapToObj(PatchableVecUtils::MHm128_vpinsrw)
+            .toArray((int dontcare) -> new MethodHandle[8]);
+
+    public static Long2 vpinsrw(Long2 a, short val, int imm) {
+        try {
+            Long2 res = (Long2) MHm128_vpinsrw_handles[imm & 0b111].invokeExact(a, val);
+            //assertEquals(res, vpinsrb_naive(a, val, imm));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vpinsrd(Register out, Register in1, Register in2, int imm){
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F3A, W_LOW, in1, L_128, PP_66);
+        return vex_emit(vex, 0x22, modRM(out, in2), imm);
+    }
+
+    // VPINSRD xmm1, xmm2, r/m32, imm8
+    private static MethodHandle MHm128_vpinsrd(Integer imm) {
+        return MachineCodeSnippet.make(
+                "mm128_vpinsrd", MT_L2_L2_INT, requires(AVX),
+                new Register[][]{xmmRegistersSSE, xmmRegistersSSE, cpuRegisters}, //{rdi, rsi, rdx, rcx, r8, r9}},
+                (Register[] regs) ->
+                    //VEX.NDS.128.66.0F3A.W0 22 /r ib
+                    vpinsrd(regs[0],regs[1],regs[2],imm)
+                );
+    }
+
+    private static Long2 vpinsrd_naive(Long2 a, int val, int imm) {
+        int[] vals = new int[4];
+        for (int i = 0; i < 4; i++) {
+            vals[i] = getInt(a, i);
+        }
+        vals[imm & 0b11] = val;
+        return Long2.make(pack(vals[0], vals[1]), pack(vals[2], vals[3]));
+    }
+
+    @Stable public static final MethodHandle[] MHm128_vpinsrd_handles = IntStream
+            .range(0, 4)
+            .mapToObj(PatchableVecUtils::MHm128_vpinsrd)
+            .toArray((int dontcare) -> new MethodHandle[4]);
+
+    public static Long2 vpinsrd(Long2 a, int val, int imm) {
+        try {
+            Long2 res = (Long2) MHm128_vpinsrd_handles[imm & 0b11].invokeExact(a, val);
+            assertEquals(res, vpinsrd_naive(a, val, imm));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vpinsrq(Register out, Register in1, Register in2, int imm){
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F3A, W_HIGH, in1, L_128, PP_66);
+        return vex_emit(vex, 0x22, modRM(out, in2), imm);
+    }
+    // VPINSRQ xmm1, xmm2, r/m64, imm8
+    private static MethodHandle MHm128_vpinsrq(Integer imm) {
+        return MachineCodeSnippet.make(
+                "mm128_vpinsrq", MT_L2_LONG_BINARY, requires(AVX),
+                new Register[][]{xmmRegistersSSE, xmmRegistersSSE, cpuRegisters}, //{rdi, rsi, rdx, rcx, r8, r9}},
+                (Register[] regs) ->
+                    //VEX.NDS.128.66.0F3A.W1 22 /r ib
+                    vpinsrq(regs[0],regs[1],regs[2],imm)
+                );
+    }
+
+    private static Long2 vpinsrq_naive(Long2 a, long val, int imm) {
+        long[] vals = new long[2];
+        for (int i = 0; i < 2; i++) {
+            vals[i] = getLong(a, i);
+        }
+        vals[imm & 0b1] = val;
+        return Long2.make(vals[0], vals[1]);
+    }
+
+    @Stable public static final MethodHandle[] MHm128_vpinsrq_handles = IntStream
+            .range(0, 2)
+            .mapToObj(PatchableVecUtils::MHm128_vpinsrq)
+            .toArray((int dontcare) -> new MethodHandle[2]);
+
+    public static Long2 vpinsrq(Long2 a, long val, int imm) {
+        try {
+            Long2 res = (Long2) MHm128_vpinsrq_handles[imm & 0b1].invokeExact(a, val);
+            assertEquals(res, vpinsrq_naive(a, val, imm));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    // MOV r32,r32
+//    private static MethodHandle MHm32_mov = MachineCodeSnippet.make(
+//            "mm32_mov", INT_BINARY, true,
+//            new Register[][]{cpuRegisters,cpuRegisters},
+//            (Register[] regs) -> {
+//                Register out = regs[0];
+//                Register in = regs[1];
+//                return new int[] {
+//                        0x8B, //mov out, in
+//                        modRM(out,in)
+//                };
+//            });
+//
+//    private static int mov_naive(int a) {
+//        return a;
+//    }
+//
+//    public static int mov(int a){
+//        try {
+//            int res = (int) MHm32_mov.invokeExact(a);
+//            assertEquals(res,mov_naive(a));
+//            return res;
+//        } catch (Throwable e){
+//            throw new Error(e);
+//        }
+//    }
+
+    /* ========================================================================================*/
+    private static int[] vinsertf128(Register out, Register in1, Register in2, VEXLength l, int imm) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F3A, W_LOW, in1, l, PP_66);
+        return vex_emit(vex, 0x18, modRM(out, in2), imm);
+    }
+    // VINSERTF128 ymm1, ymm2, xmm3/m128, imm8
+    private static MethodHandle MHm256_vinsertf128(int imm) {
+        return MachineCodeSnippet.make(
+                "mm256_vinsertf128" + imm, MT_L4_L2, requires(AVX),
+                new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+                (Register[] regs) ->
+                    //VEX.NDS.256.66.0F3A.W0 18 /r ib
+                    vinsertf128(regs[0],regs[1],regs[2],L_256,imm)
+                );
+    }
+
+    @Stable public static final MethodHandle[] MHm256_vinsertf128_handles = IntStream
+            .range(0, 2)
+            .mapToObj(PatchableVecUtils::MHm256_vinsertf128)
+            .toArray((int dontcare) -> new MethodHandle[2]);
+
+    private static Long4 vinsertf128_naive(Long4 dst, Long2 src, int imm) {
+        switch (imm & 0x1) {
+            case 0:
+                return Long4.make(src.extract(0), src.extract(1), dst.extract(2), dst.extract(3));
+            case 1:
+                return Long4.make(dst.extract(0), dst.extract(1), src.extract(0), src.extract(1));
+            default:
+                return dst;
+        }
+    }
+
+    public static Long4 vinsertf128(Long4 dst, Long2 src, int imm) {
+        try {
+            Long4 res = (Long4) MHm256_vinsertf128_handles[imm].invokeExact(dst, src);
+            assertEquals(res, vinsertf128_naive(dst, src, imm));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    //TODO: START HERE
+    /* ========================================================================================*/
+    private static int[] vpextrb(Register out, Register in, VEXLength l, int imm) {
+        int[] vex = vex_prefix_nonds(rBit(in), X_LOW, bBit(out), M_0F3A, W_LOW, 0b1111, l, PP_66);
+        return vex_emit(vex, 0x14, modRM(in, out), imm);
+    }
+
+    // VPEXTRB r/m8, xmm2, imm8
+    private static MethodHandle MHm128_vpextrb(int imm) {
+        return MachineCodeSnippet.make(
+                "mm128_vpextrb", MT_BYTE_L2, requires(AVX),
+                new Register[][]{cpuRegisters, xmmRegistersSSE},
+                //VEX.128.66.0F3A.W0 14 /r ib
+                (Register[] regs) ->
+                    vpextrb(regs[0],regs[1],L_128,imm)
+                );
+    }
+
+    @Stable public static final MethodHandle[] MHm128_vpextrb_handles = IntStream
+            .range(0, 16)
+            .mapToObj(PatchableVecUtils::MHm128_vpextrb)
+            .toArray((int dontcare) -> new MethodHandle[16]);
+
+    /* public static int vextractps_naive(int i, Long2 val) {
+        return getInt(val, i & 0b11);
+    }*/
+
+    public static byte vpextrb(int i, Long2 val) {
+        try {
+            byte res = (byte) MHm128_vpextrb_handles[i & 0b1111].invokeExact(val);
+            //assertEquals(Integer.toHexString(Float.floatToIntBits(res)),Integer.toHexString(Float.floatToIntBits(vextractps_naive(i,val))));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vpextrw(Register out, Register in, VEXLength l, int imm) {
+        int[] vex = vex_prefix_nonds(rBit(in), X_LOW, bBit(out), M_0F3A, W_LOW, 0b1111, l, PP_66);
+        return vex_emit(vex, 0x15, modRM(in, out), imm);
+    }
+    // VPEXTRW r/m16, xmm2, imm8
+    private static MethodHandle MHm128_vpextrw(int imm) {
+        return MachineCodeSnippet.make(
+                "mm128_vpextrw", MT_SHORT_L2, requires(AVX),
+                new Register[][]{cpuRegisters, xmmRegistersSSE},
+                //VEX.128.66.0F.W0 C5 /r ib
+                (Register[] regs) ->
+                        vpextrw(regs[0],regs[1],L_128,imm)
+                );
+    }
+
+    @Stable public static final MethodHandle[] MHm128_vpextrw_handles = IntStream
+            .range(0, 8)
+            .mapToObj(PatchableVecUtils::MHm128_vpextrw)
+            .toArray((int dontcare) -> new MethodHandle[8]);
+
+    /* public static int vextractps_naive(int i, Long2 val) {
+        return getInt(val, i & 0b11);
+    }*/
+
+    public static short vpextrw(int i, Long2 val) {
+        try {
+            short res = (short) MHm128_vpextrw_handles[i & 0b111].invokeExact(val);
+            //assertEquals(Integer.toHexString(Float.floatToIntBits(res)),Integer.toHexString(Float.floatToIntBits(vextractps_naive(i,val))));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vextractps(Register out, Register in, VEXLength l, int imm) {
+        int[] vex = vex_prefix_nonds(rBit(in), X_LOW, bBit(out), M_0F3A, W_LOW, 0b1111, L_128, PP_66);
+        return vex_emit(vex, 0x17, modRM(in, out), imm);
+    }
+    // VEXTRACTPS r/m32, xmm1, imm8
+    private static MethodHandle MHm128_vextractps(int imm) {
+        return MachineCodeSnippet.make(
+                "mm128_vextractps", MT_INT_L2, requires(AVX),
+                new Register[][]{cpuRegisters, xmmRegistersSSE},
+                //VEX.128.66.0F3A.WIG 17 /r ib
+                (Register[] regs) ->
+                        vextractps(regs[0],regs[1],L_128,imm)
+                );
+    }
+
+    @Stable public static final MethodHandle[] MHm128_vextractps_handles = IntStream
+            .range(0, 4)
+            .mapToObj(PatchableVecUtils::MHm128_vextractps)
+            .toArray((int dontcare) -> new MethodHandle[4]);
+
+    public static int vextractps_naive(int i, Long2 val) {
+        return getInt(val, i & 0b11);
+    }
+
+    public static float vextractps(int i, Long2 val) {
+        try {
+            int res = (int) MHm128_vextractps_handles[i & 0b11].invokeExact(val);
+            //assertEquals(Integer.toHexString(Float.floatToIntBits(res)),Integer.toHexString(Float.floatToIntBits(vextractps_naive(i,val))));
+            return Float.intBitsToFloat(res);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    // VPEXTRD r/m32, xmm1, imm8
+    private static MethodHandle MHm128_vpextrd(int imm) {
+        return MachineCodeSnippet.make(
+                "mm128_vpextrd", MT_INT_L2, requires(AVX),
+                new Register[][]{cpuRegisters, xmmRegistersSSE},
+                //VEX.128.66.0F3A.W0 16 /r ib
+                (Register[] regs) -> {
+                    Register out = regs[0],
+                            in = regs[1];
+                    int[] vex = vex_prefix_nonds(R_LOW, X_LOW, B_LOW, M_0F3A, W_LOW, 0b1111, L_128, PP_66);
+                    return vex_emit(vex, 0x16, modRM(in, out), imm);
+                });
+    }
+
+    @Stable public static final MethodHandle[] MHm128_vpextrd_handles = IntStream
+            .range(0, 4)
+            .mapToObj(PatchableVecUtils::MHm128_vpextrd)
+            .toArray((int dontcare) -> new MethodHandle[4]);
+
+    public static int vpextrd_naive(int i, Long2 val) {
+        return getInt(val, i & 0b11);
+    }
+
+    public static int vpextrd(int i, Long2 val) {
+        try {
+            int res = (int) MHm128_vpextrd_handles[i & 0b11].invokeExact(val);
+            //assertEquals(Integer.toHexString(Float.floatToIntBits(res)),Integer.toHexString(Float.floatToIntBits(vextractps_naive(i,val))));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vpextrq(Register out, Register in, VEXLength l, int imm) {
+        int[] vex = vex_prefix_nonds(rBit(in), X_LOW, bBit(out), M_0F3A, W_HIGH, 0b1111, l, PP_66);
+        return vex_emit(vex, 0x16, modRM(in, out), imm);
+    }
+
+    // VPEXTRQ r/m64, xmm1, imm8
+    private static MethodHandle MHm128_vpextrq(int imm) {
+        return MachineCodeSnippet.make(
+                "mm128_vpextrq", MT_LONG_L2, requires(AVX),
+                new Register[][]{cpuRegisters, xmmRegistersSSE},
+                //VEX.128.66.0F3A.W1 16 /r ib
+                (Register[] regs) ->
+                        vpextrq(regs[0],regs[1],L_128,imm)
+                );
+    }
+
+    @Stable public static final MethodHandle[] MHm128_vpextrq_handles = IntStream
+            .range(0, 2)
+            .mapToObj(PatchableVecUtils::MHm128_vpextrq)
+            .toArray((int dontcare) -> new MethodHandle[2]);
+
+    public static long vpextrq_naive(int i, Long2 val) {
+        return getLong(val, i & 0b11);
+    }
+
+    public static long vpextrq(int i, Long2 val) {
+        try {
+            long res = (long) MHm128_vpextrq_handles[i & 0b01].invokeExact(val);
+            //long res = vpextrq_naive(i, val);
+            //assertEquals(Integer.toHexString(Float.floatToIntBits(res)),Integer.toHexString(Float.floatToIntBits(vextractps_naive(i,val))));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    // VEXTRACTF128 xmm/m128, ymm2, imm8
+    private static MethodHandle MHm256_vextractf128(int imm) {
+        return MachineCodeSnippet.make(
+                "mm256_vextractf128" + imm, MT_L4_L2_, requires(AVX),
+                new Register[][]{xmmRegistersSSE, xmmRegistersSSE},
+                //VEX.256.66.0F3A.W0 19 /r ib
+                (Register[] regs) ->
+                    vextractf128(regs[0],regs[1],imm)
+                );
+    }
+
+    @Stable public static final MethodHandle[] MHm256_vextractf128_handles = IntStream
+            .range(0, 2)
+            .mapToObj(PatchableVecUtils::MHm256_vextractf128)
+            .toArray((int dontcare) -> new MethodHandle[2]);
+
+    private static Long2 vextractf128_naive(int i, Long4 val) {
+        switch (i & 0b1) {
+            case 1:
+                return Long2.make(val.extract(2), val.extract(3));
+            default:
+                return Long2.make(val.extract(0), val.extract(1));
+        }
+    }
+
+    public static Long2 vextractf128(int i, Long4 val) {
+        try {
+            Long2 res = (Long2) MHm256_vextractf128_handles[i & 0b11].invokeExact(val);
+            assertEquals(res, vextractf128_naive(i, val));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vmovupsStore(Register base, Register index, Register vector, VEXLength l) {
+        int[] vex = vex_prefix_nonds(rBit(vector), xBit(index), bBit(base), M_0F, W_LOW, 0b1111, l, PP_NONE);
+        return vex_emit(vex, 0x11, modRM_SIB_NODISP(vector), sibByte(index, base, 0b00));
+
+    }
+    // VMOVUPS ymm2/m256, ymm1
+    public static final MethodHandle MHm256_vmovups_store = MachineCodeSnippet.make(
+            "mm256_vmovups_store", MT_VOID_OBJ_LONG_L4, requires(AVX),
+            new Register[][]{cpuRegisters, cpuRegisters, cpuRegisters, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.256.0F.WIG 11 /r
+                vmovupsStore(regs[1],regs[2],regs[3],L_256)
+            );
+
+    private static void vmovups_store_naive(float[] ary, int offset, Long4 vec) {
+        for (int i = 0; i < 8; i++) {
+            ary[i + offset] = getFloat(vec, i);
+        }
+    }
+
+    public static void vmovups(Object base, long offset, Long4 vec) {
+        try {
+            MHm256_vmovups_store.invokeExact(base, offset, vec);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vmovupsLoad(Register vector, Register base, Register index, VEXLength l) {
+        int[] vex = vex_prefix_nonds(rBit(vector), xBit(index), bBit(base), M_0F, W_LOW, 0b1111, l, PP_NONE);
+        return vex_emit(vex, 0x10, modRM_SIB_NODISP(vector), sibByte(index, base, 0b00));
+    }
+    // VMOVUPS ymm2/m256, ymm1
+    public static final MethodHandle MHm256_vmovups_load = MachineCodeSnippet.make(
+            "mm256_vmovups_load", MT_L4_OBJ_LONG, requires(AVX),
+            new Register[][]{xmmRegistersSSE, cpuRegisters, cpuRegisters},
+            (Register[] regs) ->
+                //VEX.256.0F.WIG 10 /r
+                vmovupsLoad(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vmovups_load_naive(float[] ary, int offset) {
+        float[] res = new float[8];
+        System.arraycopy(ary, offset, res, 0, 8);
+        return Long4.make(pack(res[0], res[1]), pack(res[2], res[3]), pack(res[4], res[5]), pack(res[6], res[7]));
+    }
+
+    public static Long4 vmovups(Object base, long index) {
+        try {
+            return (Long4) MHm256_vmovups_load.invokeExact(base, index);
+            //assertEquals(res,vmovups_load_naive(addr,0));
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vmovdquStore(Register base, Register index, Register vector, VEXLength l, int scale, int disp) {
+        int modRM;
+        int[] vex = vex_prefix_nonds(rBit(vector), xBit(index), bBit(base), M_0F, W_LOW, 0b1111, l, PP_F3);
+        if (disp <= 0) {
+            modRM = modRM_SIB_NODISP(vector);
+            return vex_emit(vex, 0x7F, modRM, sibByte(index, base, scale));
+        } else if (disp <= 255){
+            modRM = modRM_SIB_DISP8(vector);
+            return vex_emit(vex, 0x7F, modRM, sibByte(index, base, scale), disp); //1-byte displacement
+        } else {
+            modRM = modRM_SIB_DISP32(vector);
+            int[] dbs = i2iBytes(disp);
+            return vex_emit(vex, 0x7F, modRM, sibByte(index, base, scale), dbs[0],dbs[1],dbs[2],dbs[3]); //1-byte displacement
+        }
+
+    }
+    //Constructing vmovdqu stores with or without a one-byte displacement
+    private static MethodHandle vmovdqu_store_make(VEXLength len, MethodType type, int disp, int scale) {
+        if (scale < 0 || scale > 3) throw new UnsupportedOperationException("SIB scale can only be 0-3 inclusive.");
+
+        String pref = len == L_128 ? "mm128" : "mm256";
+        String suff = "_scale_" + scale + "_disp_" + disp;
+        String name = pref + "_vmovdqu_store" + suff;
+
+        return MachineCodeSnippet.make(
+                name, type, requires(AVX),
+                new Register[][]{cpuRegisters, cpuRegisters, cpuRegisters, xmmRegistersSSE},
+                (Register[] regs) ->
+                    vmovdquStore(regs[1],regs[2],regs[3],len,scale,disp)
+                );
+    }
+
+    private static MethodHandle vmovdqu_store_make(VEXLength len, int disp, int scale) {
+        MethodType type = len == L_128 ? MT_VOID_OBJ_LONG_L2 : MT_VOID_OBJ_LONG_L4;
+        return vmovdqu_store_make(len, type, disp, scale);
+    }
+
+    private static final MethodType MT_VOID_INTARY_INT_L2 = MethodType.methodType(void.class, int[].class, int.class, Long2.class);
+    private static final MethodType MT_VOID_INTARY_INT_L4 = MethodType.methodType(void.class, int[].class, int.class, Long4.class);
+
+    private static final MethodType MT_VOID_FLOATARY_INT_L2 = MethodType.methodType(void.class, float[].class, int.class, Long2.class);
+    private static final MethodType MT_VOID_FLOATARY_INT_L4 = MethodType.methodType(void.class, float[].class, int.class, Long4.class);
+
+    private static final MethodType MT_VOID_DOUBLEARY_INT_L2 = MethodType.methodType(void.class, double[].class, int.class, Long2.class);
+    private static final MethodType MT_VOID_DOUBLEARY_INT_L4 = MethodType.methodType(void.class, double[].class, int.class, Long4.class);
+
+    private static final MethodType MT_VOID_LONGARY_INT_L2 = MethodType.methodType(void.class, long[].class, int.class, Long2.class);
+    private static final MethodType MT_VOID_LONGARY_INT_L4 = MethodType.methodType(void.class, long[].class, int.class, Long4.class);
+
+    private static final MethodType MT_VOID_BYTEARRAY_INT_L2 = MethodType.methodType(void.class, byte[].class, int.class, Long2.class);
+    private static final MethodType MT_VOID_BYTEARRAY_INT_L4 = MethodType.methodType(void.class, byte[].class, int.class, Long4.class);
+
+    private static final MethodType MT_VOID_SHORTARRAY_INT_L2 = MethodType.methodType(void.class, short[].class, int.class, Long2.class);
+    private static final MethodType MT_VOID_SHORTARRAY_INT_L4 = MethodType.methodType(void.class, short[].class, int.class, Long4.class);
+
+
+    private static int[] vmovdquLoad(Register vector, Register base, Register index, VEXLength l, int scale, int disp){
+        int modRM;
+        int[] vex = vex_prefix_nonds(rBit(vector), xBit(index), bBit(base), M_0F, W_LOW, 0b1111, l, PP_F3);
+        if (disp <= 0) {
+            modRM = modRM_SIB_NODISP(vector);
+            return vex_emit(vex, 0x6F, modRM, sibByte(index, base, scale));
+        }
+        else if(disp <= 255) {
+            modRM = modRM_SIB_DISP8(vector);
+            return vex_emit(vex, 0x6F, modRM, sibByte(index, base, scale), disp);
+        } else {
+            modRM = modRM_SIB_DISP32(vector);
+            int[] dbs = i2iBytes(disp);
+            return vex_emit(vex, 0x6F, modRM, sibByte(index, base, scale), dbs[0],dbs[1],dbs[2],dbs[3]);
+        }
+
+    }
+
+    private static MethodHandle vmovdqu_load_make(VEXLength len, MethodType type, int disp, int scale) {
+        if (scale < 0 || scale > 3) throw new UnsupportedOperationException("SIB scale can only be 0-3 inclusive.");
+
+        String pref = len == L_128 ? "mm128" : "mm256";
+        String suff = "_scale_" + scale + "_disp_" + disp;
+        String name = pref + "_vmovdqu_load" + suff;
+
+        return MachineCodeSnippet.make(
+                name, type, requires(AVX),
+                new Register[][]{xmmRegistersSSE, cpuRegisters, cpuRegisters},
+                (Register[] regs) ->
+                    vmovdquLoad(regs[0],regs[1],regs[2],len,scale,disp)
+                );
+    }
+
+    private static MethodHandle vmovdqu_load_make(VEXLength len, int disp, int scale) {
+        MethodType type = len == L_128 ? MT_L2_OBJ_LONG : MT_L4_OBJ_LONG;
+        return vmovdqu_load_make(len, type, disp, scale);
+    }
+
+    private static final MethodType MT_L2_INTARY_INT = MethodType.methodType(Long2.class, int[].class, int.class);
+    private static final MethodType MT_L4_INTARY_INT = MethodType.methodType(Long4.class, int[].class, int.class);
+
+    private static final MethodType MT_L2_FLOATARY_INT = MethodType.methodType(Long2.class, float[].class, int.class);
+    private static final MethodType MT_L4_FLOATARY_INT = MethodType.methodType(Long4.class, float[].class, int.class);
+
+    private static final MethodType MT_L2_DOUBLEARY_INT = MethodType.methodType(Long2.class, double[].class, int.class);
+    private static final MethodType MT_L4_DOUBLEARY_INT = MethodType.methodType(Long4.class, double[].class, int.class);
+
+    private static final MethodType MT_L2_LONGARY_INT = MethodType.methodType(Long2.class, long[].class, int.class);
+    private static final MethodType MT_L4_LONGARY_INT = MethodType.methodType(Long4.class, long[].class, int.class);
+
+    private static final MethodType MT_L2_BYTEARY_INT = MethodType.methodType(Long2.class, byte[].class, int.class);
+    private static final MethodType MT_L4_BYTEARY_INT = MethodType.methodType(Long4.class, byte[].class, int.class);
+
+    private static final MethodType MT_L2_SHORTARY_INT = MethodType.methodType(Long2.class, short[].class, int.class);
+    private static final MethodType MT_L4_SHORTARY_INT = MethodType.methodType(Long4.class, short[].class, int.class);
+    /* ========================================================================================*/
+    // VMOVDQU xmm2/m128, xmm1
+    public static final MethodHandle MHm128_vmovdqu_store = vmovdqu_store_make(L_128, 0, 0b00);
+    public static final MethodHandle MHm256_vmovdqu_store = vmovdqu_store_make(L_256, 0, 0b00);
+
+    public static final MethodHandle MHm128_vmovdqu_store_intarray = vmovdqu_store_make(L_128, MT_VOID_INTARY_INT_L2, Unsafe.ARRAY_INT_BASE_OFFSET, sibScale(Unsafe.ARRAY_INT_INDEX_SCALE));
+    public static final MethodHandle MHm256_vmovdqu_store_intarray = vmovdqu_store_make(L_256, MT_VOID_INTARY_INT_L4, Unsafe.ARRAY_INT_BASE_OFFSET, sibScale(Unsafe.ARRAY_INT_INDEX_SCALE));
+
+    public static final MethodHandle MHm128_vmovdqu_store_floatarray = vmovdqu_store_make(L_128, MT_VOID_FLOATARY_INT_L2, Unsafe.ARRAY_FLOAT_BASE_OFFSET, sibScale(Unsafe.ARRAY_FLOAT_INDEX_SCALE));
+    public static final MethodHandle MHm256_vmovdqu_store_floatarray = vmovdqu_store_make(L_256, MT_VOID_FLOATARY_INT_L4, Unsafe.ARRAY_FLOAT_BASE_OFFSET, sibScale(Unsafe.ARRAY_FLOAT_INDEX_SCALE));
+
+    public static final MethodHandle MHm128_vmovdqu_store_doublearray = vmovdqu_store_make(L_128, MT_VOID_DOUBLEARY_INT_L2, Unsafe.ARRAY_DOUBLE_BASE_OFFSET, sibScale(Unsafe.ARRAY_DOUBLE_INDEX_SCALE));
+    public static final MethodHandle MHm256_vmovdqu_store_doublearray = vmovdqu_store_make(L_256, MT_VOID_DOUBLEARY_INT_L4, Unsafe.ARRAY_DOUBLE_BASE_OFFSET, sibScale(Unsafe.ARRAY_DOUBLE_INDEX_SCALE));
+
+    public static final MethodHandle MHm128_vmovdqu_store_longarray = vmovdqu_store_make(L_128, MT_VOID_LONGARY_INT_L2, Unsafe.ARRAY_LONG_BASE_OFFSET, sibScale(Unsafe.ARRAY_LONG_INDEX_SCALE));
+    public static final MethodHandle MHm256_vmovdqu_store_longarray = vmovdqu_store_make(L_256, MT_VOID_LONGARY_INT_L4, Unsafe.ARRAY_LONG_BASE_OFFSET, sibScale(Unsafe.ARRAY_LONG_INDEX_SCALE));
+
+    public static final MethodHandle MHm128_vmovdqu_store_bytearray = vmovdqu_store_make(L_128, MT_VOID_BYTEARRAY_INT_L2, Unsafe.ARRAY_BYTE_BASE_OFFSET, sibScale(Unsafe.ARRAY_BYTE_INDEX_SCALE));
+    public static final MethodHandle MHm256_vmovdqu_store_bytearray = vmovdqu_store_make(L_256, MT_VOID_BYTEARRAY_INT_L4, Unsafe.ARRAY_BYTE_BASE_OFFSET, sibScale(Unsafe.ARRAY_BYTE_INDEX_SCALE));
+
+    public static final MethodHandle MHm128_vmovdqu_store_shortarray = vmovdqu_store_make(L_128, MT_VOID_SHORTARRAY_INT_L2, Unsafe.ARRAY_SHORT_BASE_OFFSET, sibScale(Unsafe.ARRAY_SHORT_INDEX_SCALE));
+    public static final MethodHandle MHm256_vmovdqu_store_shortarray = vmovdqu_store_make(L_256, MT_VOID_SHORTARRAY_INT_L4, Unsafe.ARRAY_SHORT_BASE_OFFSET, sibScale(Unsafe.ARRAY_SHORT_INDEX_SCALE));
+
+
+    public static void vmovdqu(Object base, long offset, Long2 vec) {
+        try {
+            MHm128_vmovdqu_store.invokeExact(base, offset, vec);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static void vmovdqu(Object base, long offset, Long4 vec) {
+        try {
+            MHm256_vmovdqu_store.invokeExact(base, offset, vec);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static void long2ToIntArray(int[] base, int index, Long2 vec) {
+        try {
+            MHm128_vmovdqu_store_intarray.invokeExact(base, index, vec);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static void long4ToIntArray(int[] base, int index, Long4 vec) {
+        try {
+            MHm256_vmovdqu_store_intarray.invokeExact(base, index, vec);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static void long2ToFloatArray(float[] base, int index, Long2 vec) {
+        try {
+            MHm128_vmovdqu_store_floatarray.invokeExact(base, index, vec);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static void long4ToFloatArray(float[] base, int index, Long4 vec) {
+        try {
+            MHm256_vmovdqu_store_floatarray.invokeExact(base, index, vec);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static void long2ToDoubleArray(double[] base, int index, Long2 vec) {
+        try {
+            MHm128_vmovdqu_store_doublearray.invokeExact(base, index, vec);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static void long4ToDoubleArray(double[] base, int index, Long4 vec) {
+        try {
+            MHm256_vmovdqu_store_doublearray.invokeExact(base, index, vec);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static void long2ToLongArray(long[] base, int index, Long2 vec) {
+        try {
+            MHm128_vmovdqu_store_longarray.invokeExact(base, index, vec);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static void long4ToLongArray(long[] base, int index, Long4 vec) {
+        try {
+            MHm256_vmovdqu_store_longarray.invokeExact(base, index, vec);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static void long2ToByteArray(byte[] base, int index, Long2 vec) {
+        try {
+            MHm128_vmovdqu_store_bytearray.invokeExact(base, index, vec);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static void long4ToByteArray(byte[] base, int index, Long4 vec) {
+        try {
+            MHm256_vmovdqu_store_bytearray.invokeExact(base, index, vec);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static void long2ToShortArray(short[] base, int index, Long2 vec) {
+        try {
+            MHm128_vmovdqu_store_shortarray.invokeExact(base, index, vec);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static void long4ToShortArray(short[] base, int index, Long4 vec) {
+        try {
+            MHm256_vmovdqu_store_shortarray.invokeExact(base, index, vec);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+
+    /* ========================================================================================*/
+    // VMOVDQU xmm2/m128, xmm1
+    public static final MethodHandle MHm128_vmovdqu_load = vmovdqu_load_make(L_128, 0, 0b00);
+    public static final MethodHandle MHm256_vmovdqu_load = vmovdqu_load_make(L_256, 0, 0b00);
+
+    public static final MethodHandle MHm128_vmovdqu_load_intarray = vmovdqu_load_make(L_128, MT_L2_INTARY_INT, Unsafe.ARRAY_INT_BASE_OFFSET, sibScale(Unsafe.ARRAY_INT_INDEX_SCALE));
+    public static final MethodHandle MHm256_vmovdqu_load_intarray = vmovdqu_load_make(L_256, MT_L4_INTARY_INT, Unsafe.ARRAY_INT_BASE_OFFSET, sibScale(Unsafe.ARRAY_INT_INDEX_SCALE));
+
+    public static final MethodHandle MHm128_vmovdqu_load_floatarray = vmovdqu_load_make(L_128, MT_L2_FLOATARY_INT, Unsafe.ARRAY_FLOAT_BASE_OFFSET, sibScale(Unsafe.ARRAY_FLOAT_INDEX_SCALE));
+    public static final MethodHandle MHm256_vmovdqu_load_floatarray = vmovdqu_load_make(L_256, MT_L4_FLOATARY_INT, Unsafe.ARRAY_FLOAT_BASE_OFFSET, sibScale(Unsafe.ARRAY_FLOAT_INDEX_SCALE));
+
+    public static final MethodHandle MHm128_vmovdqu_load_doublearray = vmovdqu_load_make(L_128, MT_L2_DOUBLEARY_INT, Unsafe.ARRAY_DOUBLE_BASE_OFFSET, sibScale(Unsafe.ARRAY_DOUBLE_INDEX_SCALE));
+    public static final MethodHandle MHm256_vmovdqu_load_doublearray = vmovdqu_load_make(L_256, MT_L4_DOUBLEARY_INT, Unsafe.ARRAY_DOUBLE_BASE_OFFSET, sibScale(Unsafe.ARRAY_DOUBLE_INDEX_SCALE));
+
+    public static final MethodHandle MHm128_vmovdqu_load_longarray = vmovdqu_load_make(L_128, MT_L2_LONGARY_INT, Unsafe.ARRAY_LONG_BASE_OFFSET, sibScale(Unsafe.ARRAY_LONG_INDEX_SCALE));
+    public static final MethodHandle MHm256_vmovdqu_load_longarray = vmovdqu_load_make(L_256, MT_L4_LONGARY_INT, Unsafe.ARRAY_LONG_BASE_OFFSET, sibScale(Unsafe.ARRAY_LONG_INDEX_SCALE));
+
+    public static final MethodHandle MHm128_vmovdqu_load_bytearray = vmovdqu_load_make(L_128, MT_L2_BYTEARY_INT, Unsafe.ARRAY_BYTE_BASE_OFFSET, sibScale(Unsafe.ARRAY_BYTE_INDEX_SCALE));
+    public static final MethodHandle MHm256_vmovdqu_load_bytearray = vmovdqu_load_make(L_256, MT_L4_BYTEARY_INT, Unsafe.ARRAY_BYTE_BASE_OFFSET, sibScale(Unsafe.ARRAY_BYTE_INDEX_SCALE));
+
+    public static final MethodHandle MHm128_vmovdqu_load_shortarray = vmovdqu_load_make(L_128, MT_L2_SHORTARY_INT, Unsafe.ARRAY_SHORT_BASE_OFFSET, sibScale(Unsafe.ARRAY_SHORT_INDEX_SCALE));
+    public static final MethodHandle MHm256_vmovdqu_load_shortarray = vmovdqu_load_make(L_256, MT_L4_SHORTARY_INT, Unsafe.ARRAY_SHORT_BASE_OFFSET, sibScale(Unsafe.ARRAY_SHORT_INDEX_SCALE));
+
+    public static Long2 vmovdqu(Object base, long index) {
+        try {
+            return (Long2) MHm128_vmovdqu_load.invokeExact(base, index);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static Long4 vmovdqu_256(Object base, long index) {
+        try {
+            return (Long4) MHm256_vmovdqu_load.invokeExact(base, index);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    private static final MethodHandle MHm256_vmovdqu_load_offheap = vmovdqu_load_make(L_256,MethodType.methodType(Long4.class,long.class,int.class),0,0);
+
+    private static final MethodHandle MHm256_vmovdqu_load_offheap_bb = vmovdqu_load_make(L_256,MethodType.methodType(Long4.class, ByteBuffer.class,int.class),0,0);
+
+    public static Long4 vmovdqu_256(long base, int index) {
+        try {
+            return (Long4) MHm256_vmovdqu_load_offheap.invokeExact(base, index);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static Long4 vmovdqu_256(ByteBuffer base, int index) {
+        try {
+            return (Long4) MHm256_vmovdqu_load_offheap_bb.invokeExact(base, index);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static Long4 long4FromIntArray(int[] base, int index) {
+        try {
+            return (Long4) MHm256_vmovdqu_load_intarray.invokeExact(base, index);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static Long2 long2FromIntArray(int[] base, int index) {
+        try {
+            return (Long2) MHm128_vmovdqu_load_intarray.invokeExact(base, index);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static Long4 long4FromFloatArray(float[] base, int index) {
+        try {
+            return (Long4) MHm256_vmovdqu_load_floatarray.invokeExact(base, index);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static Long2 long2FromFloatArray(float[] base, int index) {
+        try {
+            return (Long2) MHm128_vmovdqu_load_floatarray.invokeExact(base, index);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static Long4 long4FromDoubleArray(double[] base, int index) {
+        try {
+            return (Long4) MHm256_vmovdqu_load_doublearray.invokeExact(base, index);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static Long2 long2FromDoubleArray(double[] base, int index) {
+        try {
+            return (Long2) MHm128_vmovdqu_load_doublearray.invokeExact(base, index);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static Long4 long4FromLongArray(long[] base, int index) {
+        try {
+            return (Long4) MHm256_vmovdqu_load_longarray.invokeExact(base, index);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static Long2 long2FromLongArray(long[] base, int index) {
+        try {
+            return (Long2) MHm128_vmovdqu_load_longarray.invokeExact(base, index);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static Long4 long4FromByteArray(byte[] base, int index) {
+        try {
+            return (Long4) MHm256_vmovdqu_load_bytearray.invokeExact(base, index);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static Long2 long2FromByteArray(byte[] base, int index) {
+        try {
+            return (Long2) MHm128_vmovdqu_load_bytearray.invokeExact(base, index);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static Long4 long4FromShortArray(short[] base, int index) {
+        try {
+            return (Long4) MHm256_vmovdqu_load_shortarray.invokeExact(base, index);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static Long2 long2FromShortArray(short[] base, int index) {
+        try {
+            return (Long2) MHm128_vmovdqu_load_shortarray.invokeExact(base, index);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vgatherdps(Register out, Register base, Register indexes, Register masks, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), xBit(indexes), bBit(base), M_0F38, W_LOW, masks, l, PP_66);
+        return vex_emit(vex, 0x92, modRM_SIB_DISP8(out), vsibByte(indexes, base, 0b10), Unsafe.ARRAY_FLOAT_BASE_OFFSET);
+
+    }
+    // VGATHERDPS ymm1, vm32y, ymm2
+    private static final MethodType MT_L4_FLOATARY_L4_L4 = MethodType.methodType(Long4.class, float[].class, Long4.class, Long4.class);
+    public static final MethodHandle MHm256_vgatherdps = MachineCodeSnippet.make(
+            "mm256_vgatherdps", MT_L4_FLOATARY_L4_L4, requires(AVX2),
+            new Register[][]{xmmRegistersSSE, cpuRegisters, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.DDS.128.66.0F38.W0 92 /r
+                vgatherdps(regs[0],regs[1],regs[2],regs[3],L_256)
+            );
+
+    private static Long4 vgatherdps_naive(float[] base, Long4 indexes, Long4 masks) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long4 vgatherdps(float[] base, Long4 indexes, Long4 masks) {
+        try {
+            Long4 res = (Long4) MHm256_vgatherdps.invokeExact(base, indexes, masks);
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    private static int[] vgatherdpd(Register out, Register base, Register indexes, Register masks, VEXLength l, int scale, int offset) {
+        int[] vex = vex_prefix(rBit(out), xBit(indexes), bBit(base), M_0F38, W_HIGH, masks, l, PP_66);
+        return vex_emit(vex, 0x93, modRM_SIB_DISP8(out), vsibByte(indexes, base, scale), offset);
+    }
+    // VGATHERDPD ymm1, vm64y, ymm2
+    private static final MethodType MT_L4_DOUBLEARY_L4_L4 = MethodType.methodType(Long4.class, double[].class, Long4.class, Long4.class);
+    private static final Register[] gatherkilled = {xmm0,rsi,xmm1,xmm2,xmm3,xmm4};
+    public static final MethodHandle MHm256_vgatherdpd = MachineCodeSnippet.make(
+            "mm256_vgatherdpd", MT_L4_DOUBLEARY_L4_L4,
+            new MachineCodeSnippet.Effect[]{},
+            requires(AVX2),
+            new Register[][]{{xmm0}, {rsi}, {xmm1}, {xmm2}}, //pinned
+            gatherkilled,new int[]{8,2,8,8,8,8},
+            (Register[] regs) -> {
+                Register out = regs[0];
+                Register base = regs[1];
+                Register indexes = regs[2];
+                Register masks = regs[3];
+                //VEX.DDS.256.66.0F38.W1 93 /r
+
+                HashSet<Register> allocRegs = new HashSet<>(Arrays.asList(out,indexes,masks));
+                if (Arrays.equals(new Object[]{out, base, indexes, masks}, new Object[]{xmm0, rsi, xmm1, xmm2})) {
+                    //If the pinned registers are as we expect
+                    return vgatherdpd(xmm0,rsi,xmm1,xmm2,L_256,0b11,Unsafe.ARRAY_DOUBLE_BASE_OFFSET);
+                } else if (allocRegs.size() == 3) {
+                    //If the pinned registers aren't what we expect (?!), but are all unique.
+                    return vgatherdpd(out,base,indexes,masks,L_256,0b11,Unsafe.ARRAY_DOUBLE_BASE_OFFSET);
+                } else {
+                    /*
+                    In the event that the allocator gives us some registers in a configuration that we don't assume
+                    by the pinned registers (see above), we use the list of killed registers (gatherkilled) and the
+                    incoming list of registers from the allocator (regs) to form two sets of registers.  There are five
+                    total named killed registers.  The register mask specifies three incoming/outgoing registers.
+                    Sometimes we will see as little as two unique registers in this configuration.  Regardless, five killed
+                    registers less three allocated registers leaves at least two extra killed registers that we can
+                    treat as tmps without clobbering values.  Taking the difference of these two sets gives us an array
+                    of valid tmp registers.
+
+
+                    All of this bookkeeping is to prevent an allocation configuration where gather has the same
+                    target and src registers.  This causes a UD.
+                     */
+                    HashSet<Register> regset     = new HashSet<>();
+                    regset.addAll(Arrays.asList(xmm0,xmm1,xmm2,xmm3,xmm4));
+                    regset.removeAll(Arrays.asList(out,indexes,masks));
+
+                    Register[] tmps = new Register[regset.size()];
+                    tmps = regset.toArray(tmps);
+
+                    //If the registers aren't as we expect them, use tmp kills to reorg.
+                    return join(
+                            xmmMov(tmps[0],indexes,L_256),
+                            xmmMov(tmps[1],masks,L_256),
+                            xmmMov(xmm1,tmps[0],L_256),
+                            xmmMov(xmm2,tmps[1],L_256),
+                            vgatherdpd(xmm0,base,xmm1,xmm2,L_256,0b11,Unsafe.ARRAY_DOUBLE_BASE_OFFSET)
+                    );
+                }
+            });
+
+
+    private static Long4 vgatherdpd_naive(double[] base, Long4 indexes, Long4 masks){
+        //TODO: Write this test
+        return null;
+    }
+
+
+    public static Long4 vgatherdpd(double[] base, Long4 indexes, Long4 masks){
+        try {
+            return (Long4) MHm256_vgatherdpd.invokeExact(base,indexes,masks);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+
+
+    /* ========================================================================================*/
+    //VADDPS xmm1, xmm2, xmm3/128
+    public static final MethodHandle MHm128_vaddps = MachineCodeSnippet.make(
+            "mm128_vaddps", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.0F.WIG 58 /r
+                vaddps(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vaddps_naive(Long2 a, Long2 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long2 vaddps(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vaddps.invokeExact(a, b);
+            //assert assertEquals(res, VADDPS_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vsubps(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_NONE);
+        return vex_emit(vex, 0x5C, modRM(out, in2));
+    }
+    //VSUBPS xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vsubps = MachineCodeSnippet.make(
+            "mm128_vsubps", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.0F.WIG 5C /r
+                vsubps(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vsubps_naive(Long2 a, Long2 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long2 vsubps(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vsubps.invokeExact(a, b);
+            //assert assertEquals(res, VSUBPS_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    //VSUBPS ymm1, ymm2, ymm3/m128
+    public static final MethodHandle MHm256_vsubps = MachineCodeSnippet.make(
+            "mm256_vsubps", MT_L4_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.0F.WIG 5C /r
+                vsubps(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vsubps_naive(Long4 a, Long4 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long4 vsubps(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vsubps.invokeExact(a, b);
+            //assert assertEquals(res, VSUBPS_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    private static int[] vsubpd(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_66);
+        return vex_emit(vex, 0x5C, modRM(out, in2));
+    }
+    //VSUBPD xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vsubpd = MachineCodeSnippet.make(
+            "mm128_vsubpd", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.66.0F.WIG 5C /r
+                vsubpd(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vsubpd_naive(Long2 a, Long2 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long2 vsubpd(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vsubpd.invokeExact(a, b);
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    //VSUBPD ymm1, ymm2, ymm3/m256
+    public static final MethodHandle MHm256_vsubpd = MachineCodeSnippet.make(
+            "mm256_vsubpd", MT_L4_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.66.0F.WIG 5C /r
+                vsubpd(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vsubpd_naive(Long4 a, Long4 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    @ForceInline public static Long4 vsubpd(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vsubpd.invokeExact(a, b);
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    //VMULPS xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vmulps = MachineCodeSnippet.make(
+            "mm128_vmulps", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.0F.WIG 59 /r
+                vmulps(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vmulps_naive(Long2 a, Long2 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long2 vmulps(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vmulps.invokeExact(a, b);
+            //assert assertEquals(res, VMULPS_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+
+    /* ========================================================================================*/
+    private static int[] vdivps(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_NONE);
+        return vex_emit(vex, 0x5E, modRM(out, in2));
+    }
+    //VDIVPS xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vdivps = MachineCodeSnippet.make(
+            "mm128_vdivps", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.0F.WIG 5E /r
+                vdivps(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vdivps_naive(Long2 a, Long2 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long2 vdivps(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vdivps.invokeExact(a, b);
+            //assert assertEquals(res, VDIVPS_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    //VDIVPS ymm1, ymm2, ymm3/m256
+    public static final MethodHandle MHm256_vdivps = MachineCodeSnippet.make(
+            "mm256_vdivps", MT_L4_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.0F.WIG 5E /r
+                vdivps(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vdivps_naive(Long4 a, Long4 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long4 vdivps(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vdivps.invokeExact(a, b);
+            //assert assertEquals(res, VDIVPS_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vdivpd(Register out, Register left, Register right, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(right), M_0F, W_LOW, left, l, PP_66);
+        return vex_emit(vex, 0x5E, modRM(out, right));
+    }
+    //VDIVPD ymm1, ymm2, ymm3/m256
+    public static final MethodHandle MHm256_vdivpd = MachineCodeSnippet.make(
+            "mm256_vdivpd", MT_L4_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.66.0F.WIG 5E /r
+                vdivpd(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vdivpd_naive(Long4 a, Long4 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long4 vdivpd(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vdivpd.invokeExact(a, b);
+            //assert assertEquals(res, VDIVPS_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vandps(Register out, Register in1, Register in2, VEXLength l ) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_NONE);
+        return vex_emit(vex, 0x54, modRM(out, in2));
+
+    }
+    //VANDPS xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vandps = MachineCodeSnippet.make(
+            "mm128_vandps", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.0F.WIG 54 /r
+                vandps(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vandps_naive(Long2 a, Long2 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long2 vandps(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vandps.invokeExact(a, b);
+            //assert assertEquals(res, VANDPS_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    //VANDPS ymm1, ymm2, ymm3/m128
+    public static final MethodHandle MHm256_vandps = MachineCodeSnippet.make(
+            "mm256_vandps", MT_L4_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.0F.WIG 54 /r
+                vandps(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vandps_naive(Long4 a, Long4 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long4 vandps(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vandps.invokeExact(a, b);
+            //assert assertEquals(res, VANDPS_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    private static int[] vandpd(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_66);
+        return vex_emit(vex, 0x54, modRM(out, in2));
+    }
+    //VANDPD xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vandpd = MachineCodeSnippet.make(
+            "mm128_vandpd", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.66.0F.WIG 54 /r
+                vandpd(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vandpd_naive(Long2 a, Long2 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long2 vandpd(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vandpd.invokeExact(a, b);
+            //assert assertEquals(res, VANDPS_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    //VANDPS ymm1, ymm2, ymm3/m128
+    public static final MethodHandle MHm256_vandpd = MachineCodeSnippet.make(
+            "mm256_vandpd", MT_L4_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.66.0F.WIG 54 /r
+                vandpd(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vandpd_naive(Long4 a, Long4 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long4 vandpd(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vandpd.invokeExact(a, b);
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    private static int[] vorps(Register out, Register in1, Register in2, VEXLength l){
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_NONE);
+        return vex_emit(vex, 0x56, modRM(out, in2));
+    }
+    //VORPS xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vorps = MachineCodeSnippet.make(
+            "mm128_vorps", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.0F.WIG 56 /r
+                vorps(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vorps_naive(Long2 a, Long2 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long2 vorps(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vorps.invokeExact(a, b);
+            //assert assertEquals(res, VORPS_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+
+    /* ========================================================================================*/
+    private static int[] vpaddb(Register out, Register in1, Register in2, VEXLength l){
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_66);
+        return vex_emit(vex, 0xFC, modRM(out, in2));
+    }
+
+    //VPADDB xmm1, xmm2, xmm3/128
+    public static final MethodHandle MHm128_vpaddb = MachineCodeSnippet.make(
+            "mm128_vpaddb", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.66.0F.WIG FC /r
+                vpaddb(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vpaddb_naive(Long2 a, Long2 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long2 vpaddb(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vpaddb.invokeExact(a, b);
+            //assert assertEquals(res, VADDPS_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    //VPADDB ymm1, ymm2, ymm3/256
+    public static final MethodHandle MHm256_vpaddb = MachineCodeSnippet.make(
+            "mm256_vpaddb", MT_L4_BINARY, requires(AVX2),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.66.0F.WIG FC /r
+                vpaddb(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vpaddb_naive(Long4 a, Long4 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long4 vpaddb(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vpaddb.invokeExact(a, b);
+            //assert assertEquals(res, VADDPS_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vpsubb(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_66);
+        return vex_emit(vex, 0xF8, modRM(out, in2));
+    }
+
+    //VPSUBB xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vpsubb = MachineCodeSnippet.make(
+            "mm128_vpsubb", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.66.0F.WIG FC /r
+                vpsubb(regs[0],regs[1],regs[0],L_128)
+            );
+
+    private static Long2 vpsubb_naive(Long2 a, Long2 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long2 vpsubb(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vpsubb.invokeExact(a, b);
+            //assert assertEquals(res, VSUBPS_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    //VPSUBB ymm1, ymm2, ymm3/m256
+    public static final MethodHandle MHm256_vpsubb = MachineCodeSnippet.make(
+            "mm256_vpsubb", MT_L4_BINARY, requires(AVX2),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.66.0F.WIG F8 /r
+                vpsubb(regs[0],regs[1],regs[0],L_256)
+            );
+
+    private static Long4 vpsubb_naive(Long4 a, Long4 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long4 vpsubb(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vpsubb.invokeExact(a, b);
+            //assert assertEquals(res, VSUBPS_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    private static int[] vcmpeqb(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out),X_LOW,bBit(in2),M_0F,W_LOW,in1,l,PP_66);
+        return vex_emit(vex, 0x74, modRM(out,in2));
+
+    }
+
+    //VPCMPEQB xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vpcmpeqb = MachineCodeSnippet.make(
+            "mm128_vpcmpeqb", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                // VEX.NDS.128.66.0F.WIG 74 /r
+                vcmpeqb(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vpcmpeqb_naive(Long2 a, Long2 b) {
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long2 vpcmpeqb(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vpcmpeqb.invokeExact(a, b);
+            //assert assertEquals(res,vpcmpeqps_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    //VPCMPEQB ymm1, ymm2, ymm3/m256
+    public static final MethodHandle MHm256_vpcmpeqb = MachineCodeSnippet.make(
+            "mm256_vpcmpeqb", MT_L4_BINARY, requires(AVX2),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                // VEX.NDS.256.66.0F.WIG 74 /r
+                vcmpeqb(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vpcmpeqb_naive(Long4 a, Long4 b) {
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long4 vpcmpeqb(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vpcmpeqb.invokeExact(a, b);
+            //assert assertEquals(res,vpcmpeqps_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    private static int[] vcmpgtb(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out),X_LOW,bBit(in2),M_0F,W_LOW,in1,L_128,PP_66);
+        return vex_emit(vex, 0x64, modRM(out,in2));
+
+    }
+
+    //VPCMPGTB xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vpcmpgtb = MachineCodeSnippet.make(
+            "mm128_vpcmpgtb",MT_L2_BINARY,requires(AVX),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.66.0F.WIG 64 /r
+                vcmpgtb(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vpcmpgtb_naive(Long2 a, Long2 b){
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long2 vpcmpgtb(Long2 a, Long2 b){
+        try {
+            Long2 res = (Long2) MHm128_vpcmpgtb.invokeExact(a,b);
+            //assert assertEquals(res,vcmpgtps_naive(a, b));
+            return res;
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+
+    //VPCMPGTB ymm1, ymm2, ymm3/m256
+    public static final MethodHandle MHm256_vpcmpgtb = MachineCodeSnippet.make(
+            "mm256_vpcmpgtb",MT_L4_BINARY,requires(AVX),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.66.0F.WIG 64 /r
+                vcmpgtb(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vpcmpgtb_naive(Long4 a, Long4 b){
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long4 vpcmpgtb(Long4 a, Long4 b){
+        try {
+            Long4 res = (Long4) MHm256_vpcmpgtb.invokeExact(a,b);
+            //assert assertEquals(res,vcmpgtps_naive(a, b));
+            return res;
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+
+    private static int[] vpaddw(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_66);
+        return vex_emit(vex, 0xFD, modRM(out, in2));
+    }
+
+    //VPADDW xmm1, xmm2, xmm3/128
+    public static final MethodHandle MHm128_vpaddw = MachineCodeSnippet.make(
+            "mm128_vpaddw", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.66.0F.WIG FD /r
+                vpaddw(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vpaddw_naive(Long2 a, Long2 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long2 vpaddw(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vpaddw.invokeExact(a, b);
+            //assert assertEquals(res, VADDPS_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    //VPADDW ymm1, ymm2, ymm3/256
+    public static final MethodHandle MHm256_vpaddw = MachineCodeSnippet.make(
+            "mm256_vpaddw", MT_L4_BINARY, requires(AVX2),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.66.0F.WIG FD /r
+                vpaddw(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vpaddw_naive(Long4 a, Long4 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long4 vpaddw(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vpaddw.invokeExact(a, b);
+            //assert assertEquals(res, VADDPS_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vpsubw(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_66);
+        return vex_emit(vex, 0xF9, modRM(out, in2));
+    }
+    //VPSUBW xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vpsubw = MachineCodeSnippet.make(
+            "mm128_vpsubw", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.66.0F.WIG F9 /r
+                vpsubw(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vpsubw_naive(Long2 a, Long2 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long2 vpsubw(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vpsubw.invokeExact(a, b);
+            //assert assertEquals(res, VSUBPS_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    //VPSUBW ymm1, ymm2, ymm3/m256
+    public static final MethodHandle MHm256_vpsubw = MachineCodeSnippet.make(
+            "mm256_vpsubw", MT_L4_BINARY, requires(AVX2),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.66.0F.WIG F9 /r
+                vpsubw(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vpsubw_naive(Long4 a, Long4 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long4 vpsubw(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vpsubw.invokeExact(a, b);
+            //assert assertEquals(res, VSUBPS_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    private static int[] vpcmpeqw(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out),X_LOW,bBit(in2),M_0F,W_LOW,in1,l,PP_66);
+        return vex_emit(vex, 0x75, modRM(out,in2));
+    }
+
+    //VPCMPEQW xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vpcmpeqw = MachineCodeSnippet.make(
+            "mm128_vpcmpeqw", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                // VEX.NDS.128.66.0F.WIG 75 /r
+                vpcmpeqw(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vpcmpeqw_naive(Long2 a, Long2 b) {
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long2 vpcmpeqw(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vpcmpeqw.invokeExact(a, b);
+            //assert assertEquals(res,vpcmpeqps_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    //VPCMPEQW ymm1, ymm2, ymm3/m256
+    public static final MethodHandle MHm256_vpcmpeqw = MachineCodeSnippet.make(
+            "mm256_vpcmpeqw", MT_L4_BINARY, requires(AVX2),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                // VEX.NDS.256.66.0F.WIG 75 /r
+                vpcmpeqw(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vpcmpeqw_naive(Long4 a, Long4 b) {
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long4 vpcmpeqw(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vpcmpeqw.invokeExact(a, b);
+            //assert assertEquals(res,vpcmpeqps_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    private static int[] vpcmpgtw(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out),X_LOW,bBit(in2),M_0F,W_LOW,in1,l,PP_66);
+        return vex_emit(vex, 0x65, modRM(out,in2));
+    }
+
+    //VPCMPGTW xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vpcmpgtw = MachineCodeSnippet.make(
+            "mm128_vpcmpgtw",MT_L2_BINARY,requires(AVX),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.66.0F.WIG 65 /r
+                vpcmpgtw(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vpcmpgtw_naive(Long2 a, Long2 b){
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long2 vpcmpgtw(Long2 a, Long2 b){
+        try {
+            Long2 res = (Long2) MHm128_vpcmpgtw.invokeExact(a,b);
+            //assert assertEquals(res,vcmpgtps_naive(a, b));
+            return res;
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+
+    //VPCMPGTW ymm1, ymm2, ymm3/m256
+    public static final MethodHandle MHm256_vpcmpgtw = MachineCodeSnippet.make(
+            "mm256_vpcmpgtw",MT_L4_BINARY,requires(AVX),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.66.0F.WIG 65 /r
+                vpcmpgtw(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vpcmpgtw_naive(Long4 a, Long4 b){
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long4 vpcmpgtw(Long4 a, Long4 b){
+        try {
+            Long4 res = (Long4) MHm256_vpcmpgtw.invokeExact(a,b);
+            //assert assertEquals(res,vcmpgtps_naive(a, b));
+            return res;
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vcmpeqps(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_NONE);
+        return vex_emit(vex, 0xC2, modRM(out, in2), 0x0); //0x0 is the imm that encodes compare equal
+    }
+    //VCMPPS xmm1, xmm2, xmm3/m128, 0x0  -- VCMPEQPS
+    public static final MethodHandle MHm128_vcmpeqps = MachineCodeSnippet.make(
+            "mm128_vcmpeqps", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.0F.WIG C2 /r ib
+                vcmpeqps(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vcmpeqps_naive(Long2 a, Long2 b) {
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long2 vcmpeqps(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vcmpeqps.invokeExact(a, b);
+            //assert assertEquals(res,vcmpeqps_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    //VCMPPS ymm1, ymm2, ymm3/m256, 0x0  -- VCMPEQPS
+    public static final MethodHandle MHm256_vcmpeqps = MachineCodeSnippet.make(
+            "mm256_vcmpeqps", MT_L4_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.0F.WIG C2 /r ib
+                vcmpeqps(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vcmpeqps_naive(Long4 a, Long4 b) {
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long4 vcmpeqps(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vcmpeqps.invokeExact(a, b);
+            //assert assertEquals(res,vcmpeqps_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vcmpltps(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_NONE);
+        return vex_emit(vex, 0xC2, modRM(out, in2), 0x1);
+    }
+    //VCMPPS xmm1, xmm2, xmm3/m128, 0x1  -- VCMPLTPS
+    public static final MethodHandle MHm128_vcmpltps = MachineCodeSnippet.make(
+            "mm128_vcmpltps",MT_L2_BINARY,requires(AVX),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.0F.WIG C2 /r ib
+                vcmpltps(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vcmpltps_naive(Long2 a, Long2 b){
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long2 vcmpltps(Long2 a, Long2 b){
+        try {
+            Long2 res = (Long2) MHm128_vcmpltps.invokeExact(a,b);
+            //assert assertEquals(res,vcmpltps_naive(a, b));
+            return res;
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    //VCMPPS ymm1, ymm2, ymm3/m256, 0x1  -- VCMPLTPS
+    public static final MethodHandle MHm256_vcmpltps = MachineCodeSnippet.make(
+            "mm256_vcmpltps",MT_L4_BINARY,requires(AVX),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.0F.WIG C2 /r ib
+                vcmpltps(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vcmpltps_naive(Long4 a, Long4 b){
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long4 vcmpltps(Long4 a, Long4 b){
+        try {
+            Long4 res = (Long4) MHm256_vcmpltps.invokeExact(a,b);
+            //assert assertEquals(res,vcmpeqps_naive(a, b));
+            return res;
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    private static int[] vcmpgtps(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, l, PP_NONE);
+        return vex_emit(vex, 0xC2, modRM(out, in2), 0xE);
+    }
+    //VCMPPS xmm1, xmm2, xmm3/m128, 0xE  -- VCMPGTPS
+    public static final MethodHandle MHm128_vcmpgtps = MachineCodeSnippet.make(
+            "mm128_vcmpgtps",MT_L2_BINARY,requires(AVX),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.0F.WIG C2 /r ib
+                vcmpgtps(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vcmpgtps_naive(Long2 a, Long2 b){
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long2 vcmpgtps(Long2 a, Long2 b){
+        try {
+            Long2 res = (Long2) MHm128_vcmpgtps.invokeExact(a,b);
+            //assert assertEquals(res,vcmpgtps_naive(a, b));
+            return res;
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    //VCMPPS ymm1, ymm2, ymm3/m256, 0xE  -- VCMPGTPS
+    public static final MethodHandle MHm256_vcmpgtps = MachineCodeSnippet.make(
+            "mm256_vcmpgtps",MT_L4_BINARY,requires(AVX),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.0F.WIG C2 /r ib
+                vcmpgtps(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vcmpgtps_naive(Long4 a, Long4 b){
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long4 vcmpgtps(Long4 a, Long4 b){
+        try {
+            Long4 res = (Long4) MHm256_vcmpgtps.invokeExact(a,b);
+            //assert assertEquals(res,vcmpgtps_naive(a, b));
+            return res;
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    private static int[] vcmpeqpd(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out),X_LOW,bBit(in2),M_0F,W_LOW,in1,l,PP_66);
+        return vex_emit(vex, 0xC2, modRM(out,in2), 0x0); //0x0 is the imm that encodes compare equal
+    }
+    //VCMPPD xmm1, xmm2, xmm3/m128, 0x0  -- VCMPEQPD
+    public static final MethodHandle MHm128_vcmpeqpd = MachineCodeSnippet.make(
+            "mm128_vcmpeqpd", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.66.0F.WIG C2 /r ib
+                vcmpeqpd(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vcmpeqpd_naive(Long2 a, Long2 b) {
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long2 vcmpeqpd(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vcmpeqpd.invokeExact(a, b);
+            //assert assertEquals(res,vcmpeqps_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    //VCMPPD ymm1, ymm2, ymm3/m256, 0x0  -- VCMPEQPD
+    public static final MethodHandle MHm256_vcmpeqpd = MachineCodeSnippet.make(
+            "mm256_vcmpeqpd",MT_L4_BINARY,requires(AVX),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.66.0F.WIG C2 /r ib
+                vcmpeqpd(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vcmpeqpd_naive(Long4 a, Long4 b){
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long4 vcmpeqpd(Long4 a, Long4 b){
+        try {
+            Long4 res = (Long4) MHm256_vcmpeqpd.invokeExact(a,b);
+            //assert assertEquals(res,vcmpeqps_naive(a, b));
+            return res;
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    private static int[] vcmpltpd(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out),X_LOW,bBit(in2),M_0F,W_LOW,in1,l,PP_66);
+        return vex_emit(vex, 0xC2, modRM(out,in2), 0x1); //0x0 is the imm that encodes compare equal
+    }
+    //VCMPPD xmm1, xmm2, xmm3/m128, 0x1  -- VCMPLTPD
+    public static final MethodHandle MHm128_vcmpltpd = MachineCodeSnippet.make(
+            "mm128_vcmpltpd",MT_L2_BINARY,requires(AVX),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.66.0F.WIG C2 /r ib
+                vcmpltpd(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vcmpltpd_naive(Long2 a, Long2 b){
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long2 vcmpltpd(Long2 a, Long2 b){
+        try {
+            Long2 res = (Long2) MHm128_vcmpltpd.invokeExact(a,b);
+            //assert assertEquals(res,vcmpltps_naive(a, b));
+            return res;
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    //VCMPPD ymm1, ymm2, ymm3/m256, 0x1  -- VCMPLTPD
+    public static final MethodHandle MHm256_vcmpltpd = MachineCodeSnippet.make(
+            "mm256_vcmpltpd",MT_L4_BINARY,requires(AVX),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.66.0F.WIG C2 /r ib
+                vcmpltpd(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vcmpltpd_naive(Long4 a, Long4 b){
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long4 vcmpltpd(Long4 a, Long4 b){
+        try {
+            Long4 res = (Long4) MHm256_vcmpltpd.invokeExact(a,b);
+            //assert assertEquals(res,vcmpltps_naive(a, b));
+            return res;
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    private static int[] vcmpgtpd(Register out, Register in1, Register in2, VEXLength l) {
+        int[] vex = vex_prefix(rBit(out),X_LOW,bBit(in2),M_0F,W_LOW,in1,L_128,PP_66);
+        return vex_emit(vex, 0xC2, modRM(out,in2), 0xE); //0xE is the imm that encodes compare greaterThan
+    }
+    //VCMPPD xmm1, xmm2, xmm3/m128, 0xE  -- VCMPGTPD
+    public static final MethodHandle MHm128_vcmpgtpd = MachineCodeSnippet.make(
+            "mm128_vcmpgtpd",MT_L2_BINARY,requires(AVX),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.128.66.0F.WIG C2 /r ib
+                vcmpgtpd(regs[0],regs[1],regs[2],L_128)
+            );
+
+    private static Long2 vcmpgtpd_naive(Long2 a, Long2 b){
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long2 vcmpgtpd(Long2 a, Long2 b){
+        try {
+            Long2 res = (Long2) MHm128_vcmpgtpd.invokeExact(a,b);
+            //assert assertEquals(res,vcmpgtps_naive(a, b));
+            return res;
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    //VCMPPD ymm1, ymm2, ymm3/m256, 0xE  -- VCMPGTPD
+    public static final MethodHandle MHm256_vcmpgtpd = MachineCodeSnippet.make(
+            "mm256_vcmpgtpd",MT_L4_BINARY,requires(AVX),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.NDS.256.66.0F.WIG C2 /r ib
+                vcmpgtpd(regs[0],regs[1],regs[2],L_256)
+            );
+
+    private static Long4 vcmpgtpd_naive(Long4 a, Long4 b){
+        //TODO: Write this test
+        return null;
+    }
+
+    @ForceInline
+    public static Long4 vcmpgtpd(Long4 a, Long4 b){
+        try {
+            Long4 res = (Long4) MHm256_vcmpgtpd.invokeExact(a,b);
+            //assert assertEquals(res,vcmpgtps_naive(a, b));
+            return res;
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    private static int[] vsqrtps(Register out, Register in, VEXLength l) {
+        int[] vex = vex_prefix_nonds(rBit(out), X_LOW, bBit(in), M_0F, W_LOW, 0b1111, l, PP_NONE);
+        return vex_emit(vex, 0x51, modRM(out,in));
+    }
+    //VSQRTPS ymm1, ymm2
+    public static final MethodHandle MHm256_vsqrtps = MachineCodeSnippet.make(
+            "m256_vsqrtps", MT_L4_UNARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.256.0F.WIG 51/r
+                vsqrtps(regs[0],regs[1],L_256)
+            );
+
+    private static Long4 vsqrtps_naive(Long4 a){
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long4 vsqrtps(Long4 a){
+        try {
+            return (Long4) MHm256_vsqrtps.invokeExact(a);
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    //VSQRTPS xmm1, xmm2
+    public static final MethodHandle MHm128_vsqrtps = MachineCodeSnippet.make(
+            "m128_vsqrtps", MT_L2_UNARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.128.0F.WIG 51 /r
+                vsqrtps(regs[0],regs[1],L_128)
+            );
+
+    private static Long2 vsqrtps_naive(Long2 a){
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long2 vsqrtps(Long2 a){
+        try {
+            return (Long2) MHm128_vsqrtps.invokeExact(a);
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    private static int[] vsqrtpd(Register out, Register in, VEXLength l) {
+        int[] vex = vex_prefix_nonds(rBit(out), X_LOW, bBit(in), M_0F, W_LOW, 0b1111, l, PP_66);
+        return vex_emit(vex, 0x51, modRM(out,in));
+    }
+    //VSQRTPD ymm1, ymm2
+    public static final MethodHandle MHm256_vsqrtpd = MachineCodeSnippet.make(
+            "m256_vsqrtpd", MT_L4_UNARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.256.66.0F.WIG 51 /r
+                vsqrtpd(regs[0],regs[1],L_256)
+            );
+
+    private static Long4 vsqrtpd_naive(Long4 a){
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long4 vsqrtpd(Long4 a){
+        try {
+            return (Long4) MHm256_vsqrtpd.invokeExact(a);
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    //VSQRTPD xmm1, xmm2
+    public static final MethodHandle MHm128_vsqrtpd = MachineCodeSnippet.make(
+            "m128_vsqrtpd", MT_L2_UNARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.128.66.0F.WIG 51 /r
+                vsqrtpd(regs[0],regs[1],L_128)
+            );
+
+    private static Long2 vsqrtpd_naive(Long2 a){
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long2 vsqrtpd(Long2 a){
+        try {
+            return (Long2) MHm128_vsqrtpd.invokeExact(a);
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    private static int[] vcvtps2dq(Register out, Register in, VEXLength l) {
+        int[] vex = vex_prefix_nonds(rBit(out), X_LOW, bBit(in), M_0F, W_LOW, 0b1111, l, PP_66);
+        return vex_emit(vex, 0x5B, modRM(out,in));
+    }
+    //VCVTPS2DQ xmm1, xmm2
+    public static final MethodHandle MHm128_vcvtps2dq = MachineCodeSnippet.make(
+            "m128_vcvtps2dq", MT_L2_UNARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.128.66.0F.WIG 5B /r
+                vcvtps2dq(regs[0],regs[1],L_128)
+            );
+    private static Long2 vcvtps2dq_naive(Long2 a){
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long2 vcvtps2dq(Long2 a){
+        try {
+            return (Long2) MHm128_vcvtps2dq.invokeExact(a);
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    //VCVTPS2DQ ymm1, ymm2
+    public static final MethodHandle MHm256_vcvtps2dq = MachineCodeSnippet.make(
+            "m256_vcvtps2dq", MT_L4_UNARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.256.66.0F.WIG 5B /r
+                vcvtps2dq(regs[0],regs[1],L_256)
+            );
+
+    private static Long4 vcvtps2dq_naive(Long4 a){
+        //TODO: Write this test
+        return null;
+    }
+
+    @ForceInline public static Long4 vcvtps2dq(Long4 a){
+        try {
+            return (Long4) MHm256_vcvtps2dq.invokeExact(a);
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    private static int[] vcvtdq2ps(Register out, Register in, VEXLength l) {
+        int[] vex = vex_prefix_nonds(rBit(out), X_LOW, bBit(in), M_0F, W_LOW, 0b1111, l, PP_NONE);
+        return vex_emit(vex, 0x5B, modRM(out,in));
+    }
+    //VCVTDQ2PS ymm1, ymm2
+    public static final MethodHandle MHm256_vcvtdq2ps = MachineCodeSnippet.make(
+            "m256_vcvtdq2ps", MT_L4_UNARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.256.0F.WIG 5B /r
+                vcvtdq2ps(regs[0],regs[1],L_256)
+            );
+
+    private static Long4 vcvtdq2ps_naive(Long4 a) {
+        //TODO: Write this test
+        return null;
+    }
+
+    @ForceInline public static Long4 vcvtdq2ps(Long4 a){
+        try {
+           return (Long4) MHm256_vcvtdq2ps.invokeExact(a);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+    /* ========================================================================================*/
+    //VCVTDQ2PS xmm1, xmm2
+    public static final MethodHandle MHm128_vcvtdq2ps = MachineCodeSnippet.make(
+            "m128_vcvtdq2ps", MT_L2_UNARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.128.0F.WIG 5B /r
+                vcvtdq2ps(regs[0],regs[1],L_128)
+            );
+
+    private static Long2 vcvtdq2ps_naive(Long2 a) {
+        //TODO: Write this test
+        return null;
+    }
+
+    @ForceInline public static Long2 vcvtdq2ps(Long2 a){
+        try {
+            return (Long2) MHm128_vcvtdq2ps.invokeExact(a);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vmovdgpr2xmm(Register out, Register in, VEXLength l) {
+        int[] vex = vex_prefix_nonds(rBit(out), X_LOW, bBit(in), M_0F, W_LOW, 0b1111, l, PP_66);
+        return vex_emit(vex, 0x6E, modRM_regInd(out, in));
+    }
+    // VMOVD xmm1, r32/m32
+    public static final MethodHandle MHm32_vmovd_gpr2xmm = MachineCodeSnippet.make(
+            "mm32_vmovd", MT_INT_L2, requires(AVX),
+            new Register[][]{xmmRegistersSSE, cpuRegisters},
+            (Register[] regs) ->
+                //VEX.128.66.0F.W0 6E /r
+                vmovdgpr2xmm(regs[0],regs[1],L_128)
+            );
+
+    public static Long2 vmovd_gpr2xmm(int i) {
+        try {
+            Long2 res = (Long2) MHm32_vmovd_gpr2xmm.invokeExact(i);
+            //   assert assertEquals(res, vmovd_naive_gpr2xmm(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    private static int[] vmovdxmm2gpr(Register out, Register in, VEXLength l) {
+        int[] vex = vex_prefix_nonds(rBit(out), X_LOW, bBit(in), M_0F, W_LOW, 0b1111, l, PP_66);
+        return vex_emit(vex, 0x7E, modRM_regInd(out, in));
+    }
+    // VMOVD r32/m32, xmm1
+    public static final MethodHandle MHm32_vmovd_xmm2gpr = MachineCodeSnippet.make(
+            "vmovups", MT_L2_INT, requires(AVX),
+            new Register[][]{cpuRegisters, xmmRegistersSSE},
+            (Register[] regs) ->
+                //VEX.128.66.0F.W0 7E /r
+                vmovdxmm2gpr(regs[0],regs[1],L_128)
+            );
+
+    public static int vmovd_xmm2gpr(Long2 a) {
+        try {
+            int res = (int) MHm32_vmovd_xmm2gpr.invokeExact(a);
+            // assert assertEquals(res, vmovd_naive_xmm2gpr(b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+
+
+    //TODO: START AGAIN HERE
+    /* ========================================================================================*/
+    //VPADDQ xmm1, xmm2, xmm3/128
+    public static final MethodHandle MHm128_vpaddq = MachineCodeSnippet.make(
+            "mm128_vpaddq", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) -> {
+                Register out = regs[0];
+                Register in1 = regs[1];
+                Register in2 = regs[2];
+                //VEX.NDS.128.66.0F.WIG D4 /r
+                int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, L_128, PP_66);
+                return vex_emit(vex, 0xD4, modRM(out, in2));
+            });
+
+    private static Long2 vpaddq_naive(Long2 a, Long2 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long2 vpaddq(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vpaddq.invokeExact(a, b);
+            //assert assertEquals(res, VADDPS_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    //VPADDQ ymm1, ymm2, ymm3/256
+    public static final MethodHandle MHm256_vpaddq = MachineCodeSnippet.make(
+            "mm256_vpaddq", MT_L4_BINARY, requires(AVX2),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) -> {
+                Register out = regs[0];
+                Register in1 = regs[1];
+                Register in2 = regs[2];
+                //VEX.NDS.256.66.0F .WIG D4 /r
+                int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, L_256, PP_66);
+                return vex_emit(vex, 0xD4, modRM(out, in2));
+            });
+
+    private static Long4 vpaddq_naive(Long4 a, Long4 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long4 vpaddq(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vpaddq.invokeExact(a, b);
+            //assert assertEquals(res, VADDPS_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    //VPSUBQ xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vpsubq = MachineCodeSnippet.make(
+            "mm128_vpsubq", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) -> {
+                Register out = regs[0];
+                Register in1 = regs[1];
+                Register in2 = regs[2];
+                //VEX.NDS.128.66.0F.WIG FB/r
+                int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, L_128, PP_66);
+                return vex_emit(vex, 0xFB, modRM(out, in2));
+            });
+
+    private static Long2 vpsubq_naive(Long2 a, Long2 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long2 vpsubq(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vpsubq.invokeExact(a, b);
+            //assert assertEquals(res, VSUBPS_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    //VPSUBQ ymm1, ymm2, ymm3/m256
+    public static final MethodHandle MHm256_vpsubq = MachineCodeSnippet.make(
+            "mm256_vpsubq", MT_L4_BINARY, requires(AVX2),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) -> {
+                Register out = regs[0];
+                Register in1 = regs[1];
+                Register in2 = regs[2];
+                //VEX.NDS.128.66.0F.WIG FB/r
+                int[] vex = vex_prefix(rBit(out), X_LOW, bBit(in2), M_0F, W_LOW, in1, L_256, PP_66);
+                return vex_emit(vex, 0xFB, modRM(out, in2));
+            });
+
+    private static Long4 vpsubq_naive(Long4 a, Long4 b) {
+        //TODO: Write this test.
+        return null;
+    }
+
+    public static Long4 vpsubq(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vpsubq.invokeExact(a, b);
+            //assert assertEquals(res, VSUBPS_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    //VPCMPEQQ xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vpcmpeqq = MachineCodeSnippet.make(
+            "mm128_vpcmpeqq", MT_L2_BINARY, requires(AVX),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) -> {
+                Register out = regs[0],
+                        in1 = regs[1],
+                        in2 = regs[2];
+                // VEX.NDS.128.66.0F38.WIG 29 /r
+                int[] vex = vex_prefix(rBit(out),X_LOW,bBit(in2),M_0F38,W_LOW,in1,L_128,PP_66);
+                return vex_emit(vex, 0x29, modRM(out,in2));
+            });
+
+    private static Long2 vpcmpeqq_naive(Long2 a, Long2 b) {
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long2 vpcmpeqq(Long2 a, Long2 b) {
+        try {
+            Long2 res = (Long2) MHm128_vpcmpeqq.invokeExact(a, b);
+            //assert assertEquals(res,vpcmpeqps_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    //VPCMPEQQ ymm1, ymm2, ymm3/m256
+    public static final MethodHandle MHm256_vpcmpeqq = MachineCodeSnippet.make(
+            "mm256_vpcmpeqq", MT_L4_BINARY, requires(AVX2),
+            new Register[][]{xmmRegistersSSE, xmmRegistersSSE, xmmRegistersSSE},
+            (Register[] regs) -> {
+                Register out = regs[0],
+                        in1 = regs[1],
+                        in2 = regs[2];
+                // VEX.NDS.256.66.0F38.WIG 29 /r
+                int[] vex = vex_prefix(rBit(out),X_LOW,bBit(in2),M_0F38,W_LOW,in1,L_256,PP_66);
+                return vex_emit(vex, 0x29, modRM(out,in2));
+            });
+
+    private static Long4 vpcmpeqq_naive(Long4 a, Long4 b) {
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long4 vpcmpeqq(Long4 a, Long4 b) {
+        try {
+            Long4 res = (Long4) MHm256_vpcmpeqq.invokeExact(a, b);
+            //assert assertEquals(res,vpcmpeqps_naive(a, b));
+            return res;
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    //VPCMPGTQ xmm1, xmm2, xmm3/m128
+    public static final MethodHandle MHm128_vpcmpgtq = MachineCodeSnippet.make(
+            "mm128_vpcmpgtq",MT_L2_BINARY,requires(AVX),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) -> {
+                Register out = regs[0],
+                        in1 = regs[1],
+                        in2 = regs[2];
+                //VEX.NDS.128.66.0F38.WIG 37 /r
+                int[] vex = vex_prefix(rBit(out),X_LOW,bBit(in2),M_0F38,W_LOW,in1,L_128,PP_66);
+                return vex_emit(vex, 0x37, modRM(out,in2));
+            });
+
+    private static Long2 vpcmpgtq_naive(Long2 a, Long2 b){
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long2 vpcmpgtq(Long2 a, Long2 b){
+        try {
+            Long2 res = (Long2) MHm128_vpcmpgtq.invokeExact(a,b);
+            //assert assertEquals(res,vcmpgtps_naive(a, b));
+            return res;
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+
+    //VPCMPGTQ ymm1, ymm2, ymm3/m256
+    public static final MethodHandle MHm256_vpcmpgtq = MachineCodeSnippet.make(
+            "mm256_vpcmpgtq",MT_L4_BINARY,requires(AVX),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) -> {
+                Register out = regs[0],
+                        in1 = regs[1],
+                        in2 = regs[2];
+                //VEX.NDS.256.66.0F38.WIG 37 /r
+                int[] vex = vex_prefix(rBit(out),X_LOW,bBit(in2),M_0F38,W_LOW,in1,L_256,PP_66);
+                return vex_emit(vex, 0x37, modRM(out,in2));
+            });
+
+    private static Long4 vpcmpgtq_naive(Long4 a, Long4 b){
+        //TODO: Write this test
+        return null;
+    }
+
+    public static Long4 vpcmpgtq(Long4 a, Long4 b){
+        try {
+            Long4 res = (Long4) MHm256_vpcmpgtq.invokeExact(a,b);
+            //assert assertEquals(res,vcmpgtps_naive(a, b));
+            return res;
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+
+    /* ========================================================================================*/
+    // VBROADCASTSS ymm1, ymm2
+    public static final MethodHandle MHm256_vbroadcastss = MachineCodeSnippet.make(
+            "vbroadcastss256", MT_L4_FLOAT, requires(AVX2),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) -> {
+                Register out = regs[0];
+                Register in  = regs[1];
+                //VEX.256.66.0F38.W0 18 /r
+                int[] vex = vex_prefix_nonds(rBit(out),X_LOW, bBit(in),M_0F38,W_LOW,0b1111,L_256,PP_66);
+                return vex_emit(vex,0x18,modRM(out,in));
+            });
+
+    @ForceInline public static Long4 broadcastFloatL4(float f){
+        try {
+           return (Long4) MHm256_vbroadcastss.invokeExact(f);
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+
+    // VBROADCASTSS xmm1, xmm2
+    public static final MethodHandle MHm128_vbroadcastss = MachineCodeSnippet.make(
+            "vbroadcastss128", MT_L2_FLOAT, requires(AVX2),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) -> {
+                Register out = regs[0];
+                Register in  = regs[1];
+                //VEX.128.66.0F38.W0 18 /r
+                int[] vex = vex_prefix_nonds(rBit(out),X_LOW, bBit(in),M_0F38,W_LOW,0b1111,L_256,PP_66);
+                return vex_emit(vex,0x18,modRM(out,in));
+            });
+
+    public static Long2 broadcastFloatL2(float f){
+        try {
+            return (Long2) MHm128_vbroadcastss.invokeExact(f);
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+
+    // VBROADCASTSD ymm1, xmm2
+    public static final MethodHandle MHm256_vbroadcastsd = MachineCodeSnippet.make(
+            "vbroadcastsd256", MT_L4_DOUBLE, requires(AVX2),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) -> {
+                Register out = regs[0];
+                Register in  = regs[1];
+                //VEX.256.66.0F38.W0 19 /r
+                int[] vex = vex_prefix_nonds(rBit(out),X_LOW, bBit(in),M_0F38,W_LOW,0b1111,L_256,PP_66);
+                return vex_emit(vex,0x19,modRM(out,in));
+            });
+
+    public static Long4 broadcastDoubleL4(double d){
+        try {
+            return (Long4) MHm256_vbroadcastsd.invokeExact(d);
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+
+    // VBROADCASTSD xmm1, xmm2
+    public static final MethodHandle MHm128_vbroadcastsd = MachineCodeSnippet.make(
+            "vbroadcastsd128", MT_L2_FLOAT, requires(AVX2),
+            new Register[][]{xmmRegistersSSE,xmmRegistersSSE},
+            (Register[] regs) -> {
+                Register out = regs[0];
+                Register in  = regs[1];
+                //VEX.128.66.0F38.W0 19 /r
+                int[] vex = vex_prefix_nonds(rBit(out),X_LOW, bBit(in),M_0F38,W_LOW,0b1111,L_256,PP_66);
+                return vex_emit(vex,0x18,modRM(out,in));
+            });
+
+    public static Long2 broadcastDoubleL2(double d){
+        try {
+            return (Long2) MHm128_vbroadcastsd.invokeExact(d);
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+
+
+    public static final MethodHandle MHm128_intBroadcast = MachineCodeSnippet.make(
+            "intbroadcast128", MT_L2_INT, requires(AVX2),
+            new Register[][]{xmmRegistersSSE,cpuRegisters},
+            (Register[] regs) -> {
+                Register out = regs[0];
+                Register in  = regs[1];
+
+                return join(
+                            vpinsrd(out,out,in,0x0)
+                        ,   vpbroadcastD(out,out,L_128)
+                );
+            });
+
+    public static Long2 broadcastIntL2(int i){
+        try {
+           return (Long2) MHm128_intBroadcast.invokeExact(i);
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+
+    public static final MethodHandle MHm256_intBroadcast = MachineCodeSnippet.make(
+            "intbroadcast256", MT_L4_INT, requires(AVX2),
+            new Register[][]{xmmRegistersSSE,cpuRegisters},
+            (Register[] regs) -> {
+                Register out = regs[0];
+                Register in  = regs[1];
+
+                return join(
+                            vpinsrd(out,out,in,0x0)
+                        ,   vpbroadcastD(out,out,L_256)
+                );
+            });
+
+    public static Long4 broadcastIntL4(int i){
+        try {
+            return (Long4) MHm256_intBroadcast.invokeExact(i);
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+
+    public static final MethodHandle MHm128_byteBroadcast = MachineCodeSnippet.make(
+            "bytebroadcast128", MT_L2_BYTE, requires(AVX2),
+            new Register[][]{xmmRegistersSSE,cpuRegisters},
+            (Register[] regs) -> {
+                Register out = regs[0];
+                Register in  = regs[1];
+
+                return join(
+                        vpinsrb(out,out,in,0x0)
+                        ,   vpbroadcastB(out,out,L_128)
+                );
+            });
+
+    public static Long2 broadcastByteL2(byte i){
+        try {
+            return (Long2) MHm128_byteBroadcast.invokeExact(i);
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+
+    public static final MethodHandle MHm256_byteBroadcast = MachineCodeSnippet.make(
+            "bytebroadcast256", MT_L4_BYTE, requires(AVX2),
+            new Register[][]{xmmRegistersSSE,cpuRegisters},
+            (Register[] regs) -> {
+                Register out = regs[0];
+                Register in  = regs[1];
+
+                return join(
+                        vpinsrb(out,out,in,0x0)
+                        ,   vpbroadcastB(out,out,L_256)
+                );
+            });
+
+    public static Long4 broadcastByteL4(byte i){
+        try {
+            return (Long4) MHm256_byteBroadcast.invokeExact(i);
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+
+    public static final MethodHandle MHm128_shortBroadcast = MachineCodeSnippet.make(
+            "shortbroadcast128", MT_L2_SHORT, requires(AVX2),
+            new Register[][]{xmmRegistersSSE,cpuRegisters},
+            (Register[] regs) -> {
+                Register out = regs[0];
+                Register in  = regs[1];
+
+                return join(
+                        vpinsrw(out,out,in,0x0)
+                        ,   vpbroadcastW(out,out,L_128)
+                );
+            });
+
+    public static Long2 broadcastShortL2(short i){
+        try {
+            return (Long2) MHm128_shortBroadcast.invokeExact(i);
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+
+    public static final MethodHandle MHm256_shortBroadcast = MachineCodeSnippet.make(
+            "shortbroadcast256", MT_L4_SHORT, requires(AVX2),
+            new Register[][]{xmmRegistersSSE,cpuRegisters},
+            (Register[] regs) -> {
+                Register out = regs[0];
+                Register in  = regs[1];
+
+                return join(
+                        vpinsrw(out,out,in,0x0)
+                        ,   vpbroadcastW(out,out,L_256)
+                );
+            });
+
+    public static Long4 broadcastShortL4(short i){
+        try {
+            return (Long4) MHm256_shortBroadcast.invokeExact(i);
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+
+    public static final MethodHandle MHm128_longBroadcast = MachineCodeSnippet.make(
+            "longbroadcast128", MT_L2_LONG, requires(AVX2),
+            new Register[][]{xmmRegistersSSE,cpuRegisters},
+            (Register[] regs) -> {
+                Register out = regs[0];
+                Register in  = regs[1];
+
+                return join(
+                        vpinsrq(out,out,in,0x0)
+                        ,   vpbroadcastQ(out,out,L_128)
+                );
+            });
+
+    public static Long2 broadcastLongL2(long i){
+        try {
+            return (Long2) MHm128_longBroadcast.invokeExact(i);
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+
+    public static final MethodHandle MHm256_longBroadcast = MachineCodeSnippet.make(
+            "longbroadcast256", MT_L4_LONG, requires(AVX2),
+            new Register[][]{xmmRegistersSSE,cpuRegisters},
+            (Register[] regs) -> {
+                Register out = regs[0];
+                Register in  = regs[1];
+
+                return join(
+                        vpinsrq(out,out,in,0x0)
+                        ,   vpbroadcastQ(out,out,L_256)
+                );
+            });
+
+    public static Long4 broadcastLongL4(long i){
+        try {
+            return (Long4) MHm256_longBroadcast.invokeExact(i);
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+    }
+
+    private static int[] vpbroadcastD(Register out, Register in, VEXLength l) {
+        int[] vex = vex_prefix_nonds(rBit(out), X_LOW, bBit(in), M_0F38, W_LOW, 0b1111, l,PP_66);
+        return vex_emit(vex, 0x58, modRM(out,in));
+    }
+
+    private static int[] vpbroadcastB(Register out, Register in, VEXLength l) {
+        int[] vex = vex_prefix_nonds(rBit(out), X_LOW, bBit(in), M_0F38, W_LOW, 0b1111, l,PP_66);
+        return vex_emit(vex, 0x78, modRM(out,in));
+    }
+
+    private static int[] vpbroadcastW(Register out, Register in, VEXLength l) {
+        int[] vex = vex_prefix_nonds(rBit(out), X_LOW, bBit(in), M_0F38, W_LOW, 0b1111, l,PP_66);
+        return vex_emit(vex, 0x79, modRM(out,in));
+    }
+
+    private static int[] vpbroadcastQ(Register out, Register in, VEXLength l) {
+        int[] vex = vex_prefix_nonds(rBit(out), X_LOW, bBit(in), M_0F38, W_LOW, 0b1111, l,PP_66);
+        return vex_emit(vex, 0x59, modRM(out,in));
+    }
+
+    /* ========================================================================================*/
+    //PDEP r32a, r32b, r32(mask)
+    public static final MethodHandle MHm32_pdep = MachineCodeSnippet.make(
+        "pdep32", MT_INT_BINARY, requires(BMI2),
+         new Register[][]{cpuRegisters,cpuRegisters,cpuRegisters},
+         (Register[] regs) -> {
+            Register out  = regs[0];
+            Register in1  = regs[1];
+            Register in2  = regs[2];
+            //VEX.NDS.LZ.F2.0F38.W0 F5 /r
+            int[] vex = vex_prefix(rBit(out),X_LOW,bBit(in2),M_0F38,W_LOW,in1,L_128 /* LZ */,PP_F2);
+            return vex_emit(vex,0xF5,modRM(out,in2));
+    });
+
+    //PDEP r64a, r64b, r64(mask)
+    public static final MethodHandle MHm64_pdep = MachineCodeSnippet.make(
+            "pdep64", MT_LONG_BINARY, requires(BMI2),
+            new Register[][]{cpuRegisters,cpuRegisters,cpuRegisters},
+            (Register[] regs) -> {
+                Register out  = regs[0];
+                Register in1  = regs[1];
+                Register in2  = regs[2];
+                //VEX.NDS.LZ.F2.0F38.W1 F5 /r
+                int[] vex = vex_prefix(rBit(out),X_LOW,bBit(in2),M_0F38,W_HIGH,in1,L_128 /* LZ */,PP_F2);
+                return vex_emit(vex,0xF5,modRM(out,in2));
+    });
+
+    //PEXT r32a, r32b, r32(mask)
+    public static final MethodHandle MHm32_pext = MachineCodeSnippet.make(
+            "pext32", MT_INT_BINARY, requires(BMI2),
+            new Register[][]{cpuRegisters,cpuRegisters,cpuRegisters},
+            (Register[] regs) -> {
+                Register out  = regs[0];
+                Register in1  = regs[1];
+                Register in2  = regs[2];
+                //VEX.NDS.LZ.F3.0F38.W0 F5 /r
+                int[] vex = vex_prefix(rBit(out),X_LOW,bBit(in2),M_0F38,W_LOW,in1,L_128 /* LZ */,PP_F3);
+                return vex_emit(vex,0xF5,modRM(out,in2));
+    });
+
+    public static int pextInt(int val, int mask){
+        try {
+           return (int) MHm32_pext.invokeExact(val, mask);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    //PEXT r64a, r64b, r64(mask)
+    public static final MethodHandle MHm64_pext = MachineCodeSnippet.make(
+            "pext64", MT_LONG_BINARY, requires(BMI2),
+            new Register[][]{cpuRegisters,cpuRegisters,cpuRegisters},
+            (Register[] regs) -> {
+                Register out  = regs[0];
+                Register in1  = regs[1];
+                Register in2  = regs[2];
+                //VEX.NDS.LZ.F3.0F38.W1 F5 /r
+                int[] vex = vex_prefix(rBit(out),X_LOW,bBit(in2),M_0F38,W_HIGH,in1,L_128 /* LZ */,PP_F3);
+                return vex_emit(vex,0xF5,modRM(out,in2));
+    });
+
+    /* ========================================================================================*/
+    /* Utility code for internals of assembler                                                   */
+    /* ========================================================================================*/
+    private static int[] i2iBytes(int inp) {
+       int[] res = new int[4];
+       for(int i = 0; i < 4; i++){
+           res[i] = inp & 0xFF;
+           inp = inp >> 8;
+       }
+       return res;
+    }
+    public static int getInt(Long2 l, int i) {
+        if (i < 0 || i >= 4) throw new IllegalArgumentException("" + i);
+        long r = l.extract(i / 2);
+        int bits = 32 * (i % 2);
+        int val = (int) (r >> bits);
+        return val;
+    }
+
+    public static int getInt(Long4 l, int i) {
+        if (i < 0 || i >= 8) throw new IllegalArgumentException("" + i);
+        long r = l.extract(i / 2);
+        int bits = 32 * (i % 2);
+        int val = (int) (r >> bits);
+        return val;
+    }
+
+    private static float getFloat(Long2 l, int i) {
+        return Float.intBitsToFloat(getInt(l, i));
+    }
+
+    public static float getFloat(Long4 v, int i) {
+        if (i > 7 || i < 0) throw new IllegalArgumentException("getFloat argument must be 0-7 inclusive");
+        long r = v.extract(i / 2);
+        int bits = 32 * (i % 2);
+        return Float.intBitsToFloat((int) (r >> bits));
+    }
+
+    private static double getDouble(Long2 l, int i) {
+        return Double.longBitsToDouble(getLong(l, i));
+    }
+
+    public static long getLong(Long2 l, int i) {
+        if (i < 0 || i >= 2) throw new IllegalArgumentException("" + i);
+        long r = l.extract(i / 2);
+        //int bits = 32 * (i % 2);
+        //int val = (int) (r >> bits);
+        return r;
+    }
+
+    public static long pack(int lo, int hi) {
+        long hiPacked = ((long) hi) << 32;
+        long loPacked = lo & 0xFFFFFFFFL;
+        return hiPacked | loPacked;
+    }
+
+    public static long pack(float lo, float hi) {
+        return pack(Float.floatToIntBits(lo), Float.floatToIntBits(hi));
+    }
+
+    private static boolean assertEquals(Object o1, Object o2) {
+        if (o1 == null && o2 == null) return true;
+        if (o1 != null && o1.equals(o2)) return true;
+        throw new AssertionError(o1 + " vs " + o2);
+    }
+
+    private static int sibScale(int i) {
+        switch (i) {
+            case 1:
+                return 0;
+            case 2:
+                return 1;
+            case 4:
+                return 2;
+            case 8:
+                return 3;
+            default:
+                throw new UnsupportedOperationException("sibScale rescales 1,2,4, or 8");
+        }
+    }
+
+    interface Bitty {
+        boolean isHigh();
+    }
+
+    enum VEXRBit implements Bitty {
+        R_LOW(0b0),
+        R_HIGH(0b1);
+
+        private final int encoding;
+
+        VEXRBit(int enc) {
+            encoding = enc;
+        }
+
+        int encoding() {
+            return encoding;
+        }
+
+        public boolean isHigh() {
+            return this == R_HIGH;
+        }
+
+        public boolean isLow() {
+            return this == R_LOW;
+        }
+    }
+
+    enum VEXXBit implements Bitty {
+        X_LOW(0b0),
+        X_HIGH(0b1);
+
+        private final int encoding;
+
+        VEXXBit(int enc) {
+            encoding = enc;
+        }
+
+        int encoding() {
+            return encoding;
+        }
+
+        public boolean isHigh() {
+            return this == X_HIGH;
+        }
+
+        public boolean isLow() {
+            return this == X_LOW;
+        }
+    }
+
+    public enum VEXBBit implements Bitty {
+        B_LOW(0b0),
+        B_HIGH(0b1);
+
+        private final int encoding;
+
+        VEXBBit(int enc) {
+            encoding = enc;
+        }
+
+        int encoding() {
+            return encoding;
+        }
+
+        public boolean isHigh() {
+            return this == B_HIGH;
+        }
+
+        public boolean isLow() {
+            return this == B_LOW;
+        }
+
+    }
+
+    public enum VEXWBit implements Bitty {
+        W_LOW(0b0),
+        W_HIGH(0b1);
+
+        private final int encoding;
+
+        VEXWBit(int enc) {
+            encoding = enc;
+        }
+
+        int encoding() {
+            return encoding;
+        }
+
+        public boolean isHigh() {
+            return this == W_HIGH;
+        }
+
+        public boolean isLow() {
+            return this == W_LOW;
+        }
+    }
+
+    public enum VEXLength {
+        L_128(0b0),
+        L_256(0b1);
+
+        private final int encoding;
+
+        VEXLength(int enc) {
+            encoding = enc;
+        }
+
+        int encoding() {
+            return encoding;
+        }
+    }
+
+    public enum VEXOpcode {
+        M_Reserved(0b0),
+        M_0F(0b00001),
+        M_0F38(0b00010),
+        M_0F3A(0b00011);
+        private final int encoding;
+
+        VEXOpcode(int enc) {
+            encoding = enc;
+        }
+
+        int encoding() {
+            return encoding;
+        }
+    }
+
+    public enum SIMDPrefix {
+        PP_NONE(0b00),
+        PP_66(0b01),
+        PP_F3(0b10),
+        PP_F2(0b11);
+        private final int encoding;
+
+        SIMDPrefix(int enc) {
+            encoding = enc;
+        }
+
+        int encoding() {
+            return encoding;
+        }
+    }
+
+
+}
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/Ops.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el;
+
+public enum Ops {
+    MUL, DIV, MOD,
+    ADD, SUB,
+    SLL, SRA, SRL,
+    LT, GT, LTE, GTE,
+    EQ, NEQ,
+    BAND, BXOR, BOR,
+    AND, OR
+    , SUM
+    , PROD
+    , ARY_LOAD
+    , ARY_STORE
+    , SELECT
+    , BCAST
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/Shape.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el;
+
+@FunctionalInterface
+public interface Shape {
+    int length();
+    default int bitLength(int elementLength){
+       return elementLength * length();
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/Shapes.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el;
+
+public final class Shapes {
+
+   public static final LENGTH1 L1 = new LENGTH1();
+   public static final LENGTH2 L2 = new LENGTH2();
+   public static final LENGTH4 L4 = new LENGTH4();
+   public static final LENGTH8 L8 = new LENGTH8();
+   public static final LENGTH16 L16 = new LENGTH16();
+
+   public static final class LENGTH1 implements Shape {
+     public int length(){
+        return 1;
+     }
+   }
+   public static final class LENGTH2 implements Shape {
+      public int length(){
+         return 2;
+      }
+   }
+   public static final class LENGTH4 implements Shape {
+      public int length(){
+         return 4;
+      }
+   }
+   public static final class LENGTH8 implements Shape {
+      public int length(){
+         return 8;
+      }
+   }
+   public static final class LENGTH16 implements Shape {
+      public int length(){
+         return 16;
+      }
+   }
+   public static final class LENGTH32 implements Shape {
+      public int length(){
+         return 32;
+      }
+   }
+   public static final class LENGTH64 implements Shape {
+      public int length(){
+         return 64;
+      }
+   }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/Val.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el;
+
+import com.oracle.vector.el.expression.Expression;
+import com.oracle.vector.el.expression.Expressions;
+import com.oracle.vector.el.expression.VConst;
+import com.oracle.vector.el.visitor.ExpressionEvaluator;
+
+import java.util.Optional;
+
+public class Val<E,S extends Shape> implements Expression<E,S>{
+    final S shape;
+    final Class<E> etype;
+
+    public Val(S shape, Class<E> etype){
+        this.shape = shape;
+        this.etype = etype;
+    }
+
+    @Override
+    public <R> R accept(ExpressionEvaluator<R> v) {
+        return v.visit(this);
+    }
+
+    @Override
+    public Class<E> elementType(){
+        return etype;
+    }
+
+    @Override
+    public Optional<VConst<E, S>> toVConst() {
+        return Optional.empty();
+    }
+
+    @Override
+    public S shape() {
+       return shape;
+    }
+
+    public Class<?> getValueLevelElementType(){
+        if(etype.equals(Byte.class)){
+            return byte.class;
+        } else if(etype.equals(Short.class)){
+            return short.class;
+        } else if(etype.equals(Float.class)){
+            return float.class;
+        } else if(etype.equals(Integer.class)){
+            return int.class;
+        } else if(etype.equals(Long.class)){
+            return long.class;
+        } else if(etype.equals(Double.class)){
+            return double.class;
+        } else if(etype.equals(Character.class)){
+            return char.class;
+        }
+
+        throw new UnsupportedOperationException("Invalid lane element type.");
+    }
+
+    public Class<?> getPackedType() {
+        return Expressions.packedType(this.elementType(),this.shape());
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/builder/MHMeta.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.builder;
+
+import com.oracle.vector.el.Val;
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.MethodType;
+import java.util.*;
+import java.util.stream.Collectors;
+
+/*
+    Transformations on MethodHandles to form expressions from Expression tree compositions.
+ */
+public class MHMeta {
+    private final Val binder;
+    private final List<Val> inputs;
+    private final MethodHandle body;
+
+    public MHMeta(Val binder, List<Val> inputs, MethodHandle body){
+        this.binder  = binder;
+        this.inputs  = inputs;
+        this.body    = body;
+    }
+
+    public Val getBinder() {
+        return binder;
+    }
+
+    public List<Val> getInputs() {
+        return inputs;
+    }
+
+    public MethodHandle getBody() {
+        return body;
+    }
+
+    public MHMeta substitute(MHMeta incoming){
+        return substitute(this,incoming);
+    }
+
+    public MHMeta normalize(){
+        return normalize(this);
+    }
+
+
+    //Here we normalize our MethodHandles so that all bound variables are presented as arguments only once.
+    //All duplicate variables appearing after the first instance are dropped, types are changed to reflect this,
+    //and MethodHandles permuted to map the data flow correctly.
+    public static MHMeta normalize(MHMeta mhm){
+        Val binder = mhm.getBinder();
+        List<Val> vals = new ArrayList<>(mhm.getInputs());
+        Set<Val> varset = new HashSet<>();
+        varset.addAll(vals);
+        MethodHandle mh = mhm.getBody();
+
+        Map<Integer,Val> im = new HashMap<>();
+        Map<Val,Integer> m = new HashMap<>();
+
+        int ij = 0;
+        for (Val v : varset) {
+            m.put(v, ij);
+            im.put(ij, v);
+            ij++;
+        }
+
+        List<Val> valsp = varset.stream().collect(Collectors.toList());
+        MethodType newTy = MethodType.methodType(mh.type().returnType(),valsp.stream()
+                                                                             .map(v -> v.getPackedType())
+                                                                             .toArray(Class<?>[]::new));
+        int[] remap = vals.stream().mapToInt(m::get).toArray();
+
+        mh = MethodHandles.permuteArguments(mh,newTy,remap);
+        return new MHMeta(binder,valsp,mh);
+    }
+
+    public static boolean isNormal(MHMeta mhm){
+        return new HashSet<>(mhm.getInputs()).size() == mhm.getInputs().size();
+    }
+
+
+
+    //mhm1[var] <= mhm2
+    public static MHMeta substitute(MHMeta mhm1, MHMeta mhm2){
+        List<Val> vars1, vars2;
+        vars1 = mhm1.getInputs();
+        vars2 = mhm2.getInputs();
+        Val target = mhm2.getBinder();
+
+        MethodHandle mh = mhm1.getBody();
+
+        List<Integer> ixs = new ArrayList<>();
+        for (int i = 0; i < vars1.size(); i++) {
+            if(vars1.get(i).equals(target)){
+               ixs.add(i);
+            }
+        }
+        if(vars1.contains(target)){
+            //Substitute vars
+            List<Val> b = new ArrayList<Val>();
+            for (Val val : vars1) {
+               if(val.equals(target)){
+                  b.addAll(vars2);
+               } else {
+                  b.add(val);
+               }
+            }
+
+            //Connect MH's
+            int offset = 0;
+            for(int i = 0; i < ixs.size(); i++){
+               mh = MethodHandles.collectArguments(mh,ixs.get(i)+offset,mhm2.getBody());
+                offset+=vars2.size();
+            }
+            return new MHMeta(mhm1.getBinder(),b,mh);
+        } else { //No substitutions available
+           return mhm1;
+        }
+    }
+
+
+    //This Takes an MHMeta object and rebinds its parameters given a list of in-order parameters
+    //The return type is not effected.
+    //Under the hood, this is simply a permute arguments on method handles.
+    public static MHMeta rebind(MHMeta mhm, List<Val> vals){
+        List<Val> inputs = mhm.getInputs();
+        MethodHandle body = mhm.getBody();
+
+        Set<Val> varsh = new HashSet<>(vals);
+        Set<Val> inputsh = new HashSet<>(inputs);
+        //Sanity checks:
+        //1. The new rebound vals don't have any repeat vals.
+        if(vals.size() != varsh.size()){
+            throw new UnsupportedOperationException("Rebound var list can't have any repeated binders.");
+        }
+        //2. The target must be a subset of the rebound var list.
+        for(Val s : varsh){
+            inputsh.remove(s);
+        }
+        if(inputsh.size() > 0){
+            throw new UnsupportedOperationException("Target binders must be a subset of the rebound binders.");
+        }
+
+        //Notes:
+        //1. Rebound vals can be a superset of the target, but non-matching args will be dropped on invocation.
+        int[] ixs = inputs.stream().mapToInt(vals::indexOf).toArray();
+        Class<?>[] ixtypes = vals.stream().map(Val::getPackedType).toArray(Class<?>[]::new);
+        MethodType newtype = MethodType.methodType(body.type().returnType(),ixtypes);
+        MethodHandle newbody = MethodHandles.permuteArguments(body,newtype,ixs);
+
+        return new MHMeta(mhm.binder, vals,newbody);
+
+
+    }
+
+    private static Class<?> deriveType(Val v){
+        int len       = v.length();
+        Class<?> elem = v.getValueLevelElementType();
+        int bitLen;
+        if(elem.equals(int.class) || elem.equals(Integer.class)
+                || elem.equals(float.class) || elem.equals(Float.class)){
+           bitLen = 32;
+        } else if(elem.equals(short.class) || elem.equals(Short.class)
+                || elem.equals(char.class) || elem.equals(Character.class)) {
+           bitLen = 16;
+        } else if(elem.equals(Byte.class) || elem.equals(byte.class)){
+           bitLen = 8;
+        } else {
+           //Double, Long
+           bitLen = 64;
+        }
+
+        if(len == 1) {
+            return elem;
+        } else {
+            return bitLenToType(bitLen * len);
+        }
+
+    }
+
+    private static Class<?> bitLenToType(int len){
+        switch(len){
+            case 64:
+                return Long.class;
+            case 128:
+                return Long2.class;
+            case 256:
+                return Long4.class;
+            case 512:
+                return Long8.class;
+            default:
+                throw new UnsupportedOperationException("Invalid bitLen Received.");
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/comp/ExpComp.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.comp;
+
+import com.oracle.vector.el.Ops;
+import com.oracle.vector.el.Shape;
+import com.oracle.vector.el.Val;
+import com.oracle.vector.el.expression.*;
+import com.oracle.vector.el.expression.bexp.VBinBExp;
+import com.oracle.vector.el.visitor.ExpressionEvaluator;
+import com.oracle.vector.ops.OpProvider;
+import static com.oracle.vector.el.Ops.*;
+
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.MethodType;
+import java.util.List;
+import java.util.Optional;
+
+public class ExpComp implements ExpressionEvaluator<Optional<MethodHandle>> {
+
+    private final OpProvider op;
+
+    public ExpComp(OpProvider op) {
+        this.op = op;
+    }
+
+    @Override
+    public <E,T extends Shape> Optional<MethodHandle> visit(VAdd<E,T> v) {
+        return procMethods(op.getOp(v.elementType(),ADD,v.length()),v.getLeft(),v.getRight());
+    }
+
+    @Override
+    public <E,T extends Shape> Optional<MethodHandle> visit(VDiv<E,T> v) {
+        return procMethods(op.getOp(v.elementType(),DIV,v.length()),v.getLeft(),v.getRight());
+    }
+
+    @Override
+    public <E,T extends Shape> Optional<MethodHandle> visit(VMul<E,T> v) {
+        return procMethods(op.getOp(v.elementType(),MUL,v.length()),v.getLeft(),v.getRight());
+    }
+
+    @Override
+    public <E> Optional<MethodHandle> visit(VProd<E> v) {
+        throw new UnsupportedOperationException("Prod not implemented yet.");
+    }
+
+    @Override
+    public <E,T extends Shape> Optional<MethodHandle> visit(VSub<E,T> v) {
+        return procMethods(op.getOp(v.elementType(),SUB,v.length()),v.getLeft(),v.getRight());
+    }
+
+    @Override
+    public <E> Optional<MethodHandle> visit(VSum<E> v) {
+        return procMethods(op.getOp(v.elementType(),SUM,v.length()),v.getAddends());
+    }
+
+    @Override
+    public <E,T extends Shape> Optional<MethodHandle> visit(Val<E, T> v) {
+        return Optional.empty();
+    }
+
+    @Override
+    public <E,T extends Shape> Optional<MethodHandle> visit(ITE<E, T> v) {
+       MethodHandle test, then_, else_;
+       test  = v.getTest().accept(this).orElse(MethodHandles.identity(boolean.class));
+       then_ = v.getThen().accept(this).orElse(MethodHandles.identity(v.elementType()));
+       else_ = v.getElse().accept(this).orElse(MethodHandles.identity(v.elementType()));
+
+       List<Class<?>> t,thn,els;
+       t = test.type().parameterList();
+       thn = test.type().parameterList();
+       els = test.type().parameterList();
+
+       //Rebuild each MethodHandle so it has the same parameter typing, but each MethodHandle drops arguments it doesn't
+       //use (ie the other MethodHandle parameters).
+       MethodHandle then_p, else_p;
+
+       then_p = MethodHandles.dropArguments(then_,0,t);
+       then_p = MethodHandles.dropArguments(then_p,then_p.type().parameterCount(),els);
+
+       else_p = MethodHandles.dropArguments(else_,0,t);
+       else_p = MethodHandles.dropArguments(else_p,t.size(),thn);
+
+       return Optional.of(MethodHandles.guardWithTest(test,then_p,else_p));
+    }
+
+    @Override
+    public <E,T extends Shape> Optional<MethodHandle> visit(VConst<E, T> v) {
+        return Optional.of(v.packedValue());
+    }
+
+    @Override
+    public Optional<MethodHandle> visit(FloatScalarOp v) {
+        throw new UnsupportedOperationException("FloatScalar Not Implemented");
+    }
+
+    @Override
+    public Optional<MethodHandle> visit(DoubleScalarOp v) {
+        throw new UnsupportedOperationException("DoubleScalar Not Implemented");
+    }
+
+    @Override
+    public Optional<MethodHandle> visit(IntScalarOp v) {
+        throw new UnsupportedOperationException("IntScalar Not Implemented");
+    }
+
+    @Override
+    public Optional<MethodHandle> visit(LongScalarOp v) {
+        throw new UnsupportedOperationException("LongScalar Not Implemented");
+    }
+
+    @Override
+    public <E, T extends Shape> Optional<MethodHandle> visit(VBroadcast<E, T> v) {
+        MethodHandle id = MethodHandles.identity(Expressions.packedType(v.getChild().elementType(),1));
+        MethodHandle c = v.getChild().accept(this).orElse(id);
+        MethodHandle bc = op.getOp(v.elementType(),BCAST,v.length());
+
+        return Optional.of(MethodHandles.filterReturnValue(c,bc));
+    }
+
+    @Override
+    public <E, T extends Shape> Optional<MethodHandle> visit(VMask<E, T> v) {
+        Class<?> eTy = v.getThn().elementType();
+        Class<?> pTy = Expressions.packedType(eTy,v.length());
+        MethodHandle mask, thn, els;
+        MethodHandle id = MethodHandles.identity(pTy);
+
+        //TODO: MASKING NEEDS CONTEXT WRT EXPRESSION SHAPE
+        mask = v.getMask().accept(this).orElse(id); //(args..)VectorB
+        thn  = v.getThn().accept(this).orElse(id);  //(args2..)VectorA
+        els  = v.getEls().accept(this).orElse(id);  //(args3..)VectorA
+
+        MethodHandle blender = op.getOp(v.elementType(),SELECT,v.length()); //(VectorA, VectorA, VectorB)VectorA
+
+        int thnParams = thn.type().parameterCount();
+        int elsParams = els.type().parameterCount();
+        MethodHandle r = MethodHandles.collectArguments(blender,0,els);
+        r = MethodHandles.collectArguments(r,elsParams,thn);
+        r = MethodHandles.collectArguments(r,thnParams+elsParams,mask); //(args2..,args3..,args..)VectorA (blended)
+        return Optional.of(r);
+
+    }
+
+    @Override
+    public <E, S extends Shape> Optional<MethodHandle> visit(VBinBExp<E, S> v) {
+        return procMethods(op.getOp(v.getRealElementType(),v.getOp(),v.length()),v.getLeft(),v.getRight());
+    }
+
+    @Override
+    public <E, S extends Shape> Optional<MethodHandle> visit(IndexedVal<E, S> v) {
+        MethodHandle id = MethodHandles.identity(int.class);
+        MethodHandle ix = v.getIxExp().accept(this).orElse(id);
+
+        MethodHandle loader = op.getOp(v.elementType(),Ops.ARY_LOAD,v.length());
+        MethodType mt = loader.type();
+        MethodType mtn = MethodType.methodType(mt.returnType(),mt.parameterArray()[1],mt.parameterArray()[0]); //flip
+        loader = MethodHandles.permuteArguments(loader,mtn,1,0);
+        loader = MethodHandles.collectArguments(loader,0,ix);
+
+        return Optional.of(loader);
+
+
+    }
+
+    @Override
+    public Optional<MethodHandle> visit(FloatScalarBinOp v) {
+        throw new UnsupportedOperationException("FloatScalar Not Implemented");
+    }
+
+    @Override
+    public Optional<MethodHandle> visit(DoubleScalarBinOp v) {
+        throw new UnsupportedOperationException("DoubleScalar Not Implemented");
+    }
+
+    @Override
+    public Optional<MethodHandle> visit(IntScalarBinOp v) {
+        throw new UnsupportedOperationException("IntScalar Not Implemented");
+    }
+
+    @Override
+    public Optional<MethodHandle> visit(LongScalarBinOp v) {
+        throw new UnsupportedOperationException("LongScalar Not Implemented");
+    }
+
+
+
+    private <E,T extends Shape> Optional<MethodHandle> procMethods(MethodHandle operator, Expression<E,T> leftNode, Expression<E,T> rightNode){
+
+        Optional<VConst<E,T>> lVal = leftNode.toVConst();
+        Optional<VConst<E,T>> rVal = rightNode.toVConst();
+        try {
+            if(lVal.isPresent()){
+                if(rVal.isPresent()) { //Both Values
+                    //Both arguments to this operator are constant values, so we fold them by eagerly evaluating now.
+
+                    //Reflect the type of the packed value
+                    Class<?> lClass = lVal.get().packedClass();
+                    Class<?> rClass = lVal.get().packedClass();
+
+                    //Take the supplied operator and apply it to the downcasted packed values
+                    Object res = operator.invokeExact(lClass.cast(lVal.get().packedValue()),rClass.cast(rVal.get().packedValue()));
+
+                    //Reflect the return type from the operator to downcast the constant result
+                    Class<?> retType = operator.type().returnType();
+
+                    //Our result is final because our aruments are constants.  Bundle this into a constant MethodHandle
+                    return Optional.of(MethodHandles.constant(retType,retType.cast(res)));
+                } else { //Left-only value
+                    Optional<MethodHandle> rightOp = rightNode.accept(this);
+
+                    //Reflect the type of the packed value
+                    Class<?> lClass = lVal.get().packedClass();
+
+                    MethodHandle lmh = operator.bindTo(lClass.cast(lVal.get().packedValue()));
+
+                    Optional<MethodHandle> ret = rightOp.map(right -> MethodHandles.collectArguments(lmh,0,right))
+                        .or(() -> Optional.of(lmh));
+
+                    return ret;
+                }
+
+            } else {
+                if(rVal.isPresent()) { //Right-only value
+                    Optional<MethodHandle> leftOp = leftNode.accept(this);
+                    Class<?> rClass = rVal.get().packedClass();
+
+                    MethodHandle val = rVal.get().packedValue();
+
+                    Optional<MethodHandle> mh = leftOp.flatMap(left -> {
+                        MethodHandle lmh = MethodHandles.collectArguments(operator,0,left);
+                        int leftArgs = left.type().parameterCount();
+                        return Optional.of(MethodHandles.collectArguments(lmh,leftArgs,MethodHandles.constant(rClass,rClass.cast(val)))); //Left is a methodhandle
+                   // }).or(() -> Optional.of(MethodHandles.collectArguments(operator,1,MethodHandles.constant(rClass,rClass.cast(val))))); //Left is a var
+                    }).or(() -> Optional.of(MethodHandles.collectArguments(operator,1,val)));//MethodHandles.constant(rClass,rClass.cast(val))))); //Left is a var
+
+                    return mh;
+
+
+                } else { //Neither Value
+                    Optional<MethodHandle> leftOp  = leftNode.accept(this);
+                    Optional<MethodHandle> rightOp = rightNode.accept(this);
+
+                    Optional<MethodHandle> mh = leftOp.flatMap(left -> {
+                        MethodHandle lmh = MethodHandles.collectArguments(operator,0,left);
+                        int leftArgs = left.type().parameterCount();
+                        Optional<MethodHandle> ret = rightOp.map(right -> MethodHandles.collectArguments(lmh,leftArgs,right))
+                                .or(() -> Optional.of(lmh));
+                        return ret;
+                    })
+                            .or(() -> rightOp.map(right -> MethodHandles.collectArguments(operator,1,right)))
+                            .or(() -> Optional.of(operator));
+
+                    return mh;
+                }
+
+            }
+        } catch (Throwable e){
+            throw new Error(e);
+        }
+
+
+    }
+
+    private Optional<MethodHandle> procMethods(MethodHandle operator, Expression<?,?> childNode) {
+        Optional<MethodHandle> childOp = childNode.accept(this);
+
+        Optional<MethodHandle> mh = childOp.flatMap(child ->
+                Optional.of(MethodHandles.collectArguments(operator,0,child))
+        ).or(() -> Optional.of(operator));
+
+        return mh;
+    }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/comp/ExpVarOrder.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.comp;
+
+import com.oracle.vector.el.Shape;
+import com.oracle.vector.el.Val;
+import com.oracle.vector.el.expression.*;
+import com.oracle.vector.el.expression.bexp.VBinBExp;
+import com.oracle.vector.el.expression.IndexedVal;
+import com.oracle.vector.el.visitor.ExpressionEvaluator;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class ExpVarOrder implements ExpressionEvaluator<List<Val>> {
+
+    public static final ExpVarOrder instance = new ExpVarOrder();
+
+    @Override
+    public <E, T extends Shape> List<Val> visit(VAdd<E, T> v) {
+        return append(v.getLeft().accept(this),v.getRight().accept(this));
+    }
+
+    @Override
+    public <E, T extends Shape> List<Val> visit(VDiv<E, T> v) {
+        return append(v.getLeft().accept(this),v.getRight().accept(this));
+    }
+
+    @Override
+    public <E, T extends Shape> List<Val> visit(VMul<E, T> v) {
+        return append(v.getLeft().accept(this),v.getRight().accept(this));
+    }
+
+    @Override
+    public <E> List<Val> visit(VProd<E> v) {
+        return v.getFactors().accept(this);
+    }
+
+    @Override
+    public <E, T extends Shape> List<Val> visit(VSub<E, T> v) {
+        return append(v.getLeft().accept(this),v.getRight().accept(this));
+    }
+
+    @Override
+    public <E> List<Val> visit(VSum<E> v) {
+        return v.getAddends().accept(this);
+    }
+
+    @Override
+    public <E, T extends Shape> List<Val> visit(Val<E, T> v) {
+        List<Val> l = new ArrayList<>();
+        l.add(v);
+        return l;
+    }
+
+    @Override
+    public <E, T extends Shape> List<Val> visit(VConst<E,T> v) {
+        return List.of(); //empty list.
+    }
+
+    @Override
+    public List<Val> visit(FloatScalarOp v) {
+        return v.getChild().accept(this);
+    }
+
+    @Override
+    public List<Val> visit(DoubleScalarOp v) {
+        return v.getChild().accept(this);
+    }
+
+    @Override
+    public List<Val> visit(IntScalarOp v) {
+        return v.getChild().accept(this);
+    }
+
+    @Override
+    public List<Val> visit(LongScalarOp v) {
+        return v.getChild().accept(this);
+    }
+
+    @Override
+    public List<Val> visit(FloatScalarBinOp v) {
+        return append(v.getLeft().accept(this),v.getRight().accept(this));
+    }
+
+    @Override
+    public List<Val> visit(DoubleScalarBinOp v) {
+        return append(v.getLeft().accept(this),v.getRight().accept(this));
+    }
+
+    @Override
+    public List<Val> visit(IntScalarBinOp v) {
+        return append(v.getLeft().accept(this),v.getRight().accept(this));
+    }
+
+    @Override
+    public List<Val> visit(LongScalarBinOp v) {
+        return append(v.getLeft().accept(this),v.getRight().accept(this));
+    }
+
+
+        @Override
+    public <E, T extends Shape> List<Val> visit(VBroadcast<E, T> v) {
+        return v.getChild().accept(this);
+    }
+
+    @Override
+    public <E, T extends Shape> List<Val> visit(VMask<E, T> v) {
+        return append(append(v.getMask().accept(this),v.getThn().accept(this)),v.getEls().accept(this));
+    }
+
+    @Override
+    public <E, S extends Shape> List<Val> visit(VBinBExp<E, S> v) {
+        return append(v.getLeft().accept(this),v.getRight().accept(this));
+    }
+
+    @Override
+    public <E, S extends Shape> List<Val> visit(IndexedVal<E, S> v) {
+        return append(v.getIxExp().accept(this),v.getExp().accept(this));
+    }
+
+    @Override
+    public <E, T extends Shape> List<Val> visit(ITE<E, T> v) {
+        List<Val> i,thn,els;
+        i = v.getTest().accept(this);
+        thn = v.getThen().accept(this);
+        els = v.getElse().accept(this);
+
+        return append(i,append(thn,els));
+    }
+
+
+    private <R> List<R> append(List<R> left, List<R> right){
+        List<R> l = new ArrayList<>();
+        l.addAll(left);
+        l.addAll(right);
+        return l;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/DoubleScalarBinOp.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression;
+
+import com.oracle.vector.el.Shapes;
+import com.oracle.vector.el.expression.scalars.DoubleBinOp;
+import com.oracle.vector.el.visitor.ExpressionEvaluator;
+
+import java.util.Optional;
+
+public class DoubleScalarBinOp implements Expression<Double,Shapes.LENGTH1>{
+    final Expression<Double,Shapes.LENGTH1> left, right;
+    final DoubleBinOp op;
+
+    DoubleScalarBinOp(Expression<Double,Shapes.LENGTH1> l, Expression<Double,Shapes.LENGTH1> r, DoubleBinOp op){
+        this.left = l;
+        this.right = r;
+        this.op = op;
+    }
+
+    @Override
+    public <R> R accept(ExpressionEvaluator<R> v) {
+        return v.visit(this);
+    }
+
+    @Override
+    public Shapes.LENGTH1 shape() {
+        return Shapes.L1;
+    }
+
+    @Override
+    public Class<Double> elementType() {
+        return Double.class;
+    }
+
+    @Override
+    public Optional<VConst<Double, Shapes.LENGTH1>> toVConst() {
+        return left.toVConst().flatMap((l) ->
+                right.toVConst().flatMap((r) ->
+                    Optional.of(new VConst<>(Shapes.L1,op.apply(l.val,r.val)))
+                )
+        );
+    }
+
+    public Expression<Double,Shapes.LENGTH1> getLeft(){
+        return left;
+    }
+
+    public Expression<Double,Shapes.LENGTH1> getRight(){
+        return right;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/DoubleScalarOp.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression;
+
+import com.oracle.vector.el.Shapes;
+import com.oracle.vector.el.expression.scalars.DoubleOp;
+import com.oracle.vector.el.visitor.ExpressionEvaluator;
+
+import java.util.Optional;
+
+public class DoubleScalarOp implements Expression<Double,Shapes.LENGTH1> {
+
+    final Expression<Double,Shapes.LENGTH1> child;
+    final DoubleOp op;
+
+    DoubleScalarOp(Expression<Double,Shapes.LENGTH1> e, DoubleOp op){
+        this.child = e;
+        this.op = op;
+    }
+
+    @Override
+    public <R> R accept(ExpressionEvaluator<R> v) {
+        return v.visit(this);
+    }
+
+    @Override
+    public Shapes.LENGTH1 shape() {
+        return Shapes.L1;
+    }
+
+    @Override
+    public Class<Double> elementType() {
+        return Double.class;
+    }
+
+    @Override
+    public Optional<VConst<Double, Shapes.LENGTH1>> toVConst() {
+        return child.toVConst().map((cnst) -> {
+            Double d = op.apply(cnst.val);
+            return new VConst<>(Shapes.L1,d);
+        });
+    }
+
+    public Expression<Double,Shapes.LENGTH1> getChild(){
+        return child;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/Expression.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression;
+
+import com.oracle.vector.el.Ops;
+import com.oracle.vector.el.Shape;
+import com.oracle.vector.el.Shapes;
+import com.oracle.vector.el.expression.bexp.VBinBExp;
+import com.oracle.vector.el.visitor.ExpressionEvaluator;
+
+import java.util.Optional;
+import java.util.function.Supplier;
+
+public interface Expression<E,S extends Shape> {
+    default Expression<E,S> add(Expression<E,S> v) { return new VAdd<E,S>(this,v);}
+    default Expression<E,S> sub(Expression<E,S> v) { return new VSub<E,S>(this,v);}
+    default Expression<E,S> mul(Expression<E,S> v) { return new VMul<E,S>(this,v);}
+    default Expression<E,S> div(Expression<E,S> v) { return new VDiv<E,S>(this,v);}
+    default Expression<E,S> ite(Expression<Boolean,Shapes.LENGTH1> t, Expression<E,S> thn, Expression<E,S> els){ return new ITE<>(t,thn,els);}
+
+
+    default Expression<Integer,Shapes.LENGTH1> pack(int val){
+        return new VConst<>(Shapes.L1,val);
+    }
+
+    default Expression<Integer,Shapes.LENGTH4> broadcast4(int val){
+        return new VConst<>(Shapes.L4,val);
+    }
+
+
+    //Horizontal Reductions
+    default Expression<E,Shapes.LENGTH1> sum() { return new VSum<E>(this);}
+    default Expression<E,Shapes.LENGTH1> prod() { return new VProd<E>(this);}
+
+    <R> R accept(ExpressionEvaluator<R> v);
+
+    default int length() {
+        return this.shape().length();
+    }
+    S shape();
+    Class<E> elementType();
+
+    Optional<VConst<E,S>> toVConst();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/Expressions.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression;
+
+import com.oracle.vector.el.Ops;
+import com.oracle.vector.el.Shape;
+import com.oracle.vector.el.Shapes;
+import com.oracle.vector.el.expression.bexp.VBinBExp;
+import com.oracle.vector.el.expression.scalars.*;
+
+import java.util.function.Supplier;
+
+public class Expressions {
+    /*
+    public static Expression<Integer,Shapes.LENGTH4> broadcast4(int val){
+        return new VConst<>(Shapes.L4,val);
+    }
+    public static Expression<Float,Shapes.LENGTH4>   broadcast4(float val){
+        return new VConst<>(Shapes.L4,val);
+    }
+    public static Expression<Float,Shapes.LENGTH8>   broadcast8(float val){
+        return new VConst<>(Shapes.L8,val);
+    }
+    public static Expression<Double,Shapes.LENGTH4>  broadcast4(double val) { return new VConst<>(Shapes.L4,val);}
+    */
+    public static <Z extends Number> Expression<Z,Shapes.LENGTH1> constant(Z n){
+        return new VConst<>(Shapes.L1,n);
+    }
+
+    public static <E extends Number> Expression<E,Shapes.LENGTH2> broadcast2(Expression<E,Shapes.LENGTH1> e) {
+        return new VBroadcast<>(e,Shapes.L2);
+    }
+
+    public static <E extends Number> Expression<E,Shapes.LENGTH4> broadcast4(Expression<E,Shapes.LENGTH1> e) {
+        return new VBroadcast<>(e,Shapes.L4);
+    }
+
+    public static <E extends Number> Expression<E,Shapes.LENGTH8> broadcast8(Expression<E,Shapes.LENGTH1> e) {
+        return new VBroadcast<>(e,Shapes.L8);
+    }
+
+    public static Expression<Double,Shapes.LENGTH1>  scalar(Expression<Double,Shapes.LENGTH1> e, DoubleOp f){ return new DoubleScalarOp(e,f);}
+    public static Expression<Float,Shapes.LENGTH1>   scalar(Expression<Float,Shapes.LENGTH1> e, FloatOp f){ return new FloatScalarOp(e,f);}
+    public static Expression<Integer,Shapes.LENGTH1> scalar(Expression<Integer,Shapes.LENGTH1> e, IntOp f){ return new IntScalarOp(e,f);}
+    public static Expression<Long,Shapes.LENGTH1>    scalar(Expression<Long,Shapes.LENGTH1> e, LongOp f){ return new LongScalarOp(e,f);}
+
+    public static Expression<Double,Shapes.LENGTH1>  scalar(Expression<Double,Shapes.LENGTH1> left, Expression<Double,Shapes.LENGTH1> right, DoubleBinOp f){ return new DoubleScalarBinOp(left,right,f);}
+    public static Expression<Float,Shapes.LENGTH1>   scalar(Expression<Float,Shapes.LENGTH1> left, Expression<Float,Shapes.LENGTH1> right, FloatBinOp f){ return new FloatScalarBinOp(left,right,f);}
+    public static Expression<Integer,Shapes.LENGTH1> scalar(Expression<Integer,Shapes.LENGTH1> left, Expression<Integer,Shapes.LENGTH1> right, IntBinOp f){ return new IntScalarBinOp(left,right,f);}
+    public static Expression<Long,Shapes.LENGTH1>    scalar(Expression<Long,Shapes.LENGTH1> left, Expression<Long,Shapes.LENGTH1> right, LongBinOp f){ return new LongScalarBinOp(left,right,f);}
+
+    public static <E,S extends Shape> Expression<E,S> mask(Expression<Boolean,S> mask, Expression<E,S> thn, Expression<E,S> els) {
+        return new VMask<>(mask,thn,els);
+    }
+
+    public static <E,S extends Shape> Expression<E,S> mask(Supplier<Expression<Boolean,S>> mask, Supplier<Expression<E,S>> thn, Supplier<Expression<E,S>> els) {
+        return new VMask<>(mask.get(),thn.get(),els.get());
+    }
+
+    public static <E,S extends Shape> Expression<Boolean,S> eq(Expression<E,S> left, Expression<E,S> right) {
+        return new VBinBExp<>(left,right, Ops.EQ);
+    }
+    public static <E,S extends Shape> Expression<Boolean,S> lt(Expression<E,S> left, Expression<E,S> right){
+        return new VBinBExp<>(left,right,Ops.LT);
+    }
+
+    public static <E,S extends Shape> Expression<Boolean,S> lte(Expression<E,S> left, Expression<E,S> right){
+        return new VBinBExp<>(left,right,Ops.LTE);
+    }
+
+    public static <E,S extends Shape> Expression<E,S> get(IndexableVal<E,S> iv, Expression<Integer,Shapes.LENGTH1> ix) {
+        return new IndexedVal<>(iv,ix);
+    }
+
+    public static int elementWidth(Class<?> elem){
+        if(elem == Byte.class || elem == byte.class){
+            return 8;
+        } else if(elem == Short.class || elem == short.class || elem == Character.class || elem == char.class) {
+            return 16;
+        } else if(elem == Integer.class || elem == int.class || elem == Float.class || elem == float.class) {
+            return 32;
+        } else if(elem == Double.class || elem == double.class || elem == Long.class || elem == long.class) {
+            return 64;
+        }
+
+        throw new UnsupportedOperationException("elementWidth encountered unsupported type");
+    }
+
+    public static Class<?> packedType(Class<?> elem, Shape s){
+        return packedType(elem,s.length());
+    }
+
+    public static Class<?> packedType(Class<?> elem, int l){
+        if(l == 1){
+            if(elem == Integer.class) return int.class;
+            if(elem == Float.class) return float.class;
+            if(elem == Double.class) return double.class;
+            if(elem == Long.class) return long.class;
+
+            return elem;
+        } else {
+            int len = elementWidth(elem) * l;
+            //if(len == 16) {
+            //   return short.class;
+            //} else if(len == 32) {
+            //   return int.class;
+            //} else if(len == 64) {
+            //   return long.class;
+            //} else
+            if(len == 128) {
+               return Long2.class;
+            } else if(len == 256) {
+               return Long4.class;
+            } else if(len == 512) {
+               return Long8.class;
+            }
+            throw new UnsupportedOperationException("Unsupported Size for packed elements (Not 128-512 bits).");
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/FloatScalarBinOp.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression;
+
+import com.oracle.vector.el.Shapes;
+import com.oracle.vector.el.expression.scalars.FloatBinOp;
+import com.oracle.vector.el.visitor.ExpressionEvaluator;
+
+import java.util.Optional;
+
+public class FloatScalarBinOp implements Expression<Float,Shapes.LENGTH1>{
+    final Expression<Float,Shapes.LENGTH1> left, right;
+    final FloatBinOp op;
+
+    FloatScalarBinOp(Expression<Float,Shapes.LENGTH1> l, Expression<Float,Shapes.LENGTH1> r, FloatBinOp op){
+        this.left = l;
+        this.right = r;
+        this.op = op;
+    }
+
+    @Override
+    public <R> R accept(ExpressionEvaluator<R> v) {
+        return v.visit(this);
+    }
+
+    @Override
+    public Shapes.LENGTH1 shape() {
+        return Shapes.L1;
+    }
+
+    @Override
+    public Class<Float> elementType() {
+        return Float.class;
+    }
+
+    @Override
+    public Optional<VConst<Float, Shapes.LENGTH1>> toVConst() {
+        return left.toVConst().flatMap((l) ->
+                right.toVConst().flatMap((r) ->
+                        Optional.of(new VConst<>(Shapes.L1,op.apply(l.val,r.val)))
+                )
+        );
+    }
+
+    public Expression<Float,Shapes.LENGTH1> getLeft(){
+        return left;
+    }
+
+    public Expression<Float,Shapes.LENGTH1> getRight(){
+        return right;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/FloatScalarOp.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression;
+
+import com.oracle.vector.el.Shapes;
+import com.oracle.vector.el.expression.scalars.FloatOp;
+import com.oracle.vector.el.visitor.ExpressionEvaluator;
+
+import java.util.Optional;
+
+public class FloatScalarOp implements Expression<Float,Shapes.LENGTH1> {
+
+    final Expression<Float, Shapes.LENGTH1> child;
+    final FloatOp op;
+
+    FloatScalarOp(Expression<Float, Shapes.LENGTH1> e, FloatOp op) {
+        this.child = e;
+        this.op = op;
+    }
+
+    @Override
+    public <R> R accept(ExpressionEvaluator<R> v) {
+        return v.visit(this);
+    }
+
+    @Override
+    public Shapes.LENGTH1 shape() {
+        return Shapes.L1;
+    }
+
+    @Override
+    public Class<Float> elementType() {
+        return Float.class;
+    }
+
+    @Override
+    public Optional<VConst<Float, Shapes.LENGTH1>> toVConst() {
+        return child.toVConst().map((cnst) -> {
+            Float f = op.apply(cnst.val);
+            return new VConst<>(Shapes.L1, f);
+        });
+    }
+
+    public Expression<Float, Shapes.LENGTH1> getChild() {
+        return child;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/ITE.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression;
+
+import com.oracle.vector.el.Shape;
+import com.oracle.vector.el.Shapes;
+import com.oracle.vector.el.visitor.ExpressionEvaluator;
+
+import java.util.Optional;
+
+public class ITE<E,S extends Shape> implements Expression<E,S> {
+
+    final Expression<Boolean,Shapes.LENGTH1> test;
+    final Expression<E,S> thn, els;
+
+    public ITE(Expression<Boolean,Shapes.LENGTH1> test, Expression<E,S> thn, Expression<E,S> els) {
+        this.test = test;
+        this.thn  = thn;
+        this.els  = els;
+    }
+
+    @Override
+    public <R> R accept(ExpressionEvaluator<R> v) {
+        return v.visit(this);
+    }
+
+    @Override
+    public S shape() {
+        return thn.shape();
+    }
+
+    @Override
+    public Class<E> elementType() {
+        return els.elementType();
+    }
+
+    @Override
+    public Optional<VConst<E, S>> toVConst() {
+        return Optional.empty();
+    }
+
+    public Expression<Boolean,Shapes.LENGTH1> getTest(){
+        return this.test;
+    }
+
+    public Expression<E,S> getThen(){
+        return this.thn;
+    }
+
+    public Expression<E,S> getElse(){
+        return this.els;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/IndexableVal.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression;
+
+import com.oracle.vector.el.Shape;
+import com.oracle.vector.el.Shapes;
+import com.oracle.vector.el.Val;
+
+public class IndexableVal<E,S extends Shape> extends Val<E,S> implements Expression<E,S>{
+    Class<?> indexedTy;
+    public IndexableVal(S shape, Class<E> etype, Class<?> ixTy) {
+        super(shape, etype);
+        this.indexedTy = ixTy;
+    }
+
+    @Override
+    public Class<?> getPackedType(){
+        return indexedTy;
+    }
+
+
+    public Expression<E, S> get(Expression<Integer,Shapes.LENGTH1> li) {
+        return new IndexedVal<>(this,li);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/IndexedVal.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression;
+
+import com.oracle.vector.el.Shape;
+import com.oracle.vector.el.Shapes;
+import com.oracle.vector.el.visitor.ExpressionEvaluator;
+
+import java.util.Optional;
+
+public class IndexedVal<E,S extends Shape> implements Expression<E,S> {
+
+    Expression<Integer,Shapes.LENGTH1> iExp;
+    IndexableVal<E,S> iVal;
+
+    IndexedVal(IndexableVal<E,S> iv, Expression<Integer,Shapes.LENGTH1> ix) {
+        this.iVal = iv;
+        this.iExp = ix;
+    }
+
+    @Override
+    public <R> R accept(ExpressionEvaluator<R> v) {
+        return v.visit(this);
+    }
+
+    @Override
+    public S shape() {
+        return iVal.shape();
+    }
+
+    @Override
+    public Class<E> elementType() {
+        return iVal.elementType();
+    }
+
+    @Override
+    public Optional<VConst<E, S>> toVConst() {
+        return Optional.empty();
+    }
+
+    public Expression<Integer,Shapes.LENGTH1> getIxExp(){
+        return iExp;
+    }
+
+    public Expression<E,S> getExp(){
+        return iVal;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/IntScalarBinOp.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression;
+
+import com.oracle.vector.el.Shapes;
+import com.oracle.vector.el.expression.scalars.IntBinOp;
+import com.oracle.vector.el.visitor.ExpressionEvaluator;
+
+import java.util.Optional;
+
+public class IntScalarBinOp implements Expression<Integer,Shapes.LENGTH1>{
+    final Expression<Integer,Shapes.LENGTH1> left, right;
+    final IntBinOp op;
+
+    IntScalarBinOp(Expression<Integer,Shapes.LENGTH1> l, Expression<Integer,Shapes.LENGTH1> r, IntBinOp op){
+        this.left = l;
+        this.right = r;
+        this.op = op;
+    }
+
+    @Override
+    public <R> R accept(ExpressionEvaluator<R> v) {
+        return v.visit(this);
+    }
+
+    @Override
+    public Shapes.LENGTH1 shape() {
+        return Shapes.L1;
+    }
+
+    @Override
+    public Class<Integer> elementType() {
+        return Integer.class;
+    }
+
+    @Override
+    public Optional<VConst<Integer, Shapes.LENGTH1>> toVConst() {
+        return left.toVConst().flatMap((l) ->
+                right.toVConst().flatMap((r) ->
+                        Optional.of(new VConst<>(Shapes.L1,op.apply(l.val,r.val)))
+                )
+        );
+    }
+
+    public Expression<Integer,Shapes.LENGTH1> getLeft(){
+        return left;
+    }
+
+    public Expression<Integer,Shapes.LENGTH1> getRight(){
+        return right;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/IntScalarOp.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression;
+
+import com.oracle.vector.el.Shapes;
+import com.oracle.vector.el.expression.scalars.IntOp;
+import com.oracle.vector.el.visitor.ExpressionEvaluator;
+
+import java.util.Optional;
+
+public class IntScalarOp implements Expression<Integer,Shapes.LENGTH1> {
+
+    final Expression<Integer,Shapes.LENGTH1> child;
+    final IntOp op;
+
+    IntScalarOp(Expression<Integer,Shapes.LENGTH1> e, IntOp op){
+        this.child = e;
+        this.op = op;
+    }
+    @Override
+    public <R> R accept(ExpressionEvaluator<R> v) {
+        return v.visit(this);
+    }
+
+    @Override
+    public Shapes.LENGTH1 shape() {
+        return Shapes.L1;
+    }
+
+    @Override
+    public Class<Integer> elementType() {
+        return Integer.class;
+    }
+
+    @Override
+    public Optional<VConst<Integer, Shapes.LENGTH1>> toVConst() {
+        return child.toVConst().map((cnst) -> {
+            Integer i = op.apply(cnst.val);
+            return new VConst<>(Shapes.L1,i);
+        });
+    }
+
+    public Expression<Integer, Shapes.LENGTH1> getChild() {
+        return child;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/LongScalarBinOp.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression;
+
+import com.oracle.vector.el.Shapes;
+import com.oracle.vector.el.expression.scalars.LongBinOp;
+import com.oracle.vector.el.visitor.ExpressionEvaluator;
+
+import java.util.Optional;
+
+public class LongScalarBinOp implements Expression<Long,Shapes.LENGTH1>{
+    final Expression<Long,Shapes.LENGTH1> left, right;
+    final LongBinOp op;
+
+    LongScalarBinOp(Expression<Long,Shapes.LENGTH1> l, Expression<Long,Shapes.LENGTH1> r, LongBinOp op){
+        this.left = l;
+        this.right = r;
+        this.op = op;
+    }
+
+    @Override
+    public <R> R accept(ExpressionEvaluator<R> v) {
+        return v.visit(this);
+    }
+
+    @Override
+    public Shapes.LENGTH1 shape() {
+        return Shapes.L1;
+    }
+
+    @Override
+    public Class<Long> elementType() {
+        return Long.class;
+    }
+
+    @Override
+    public Optional<VConst<Long, Shapes.LENGTH1>> toVConst() {
+        return left.toVConst().flatMap((l) ->
+                right.toVConst().flatMap((r) ->
+                        Optional.of(new VConst<>(Shapes.L1,op.apply(l.val,r.val)))
+                )
+        );
+    }
+
+    public Expression<Long,Shapes.LENGTH1> getLeft(){
+        return left;
+    }
+
+    public Expression<Long,Shapes.LENGTH1> getRight(){
+        return right;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/LongScalarOp.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression;
+
+import com.oracle.vector.el.Shapes;
+import com.oracle.vector.el.expression.scalars.LongOp;
+import com.oracle.vector.el.visitor.ExpressionEvaluator;
+
+import java.util.Optional;
+
+public class LongScalarOp implements Expression<Long,Shapes.LENGTH1> {
+
+    final Expression<Long,Shapes.LENGTH1> child;
+    final LongOp op;
+
+    LongScalarOp(Expression<Long,Shapes.LENGTH1> e, LongOp op){
+       this.child = e;
+       this.op = op;
+    }
+
+    @Override
+    public <R> R accept(ExpressionEvaluator<R> v) {
+        return v.visit(this);
+    }
+
+    @Override
+    public Shapes.LENGTH1 shape() {
+        return Shapes.L1;
+    }
+
+    @Override
+    public Class<Long> elementType() {
+        return Long.class;
+    }
+
+    @Override
+    public Optional<VConst<Long, Shapes.LENGTH1>> toVConst() {
+        return child.toVConst().map((cnst) -> {
+            Long l = op.apply(cnst.val);
+            return new VConst<>(Shapes.L1,l);
+        });
+    }
+
+    public Expression<Long, Shapes.LENGTH1> getChild() {
+        return child;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/VAdd.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression;
+
+import com.oracle.vector.el.Shape;
+import com.oracle.vector.el.visitor.ExpressionEvaluator;
+
+import java.util.Optional;
+
+public class VAdd<E,S extends Shape> implements Expression<E,S>{
+
+    private final Expression<E,S> left, right;
+
+    VAdd(Expression<E,S> left, Expression<E,S> right){
+        if(!left.elementType().equals(right.elementType())){
+            throw new UnsupportedOperationException("Binary Expressions must have the same typed arguments.");
+        }
+        this.left = left;
+        this.right = right;
+
+    }
+
+    @Override
+    public Class<E> elementType(){
+        return left.elementType();
+    }
+
+    @Override
+    public Optional<VConst<E, S>> toVConst() {
+        return Optional.empty();
+    }
+
+    @Override
+    public <R> R accept(ExpressionEvaluator<R> v) {
+        return v.visit(this);
+    }
+
+    @Override
+    public S shape() {
+        return left.shape();
+    }
+
+    public Expression<E, S> getLeft() {
+        return left;
+    }
+
+    public Expression<E, S> getRight() {
+        return right;
+    }
+
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/VBroadcast.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression;
+
+import com.oracle.vector.el.Shape;
+import com.oracle.vector.el.Shapes;
+import com.oracle.vector.el.visitor.ExpressionEvaluator;
+
+import java.util.Optional;
+
+public class VBroadcast<E,S extends Shape> implements Expression<E,S>{
+
+    final Expression<E,Shapes.LENGTH1> child;
+    final S s;
+
+
+    VBroadcast(Expression<E,Shapes.LENGTH1> e, S s){
+       this.child = e;
+       this.s = s;
+    }
+
+    @Override
+    public <R> R accept(ExpressionEvaluator<R> v) {
+        return v.visit(this);
+    }
+
+    @Override
+    public S shape() {
+        return s;
+    }
+
+    @Override
+    public Class<E> elementType() {
+        return child.elementType();
+    }
+
+    @Override
+    public Optional<VConst<E, S>> toVConst() {
+        //TODO: If the child is a vconst, then pack it and be a vconst, too.
+        return Optional.empty();
+    }
+
+    public Expression<E,Shapes.LENGTH1> getChild(){
+        return child;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/VConst.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression;
+
+import com.oracle.vector.PatchableVecUtils;
+import com.oracle.vector.el.Shape;
+import com.oracle.vector.el.visitor.ExpressionEvaluator;
+
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.lang.reflect.InvocationTargetException;
+import java.util.Optional;
+
+public class VConst<E,S extends Shape> implements Expression<E,S>{
+
+    final S shape;
+    final Class<E> vClass;
+    final E val;
+
+    @SuppressWarnings("unchecked")
+    VConst(S shape, E val){
+       this.val = val;
+       this.shape = shape;
+       this.vClass = (Class<E>) normalizeClass(val.getClass());
+    }
+
+    //TODO: Fixme.
+    private static Class<?> normalizeClass(Class<?> c) {
+        if(c == Float.class){
+            return float.class;
+        } else if (c == Integer.class) {
+            return int.class;
+        }
+        return c;
+    }
+
+    @Override
+    public <R> R accept(ExpressionEvaluator<R> v) {
+        return v.visit(this);
+    }
+
+    @Override
+    public int length() {
+            return shape.length();
+    }
+
+    @Override
+    public S shape() {
+        return shape;
+    }
+
+    @Override
+    @SuppressWarnings("unchecked")
+    public Class<E> elementType() {
+        return (Class<E>) val.getClass();
+    }
+
+    @Override
+    public Optional<VConst<E, S>> toVConst() {
+        return Optional.of(this);
+    }
+
+    private int laneWidth() {
+        if(vClass == int.class || vClass == float.class) {
+            return 32;
+        } else if(vClass == long.class || vClass == double.class) {
+            return  64;
+        } else {
+            throw new UnsupportedOperationException("Invalid lane type: " + vClass);
+        }
+    }
+
+    public MethodHandle packedValue(){
+        int laneWidth = laneWidth();
+        int bitLength;
+
+        bitLength = laneWidth * shape.length();
+
+        switch(bitLength) {
+            case 32:
+                if(vClass == int.class) {
+                   return MethodHandles.constant(vClass,((Number)val).intValue());
+                } else if(vClass == float.class) {
+                   return MethodHandles.constant(vClass,((Number)val).floatValue());
+                }
+            case 64:
+                if(vClass == double.class) {
+                    return MethodHandles.constant(vClass,((Number)val).doubleValue());
+                } else if(vClass == long.class){
+                    return MethodHandles.constant(vClass,((Number)val).longValue());
+                }
+            case 128:
+            case 256:
+                switch(vClass.getSimpleName()) {
+                    case "int":
+                    case "Integer":
+                        int v = (int) val;
+                        int[] ibuffer = new int[bitLength/laneWidth];
+                        for(int i = 0; i < ibuffer.length; i++){
+                            ibuffer[i] = v;
+                        }
+                        return MethodHandles.constant(vClass,bitLength == 256 ? PatchableVecUtils.long4FromIntArray(ibuffer,0) : PatchableVecUtils.long2FromIntArray(ibuffer,0));
+                    case "float":
+                    case "Float":
+                        float f = (float) val;
+                        float[] fbuffer = new float[bitLength/laneWidth];
+                        for(int i = 0; i < fbuffer.length; i++){
+                           fbuffer[i] = f;
+                        }
+                        return MethodHandles.constant(vClass,bitLength == 256 ? PatchableVecUtils.long4FromFloatArray(fbuffer,0) : PatchableVecUtils.long2FromFloatArray(fbuffer,0));
+                    case "double":
+                    case "Double":
+                        double d = (double) val;
+                        double[] dbuffer = new double[bitLength/laneWidth];
+                        for(int i = 0; i < dbuffer.length; i++){
+                            dbuffer[i] = d;
+                        }
+                        return MethodHandles.constant(vClass,bitLength == 256 ? PatchableVecUtils.long4FromDoubleArray(dbuffer,0) : PatchableVecUtils.long2FromDoubleArray(dbuffer,0));
+                    case "long":
+                    case "Long":
+                        long l = (long) val;
+                        long[] lbuffer = new long[bitLength/laneWidth];
+                        for(int i = 0; i < lbuffer.length; i++){
+                           lbuffer[i] = l;
+                        }
+                        return MethodHandles.constant(vClass,bitLength == 256 ? PatchableVecUtils.long4FromLongArray(lbuffer,0) : PatchableVecUtils.long2FromLongArray(lbuffer,0));
+                    default:
+                        throw new UnsupportedOperationException("Bad element/size configuration.");
+                }
+            default:
+                throw new UnsupportedOperationException("Invalid lane width.");
+        }
+    }
+
+    public Class<?> packedClass(int laneWidth, Shape shape){
+        int bitLength = laneWidth * shape.length();
+
+        switch(bitLength) {
+            case 32:
+                return val.getClass();
+            case 64:
+                return val.getClass();
+            case 128:
+                return Long2.class;
+            case 256:
+                return Long4.class;
+            default:
+                throw new UnsupportedOperationException("Cannot retrive class for invalid lanewidth/shape combo.");
+        }
+
+    }
+
+    public Class<?> packedClass(){
+        int laneWidth = laneWidth();
+        int bitLength = laneWidth * shape.length();
+
+        switch(bitLength) {
+            case 32:
+                //return val.getClass();
+                return vClass;
+            case 64:
+                //return val.getClass();
+                return vClass;
+            case 128:
+                return Long2.class;
+            case 256:
+                return Long4.class;
+            default:
+                throw new UnsupportedOperationException("Cannot retrive class for invalid lanewidth/shape combo.");
+        }
+
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/VDiv.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression;
+
+import com.oracle.vector.el.Shape;
+import com.oracle.vector.el.visitor.ExpressionEvaluator;
+
+import java.util.Optional;
+
+public class VDiv<E,S extends Shape> implements Expression<E,S> {
+
+    private final Expression<E,S> left, right;
+
+    VDiv(Expression<E,S> left, Expression<E,S> right){
+        if(!left.elementType().equals(right.elementType())){
+            throw new UnsupportedOperationException("Binary Expressions must have the same typed arguments.");
+        }
+        this.left = left;
+        this.right = right;
+
+    }
+
+    @Override
+    public Class<E> elementType(){
+        return left.elementType();
+    }
+
+    @Override
+    public Optional<VConst<E, S>> toVConst() {
+        return Optional.empty();
+    }
+
+    @Override
+    public <R> R accept(ExpressionEvaluator<R> v) {
+        return v.visit(this);
+    }
+
+    @Override
+    public S shape() {
+        return left.shape();
+    }
+
+    public Expression<E, S> getLeft() {
+        return left;
+    }
+
+    public Expression<E, S> getRight() {
+        return right;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/VMask.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression;
+
+import com.oracle.vector.el.Shape;
+import com.oracle.vector.el.visitor.ExpressionEvaluator;
+
+import java.util.Optional;
+
+public class VMask<E,S extends Shape> implements Expression<E,S>{
+
+    Expression<Boolean,S> mask;
+    Expression<E,S> thn, els;
+
+    VMask(Expression<Boolean,S> b, Expression<E,S> t, Expression<E,S> e){
+        this.mask = b;
+        this.thn  = t;
+        this.els  = e;
+
+    }
+
+    @Override
+    public <R> R accept(ExpressionEvaluator<R> v) {
+        return v.visit(this);
+    }
+
+    @Override
+    public S shape() {
+        return thn.shape();
+    }
+
+    @Override
+    public Class<E> elementType() {
+        return thn.elementType();
+    }
+
+    @Override
+    public Optional<VConst<E, S>> toVConst() {
+        return Optional.empty();
+    }
+
+    public Expression<Boolean, S> getMask() {
+        return mask;
+    }
+
+    public Expression<E, S> getThn() {
+        return thn;
+    }
+
+    public Expression<E, S> getEls() {
+        return els;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/VMul.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression;
+
+import com.oracle.vector.el.Shape;
+import com.oracle.vector.el.visitor.ExpressionEvaluator;
+
+import java.util.Optional;
+
+public class VMul<E,S extends Shape> implements Expression<E,S> {
+
+    private final Expression<E,S> left, right;
+
+    VMul(Expression<E,S> left, Expression<E,S> right){
+        if(!left.elementType().equals(right.elementType())){
+            throw new UnsupportedOperationException("Binary Expressions must have the same typed arguments.");
+        }
+        this.left = left;
+        this.right = right;
+
+    }
+
+    @Override
+    public Class<E> elementType(){
+        return left.elementType();
+    }
+
+    @Override
+    public Optional<VConst<E, S>> toVConst() {
+        return Optional.empty();
+    }
+
+    @Override
+    public <R> R accept(ExpressionEvaluator<R> v) {
+        return v.visit(this);
+    }
+
+    @Override
+    public S shape() {
+        return left.shape();
+    }
+
+    public Expression<E, S> getLeft() {
+        return left;
+    }
+
+    public Expression<E, S> getRight() {
+        return right;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/VProd.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression;
+
+import com.oracle.vector.el.Shapes;
+import com.oracle.vector.el.visitor.ExpressionEvaluator;
+
+import java.util.Optional;
+
+public class VProd<E> implements Expression<E,Shapes.LENGTH1> {
+
+    private final Expression<E,?> factors;
+
+    VProd(Expression<E,?> factors){
+        this.factors = factors;
+    }
+
+
+    @Override
+    public Class<E> elementType(){
+        return factors.elementType();
+    }
+
+    @Override
+    public Optional<VConst<E, Shapes.LENGTH1>> toVConst() {
+        return Optional.empty();
+    }
+
+    @Override
+    public <R> R accept(ExpressionEvaluator<R> v) {
+        return v.visit(this);
+    }
+
+    @Override
+    public Shapes.LENGTH1 shape() {
+        return Shapes.L1;
+    }
+
+
+    public Expression<E, ?> getFactors() {
+        return factors;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/VSub.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression;
+
+import com.oracle.vector.el.Shape;
+import com.oracle.vector.el.visitor.ExpressionEvaluator;
+
+import java.util.Optional;
+
+public class VSub<E,S extends Shape> implements Expression<E,S> {
+
+    private final Expression<E,S> left, right;
+
+    VSub(Expression<E,S> left, Expression<E,S> right){
+        if(!left.elementType().equals(right.elementType())){
+            throw new UnsupportedOperationException("Binary Expressions must have the same typed arguments.");
+        }
+        this.left = left;
+        this.right = right;
+
+    }
+
+    @Override
+    public Class<E> elementType(){
+        return left.elementType();
+    }
+
+    @Override
+    public Optional<VConst<E, S>> toVConst() {
+        return Optional.empty();
+    }
+
+    @Override
+    public <R> R accept(ExpressionEvaluator<R> v) {
+        return v.visit(this);
+    }
+
+    @Override
+    public S shape() {
+        return left.shape();
+    }
+
+    public Expression<E, S> getLeft() {
+        return left;
+    }
+
+    public Expression<E, S> getRight() {
+        return right;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/VSum.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression;
+
+import com.oracle.vector.el.Shapes;
+import com.oracle.vector.el.visitor.ExpressionEvaluator;
+
+import java.util.Optional;
+
+public class VSum<E> implements Expression<E,Shapes.LENGTH1> {
+
+    private final Class<E> elementType;
+    private final Expression<E,?> addends;
+
+    VSum(Expression<E,?> addends){
+        this.elementType = addends.elementType();
+        this.addends = addends;
+    }
+
+    @Override
+    public Class<E> elementType(){
+        return elementType;
+    }
+
+    @Override
+    public Optional<VConst<E, Shapes.LENGTH1>> toVConst() {
+        return Optional.empty();
+    }
+
+
+    @Override
+    public <R> R accept(ExpressionEvaluator<R> v) {
+        return v.visit(this);
+    }
+
+    @Override
+    public Shapes.LENGTH1 shape() {
+        return Shapes.L1;
+    }
+
+    public Expression<E, ?> getAddends() {
+        return addends;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/bexp/BExp.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression.bexp;
+
+import com.oracle.vector.el.Shape;
+import com.oracle.vector.el.expression.Expression;
+
+public abstract class BExp<S extends Shape> implements Expression<Boolean,S> {
+
+    Class<?> laneType;
+    S shape;
+
+    BExp(Class<?> laneType, S shape){
+        this.laneType = laneType;
+        this.shape    = shape;
+    }
+
+    public Class<?> getLaneType() {
+        return laneType;
+    }
+
+    @Override
+    public S shape() {
+        return shape;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/bexp/BOpExp.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression.bexp;
+
+import com.oracle.vector.el.Ops;
+import com.oracle.vector.el.Shape;
+
+public abstract class BOpExp<S extends Shape> extends BExp<S> {
+
+    final Ops op;
+
+    BOpExp(Ops op, Class<?> ety, S shape){
+        super(ety,shape);
+        this.op = op;
+    }
+
+    public Ops getOp() {
+        return op;
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/bexp/VBinBExp.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression.bexp;
+
+import com.oracle.vector.el.Ops;
+import com.oracle.vector.el.Shape;
+import com.oracle.vector.el.expression.Expression;
+import com.oracle.vector.el.expression.VConst;
+import com.oracle.vector.el.visitor.ExpressionEvaluator;
+
+import java.util.Optional;
+
+public class VBinBExp<E,S extends Shape> extends BOpExp<S> {
+
+    private final Expression<E,S> left;
+    private final Expression<E,S> right;
+
+    public VBinBExp(Expression<E,S> left, Expression<E,S> right, Ops op) {
+       super(op,left.elementType(),left.shape());
+       this.left  = left;
+       this.right = right;
+    }
+
+    public Expression<E,S> getLeft() {
+        return left;
+    }
+
+    public Expression<E,S> getRight() {
+        return right;
+    }
+
+    @Override
+    public <R> R accept(ExpressionEvaluator<R> v) {
+        return v.visit(this);
+    }
+
+    @Override
+    public int length() {
+        return this.shape.length();
+    }
+
+    @Override
+    public Class<Boolean> elementType() {
+        return Boolean.class;
+    }
+
+    public Class<E> getRealElementType() { return left.elementType(); }
+
+    @Override
+    public Optional<VConst<Boolean, S>> toVConst() {
+        return Optional.empty();
+    }
+
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/bexp/VUnBExp.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression.bexp;
+
+import com.oracle.vector.el.Ops;
+import com.oracle.vector.el.Shape;
+
+public abstract class VUnBExp<S extends Shape> extends BOpExp<S> {
+
+    BExp<S> child;
+
+    VUnBExp(BExp<S> c, Ops op, Class<?> eTy, S shape){
+       super(op, eTy,shape);
+       this.child = c;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/scalars/DoubleBinOp.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression.scalars;
+
+@FunctionalInterface
+public interface DoubleBinOp {
+    double apply(double l, double r);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/scalars/DoubleOp.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression.scalars;
+
+@FunctionalInterface
+public interface DoubleOp {
+   double apply(double d);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/scalars/FloatBinOp.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression.scalars;
+
+@FunctionalInterface
+public interface FloatBinOp {
+    float apply(float l, float r);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/scalars/FloatOp.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression.scalars;
+
+@FunctionalInterface
+public interface FloatOp {
+    float apply(float f);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/scalars/IntBinOp.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression.scalars;
+
+@FunctionalInterface
+public interface IntBinOp {
+    int apply(int l, int r);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/scalars/IntOp.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression.scalars;
+
+@FunctionalInterface
+public interface IntOp {
+    int apply(int i);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/scalars/LongBinOp.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression.scalars;
+
+@FunctionalInterface
+public interface LongBinOp {
+    long apply(long l, long r);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/scalars/LongOp.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression.scalars;
+
+@FunctionalInterface
+public interface LongOp {
+    long apply(long l);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/types/Float256.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression.types;
+
+import com.oracle.vector.el.Shapes;
+
+class Float256 implements VectorClass<Float,Shapes.LENGTH8>{
+    public Shapes.LENGTH8 getShape(){ return Shapes.L8;}
+    public Class<Float> getElementType(){ return Float.class;}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/types/VectorClass.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression.types;
+
+import com.oracle.vector.el.Shape;
+
+public interface VectorClass<E,S extends Shape> {
+    S getShape();
+    Class<E> getElementType();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/expression/types/Vectors.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.expression.types;
+
+import com.oracle.vector.el.Shapes;
+
+public class Vectors {
+
+
+    public static final VectorClass<Float,Shapes.LENGTH8> float256 = new Float256();
+
+    @SuppressWarnings("unchecked")
+    public static <E> VectorClass<E,?> vectorClass(Class<E> lane, int width){
+        VectorClass<?,?> res = null;
+        if(lane == Float.class){
+            switch(width){
+                //case 32:
+                //case 64:
+                //case 128:
+                case 256:
+                    res = float256;
+                default:
+                    throw new UnsupportedOperationException("Type not supported");
+            }
+        } else if(lane == Integer.class) {
+
+        } else if(lane == Short.class) {
+
+        } else if(lane == Double.class) {
+
+        } else if(lane == Long.class) {
+
+        } else if(lane == Byte.class) {
+
+        }
+
+        return (VectorClass<E,?>) res;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/stmt/Assignment.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.stmt;
+
+import com.oracle.vector.el.Shape;
+import com.oracle.vector.el.Val;
+import com.oracle.vector.el.expression.Expression;
+import com.oracle.vector.el.visitor.StatementVisitor;
+
+public class Assignment<E,S extends Shape> implements Statement{
+
+    final Val<E,S> val;
+    final Expression<E,S> exp;
+
+    Assignment(Val<E,S> v, Expression<E,S> e) {
+        this.val = v;
+        this.exp = e;
+    }
+
+    public void accept(StatementVisitor sv) {
+        sv.visit(this);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/stmt/Builder.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,460 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.stmt;
+
+import com.oracle.vector.el.Ops;
+import com.oracle.vector.el.Shape;
+import com.oracle.vector.el.Shapes;
+import com.oracle.vector.el.Val;
+import com.oracle.vector.el.builder.MHMeta;
+//import com.oracle.vector.el.builder.Term;
+import com.oracle.vector.el.comp.ExpComp;
+import com.oracle.vector.el.comp.ExpVarOrder;
+import com.oracle.vector.el.expression.Expression;
+
+import java.io.Serializable;
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.MethodType;
+import java.lang.invoke.SerializedLambda;
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.List;
+
+import com.oracle.vector.el.expression.IndexableVal;
+import com.oracle.vector.ops.OpProvider;
+import com.oracle.vector.ops.OpProviders;
+import com.oracle.vector.el.expression.types.VectorClass;
+
+
+public class Builder<E,S extends Shape> {
+
+    Class<E> retT;
+    S retS;
+
+    private List<Val>  params = new ArrayList<>();
+    private List<Val> vals = new ArrayList<>();
+    private List<Statement>  stmts = new ArrayList<>();
+
+
+    private Builder(Class<E> e, S shape){
+        retT = e;
+        retS = shape;
+    }
+    //private Term body;
+    private Expression<E,S> retexp;
+
+    public static Builder builder(){
+        return new Builder();
+    }
+
+    public static <E, S extends Shape> Builder<E,S> builder(Class<E> e, S s) { return new Builder<>(e,s); }
+
+    public Builder(){
+        params = new ArrayList<>();
+    }
+
+    private Builder(List<Val> le){
+        this.params = le;
+    }
+
+    private Builder(Builder b){
+        this(b.params);
+    }
+
+    @SuppressWarnings("unchecked")
+    public <E, S extends Shape> Val<E,S> assign(Expression<E,S> e){
+        Class<E> etype = e.elementType();
+        S s;
+        switch(e.length()){
+            case 1:
+                s = (S) Shapes.L1;
+                break;
+            case 2:
+                s = (S) Shapes.L2;
+                break;
+            case 4:
+                s = (S) Shapes.L4;
+                break;
+            case 8:
+                s = (S) Shapes.L8;
+                break;
+            case 16:
+                s = (S) Shapes.L16;
+                break;
+            default:
+                throw new UnsupportedOperationException("Shape length not supported: " + e.length());
+
+        }
+        Val<E,S> v = new Val<>(s,etype);
+        Statement stmt = new Assignment<>(v,e);
+        List<Statement> ss = new ArrayList<>();
+        ss.addAll(stmts);
+        ss.add(stmt);
+        stmts = ss;
+        return v;
+    }
+
+
+    public <T extends Shape> Builder<E,S> bindFloatIndexable(T s, Param1DBinder<E,Float,S,T> binder) {
+        IndexableVal<Float,T> v = new IndexableVal<>(s,Float.class,float[].class);
+        addParams(v);
+        return binder.apply(v,this);
+    }
+
+    public <T extends Shape> Builder<E,S> bindDoubleIndexable(T s, Param1DBinder<E,Double,S,T> binder) {
+        IndexableVal<Double,T> v = new IndexableVal<>(s,Double.class,double[].class);
+        addParams(v);
+        return binder.apply(v,this);
+    }
+
+
+
+    public <F,T extends Shape> Builder<E,S> bind(Class<F> v2e, T v2s, Param1Binder<E,F,S,T> binder){
+        Val<F,T> v = new Val<>(v2s,v2e);
+        addParams(v);
+        return binder.apply(v,this);
+    }
+
+    /*
+    public <E, S extends Shape> Builder<E,S> bind(Val<E,S> v, Param1Binder<E,S> binder){
+        addParams(v);
+        return binder.apply(v,this);
+    }
+    */
+
+    public <F,T extends Shape> Builder<E,S> bind(VectorClass<F,T> v2, Param1Binder<E,F,S,T> binder){
+        return this.bind(v2.getElementType(),v2.getShape(),binder);
+    }
+
+    public <F, G, T extends Shape, U extends Shape> Builder<E,S> bind(Class<F> v2e, T v2s, Class<G> v3e, U v3s, Param2Binder<E,F,G,S,T,U> binder){
+        Val<G,U> v3; Val<F,T> v2;
+        v2 = new Val<>(v2s,v2e);
+        v3  = new Val<>(v3s,v3e);
+        addParams(v2,v3);
+        return binder.apply(v2,v3,this);
+    }
+
+    public <F, G, T extends Shape, U extends Shape> Builder<E,S> bind(Val<F,T> v2, Val<G,U> v3, Param2Binder<E,F,G,S,T,U> binder){
+        addParams(v2,v3);
+        return binder.apply(v2,v3,this);
+    }
+
+    public <F, G, T extends Shape, U extends Shape> Builder<E,S> bind(VectorClass<F,T> v2, VectorClass<G,U> v3, Param2Binder<E,F,G,S,T,U> binder){
+        return this.bind(v2.getElementType(),v2.getShape(),v3.getElementType(),v3.getShape(),binder);
+
+    }
+
+    public <F, G, H, T extends Shape, U extends Shape, V extends Shape> Builder<E,S> bind(Class<F> v2e,T v2s, Class<G> v3e, U v3s, Class<H> v4e, V v4s, Param3Binder<E,F,G,H,S,T,U,V> binder){
+        Val<F,T> v2; Val<G,U> v3; Val<H,V> v4;
+        v2 = new Val<>(v2s,v2e);
+        v3 = new Val<>(v3s,v3e);
+        v4 = new Val<>(v4s,v4e);
+
+        addParams(v2,v3,v4);
+        return binder.apply(v2,v3,v4,this);
+    }
+
+    public <F, G, H, T extends Shape, U extends Shape, V extends Shape> Builder<E,S> bind(Val<E,S> v, Val<F,T> v2, Val<G,U> v3, Val<H,V> v4, Param3Binder<E,F,G,H,S,T,U,V> binder){
+        addParams(v2,v3,v4);
+        return binder.apply(v2,v3,v4,this);
+    }
+
+    public <F, G, H, T extends Shape, U extends Shape, V extends Shape> Builder<E,S> bind(VectorClass<F,T> v2, VectorClass<G,U> v3, VectorClass<H,V> v4, Param3Binder<E,F,G,H,S,T,U,V> binder){
+        return this.bind(v2.getElementType(),v2.getShape(),v3.getElementType(),v3.getShape(),v4.getElementType(),v4.getShape(),binder);
+    }
+
+    public <F, G, H, I, T extends Shape, U extends Shape, V extends Shape, W extends Shape> Builder<E,S> bind(Class<F> v2e, T v2s, Class<G> v3e,U v3s, Class<H> v4e,V v4s, Class<I> v5e, W v5s, Param4Binder<E,F,G,H,I,S,T,U,V,W> binder){
+        Val<F,T> v2; Val<G,U> v3; Val<H,V> v4; Val<I,W> v5;
+        v2 = new Val<>(v2s,v2e);
+        v3 = new Val<>(v3s,v3e);
+        v4 = new Val<>(v4s,v4e);
+        v5 = new Val<>(v5s,v5e);
+        addParams(v2,v3,v4,v5);
+        return binder.apply(v2,v3,v4,v5,this);
+    }
+
+    public <F, G, H, I, T extends Shape, U extends Shape, V extends Shape, W extends Shape> Builder<E,S> bind(Val<F,T> v2, Val<G,U> v3, Val<H,V> v4, Val<I,W> v5, Param4Binder<E,F,G,H,I,S,T,U,V,W> binder){
+        addParams(v2,v3,v4,v5);
+        return binder.apply(v2,v3,v4,v5,this);
+    }
+
+    public <F, G, H, I, T extends Shape, U extends Shape, V extends Shape, W extends Shape> Builder<E,S> bind(VectorClass<F,T> v2, VectorClass<G,U> v3, VectorClass<H,V> v4, VectorClass<I,W> v5, Param4Binder<E,F,G,H,I,S,T,U,V,W> binder){
+        return this.bind(v2.getElementType(),v2.getShape(),v3.getElementType(),v3.getShape(),v4.getElementType(),v4.getShape(),v5.getElementType(),v5.getShape(),binder);
+    }
+
+
+    public Builder<E,S> return_(Expression<E,S> e){
+        retexp = e;
+        return this;
+    }
+
+
+    public MethodHandle build(OpProvider op){
+
+        MHMeta[] mhs = new MHMeta[stmts.size()];
+        for(int i = 0 ; i < stmts.size(); i++){
+            Statement stmt = stmts.get(i);
+            Assignment as = (Assignment) stmt;
+            Val v = as.val;
+            Expression<?,?> e = as.exp;
+            ExpComp ec = new ExpComp(op);
+            MethodHandle body = e.accept(ec).get();
+            List<Val> vals = e.accept(ExpVarOrder.instance);
+            mhs[i] = new MHMeta(v, vals,body);
+        }
+
+        ExpComp ec = new ExpComp(op);
+        MethodHandle body = retexp.accept(ec).get();
+
+        MHMeta result = new MHMeta(null,retexp.accept(ExpVarOrder.instance),body);
+
+        result = result.normalize();
+        for(int i = mhs.length-1; i >= 0; i--){
+            result = result.substitute(mhs[i]);
+            result = result.normalize();
+        }
+
+        result = MHMeta.rebind(result,this.params);
+
+        return result.getBody();
+
+    }
+
+    @FunctionalInterface
+    public interface Param1DBinder<E,F,S extends Shape, T extends Shape> {
+        Builder<E,S> apply(IndexableVal<F,T> ival1, Builder<E,S> builder);
+    }
+
+    @FunctionalInterface
+    public interface Param1Binder<E,F,S extends Shape,T extends Shape> {
+        Builder<E,S> apply(Val<F,T> val1, Builder<E,S> builder);
+    }
+
+    @FunctionalInterface
+    public interface Param2Binder<E,F,G,S extends Shape, T extends Shape, U extends Shape> {
+        Builder<E,S> apply(Val<F,T> val1, Val<G,U> val2, Builder<E,S> builder);
+    }
+
+    @FunctionalInterface
+    public interface Param3Binder<E,F,G,H,S extends Shape, T extends Shape, U extends Shape, V extends Shape> {
+        Builder<E,S> apply(Val<F,T> val1, Val<G,U> val2, Val<H,V> val3, Builder<E,S> builder);
+    }
+
+    @FunctionalInterface
+    public interface Param4Binder<E,F,G,H,I, S extends Shape, T extends Shape, U extends Shape, V extends Shape, W extends Shape> {
+        Builder<E,S> apply(Val<F,T> val1, Val<G,U> val2, Val<H,V> val3, Val<I,W> val4, Builder<E,S> builder);
+    }
+
+
+    //Utilities
+    void addVars(Val...vs){
+        List<Val> vv = new ArrayList<>();
+        vv.addAll(vals);
+        for(Val v : vs){
+            vv.add(v);
+        }
+        /*
+        vals = new ImmutableList.Builder<Val>()
+                .addAll(vals)
+                .add(vs)
+                .build();
+        */
+        vals = vv;
+    }
+
+    void addParams(Val...ps){
+        List<Val> vv = new ArrayList<>();
+        vv.addAll(params);
+        for(Val p : ps){
+           vv.add(p);
+        }
+        params = vv;
+    }
+
+
+    //Test Combinators
+    public static <Z extends Shape> MethodHandle zip(Z shape, Param2Binder<Float,Float,Float,Z,Z,Z> binder) {
+
+        OpProvider ops = OpProviders.provider(Float.class,shape.length());
+
+
+        MethodHandle load = ops.getOp(Float.class,Ops.ARY_LOAD,shape.length());
+        MethodHandle store = ops.getOp(Float.class,Ops.ARY_STORE,shape.length());
+
+        Val<Float,Z> left  = new Val<>(shape,Float.class);
+        Val<Float,Z> right = new Val<>(shape,Float.class);
+        Builder b = new Builder();
+        b.addParams(left,right);
+        b = binder.apply(left,right,b); //(LongZ,LongZ)LongZ
+
+        MethodHandle kernel = b.build(ops);
+
+        //(float[],int,LongZ)LongZ
+        kernel = MethodHandles.collectArguments(kernel,0,load);
+
+        //(float[],int,float[],int)LongZ
+        kernel = MethodHandles.collectArguments(kernel,2,load);
+
+        //(float[],int,float[],int,float[],int)void
+        kernel = MethodHandles.collectArguments(store,2,kernel);
+
+        MethodType loopBodyTy = MethodType.methodType(void.class,int.class,float[].class,float[].class,float[].class);
+        MethodType iterTy = MethodType.methodType(int.class,float[].class,float[].class,float[].class);
+
+        //(int,float[],float[],float[])void
+        kernel = MethodHandles.permuteArguments(kernel, loopBodyTy,1,0,2,0,3,0);
+
+
+
+
+        //Build loop
+        MethodHandle iterations = MethodHandles.arrayLength(float[].class);
+        iterations = MethodHandles.permuteArguments(iterations,iterTy,0);
+
+
+        //Divide
+        try {
+            MethodHandle dscaler = MethodHandles.lookup().findStatic(Builder.class,"divide",MethodType.methodType(int.class,int.class,int.class));
+            dscaler = MethodHandles.collectArguments(dscaler,1,MethodHandles.constant(int.class,shape.length()));
+            // Scale iterations down by shape length
+            iterations = MethodHandles.filterReturnValue(iterations,dscaler);
+
+            MethodHandle mscaler = MethodHandles.lookup().findStatic(Builder.class,"multiply",MethodType.methodType(int.class,int.class,int.class));
+            mscaler = MethodHandles.collectArguments(mscaler,1,MethodHandles.constant(int.class,shape.length()));
+
+
+            kernel = MethodHandles.collectArguments(kernel,0,mscaler);
+
+            return MethodHandles.countedLoop(iterations,null,kernel);
+        } catch (Throwable e) {
+            throw new Error(e);
+        }
+    }
+
+    public static <Z extends Shape> MethodHandle reduce(Z shape, Param2Binder<Float,Float,Float,Z,Z,Z> binder){
+
+        OpProvider ops = OpProviders.provider(Float.class,shape.length());
+
+        MethodHandle load = ops.getOp(Float.class,Ops.ARY_LOAD,shape.length());
+
+        Val<Float,Z> left  = new Val<>(shape,Float.class);
+        Val<Float,Z> right = new Val<>(shape,Float.class);
+        Builder b = new Builder();
+        b.addParams(left,right);
+        b = binder.apply(left,right,b); //(LongZ,LongZ)LongZ
+
+
+        MethodHandle kernel = b.build(ops);
+
+        //(LongZ,float[],int)LongZ
+        kernel = MethodHandles.collectArguments(kernel,1,load);
+
+        //(LongZ,int,float[])LongZ
+        kernel = MethodHandles.permuteArguments(kernel,
+                                                MethodType.methodType(kernel.type().returnType(),
+                                                                      int.class,
+                                                                      kernel.type().parameterArray()[0],
+                                                                      float[].class),
+                                                1,2,0);
+
+        if(shape.length() == 8){
+            Long4 zero = Long4.ZERO;
+            MethodHandle init = MethodHandles.constant(Long4.class,zero);
+            init = MethodHandles.permuteArguments(init,MethodType.methodType(Long4.class,float[].class)); //Dummy args
+            MethodHandle iterations = MethodHandles.arrayLength(float[].class);
+
+            MethodHandle incr, pred;
+
+            try {
+                incr = MethodHandles.lookup().findStatic(Builder.class,"increment",MethodType.methodType(int.class,int.class,int.class));
+                incr = MethodHandles.collectArguments(incr,1,MethodHandles.constant(int.class,shape.length()));
+
+                pred = MethodHandles.lookup().findStatic(Builder.class,"intLT",MethodType.methodType(boolean.class,int.class,int.class));
+                pred = MethodHandles.collectArguments(pred,1,MethodHandles.arrayLength(float[].class));
+
+                /*
+                dscaler = MethodHandles.lookup().findStatic(Builder.class,"divide",MethodType.methodType(int.class,int.class,int.class));
+                dscaler = MethodHandles.collectArguments(dscaler,1,MethodHandles.constant(int.class,shape.length()));
+
+                mscaler = MethodHandles.lookup().findStatic(Builder.class,"multiply",MethodType.methodType(int.class,int.class,int.class));
+                mscaler = MethodHandles.collectArguments(mscaler,1,MethodHandles.constant(int.class,shape.length()));
+                */
+            } catch (Throwable e){
+                throw new Error(e);
+            }
+            incr = MethodHandles.permuteArguments(incr,MethodType.methodType(int.class,int.class,Long4.class,float[].class),0);
+            pred = MethodHandles.permuteArguments(pred,MethodType.methodType(boolean.class,int.class,Long4.class,float[].class),0,2);
+            MethodHandle fin = MethodHandles.permuteArguments(MethodHandles.identity(Long4.class),MethodType.methodType(Long4.class,int.class,Long4.class,float[].class),1);
+            MethodHandle truepred = MethodHandles.permuteArguments(MethodHandles.constant(boolean.class,true),MethodType.methodType(boolean.class,int.class,Long4.class,float[].class));
+
+            MethodHandle intZero = MethodHandles.dropArguments(MethodHandles.constant(int.class,0),0,int.class,Long4.class,float[].class);
+            MethodHandle long4Zero = MethodHandles.dropArguments(MethodHandles.constant(Long4.class,Long4.ZERO),0,float[].class);
+            //Scale upperbound
+            //iterations = MethodHandles.filterReturnValue(iterations,dscaler);
+
+            //Scale step
+            //kernel     = MethodHandles.collectArguments(kernel,1,mscaler);
+
+            //Loop State: (int i ,Long4 acc) (counter,accumulator)
+            //Loop predicate (i < ary.length)
+            //Loop Steps i = i + 8; acc = acc + load_acc(float[i]);
+            //(V...,A...) = (int,Long4,float[])
+            //(V...) = (int,Long4)
+            //(A...) = (float[])
+            MethodHandle[][] clauses = {
+                    new MethodHandle[]{null,incr,pred,fin}, //int i = 0; i+=len; i <
+                    new MethodHandle[]{long4Zero,kernel,truepred,fin} //Long4 = Long4.ZERO; kernel body
+            };
+
+
+            return MethodHandles.loop(clauses);
+
+        } else if(shape.length() == 4) {
+           throw new UnsupportedOperationException("4 not implemented yet.");
+        } else {
+            throw new UnsupportedOperationException("2 not implemented yet.");
+        }
+
+    }
+
+    private static int divide(int num, int den){
+        return num / den;
+    }
+
+    private static int multiply(int left, int right){
+        return left * right;
+    }
+
+    private static int increment(int left, int right){
+        return left + right;
+    }
+
+    private static boolean intLT(int left, int right){
+        return left < right;
+    }
+
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/stmt/For.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.stmt;
+
+import com.oracle.vector.el.visitor.StatementVisitor;
+
+public class For implements Statement{
+    @Override
+    public void accept(StatementVisitor sv){
+        sv.visit(this);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/stmt/Statement.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.stmt;
+
+import com.oracle.vector.el.Shape;
+import com.oracle.vector.el.Val;
+import com.oracle.vector.el.expression.Expression;
+import com.oracle.vector.el.visitor.StatementVisitor;
+
+public interface Statement {
+    void accept(StatementVisitor sv);
+
+    default <E,S extends Shape> Statement assign(Val<E,S> v, Expression<E,S> e){
+        return new Assignment<>(v,e);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/stmt/While.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.stmt;
+
+import com.oracle.vector.el.visitor.StatementVisitor;
+
+public class While implements Statement{
+
+    @Override
+    public void accept(StatementVisitor sv) {
+        sv.visit(this);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/visitor/ExpressionEvaluator.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.visitor;
+
+import com.oracle.vector.el.Shape;
+import com.oracle.vector.el.Val;
+import com.oracle.vector.el.expression.*;
+import com.oracle.vector.el.expression.bexp.VBinBExp;
+import com.oracle.vector.el.expression.IndexedVal;
+
+public interface ExpressionEvaluator<R> {
+
+    <E,S extends Shape> R  visit(VAdd<E,S> v);
+    <E,S extends Shape> R  visit(VDiv<E,S> v);
+    <E,S extends Shape> R  visit(VMul<E,S> v);
+    <E>                 R  visit(VProd<E> v);
+    <E,S extends Shape> R  visit(VSub<E,S> v);
+    <E>                 R  visit(VSum<E> v);
+    <E,S extends Shape> R  visit(Val<E,S> v);
+    <E,S extends Shape> R  visit(ITE<E,S> v);
+    <E,S extends Shape> R  visit(VConst<E,S> v);
+                        R  visit(FloatScalarOp v);
+                        R  visit(DoubleScalarOp v);
+                        R  visit(IntScalarOp v);
+                        R  visit(LongScalarOp v);
+                        R  visit(FloatScalarBinOp v);
+                        R  visit(DoubleScalarBinOp v);
+                        R  visit(IntScalarBinOp v);
+                        R  visit(LongScalarBinOp v);
+    <E,S extends Shape> R  visit(VBroadcast<E,S> v);
+    <E,S extends Shape> R  visit(VMask<E,S> v);
+    <E,S extends Shape> R  visit(VBinBExp<E,S> v);
+    <E,S extends Shape> R  visit(IndexedVal<E,S> v);
+    //<E,S extends Shape> R  visit(IndexableVal<E,S> v);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/el/visitor/StatementVisitor.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have
+ * questions.
+ */
+package com.oracle.vector.el.visitor;
+
+import com.oracle.vector.el.stmt.Assignment;
+import com.oracle.vector.el.stmt.For;
+import com.oracle.vector.el.stmt.While;
+
+public interface StatementVisitor {
+
+    void visit(Assignment a);
+    void visit(For f);
+    void visit(While w);
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/panama/Expressions/src/main/java/com/oracle/vector/ops/OpProvider.java	Thu Dec 14 20:21:25 2017 -0800
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+