changeset 11508:bb39b1bce748

Merge.
author Christian Humer <christian.humer@gmail.com>
date Mon, 02 Sep 2013 15:27:20 +0200
parents d6a5ab791b0d fc3a6fb4cf3d
children dcaf879d4a7e
files
diffstat 49 files changed, 1489 insertions(+), 419 deletions(-) [+]
line wrap: on
line diff
--- a/graal/com.oracle.graal.api.code/src/com/oracle/graal/api/code/Architecture.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.api.code/src/com/oracle/graal/api/code/Architecture.java	Mon Sep 02 15:27:20 2013 +0200
@@ -192,7 +192,7 @@
      *            null if no arithmetic needs to be performed on the vector
      * @return a supported vector size, but at most {@code maxLength}
      */
-    public int getSupportedVectorLength(Kind kind, int maxLength, ArithmeticOperation arithmetic) {
+    public int getSupportedVectorLength(Kind kind, int maxLength, Class<? extends ArithmeticOperation> arithmetic) {
         return 1;
     }
 
--- a/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java	Mon Sep 02 15:27:20 2013 +0200
@@ -272,59 +272,69 @@
     }
 
     public final void ld_global_b8(Register d, Register a, long immOff) {
-        emitString("ld.global.b8" + " " + "%r" + d.encoding() + ", [%r" + a.encoding() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.b8" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_b16(Register d, Register a, long immOff) {
-        emitString("ld.global.b16" + " " + "%r" + d.encoding() + ", [%r" + a.encoding() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.b16" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_b32(Register d, Register a, long immOff) {
-        emitString("ld.global.b32" + " " + "%r" + d.encoding() + ", [%r" + a.encoding() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.b32" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_b64(Register d, Register a, long immOff) {
-        emitString("ld.global.b64" + " " + "%r" + d.encoding() + ", [%r" + a.encoding() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.b64" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_u8(Register d, Register a, long immOff) {
-        emitString("ld.global.u8" + " " + "%r" + d.encoding() + ", [%r" + a.encoding() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.u8" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_u16(Register d, Register a, long immOff) {
-        emitString("ld.global.u16" + " " + "%r" + d.encoding() + ", [%r" + a.encoding() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.u16" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_u32(Register d, Register a, long immOff) {
-        emitString("ld.global.u32" + " " + "%r" + d.encoding() + ", [%r" + a.encoding() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.u32" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_u64(Register d, Register a, long immOff) {
-        emitString("ld.global.u64" + " " + "%r" + d.encoding() + ", [%r" + a.encoding() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.u64" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_s8(Register d, Register a, long immOff) {
-        emitString("ld.global.s8" + " " + "%r" + d.encoding() + ", [%r" + a.encoding() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.s8" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_s16(Register d, Register a, long immOff) {
-        emitString("ld.global.s16" + " " + "%r" + d.encoding() + ", [%r" + a.encoding() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.s16" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_s32(Register d, Register a, long immOff) {
-        emitString("ld.global.s32" + " " + "%r" + d.encoding() + ", [%r" + a.encoding() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.s32" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_s64(Register d, Register a, long immOff) {
-        emitString("ld.global.s64" + " " + "%r" + d.encoding() + ", [%r" + a.encoding() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.s64" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_f32(Register d, Register a, long immOff) {
-        emitString("ld.global.f32" + " " + "%r" + d.encoding() + ", [%r" + a.encoding() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.f32" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_f64(Register d, Register a, long immOff) {
-        emitString("ld.global.f64" + " " + "%r" + d.encoding() + ", [%r" + a.encoding() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.f64" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
+    }
+
+    // Load from state space to destination register
+    public final void ld_from_state_space(String s, Register d, Register a, long immOff) {
+        emitString("ld" + s + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
+    }
+
+    // Load return address from return parameter which is in .param state space
+    public final void ld_return_address(String s, Register d, Register a, long immOff) {
+        emitString("ld.param." + s + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
     }
 
     public final void mov_b16(Register d, Register a) {
@@ -539,6 +549,18 @@
         emitString("or.b64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + b64 + ";" + "");
     }
 
+    public final void param_8_decl(Register d, boolean lastParam) {
+        emitString(".param" + " " + ".s8" + " " + d.toString() + (lastParam ? "" : ","));
+    }
+
+    public final void param_32_decl(Register d, boolean lastParam) {
+        emitString(".param" + " " + ".s32" + " " + d.toString() + (lastParam ? "" : ","));
+    }
+
+    public final void param_64_decl(Register d, boolean lastParam) {
+        emitString(".param" + " " + ".s64" + " " + d.toString() + (lastParam ? "" : ","));
+    }
+
     public final void popc_b32(Register d, Register a) {
         emitString("popc.b32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ";" + "");
     }
@@ -923,6 +945,8 @@
         emitString("shr.u64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + u64 + ";" + "");
     }
 
+    // Store in global state space
+
     public final void st_global_b8(Register a, long immOff, Register b) {
         emitString("st.global.b8" + " " + "[%r" + a.encoding() + " + " + immOff + "], %r" + b.encoding() + ";" + "");
     }
@@ -979,6 +1003,37 @@
         emitString("st.global.f64" + " " + "[%r" + a.encoding() + " + " + immOff + "], %r" + b.encoding() + ";" + "");
     }
 
+    // Store return value
+    public final void st_global_return_value_s8(Register a, long immOff, Register b) {
+        emitString("st.global.s8" + " " + "[%r" + a.encoding() + " + " + immOff + "], %r" + b.encoding() + ";" + "");
+    }
+
+    public final void st_global_return_value_s32(Register a, long immOff, Register b) {
+        emitString("st.global.s32" + " " + "[%r" + a.encoding() + " + " + immOff + "], %r" + b.encoding() + ";" + "");
+    }
+
+    public final void st_global_return_value_s64(Register a, long immOff, Register b) {
+        emitString("st.global.s64" + " " + "[%r" + a.encoding() + " + " + immOff + "], %r" + b.encoding() + ";" + "");
+    }
+
+    public final void st_global_return_value_f32(Register a, long immOff, Register b) {
+        emitString("st.global.f32" + " " + "[%r" + a.encoding() + " + " + immOff + "], %r" + b.encoding() + ";" + "");
+    }
+
+    public final void st_global_return_value_f64(Register a, long immOff, Register b) {
+        emitString("st.global.f64" + " " + "[%r" + a.encoding() + " + " + immOff + "], %r" + b.encoding() + ";" + "");
+    }
+
+    public final void st_global_return_value_u32(Register a, long immOff, Register b) {
+        emitString("st.global.u32" + " " + "[%r" + a.encoding() + " + " + immOff + "], %r" + b.encoding() + ";" + "");
+    }
+
+    public final void st_global_return_value_u64(Register a, long immOff, Register b) {
+        emitString("st.global.u64" + " " + "[%r" + a.encoding() + " + " + immOff + "], %r" + b.encoding() + ";" + "");
+    }
+
+    // Subtract instruction
+
     public final void sub_f32(Register d, Register a, Register b) {
         emitString("sub.f32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
--- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/ArrayTest.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/ArrayTest.java	Mon Sep 02 15:27:20 2013 +0200
@@ -24,10 +24,11 @@
 
 import java.lang.reflect.Method;
 
-import org.junit.Test;
+import org.junit.*;
 
 public class ArrayTest extends PTXTestBase {
 
+    @Ignore
     @Test
     public void testArray() {
         compile("testArray1I");
--- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/BasicPTXTest.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/BasicPTXTest.java	Mon Sep 02 15:27:20 2013 +0200
@@ -41,7 +41,7 @@
         invoke(compile("testConstI"));
     }
 
-    public int testConstI() {
+    public static int testConstI() {
         return 42;
     }
 
--- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/ControlTest.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/ControlTest.java	Mon Sep 02 15:27:20 2013 +0200
@@ -22,12 +22,13 @@
  */
 package com.oracle.graal.compiler.ptx.test;
 
-import org.junit.Test;
+import org.junit.*;
 
 import java.lang.reflect.Method;
 
 public class ControlTest extends PTXTestBase {
 
+    @Ignore
     @Test
     public void testControl() {
         compile("testSwitch1I");
--- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/FloatPTXTest.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/FloatPTXTest.java	Mon Sep 02 15:27:20 2013 +0200
@@ -26,18 +26,38 @@
 
 import org.junit.*;
 
+import com.oracle.graal.api.code.CompilationResult;
 
 /* PTX ISA 3.1 - 8.7.3 Floating-Point Instructions */
 public class FloatPTXTest extends PTXTestBase {
 
     @Test
     public void testAdd() {
-        compile("testAdd2F");
-        compile("testAdd2D");
-        compile("testAddFConst");
-        compile("testAddConstF");
-        compile("testAddDConst");
-        compile("testAddConstD");
+        CompilationResult r = compile("testAdd2F");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testAdd2F FAILED");
+        }
+        r = compile("testAdd2D");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testAdd2D FAILED");
+        }
+
+        r = compile("testAddFConst");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testAddFConst FAILED");
+        }
+        r = compile("testAddConstF");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testConstF FAILED");
+        }
+        r = compile("testAddDConst");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testAddDConst FAILED");
+        }
+        r = compile("testAddConstD");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testConstD FAILED");
+        }
     }
 
     public static float testAdd2F(float a, float b) {
@@ -66,12 +86,35 @@
 
     @Test
     public void testSub() {
-        compile("testSub2F");
-        compile("testSub2D");
-        compile("testSubFConst");
-        compile("testSubConstF");
-        compile("testSubDConst");
-        compile("testSubConstD");
+        CompilationResult r = compile("testSub2F");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testSub2F FAILED");
+        }
+
+        r = compile("testSub2D");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testSub2D FAILED");
+        }
+
+        r = compile("testSubFConst");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testSubFConst FAILED");
+        }
+
+        r = compile("testSubConstF");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testSubConstF FAILED");
+        }
+
+        r = compile("testSubDConst");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testSubDconst FAILED");
+        }
+
+        r = compile("testSubConstD");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testConstD FAILED");
+        }
     }
 
     public static float testSub2F(float a, float b) {
@@ -100,12 +143,35 @@
 
     @Test
     public void testMul() {
-        compile("testMul2F");
-        compile("testMul2D");
-        compile("testMulFConst");
-        compile("testMulConstF");
-        compile("testMulDConst");
-        compile("testMulConstD");
+        CompilationResult r = compile("testMul2F");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testAdd2F FAILED");
+        }
+
+        r = compile("testMul2D");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testAdd2F FAILED");
+        }
+
+        r = compile("testMulFConst");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testAdd2F FAILED");
+        }
+
+        r = compile("testMulConstF");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testAdd2F FAILED");
+        }
+
+        r = compile("testMulDConst");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testAdd2F FAILED");
+        }
+
+        r = compile("testMulConstD");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testAdd2F FAILED");
+        }
     }
 
     public static float testMul2F(float a, float b) {
@@ -134,12 +200,35 @@
 
     @Test
     public void testDiv() {
-        compile("testDiv2F");
-        compile("testDiv2D");
-        compile("testDivFConst");
-        compile("testDivConstF");
-        compile("testDivDConst");
-        compile("testDivConstD");
+        CompilationResult r = compile("testDiv2F");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testDiv2F FAILED");
+        }
+
+        r = compile("testDiv2D");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testDiv2D FAILED");
+        }
+
+        r = compile("testDivFConst");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testDivFConst FAILED");
+        }
+
+        r = compile("testDivConstF");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testDivConstF FAILED");
+        }
+
+        r = compile("testDivDConst");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testDivDConst FAILED");
+        }
+
+        r = compile("testDivConstD");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testDivConstD FAILED");
+        }
     }
 
     public static float testDiv2F(float a, float b) {
@@ -168,8 +257,15 @@
 
     @Test
     public void testNeg() {
-        compile("testNeg2F");
-        compile("testNeg2D");
+        CompilationResult r = compile("testNeg2F");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testNeg2F FAILED");
+        }
+
+        r = compile("testNeg2D");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testNeg2D FAILED");
+        }
     }
 
     public static float testNeg2F(float a) {
@@ -195,14 +291,38 @@
         return a % b;
     }
 
+    @Ignore
     @Test
     public void testFloatConversion() {
-        compile("testF2I");
-        compile("testF2L");
-        compile("testF2D");
-        compile("testD2I");
-        compile("testD2L");
-        compile("testD2F");
+        CompilationResult r = compile("testF2I");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of tesF2I FAILED");
+        }
+
+        r = compile("testF2L");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testF2L FAILED");
+        }
+
+        r = compile("testF2D");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testF2D FAILED");
+        }
+
+        r = compile("testD2I");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testD2I FAILED");
+        }
+
+        r = compile("testD2L");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testD2L FAILED");
+        }
+
+        r = compile("testD2F");
+        if (r.getTargetCode() == null) {
+            printReport("Compilation of testD2F FAILED");
+        }
     }
 
     public static int testF2I(float a) {
@@ -229,6 +349,13 @@
         return (float) a;
     }
 
+    public static void printReport(String message) {
+        // CheckStyle: stop system..print check
+        System.out.println(message);
+        // CheckStyle: resume system..print check
+
+    }
+
     public static void main(String[] args) {
         FloatPTXTest test = new FloatPTXTest();
         for (Method m : FloatPTXTest.class.getMethods()) {
--- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/IntegerPTXTest.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/IntegerPTXTest.java	Mon Sep 02 15:27:20 2013 +0200
@@ -31,11 +31,44 @@
 
     @Test
     public void testAdd() {
-        invoke(compile("testAdd2I"), 8, 4);
-        invoke(compile("testAdd2L"), 12, 6);
-        invoke(compile("testAdd2B"), 6, 4);
-        invoke(compile("testAddIConst"), 5);
-        invoke(compile("testAddConstI"), 7);
+
+        Long r2 = (Long) invoke(compile("testAdd2L"), (long) 12, (long) 6);
+        if (r2 == null) {
+            printReport("testAdd2L FAILED");
+        } else if (r2.longValue() == 18) {
+            printReport("testAdd2L PASSED");
+        } else {
+            printReport("testAdd2L FAILED");
+        }
+
+        //invoke(compile("testAdd2B"), (byte) 6, (byte) 4);
+
+        Integer r4 = (Integer) invoke(compile("testAddIConst"), 5);
+        if (r4 == null) {
+            printReport("testAddIConst FAILED");
+        } else if (r4.intValue() == 37) {
+            printReport("testAddIConst PASSED");
+        } else {
+            printReport("testAddIConst FAILED");
+        }
+
+        r4 = (Integer) invoke(compile("testAddConstI"), 7);
+        if (r4 == null) {
+            printReport("testAddConstI FAILED");
+        } else if (r4.intValue() == 39) {
+            printReport("testAddConstI PASSED");
+        } else {
+            printReport("testAddConstI FAILED");
+        }
+
+        r4 = (Integer) invoke(compile("testAdd2I"), 18, 24);
+        if (r4 == null) {
+            printReport("testAdd2I FAILED");
+        } else if (r4.intValue() == 42) {
+            printReport("testAdd2I PASSED");
+        } else {
+            printReport("testAdd2I FAILED");
+        }
     }
 
     public static int testAdd2I(int a, int b) {
@@ -60,10 +93,42 @@
 
     @Test
     public void testSub() {
-        invoke(compile("testSub2I"), 8, 4);
-        invoke(compile("testSub2L"), 12, 6);
-        invoke(compile("testSubIConst"), 35);
-        invoke(compile("testSubConstI"), 12);
+        Long r2 = (Long) invoke(compile("testSub2L"), (long) 12, (long) 6);
+        if (r2 == null) {
+            printReport("testSub2I FAILED (null return value)");
+        } else if (r2.longValue() == 6) {
+            printReport("testSub2I PASSED");
+        } else {
+            printReport("testSub2I FAILED");
+        }
+
+        Integer r1 = (Integer) invoke(compile("testSub2I"), 18, 4);
+
+        if (r1 == null) {
+            printReport("testSub2I FAILED");
+        } else if (r1.intValue() == 14) {
+            printReport("testSub2I PASSED");
+        } else {
+            printReport("testSub2I FAILED");
+        }
+
+        r1 = (Integer) invoke(compile("testSubIConst"), 35);
+        if (r1 == null) {
+            printReport("testSubIConst FAILED");
+        } else if (r1.intValue() == 3) {
+            printReport("testSubIConst PASSED");
+        } else {
+            printReport("testSubIConst FAILED");
+        }
+
+        r1 = (Integer) invoke(compile("testSubConstI"), 12);
+        if (r1 == null) {
+            printReport("testSubConstI FAILED");
+        } else if (r1.intValue() == 20) {
+            printReport("testSubConstI PASSED");
+        } else {
+            printReport("testSubConstI FAILED");
+        }
     }
 
     public static int testSub2I(int a, int b) {
@@ -85,7 +150,7 @@
     @Test
     public void testMul() {
         invoke(compile("testMul2I"), 8, 4);
-        invoke(compile("testMul2L"), 12, 6);
+        invoke(compile("testMul2L"), (long) 12, (long) 6);
         invoke(compile("testMulIConst"), 4);
         invoke(compile("testMulConstI"), 5);
     }
@@ -105,11 +170,10 @@
     public static int testMulConstI(int a) {
         return 32 * a;
     }
-
     @Test
     public void testDiv() {
         invoke(compile("testDiv2I"), 8, 4);
-        invoke(compile("testDiv2L"), 12, 6);
+        invoke(compile("testDiv2L"), (long) 12, (long) 6);
         invoke(compile("testDivIConst"), 64);
         invoke(compile("testDivConstI"), 8);
     }
@@ -133,7 +197,7 @@
     @Test
     public void testRem() {
         invoke(compile("testRem2I"), 8, 4);
-        invoke(compile("testRem2L"), 12, 6);
+        invoke(compile("testRem2L"), (long) 12, (long) 6);
     }
 
     public static int testRem2I(int a, int b) {
@@ -147,11 +211,11 @@
     @Test
     public void testIntConversion() {
         invoke(compile("testI2L"), 8);
-        invoke(compile("testL2I"), 12L);
-        invoke(compile("testI2C"), 65);
-        invoke(compile("testI2B"), 9);
-        invoke(compile("testI2F"), 17);
-        invoke(compile("testI2D"), 22);
+        invoke(compile("testL2I"), (long) 12);
+        // invoke(compile("testI2C"), 65);
+        // invoke(compile("testI2B"), 9);
+        // invoke(compile("testI2F"), 17);
+        // invoke(compile("testI2D"), 22);
     }
 
     public static long testI2L(int a) {
@@ -178,6 +242,13 @@
         return (int) a;
     }
 
+    public static void printReport(String message) {
+        // CheckStyle: stop system..print check
+        System.out.println(message);
+        // CheckStyle: resume system..print check
+
+    }
+
     public static void main(String[] args) {
         IntegerPTXTest test = new IntegerPTXTest();
         for (Method m : IntegerPTXTest.class.getMethods()) {
--- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/PTXTestBase.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/PTXTestBase.java	Mon Sep 02 15:27:20 2013 +0200
@@ -82,11 +82,11 @@
         return sg;
     }
 
-    protected void invoke(CompilationResult result, Object... args) {
+    protected Object invoke(CompilationResult result, Object... args) {
         try {
             if (((ExternalCompilationResult) result).getEntryPoint() == 0) {
                 Debug.dump(result, "[CUDA] *** Null entry point - Not launching kernel");
-                return;
+                return null;
             }
 
             /* Check if the method compiled is static */
@@ -95,9 +95,11 @@
             Object[] executeArgs = argsWithReceiver((isStatic ? null : this), args);
             HotSpotRuntime hsr = (HotSpotRuntime) runtime;
             InstalledCode installedCode = hsr.addExternalMethod(sg.method(), result, sg);
-            installedCode.executeVarargs(executeArgs);
+            Object r = installedCode.executeVarargs(executeArgs);
+            return r;
         } catch (Throwable th) {
             th.printStackTrace();
+            return null;
         }
     }
 }
--- a/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java	Mon Sep 02 15:27:20 2013 +0200
@@ -22,6 +22,10 @@
  */
 package com.oracle.graal.compiler.ptx;
 
+import static com.oracle.graal.api.code.ValueUtil.*;
+
+import java.util.*;
+
 import com.oracle.graal.api.code.*;
 import com.oracle.graal.api.meta.*;
 import com.oracle.graal.asm.*;
@@ -32,6 +36,11 @@
 import com.oracle.graal.lir.asm.*;
 import com.oracle.graal.lir.ptx.*;
 import com.oracle.graal.nodes.*;
+import com.oracle.graal.nodes.cfg.Block;
+import com.oracle.graal.lir.LIRInstruction.OperandFlag;
+import com.oracle.graal.lir.LIRInstruction.OperandMode;
+import com.oracle.graal.lir.LIRInstruction.ValueProcedure;
+import com.oracle.graal.graph.GraalInternalError;
 
 /**
  * PTX specific backend.
@@ -84,67 +93,121 @@
         return tasm;
     }
 
-    @Override
-    public void emitCode(TargetMethodAssembler tasm, LIRGenerator lirGen, ResolvedJavaMethod codeCacheOwner) {
-        // Emit the prologue
+    private static void emitKernelEntry(TargetMethodAssembler tasm, LIRGenerator lirGen,
+                                        ResolvedJavaMethod codeCacheOwner) {
+        // Emit PTX kernel entry text based on PTXParameterOp
+        // instructions in the start block.  Remove the instructions
+        // once kernel entry text and directives are emitted to
+        // facilitate seemless PTX code generation subsequently.
         assert codeCacheOwner != null : lirGen.getGraph() + " is not associated with a method";
         final String name = codeCacheOwner.getName();
         Buffer codeBuffer = tasm.asm.codeBuffer;
+
+        // Emit initial boiler-plate directives.
         codeBuffer.emitString(".version 1.4");
         codeBuffer.emitString(".target sm_10");
         codeBuffer.emitString0(".entry " + name + " (");
         codeBuffer.emitString("");
 
-        Signature signature = codeCacheOwner.getSignature();
-        int paramCount = signature.getParameterCount(false);
-        // TODO - Revisit this.
-        // Bit-size of registers to be declared and used by the kernel.
-        int regSize = 32;
-        for (int i = 0; i < paramCount; i++) {
-            String param;
-            // No unsigned types in Java. So using .s specifier
-            switch (signature.getParameterKind(i)) {
-                case Boolean:
-                case Byte:
-                    param = ".param .s8 param" + i;
-                    regSize = 8;
-                    break;
-                case Char:
-                case Short:
-                    param = ".param .s16 param" + i;
-                    regSize = 16;
-                    break;
-                case Int:
-                    param = ".param .s32 param" + i;
-                    regSize = 32;
-                    break;
-                case Long:
-                case Float:
-                case Double:
-                case Void:
-                    param = ".param .s64 param" + i;
-                    regSize = 32;
-                    break;
-                default:
-                    // Not sure but specify 64-bit specifier??
-                    param = ".param .s64 param" + i;
-                    break;
+        // Get the start block
+        Block startBlock = lirGen.lir.cfg.getStartBlock();
+        // Keep a list of ParameterOp instructions to delete from the
+        // list of instructions in the block.
+        ArrayList<LIRInstruction> deleteOps = new ArrayList<>();
+
+        // Emit .param arguments to kernel entry based on ParameterOp
+        // instruction.
+        for (LIRInstruction op : lirGen.lir.lir(startBlock)) {
+            if (op instanceof PTXParameterOp) {
+                op.emitCode(tasm);
+                deleteOps.add(op);
             }
-            if (i != (paramCount - 1)) {
-                param += ",";
-            }
-            codeBuffer.emitString(param);
         }
 
+        // Delete ParameterOp instructions.
+        for (LIRInstruction op : deleteOps) {
+            lirGen.lir.lir(startBlock).remove(op);
+        }
+
+        // Start emiting body of the PTX kernel.
         codeBuffer.emitString0(") {");
         codeBuffer.emitString("");
 
-        // XXX For now declare one predicate and all registers
-        codeBuffer.emitString("  .reg .pred %p,%q;");
-        codeBuffer.emitString("  .reg .s" + regSize + " %r<16>;");
+        codeBuffer.emitString(".reg .u64" + " %rax;");
+    }
 
+    // Emit .reg space declarations
+    private static void emitRegisterDecl(TargetMethodAssembler tasm, LIRGenerator lirGen,
+                                         ResolvedJavaMethod codeCacheOwner) {
+        assert codeCacheOwner != null : lirGen.getGraph() + " is not associated with a method";
+        Buffer codeBuffer = tasm.asm.codeBuffer;
+
+        final SortedSet<Integer> signed32 = new TreeSet<>();
+        final SortedSet<Integer> signed64 = new TreeSet<>();
+
+        ValueProcedure trackRegisterKind = new ValueProcedure() {
+
+            @Override
+            public Value doValue(Value value, OperandMode mode, EnumSet<OperandFlag> flags) {
+                if (isRegister(value)) {
+                    RegisterValue regVal = (RegisterValue) value;
+                    Kind regKind = regVal.getKind();
+                    switch (regKind) {
+                       case Int:
+                           signed32.add(regVal.getRegister().encoding());
+                           break;
+                       case Long:
+                           signed64.add(regVal.getRegister().encoding());
+                           break;
+                       default :
+                           throw GraalInternalError.shouldNotReachHere("unhandled register type "  + value.toString());
+                    }
+                }
+                return value;
+            }
+        };
+
+        for (Block b : lirGen.lir.codeEmittingOrder()) {
+            for (LIRInstruction op : lirGen.lir.lir(b)) {
+                op.forEachOutput(trackRegisterKind);
+            }
+        }
+
+        for (Integer i : signed32) {
+            codeBuffer.emitString("  .reg .s32 %r" + i.intValue() + ";");
+        }
+        for (Integer i : signed64) {
+            codeBuffer.emitString(".reg .s64 %r" + i.intValue() + ";");
+        }
+    }
+
+    @Override
+    public void emitCode(TargetMethodAssembler tasm, LIRGenerator lirGen, ResolvedJavaMethod codeCacheOwner) {
+        assert codeCacheOwner != null : lirGen.getGraph() + " is not associated with a method";
+        Buffer codeBuffer = tasm.asm.codeBuffer;
+        // Emit the prologue
+        emitKernelEntry(tasm, lirGen, codeCacheOwner);
+
+        // Emit register declarations
+        try {
+            emitRegisterDecl(tasm, lirGen, codeCacheOwner);
+        } catch (GraalInternalError e) {
+            // TODO : Better error handling needs to be done once
+            //        all types of parameters are handled.
+            codeBuffer.setPosition(0);
+            codeBuffer.close(false);
+            return;
+        }
         // Emit code for the LIR
-        lirGen.lir.emitCode(tasm);
+        try {
+            lirGen.lir.emitCode(tasm);
+        } catch (GraalInternalError e) {
+            // TODO : Better error handling needs to be done once
+            //        all types of parameters are handled.
+            codeBuffer.setPosition(0);
+            codeBuffer.close(false);
+            return;
+        }
 
         // Emit the epilogue
         codeBuffer.emitString0("}");
--- a/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXLIRGenerator.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXLIRGenerator.java	Mon Sep 02 15:27:20 2013 +0200
@@ -47,12 +47,16 @@
 import com.oracle.graal.lir.ptx.PTXControlFlow.CondMoveOp;
 import com.oracle.graal.lir.ptx.PTXControlFlow.FloatCondMoveOp;
 import com.oracle.graal.lir.ptx.PTXControlFlow.ReturnOp;
+import com.oracle.graal.lir.ptx.PTXControlFlow.ReturnNoValOp;
 import com.oracle.graal.lir.ptx.PTXControlFlow.SequentialSwitchOp;
 import com.oracle.graal.lir.ptx.PTXControlFlow.TableSwitchOp;
-import com.oracle.graal.lir.ptx.PTXMove.LoadOp;
 import com.oracle.graal.lir.ptx.PTXMove.MoveFromRegOp;
 import com.oracle.graal.lir.ptx.PTXMove.MoveToRegOp;
-import com.oracle.graal.lir.ptx.PTXMove.StoreOp;
+import com.oracle.graal.lir.ptx.PTXMemOp.LoadOp;
+import com.oracle.graal.lir.ptx.PTXMemOp.StoreOp;
+import com.oracle.graal.lir.ptx.PTXMemOp.LoadParamOp;
+import com.oracle.graal.lir.ptx.PTXMemOp.LoadReturnAddrOp;
+import com.oracle.graal.lir.ptx.PTXMemOp.StoreReturnValOp;
 import com.oracle.graal.nodes.*;
 import com.oracle.graal.nodes.calc.*;
 import com.oracle.graal.nodes.java.*;
@@ -96,6 +100,42 @@
         }
     }
 
+    protected static AllocatableValue toParamKind(AllocatableValue value) {
+        if (value.getKind().getStackKind() != value.getKind()) {
+            // We only have stack-kinds in the LIR, so convert the operand kind for values from the
+            // calling convention.
+            if (isRegister(value)) {
+                return asRegister(value).asValue(value.getKind().getStackKind());
+            } else if (isStackSlot(value)) {
+                return StackSlot.get(value.getKind().getStackKind(), asStackSlot(value).getRawOffset(), asStackSlot(value).getRawAddFrameSize());
+            } else {
+                throw GraalInternalError.shouldNotReachHere();
+            }
+        }
+        return value;
+    }
+
+    @Override
+    public void emitPrologue() {
+        // Need to emit .param directives based on incoming arguments and return value
+        CallingConvention incomingArguments = cc;
+        int argCount = incomingArguments.getArgumentCount();
+        // Additional argument for return value.
+        Value[] params = new Value[argCount + 1];
+        for (int i = 0; i < argCount; i++) {
+            params[i] = toParamKind(incomingArguments.getArgument(i));
+        }
+        // Add the return value as the last parameter.
+        params[argCount] =  incomingArguments.getReturn();
+
+        append(new PTXParameterOp(params));
+        for (LocalNode local : graph.getNodes(LocalNode.class)) {
+            Value param = params[local.index()];
+            assert param.getKind() == local.kind().getStackKind();
+            setResult(local, emitLoadParam(param.getKind(), param, null));
+        }
+    }
+
     @Override
     public Variable emitMove(Value input) {
         Variable result = newVariable(input.getKind());
@@ -250,7 +290,8 @@
     /**
      * This method emits the compare instruction, and may reorder the operands. It returns true if
      * it did so.
-     * 
+     *
+     *
      * @param a the left operand of the comparison
      * @param b the right operand of the comparison
      * @return true if the left and right operands were switched, false otherwise
@@ -705,6 +746,10 @@
         append(new ReturnOp(input));
     }
 
+    private void emitReturnNoVal() {
+        append(new ReturnNoValOp());
+    }
+
     @Override
     protected void emitSequentialSwitch(Constant[] keyConstants, LabelRef[] keyTargets, LabelRef defaultTarget, Value key) {
         // Making a copy of the switch value is necessary because jump table destroys the input
@@ -761,4 +806,38 @@
     public void visitInfopointNode(InfopointNode i) {
         throw new InternalError("NYI");
     }
+
+    public Variable emitLoadParam(Kind kind, Value address, DeoptimizingNode deopting) {
+        PTXAddressValue loadAddress = asAddress(address);
+        Variable result = newVariable(kind);
+        append(new LoadParamOp(kind, result, loadAddress, deopting != null ? state(deopting) : null));
+        return result;
+    }
+
+    public Variable emitLoadReturnAddress(Kind kind, Value address, DeoptimizingNode deopting) {
+        PTXAddressValue loadAddress = asAddress(address);
+        Variable result = newVariable(kind);
+        append(new LoadReturnAddrOp(kind, result, loadAddress, deopting != null ? state(deopting) : null));
+        return result;
+    }
+
+    public void emitStoreReturnValue(Kind kind, Value address, Value inputVal, DeoptimizingNode deopting) {
+        PTXAddressValue storeAddress = asAddress(address);
+        Variable input = load(inputVal);
+        append(new StoreReturnValOp(kind, storeAddress, input, deopting != null ? state(deopting) : null));
+    }
+
+
+    @Override
+    public void visitReturn(ReturnNode x) {
+        AllocatableValue operand = Value.ILLEGAL;
+        if (x.result() != null) {
+            operand = resultOperandFor(x.result().kind());
+            // Load the global memory address from return parameter
+            Variable loadVar = emitLoadReturnAddress(operand.getKind(), operand, null);
+            // Store result in global memory whose location is loadVar
+            emitStoreReturnValue(operand.getKind(), loadVar, operand(x.result()), null);
+        }
+        emitReturnNoVal();
+    }
 }
--- a/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXTargetMethodAssembler.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXTargetMethodAssembler.java	Mon Sep 02 15:27:20 2013 +0200
@@ -50,7 +50,7 @@
         ExternalCompilationResult graalCompile = (ExternalCompilationResult) super.finishTargetMethod(graph);
 
         try {
-            if (validDevice) {
+            if ((validDevice) && (graalCompile.getTargetCode() != null)) {
                 long kernel = toGPU.generateKernel(graalCompile.getTargetCode(), method.getName());
                 graalCompile.setEntryPoint(kernel);
             }
--- a/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/GraalCompiler.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/GraalCompiler.java	Mon Sep 02 15:27:20 2013 +0200
@@ -22,6 +22,7 @@
  */
 package com.oracle.graal.compiler;
 
+import static com.oracle.graal.compiler.MethodFilter.*;
 import static com.oracle.graal.phases.GraalOptions.*;
 
 import java.util.*;
@@ -34,6 +35,7 @@
 import com.oracle.graal.compiler.gen.*;
 import com.oracle.graal.compiler.target.*;
 import com.oracle.graal.debug.*;
+import com.oracle.graal.debug.internal.*;
 import com.oracle.graal.lir.*;
 import com.oracle.graal.lir.asm.*;
 import com.oracle.graal.nodes.*;
@@ -52,6 +54,42 @@
  */
 public class GraalCompiler {
 
+    private static final DebugTimer FrontEnd = Debug.timer("FrontEnd");
+    private static final DebugTimer BackEnd = Debug.timer("BackEnd");
+
+    private static final MethodFilter[] positiveIntrinsificationFilter;
+    private static final MethodFilter[] negativeIntrinsificationFilter;
+    static {
+        if (GraalDebugConfig.IntrinsificationsDisabled.getValue() != null) {
+            negativeIntrinsificationFilter = parse(GraalDebugConfig.IntrinsificationsDisabled.getValue());
+        } else {
+            negativeIntrinsificationFilter = null;
+        }
+
+        if (GraalDebugConfig.IntrinsificationsEnabled.getValue() != null) {
+            positiveIntrinsificationFilter = parse(GraalDebugConfig.IntrinsificationsEnabled.getValue());
+        } else if (negativeIntrinsificationFilter != null) {
+            positiveIntrinsificationFilter = new MethodFilter[0];
+        } else {
+            positiveIntrinsificationFilter = null;
+        }
+    }
+
+    /**
+     * Determines if a given method should be intrinsified based on the values of
+     * {@link GraalDebugConfig#IntrinsificationsEnabled} and
+     * {@link GraalDebugConfig#IntrinsificationsDisabled}.
+     */
+    public static boolean shouldIntrinsify(JavaMethod method) {
+        if (positiveIntrinsificationFilter == null) {
+            return true;
+        }
+        if (positiveIntrinsificationFilter.length == 0 || matches(positiveIntrinsificationFilter, method)) {
+            return negativeIntrinsificationFilter == null || !matches(negativeIntrinsificationFilter, method);
+        }
+        return false;
+    }
+
     /**
      * Requests compilation of a given graph.
      * 
@@ -62,13 +100,9 @@
      *            argument can be null.
      * @return the result of the compilation
      */
-    public static CompilationResult compileGraph(final StructuredGraph graph, final CallingConvention cc,
-                                                 final ResolvedJavaMethod installedCodeOwner, final GraalCodeCacheProvider runtime,
-                                                 final Replacements replacements, final Backend backend,
-                                                 final TargetDescription target, final GraphCache cache,
-                                                 final PhasePlan plan, final OptimisticOptimizations optimisticOpts,
-                                                 final SpeculationLog speculationLog, final Suites suites,
-                                                 final CompilationResult compilationResult) {
+    public static CompilationResult compileGraph(final StructuredGraph graph, final CallingConvention cc, final ResolvedJavaMethod installedCodeOwner, final GraalCodeCacheProvider runtime,
+                    final Replacements replacements, final Backend backend, final TargetDescription target, final GraphCache cache, final PhasePlan plan, final OptimisticOptimizations optimisticOpts,
+                    final SpeculationLog speculationLog, final Suites suites, final CompilationResult compilationResult) {
         Debug.scope("GraalCompiler", new Object[]{graph, runtime}, new Runnable() {
 
             public void run() {
@@ -76,22 +110,26 @@
                 final LIR lir = Debug.scope("FrontEnd", new Callable<LIR>() {
 
                     public LIR call() {
-                        return emitHIR(runtime, target, graph, replacements, assumptions, cache, plan, optimisticOpts, speculationLog, suites);
+                        try (TimerCloseable a = FrontEnd.start()) {
+                            return emitHIR(runtime, target, graph, replacements, assumptions, cache, plan, optimisticOpts, speculationLog, suites);
+                        }
                     }
                 });
-                final LIRGenerator lirGen = Debug.scope("BackEnd", lir, new Callable<LIRGenerator>() {
+                try (TimerCloseable a = BackEnd.start()) {
+                    final LIRGenerator lirGen = Debug.scope("BackEnd", lir, new Callable<LIRGenerator>() {
 
-                    public LIRGenerator call() {
-                        return emitLIR(backend, target, lir, graph, cc);
-                    }
-                });
-                Debug.scope("CodeGen", lirGen, new Runnable() {
+                        public LIRGenerator call() {
+                            return emitLIR(backend, target, lir, graph, cc);
+                        }
+                    });
+                    Debug.scope("CodeGen", lirGen, new Runnable() {
 
-                    public void run() {
-                        emitCode(backend, getLeafGraphIdArray(graph), assumptions, lirGen, compilationResult, installedCodeOwner);
-                    }
+                        public void run() {
+                            emitCode(backend, getLeafGraphIdArray(graph), assumptions, lirGen, compilationResult, installedCodeOwner);
+                        }
 
-                });
+                    });
+                }
             }
         });
 
--- a/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/GraalDebugConfig.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/GraalDebugConfig.java	Mon Sep 02 15:27:20 2013 +0200
@@ -38,15 +38,15 @@
     // @formatter:off
     @Option(help = "Enable scope-based debugging", name = "Debug")
     public static final OptionValue<Boolean> DebugEnabled = new OptionValue<>(true);
-    @Option(help = "Scopes to be dumped")
+    @Option(help = "Pattern for scope(s) to in which dumping is enabled (see DebugFilter and Debug.dump)")
     public static final OptionValue<String> Dump = new OptionValue<>(null);
-    @Option(help = "Scopes to be metered")
+    @Option(help = "Pattern for scope(s) to in which metering is enabled (see DebugFilter and Debug.metric)")
     public static final OptionValue<String> Meter = new OptionValue<>(null);
-    @Option(help = "Scopes to be timed")
+    @Option(help = "Pattern for scope(s) to in which timing is enabled (see DebugFilter and Debug.timer)")
     public static final OptionValue<String> Time = new OptionValue<>(null);
-    @Option(help = "Scopes to be logged")
+    @Option(help = "Pattern for scope(s) to in which logging is enabled (see DebugFilter and Debug.log)")
     public static final OptionValue<String> Log = new OptionValue<>(null);
-    @Option(help = "Filters debug scope output by method name/pattern")
+    @Option(help = "Pattern for filtering debug scope output based on method context (see MethodFilter)")
     public static final OptionValue<String> MethodFilter = new OptionValue<>(null);
     @Option(help = "How to print metric and timing values:%n" +
                    "Name - aggregate by unqualified name%n" +
@@ -66,6 +66,19 @@
             return enabled;
         }
     };
+    /**
+     * @see MethodFilter
+     */
+    @Option(help = "Pattern for method(s) to which intrinsification (if available) will be applied. " +
+                   "By default, all available intrinsifications are applied except for methods matched " +
+                   "by IntrinsificationsDisabled. See MethodFilter class for pattern syntax.")
+    public static final OptionValue<String> IntrinsificationsEnabled = new OptionValue<>(null);
+    /**
+     * @see MethodFilter
+     */
+    @Option(help = "Pattern for method(s) to which intrinsification will not be applied. " +
+                   "See MethodFilter class for pattern syntax.")
+    public static final OptionValue<String> IntrinsificationsDisabled = new OptionValue<>("Object.clone");
     // @formatter:on
 
     private final DebugFilter logFilter;
@@ -85,11 +98,7 @@
         if (methodFilter == null || methodFilter.isEmpty()) {
             this.methodFilter = null;
         } else {
-            String[] filters = methodFilter.split(",");
-            this.methodFilter = new MethodFilter[filters.length];
-            for (int i = 0; i < filters.length; i++) {
-                this.methodFilter[i] = new MethodFilter(filters[i]);
-            }
+            this.methodFilter = com.oracle.graal.compiler.MethodFilter.parse(methodFilter);
         }
 
         // Report the filters that have been configured so the user can verify it's what they expect
@@ -156,10 +165,8 @@
                 } else if (methodFilter != null) {
                     JavaMethod method = asJavaMethod(o);
                     if (method != null) {
-                        for (MethodFilter filter : methodFilter) {
-                            if (filter.matches(method)) {
-                                return true;
-                            }
+                        if (com.oracle.graal.compiler.MethodFilter.matches(methodFilter, method)) {
+                            return true;
                         }
                     }
                 }
--- a/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/MethodFilter.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/MethodFilter.java	Mon Sep 02 15:27:20 2013 +0200
@@ -88,6 +88,31 @@
     private final Pattern methodName;
     private final Pattern[] signature;
 
+    /**
+     * Parses a string containing list of comma separated filter patterns into an array of
+     * {@link MethodFilter}s.
+     */
+    public static MethodFilter[] parse(String commaSeparatedPatterns) {
+        String[] filters = commaSeparatedPatterns.split(",");
+        MethodFilter[] methodFilters = new MethodFilter[filters.length];
+        for (int i = 0; i < filters.length; i++) {
+            methodFilters[i] = new MethodFilter(filters[i]);
+        }
+        return methodFilters;
+    }
+
+    /**
+     * Determines if a given method is matched by a given array of filters.
+     */
+    public static boolean matches(MethodFilter[] filters, JavaMethod method) {
+        for (MethodFilter filter : filters) {
+            if (filter.matches(method)) {
+                return true;
+            }
+        }
+        return false;
+    }
+
     public MethodFilter(String sourcePattern) {
         String pattern = sourcePattern.trim();
 
--- a/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/gen/LIRGenerator.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/gen/LIRGenerator.java	Mon Sep 02 15:27:20 2013 +0200
@@ -452,12 +452,16 @@
         append(new JumpOp(getLIRBlock(merge)));
     }
 
+    protected PlatformKind getPhiKind(PhiNode phi) {
+        return phi.kind();
+    }
+
     private Value operandForPhi(PhiNode phi) {
         assert phi.type() == PhiType.Value : "wrong phi type: " + phi;
         Value result = operand(phi);
         if (result == null) {
             // allocate a variable for this phi
-            Variable newOperand = newVariable(phi.kind());
+            Variable newOperand = newVariable(getPhiKind(phi));
             setResult(phi, newOperand);
             return newOperand;
         } else {
--- a/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/gen/PhiResolver.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/gen/PhiResolver.java	Mon Sep 02 15:27:20 2013 +0200
@@ -230,7 +230,7 @@
 
     private void moveToTemp(Value src) {
         assert isIllegal(temp);
-        temp = gen.newVariable(src.getKind());
+        temp = gen.newVariable(src.getPlatformKind());
         emitMove(temp, src);
     }
 
--- a/graal/com.oracle.graal.graph/src/com/oracle/graal/graph/Node.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.graph/src/com/oracle/graal/graph/Node.java	Mon Sep 02 15:27:20 2013 +0200
@@ -26,7 +26,8 @@
 import java.util.*;
 
 import com.oracle.graal.graph.Graph.NodeChangedListener;
-import com.oracle.graal.graph.NodeClass.*;
+import com.oracle.graal.graph.NodeClass.NodeClassIterator;
+import com.oracle.graal.graph.NodeClass.Position;
 import com.oracle.graal.graph.iterators.*;
 
 /**
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/CompilationTask.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/CompilationTask.java	Mon Sep 02 15:27:20 2013 +0200
@@ -113,6 +113,8 @@
      */
     public static final DebugTimer CompilationTime = Debug.timer("CompilationTime");
 
+    public static final DebugTimer CodeInstallationTime = Debug.timer("CodeInstallation");
+
     public void runCompilation() {
         /*
          * no code must be outside this try/finally because it could happen otherwise that
@@ -164,7 +166,9 @@
                 }
             }
 
-            installMethod(result);
+            try (TimerCloseable b = CodeInstallationTime.start()) {
+                installMethod(result);
+            }
             stats.finish(method);
         } catch (BailoutException bailout) {
             Debug.metric("Bailouts").increment();
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotRuntime.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotRuntime.java	Mon Sep 02 15:27:20 2013 +0200
@@ -313,31 +313,15 @@
         linkForeignCall(r, G1WBPOSTCALL, c.writeBarrierPostAddress, PREPEND_THREAD, LEAF, REEXECUTABLE, NO_LOCATIONS);
         linkForeignCall(r, VALIDATE_OBJECT, c.validateObject, PREPEND_THREAD, LEAF, REEXECUTABLE, NO_LOCATIONS);
 
-        if (IntrinsifyObjectMethods.getValue()) {
-            r.registerSubstitutions(ObjectSubstitutions.class);
-        }
-        if (IntrinsifySystemMethods.getValue()) {
-            r.registerSubstitutions(SystemSubstitutions.class);
-        }
-        if (IntrinsifyThreadMethods.getValue()) {
-            r.registerSubstitutions(ThreadSubstitutions.class);
-        }
-        if (IntrinsifyUnsafeMethods.getValue()) {
-            r.registerSubstitutions(UnsafeSubstitutions.class);
-        }
-        if (IntrinsifyClassMethods.getValue()) {
-            r.registerSubstitutions(ClassSubstitutions.class);
-        }
-        if (IntrinsifyAESMethods.getValue()) {
-            r.registerSubstitutions(AESCryptSubstitutions.class);
-            r.registerSubstitutions(CipherBlockChainingSubstitutions.class);
-        }
-        if (IntrinsifyCRC32Methods.getValue()) {
-            r.registerSubstitutions(CRC32Substitutions.class);
-        }
-        if (IntrinsifyReflectionMethods.getValue()) {
-            r.registerSubstitutions(ReflectionSubstitutions.class);
-        }
+        r.registerSubstitutions(ObjectSubstitutions.class);
+        r.registerSubstitutions(SystemSubstitutions.class);
+        r.registerSubstitutions(ThreadSubstitutions.class);
+        r.registerSubstitutions(UnsafeSubstitutions.class);
+        r.registerSubstitutions(ClassSubstitutions.class);
+        r.registerSubstitutions(AESCryptSubstitutions.class);
+        r.registerSubstitutions(CipherBlockChainingSubstitutions.class);
+        r.registerSubstitutions(CRC32Substitutions.class);
+        r.registerSubstitutions(ReflectionSubstitutions.class);
 
         checkcastDynamicSnippets = new CheckCastDynamicSnippets.Templates(this, r, graalRuntime.getTarget());
         instanceofSnippets = new InstanceOfSnippets.Templates(this, r, graalRuntime.getTarget());
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/replacements/ArrayCopyNode.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/replacements/ArrayCopyNode.java	Mon Sep 02 15:27:20 2013 +0200
@@ -22,7 +22,7 @@
  */
 package com.oracle.graal.hotspot.replacements;
 
-import static com.oracle.graal.phases.GraalOptions.*;
+import static com.oracle.graal.compiler.GraalCompiler.*;
 
 import com.oracle.graal.api.meta.*;
 import com.oracle.graal.debug.*;
@@ -93,7 +93,7 @@
 
     @Override
     protected StructuredGraph getSnippetGraph(LoweringTool tool) {
-        if (!IntrinsifyArrayCopy.getValue()) {
+        if (!shouldIntrinsify(getTargetMethod())) {
             return null;
         }
 
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/replacements/CallSiteSubstitutions.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/replacements/CallSiteSubstitutions.java	Mon Sep 02 15:27:20 2013 +0200
@@ -22,8 +22,6 @@
  */
 package com.oracle.graal.hotspot.replacements;
 
-import static com.oracle.graal.phases.GraalOptions.*;
-
 import java.lang.invoke.*;
 
 import com.oracle.graal.api.code.*;
@@ -37,11 +35,9 @@
 
     @Override
     public void registerReplacements(MetaAccessProvider runtime, Replacements replacements, TargetDescription target) {
-        if (IntrinsifyCallSiteTarget.getValue()) {
-            replacements.registerSubstitutions(ConstantCallSiteSubstitutions.class);
-            replacements.registerSubstitutions(MutableCallSiteSubstitutions.class);
-            replacements.registerSubstitutions(VolatileCallSiteSubstitutions.class);
-        }
+        replacements.registerSubstitutions(ConstantCallSiteSubstitutions.class);
+        replacements.registerSubstitutions(MutableCallSiteSubstitutions.class);
+        replacements.registerSubstitutions(VolatileCallSiteSubstitutions.class);
     }
 
     @ClassSubstitution(ConstantCallSite.class)
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/replacements/ObjectCloneNode.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/replacements/ObjectCloneNode.java	Mon Sep 02 15:27:20 2013 +0200
@@ -22,7 +22,7 @@
  */
 package com.oracle.graal.hotspot.replacements;
 
-import static com.oracle.graal.phases.GraalOptions.*;
+import static com.oracle.graal.compiler.GraalCompiler.*;
 
 import java.lang.reflect.*;
 
@@ -52,7 +52,7 @@
 
     @Override
     protected StructuredGraph getSnippetGraph(LoweringTool tool) {
-        if (!IntrinsifyObjectClone.getValue()) {
+        if (!shouldIntrinsify(getTargetMethod())) {
             return null;
         }
 
@@ -121,7 +121,7 @@
             ResolvedJavaType type = getConcreteType(obj.stamp(), tool.getAssumptions());
             if (isCloneableType(type, tool.getMetaAccessProvider())) {
                 if (!type.isArray()) {
-                    VirtualInstanceNode newVirtual = new VirtualInstanceNode(type);
+                    VirtualInstanceNode newVirtual = new VirtualInstanceNode(type, true);
                     ResolvedJavaField[] fields = newVirtual.getFields();
 
                     ValueNode[] state = new ValueNode[fields.length];
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/replacements/ReflectionGetCallerClassNode.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/replacements/ReflectionGetCallerClassNode.java	Mon Sep 02 15:27:20 2013 +0200
@@ -22,7 +22,7 @@
  */
 package com.oracle.graal.hotspot.replacements;
 
-import static com.oracle.graal.phases.GraalOptions.*;
+import static com.oracle.graal.compiler.GraalCompiler.*;
 
 import com.oracle.graal.api.meta.*;
 import com.oracle.graal.graph.*;
@@ -65,7 +65,7 @@
      * @return ConstantNode of the caller class, or null
      */
     private ConstantNode getCallerClassNode(MetaAccessProvider runtime) {
-        if (!IntrinsifyReflectionMethods.getValue()) {
+        if (!shouldIntrinsify(getTargetMethod())) {
             return null;
         }
 
--- a/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXControlFlow.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXControlFlow.java	Mon Sep 02 15:27:20 2013 +0200
@@ -55,6 +55,19 @@
         }
     }
 
+    public static class ReturnNoValOp extends PTXLIRInstruction {
+
+        public ReturnNoValOp() { }
+
+        @Override
+        public void emitCode(TargetMethodAssembler tasm, PTXAssembler masm) {
+            if (tasm.frameContext != null) {
+                tasm.frameContext.leave(tasm);
+            }
+            masm.ret();
+        }
+    }
+
     public static class BranchOp extends PTXLIRInstruction implements StandardOp.BranchOp {
 
         protected Condition condition;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMemOp.java	Mon Sep 02 15:27:20 2013 +0200
@@ -0,0 +1,264 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.graal.lir.ptx;
+
+import static com.oracle.graal.api.code.ValueUtil.*;
+import static com.oracle.graal.lir.LIRInstruction.OperandFlag.*;
+
+import com.oracle.graal.api.meta.*;
+import com.oracle.graal.asm.ptx.*;
+import com.oracle.graal.graph.*;
+import com.oracle.graal.lir.*;
+import com.oracle.graal.lir.asm.*;
+
+public class PTXMemOp {
+
+    // Load operation from .global state space
+    @Opcode("LOAD")
+    public static class LoadOp extends PTXLIRInstruction {
+
+        private final Kind kind;
+        @Def({REG}) protected AllocatableValue result;
+        @Use({COMPOSITE}) protected PTXAddressValue address;
+        @State protected LIRFrameState state;
+
+        public LoadOp(Kind kind, AllocatableValue result, PTXAddressValue address, LIRFrameState state) {
+            this.kind = kind;
+            this.result = result;
+            this.address = address;
+            this.state = state;
+        }
+
+        @Override
+        public void emitCode(TargetMethodAssembler tasm, PTXAssembler masm) {
+            PTXAddress addr = address.toAddress();
+            switch (kind) {
+                case Byte:
+                    masm.ld_global_s8(asRegister(result), addr.getBase(), addr.getDisplacement());
+                    break;
+                case Short:
+                    masm.ld_global_s16(asRegister(result), addr.getBase(), addr.getDisplacement());
+                    break;
+                case Char:
+                    masm.ld_global_u16(asRegister(result), addr.getBase(), addr.getDisplacement());
+                    break;
+                case Int:
+                    masm.ld_global_s32(asRegister(result), addr.getBase(), addr.getDisplacement());
+                    break;
+                case Long:
+                    masm.ld_global_s64(asRegister(result), addr.getBase(), addr.getDisplacement());
+                    break;
+                case Float:
+                    masm.ld_global_f32(asRegister(result), addr.getBase(), addr.getDisplacement());
+                    break;
+                case Double:
+                    masm.ld_global_f64(asRegister(result), addr.getBase(), addr.getDisplacement());
+                    break;
+                case Object:
+                    masm.ld_global_u32(asRegister(result), addr.getBase(), addr.getDisplacement());
+                    break;
+                default:
+                    throw GraalInternalError.shouldNotReachHere();
+            }
+        }
+    }
+
+    // Store operation from .global state space
+    @Opcode("STORE")
+    public static class StoreOp extends PTXLIRInstruction {
+
+        private final Kind kind;
+        @Use({COMPOSITE}) protected PTXAddressValue address;
+        @Use({REG}) protected AllocatableValue input;
+        @State protected LIRFrameState state;
+
+        public StoreOp(Kind kind, PTXAddressValue address, AllocatableValue input, LIRFrameState state) {
+            this.kind = kind;
+            this.address = address;
+            this.input = input;
+            this.state = state;
+        }
+
+        @Override
+        public void emitCode(TargetMethodAssembler tasm, PTXAssembler masm) {
+            assert isRegister(input);
+            PTXAddress addr = address.toAddress();
+            switch (kind) {
+                case Byte:
+                    masm.st_global_s8(addr.getBase(), addr.getDisplacement(), asRegister(input));
+                    break;
+                case Short:
+                    masm.st_global_s8(addr.getBase(), addr.getDisplacement(), asRegister(input));
+                    break;
+                case Int:
+                    masm.st_global_s32(addr.getBase(), addr.getDisplacement(), asRegister(input));
+                    break;
+                case Long:
+                    masm.st_global_s64(addr.getBase(), addr.getDisplacement(), asRegister(input));
+                    break;
+                case Float:
+                    masm.st_global_f32(addr.getBase(), addr.getDisplacement(), asRegister(input));
+                    break;
+                case Double:
+                    masm.st_global_f64(addr.getBase(), addr.getDisplacement(), asRegister(input));
+                    break;
+                case Object:
+                    masm.st_global_u64(addr.getBase(), addr.getDisplacement(), asRegister(input));
+                    break;
+                default:
+                    throw GraalInternalError.shouldNotReachHere("missing: " + address.getKind());
+            }
+        }
+    }
+
+    // Load operation from .param state space
+    @Opcode("LOAD")
+    public static class LoadParamOp extends PTXLIRInstruction {
+
+        private final Kind kind;
+        @Def({REG}) protected AllocatableValue result;
+        @Use({COMPOSITE}) protected PTXAddressValue address;
+        @State protected LIRFrameState state;
+
+        public LoadParamOp(Kind kind, AllocatableValue result, PTXAddressValue address, LIRFrameState state) {
+            this.kind = kind;
+            this.result = result;
+            this.address = address;
+            this.state = state;
+        }
+
+        @Override
+        public void emitCode(TargetMethodAssembler tasm, PTXAssembler masm) {
+            PTXAddress addr = address.toAddress();
+            switch (kind) {
+                case Byte:
+                    masm.ld_from_state_space(".param.s8", asRegister(result), addr.getBase(), addr.getDisplacement());
+                    break;
+                case Short:
+                    masm.ld_from_state_space(".param.s16", asRegister(result), addr.getBase(), addr.getDisplacement());
+                    break;
+                case Char:
+                    masm.ld_from_state_space(".param.s16", asRegister(result), addr.getBase(), addr.getDisplacement());
+                    break;
+                case Int:
+                    masm.ld_from_state_space(".param.s32", asRegister(result), addr.getBase(), addr.getDisplacement());
+                    break;
+                case Long:
+                    masm.ld_from_state_space(".param.s64", asRegister(result), addr.getBase(), addr.getDisplacement());
+                    break;
+                case Float:
+                    masm.ld_from_state_space(".param.f32", asRegister(result), addr.getBase(), addr.getDisplacement());
+                    break;
+                case Double:
+                    masm.ld_from_state_space(".param.f64", asRegister(result), addr.getBase(), addr.getDisplacement());
+                    break;
+                case Object:
+                    masm.ld_from_state_space(".param.u64", asRegister(result), addr.getBase(), addr.getDisplacement());
+                    break;
+                default:
+                    throw GraalInternalError.shouldNotReachHere();
+            }
+        }
+    }
+
+    // Load contents of return value pointer from return argument in
+    // .param state space
+    @Opcode("LOAD_RET_ADDR")
+    public static class LoadReturnAddrOp extends PTXLIRInstruction {
+
+        private final Kind kind;
+        @Def({REG}) protected AllocatableValue result;
+        @Use({COMPOSITE}) protected PTXAddressValue address;
+        @State protected LIRFrameState state;
+
+        public LoadReturnAddrOp(Kind kind, AllocatableValue result, PTXAddressValue address, LIRFrameState state) {
+            this.kind = kind;
+            this.result = result;
+            this.address = address;
+            this.state = state;
+        }
+
+        @Override
+        public void emitCode(TargetMethodAssembler tasm, PTXAssembler masm) {
+            PTXAddress addr = address.toAddress();
+            switch (kind) {
+                case Int:
+                    masm.ld_return_address("u32", asRegister(result), addr.getBase(), addr.getDisplacement());
+                    break;
+                case Long:
+                    masm.ld_return_address("u64", asRegister(result), addr.getBase(), addr.getDisplacement());
+                    break;
+                default:
+                    throw GraalInternalError.shouldNotReachHere();
+            }
+        }
+    }
+
+    // Store operation from .global state space
+    @Opcode("STORE_RETURN_VALUE")
+    public static class StoreReturnValOp extends PTXLIRInstruction {
+
+        private final Kind kind;
+        @Use({COMPOSITE}) protected PTXAddressValue address;
+        @Use({REG}) protected AllocatableValue input;
+        @State protected LIRFrameState state;
+
+        public StoreReturnValOp(Kind kind, PTXAddressValue address, AllocatableValue input, LIRFrameState state) {
+            this.kind = kind;
+            this.address = address;
+            this.input = input;
+            this.state = state;
+        }
+
+        @Override
+        public void emitCode(TargetMethodAssembler tasm, PTXAssembler masm) {
+            assert isRegister(input);
+            PTXAddress addr = address.toAddress();
+            // masm.st_global_return_value_s64(addr.getBase(), addr.getDisplacement(), asRegister(input));
+
+            switch (kind) {
+                case Byte:
+                case Short:
+                    masm.st_global_return_value_s8(addr.getBase(), addr.getDisplacement(), asRegister(input));
+                    break;
+                case Int:
+                    masm.st_global_return_value_s32(addr.getBase(), addr.getDisplacement(), asRegister(input));
+                    break;
+                case Long:
+                    masm.st_global_return_value_s64(addr.getBase(), addr.getDisplacement(), asRegister(input));
+                    break;
+                case Float:
+                    masm.st_global_return_value_f32(addr.getBase(), addr.getDisplacement(), asRegister(input));
+                    break;
+                case Double:
+                    masm.st_global_return_value_f64(addr.getBase(), addr.getDisplacement(), asRegister(input));
+                    break;
+                case Object:
+                    masm.st_global_return_value_u64(addr.getBase(), addr.getDisplacement(), asRegister(input));
+                    break;
+                default:
+                    throw GraalInternalError.shouldNotReachHere("missing: " + address.getKind());
+            }
+        }
+    }
+}
--- a/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMove.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMove.java	Mon Sep 02 15:27:20 2013 +0200
@@ -116,100 +116,6 @@
         }
     }
 
-    public static class LoadOp extends PTXLIRInstruction {
-
-        private final Kind kind;
-        @Def({REG}) protected AllocatableValue result;
-        @Use({COMPOSITE}) protected PTXAddressValue address;
-        @State protected LIRFrameState state;
-
-        public LoadOp(Kind kind, AllocatableValue result, PTXAddressValue address, LIRFrameState state) {
-            this.kind = kind;
-            this.result = result;
-            this.address = address;
-            this.state = state;
-        }
-
-        @Override
-        public void emitCode(TargetMethodAssembler tasm, PTXAssembler masm) {
-            PTXAddress addr = address.toAddress();
-            switch (kind) {
-                case Byte:
-                    masm.ld_global_s8(asRegister(result), addr.getBase(), addr.getDisplacement());
-                    break;
-                case Short:
-                    masm.ld_global_s16(asRegister(result), addr.getBase(), addr.getDisplacement());
-                    break;
-                case Char:
-                    masm.ld_global_u16(asRegister(result), addr.getBase(), addr.getDisplacement());
-                    break;
-                case Int:
-                    masm.ld_global_s32(asRegister(result), addr.getBase(), addr.getDisplacement());
-                    break;
-                case Long:
-                    masm.ld_global_s64(asRegister(result), addr.getBase(), addr.getDisplacement());
-                    break;
-                case Float:
-                    masm.ld_global_f32(asRegister(result), addr.getBase(), addr.getDisplacement());
-                    break;
-                case Double:
-                    masm.ld_global_f64(asRegister(result), addr.getBase(), addr.getDisplacement());
-                    break;
-                case Object:
-                    masm.ld_global_u32(asRegister(result), addr.getBase(), addr.getDisplacement());
-                    break;
-                default:
-                    throw GraalInternalError.shouldNotReachHere();
-            }
-        }
-    }
-
-    public static class StoreOp extends PTXLIRInstruction {
-
-        private final Kind kind;
-        @Use({COMPOSITE}) protected PTXAddressValue address;
-        @Use({REG}) protected AllocatableValue input;
-        @State protected LIRFrameState state;
-
-        public StoreOp(Kind kind, PTXAddressValue address, AllocatableValue input, LIRFrameState state) {
-            this.kind = kind;
-            this.address = address;
-            this.input = input;
-            this.state = state;
-        }
-
-        @Override
-        public void emitCode(TargetMethodAssembler tasm, PTXAssembler masm) {
-            assert isRegister(input);
-            PTXAddress addr = address.toAddress();
-            switch (kind) {
-                case Byte:
-                    masm.st_global_s8(addr.getBase(), addr.getDisplacement(), asRegister(input));
-                    break;
-                case Short:
-                    masm.st_global_s8(addr.getBase(), addr.getDisplacement(), asRegister(input));
-                    break;
-                case Int:
-                    masm.st_global_s32(addr.getBase(), addr.getDisplacement(), asRegister(input));
-                    break;
-                case Long:
-                    masm.st_global_s64(addr.getBase(), addr.getDisplacement(), asRegister(input));
-                    break;
-                case Float:
-                    masm.st_global_f32(addr.getBase(), addr.getDisplacement(), asRegister(input));
-                    break;
-                case Double:
-                    masm.st_global_f64(addr.getBase(), addr.getDisplacement(), asRegister(input));
-                    break;
-                case Object:
-                    masm.st_global_s32(addr.getBase(), addr.getDisplacement(), asRegister(input));
-                    break;
-                default:
-                    throw GraalInternalError.shouldNotReachHere("missing: " + address.getKind());
-            }
-        }
-    }
-
     public static class LeaOp extends PTXLIRInstruction {
 
         @Def({REG}) protected AllocatableValue result;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXParameterOp.java	Mon Sep 02 15:27:20 2013 +0200
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package com.oracle.graal.lir.ptx;
+
+import static com.oracle.graal.api.code.ValueUtil.*;
+import static com.oracle.graal.lir.LIRInstruction.OperandFlag.*;
+
+import com.oracle.graal.api.meta.*;
+import com.oracle.graal.asm.ptx.*;
+import com.oracle.graal.graph.*;
+import com.oracle.graal.lir.*;
+import com.oracle.graal.lir.asm.*;
+
+public class PTXParameterOp extends LIRInstruction {
+
+    @Def({REG}) protected Value[] params;
+
+    public PTXParameterOp(Value[] params) {
+        this.params = params;
+    }
+
+    @Override
+    public void emitCode(TargetMethodAssembler tasm) {
+        PTXAssembler ptxasm = (PTXAssembler) tasm.asm;
+        // Emit parameter directives for arguments
+        int argCount = params.length;
+        for (int i = 0; i < argCount; i++) {
+            Kind paramKind = params[i].getKind();
+            switch (paramKind) {
+            case Int :
+                ptxasm.param_32_decl(asIntReg(params[i]), (i == (argCount - 1)));
+                break;
+            case Long :
+                ptxasm.param_64_decl(asLongReg(params[i]), (i == (argCount - 1)));
+                break;
+            case Float :
+                ptxasm.param_32_decl(asFloatReg(params[i]), (i == (argCount - 1)));
+                break;
+            case Double :
+                ptxasm.param_64_decl(asDoubleReg(params[i]), (i == (argCount - 1)));
+                break;
+            default :
+                throw GraalInternalError.shouldNotReachHere("unhandled parameter type "  + paramKind.toString());
+            }
+        }
+    }
+}
--- a/graal/com.oracle.graal.lir/src/com/oracle/graal/lir/CompositeValue.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.lir/src/com/oracle/graal/lir/CompositeValue.java	Mon Sep 02 15:27:20 2013 +0200
@@ -25,6 +25,7 @@
 import java.lang.annotation.*;
 
 import com.oracle.graal.api.meta.*;
+import com.oracle.graal.debug.*;
 import com.oracle.graal.lir.LIRInstruction.OperandFlag;
 import com.oracle.graal.lir.LIRInstruction.OperandMode;
 import com.oracle.graal.lir.LIRInstruction.ValueProcedure;
@@ -45,8 +46,11 @@
 
     private final CompositeValueClass valueClass;
 
+    private static final DebugMetric COMPOSITE_VALUE_COUNT = Debug.metric("CompositeValues");
+
     public CompositeValue(PlatformKind kind) {
         super(kind);
+        COMPOSITE_VALUE_COUNT.increment();
         valueClass = CompositeValueClass.get(getClass());
     }
 
--- a/graal/com.oracle.graal.lir/src/com/oracle/graal/lir/LIRInstruction.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.lir/src/com/oracle/graal/lir/LIRInstruction.java	Mon Sep 02 15:27:20 2013 +0200
@@ -30,6 +30,7 @@
 
 import com.oracle.graal.api.code.*;
 import com.oracle.graal.api.meta.*;
+import com.oracle.graal.debug.*;
 import com.oracle.graal.graph.*;
 import com.oracle.graal.lir.asm.*;
 
@@ -211,10 +212,13 @@
      */
     private int id;
 
+    private static final DebugMetric LIR_NODE_COUNT = Debug.metric("LIRNodes");
+
     /**
      * Constructs a new LIR instruction.
      */
     public LIRInstruction() {
+        LIR_NODE_COUNT.increment();
         instructionClass = LIRInstructionClass.get(getClass());
         id = -1;
     }
--- a/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/java/NewInstanceNode.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/java/NewInstanceNode.java	Mon Sep 02 15:27:20 2013 +0200
@@ -85,7 +85,7 @@
     public void virtualize(VirtualizerTool tool) {
         if (instanceClass != null) {
             assert !instanceClass().isArray();
-            VirtualInstanceNode virtualObject = new VirtualInstanceNode(instanceClass());
+            VirtualInstanceNode virtualObject = new VirtualInstanceNode(instanceClass(), true);
             ResolvedJavaField[] fields = virtualObject.getFields();
             ValueNode[] state = new ValueNode[fields.length];
             for (int i = 0; i < state.length; i++) {
--- a/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/virtual/VirtualArrayNode.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/virtual/VirtualArrayNode.java	Mon Sep 02 15:27:20 2013 +0200
@@ -36,6 +36,7 @@
     private final int length;
 
     public VirtualArrayNode(ResolvedJavaType componentType, int length) {
+        super(true);
         this.componentType = componentType;
         this.length = length;
     }
--- a/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/virtual/VirtualBoxingNode.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/virtual/VirtualBoxingNode.java	Mon Sep 02 15:27:20 2013 +0200
@@ -31,7 +31,7 @@
     private final Kind boxingKind;
 
     public VirtualBoxingNode(ResolvedJavaType type, Kind boxingKind) {
-        super(type);
+        super(type, false);
         this.boxingKind = boxingKind;
     }
 
@@ -41,11 +41,6 @@
     }
 
     @Override
-    public boolean hasIdentity() {
-        return false;
-    }
-
-    @Override
     public ValueNode getMaterializedRepresentation(FixedNode fixed, ValueNode[] entries, int[] locks) {
         assert entries.length == 1;
         assert locks.length == 0;
--- a/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/virtual/VirtualInstanceNode.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/virtual/VirtualInstanceNode.java	Mon Sep 02 15:27:20 2013 +0200
@@ -32,12 +32,12 @@
     private final ResolvedJavaType type;
     private final ResolvedJavaField[] fields;
 
-    public VirtualInstanceNode(ResolvedJavaType type) {
-        this.type = type;
-        this.fields = type.getInstanceFields(true);
+    public VirtualInstanceNode(ResolvedJavaType type, boolean hasIdentity) {
+        this(type, type.getInstanceFields(true), hasIdentity);
     }
 
-    public VirtualInstanceNode(ResolvedJavaType type, ResolvedJavaField[] fields) {
+    public VirtualInstanceNode(ResolvedJavaType type, ResolvedJavaField[] fields, boolean hasIdentity) {
+        super(hasIdentity);
         this.type = type;
         this.fields = fields;
     }
@@ -97,7 +97,7 @@
 
     @Override
     public VirtualInstanceNode duplicate() {
-        return new VirtualInstanceNode(type);
+        return new VirtualInstanceNode(type, fields, super.hasIdentity());
     }
 
     @Override
--- a/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/virtual/VirtualObjectNode.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/virtual/VirtualObjectNode.java	Mon Sep 02 15:27:20 2013 +0200
@@ -23,14 +23,18 @@
 package com.oracle.graal.nodes.virtual;
 
 import com.oracle.graal.api.meta.*;
+import com.oracle.graal.graph.*;
 import com.oracle.graal.nodes.*;
 import com.oracle.graal.nodes.spi.*;
 import com.oracle.graal.nodes.type.*;
 
-public abstract class VirtualObjectNode extends ValueNode implements LIRLowerable {
+public abstract class VirtualObjectNode extends ValueNode implements LIRLowerable, Node.IterableNodeType {
 
-    public VirtualObjectNode() {
+    private boolean hasIdentity;
+
+    public VirtualObjectNode(boolean hasIdentity) {
         super(StampFactory.virtual());
+        this.hasIdentity = hasIdentity;
     }
 
     /**
@@ -72,7 +76,11 @@
      * comparison of two virtual objects is determined by comparing their contents.
      */
     public boolean hasIdentity() {
-        return true;
+        return hasIdentity;
+    }
+
+    public void setIdentity(boolean identity) {
+        this.hasIdentity = identity;
     }
 
     /**
--- a/graal/com.oracle.graal.phases/src/com/oracle/graal/phases/GraalOptions.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.phases/src/com/oracle/graal/phases/GraalOptions.java	Mon Sep 02 15:27:20 2013 +0200
@@ -89,12 +89,12 @@
     @Option(help = "")
     public static final OptionValue<Integer> DeoptsToDisableOptimisticOptimization = new OptionValue<>(40);
 
-    // comilation queue
-    @Option(help = "")
+    // compilation queue
+    @Option(help = "Compile all methods in all classes on given class path")
     public static final OptionValue<String> CompileTheWorld = new OptionValue<>(null);
-    @Option(help = "")
+    @Option(help = "First class to consider when using CompileTheWorld")
     public static final OptionValue<Integer> CompileTheWorldStartAt = new OptionValue<>(1);
-    @Option(help = "")
+    @Option(help = "Last class to consider when using CompileTheWorld")
     public static final OptionValue<Integer> CompileTheWorldStopAt = new OptionValue<>(Integer.MAX_VALUE);
 
     // graph caching
@@ -290,33 +290,6 @@
     @Option(help = "")
     public static final OptionValue<Boolean> OptPushThroughPi = new OptionValue<>(true);
 
-    // Intrinsification settings
-    @Option(help = "")
-    public static final OptionValue<Boolean> IntrinsifyObjectClone = new OptionValue<>(false);
-    @Option(help = "")
-    public static final OptionValue<Boolean> IntrinsifyArrayCopy = new OptionValue<>(true);
-    @Option(help = "")
-    public static final OptionValue<Boolean> IntrinsifyObjectMethods = new OptionValue<>(true);
-    @Option(help = "")
-    public static final OptionValue<Boolean> IntrinsifySystemMethods = new OptionValue<>(true);
-    @Option(help = "")
-    public static final OptionValue<Boolean> IntrinsifyClassMethods = new OptionValue<>(true);
-    @Option(help = "")
-    public static final OptionValue<Boolean> IntrinsifyThreadMethods = new OptionValue<>(true);
-    @Option(help = "")
-    public static final OptionValue<Boolean> IntrinsifyUnsafeMethods = new OptionValue<>(true);
-    @Option(help = "")
-    public static final OptionValue<Boolean> IntrinsifyMathMethods = new OptionValue<>(true);
-    @Option(help = "")
-    public static final OptionValue<Boolean> IntrinsifyAESMethods = new OptionValue<>(true);
-    @Option(help = "")
-    public static final OptionValue<Boolean> IntrinsifyCRC32Methods = new OptionValue<>(true);
-    @Option(help = "")
-    public static final OptionValue<Boolean> IntrinsifyReflectionMethods = new OptionValue<>(true);
-    @Option(help = "")
-    public static final OptionValue<Boolean> IntrinsifyInstalledCodeMethods = new OptionValue<>(true);
-    @Option(help = "")
-    public static final OptionValue<Boolean> IntrinsifyCallSiteTarget = new OptionValue<>(true);
 
     /**
      * Counts the various paths taken through snippets.
--- a/graal/com.oracle.graal.replacements/src/com/oracle/graal/replacements/ReplacementsImpl.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.replacements/src/com/oracle/graal/replacements/ReplacementsImpl.java	Mon Sep 02 15:27:20 2013 +0200
@@ -23,6 +23,7 @@
 package com.oracle.graal.replacements;
 
 import static com.oracle.graal.api.meta.MetaUtil.*;
+import static com.oracle.graal.compiler.GraalCompiler.*;
 import static com.oracle.graal.phases.GraalOptions.*;
 
 import java.lang.reflect.*;
@@ -138,7 +139,7 @@
                 Member originalMethod = originalMethod(classSubstitution, methodSubstitution.optional(), originalName, originalParameters);
                 if (originalMethod != null) {
                     ResolvedJavaMethod original = registerMethodSubstitution(originalMethod, substituteMethod);
-                    if (original != null && methodSubstitution.forced()) {
+                    if (original != null && methodSubstitution.forced() && shouldIntrinsify(original)) {
                         forcedSubstitutions.add(original);
                     }
                 }
@@ -149,7 +150,7 @@
                 Member originalMethod = originalMethod(classSubstitution, macroSubstitution.optional(), originalName, originalParameters);
                 if (originalMethod != null) {
                     ResolvedJavaMethod original = registerMacroSubstitution(originalMethod, macroSubstitution.macro());
-                    if (original != null && macroSubstitution.forced()) {
+                    if (original != null && macroSubstitution.forced() && shouldIntrinsify(original)) {
                         forcedSubstitutions.add(original);
                     }
                 }
--- a/graal/com.oracle.graal.truffle/src/com/oracle/graal/truffle/PartialEvaluator.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.truffle/src/com/oracle/graal/truffle/PartialEvaluator.java	Mon Sep 02 15:27:20 2013 +0200
@@ -42,6 +42,7 @@
 import com.oracle.graal.nodes.java.MethodCallTargetNode.InvokeKind;
 import com.oracle.graal.nodes.spi.*;
 import com.oracle.graal.nodes.util.*;
+import com.oracle.graal.nodes.virtual.*;
 import com.oracle.graal.phases.*;
 import com.oracle.graal.phases.PhasePlan.PhasePosition;
 import com.oracle.graal.phases.common.*;
@@ -184,8 +185,17 @@
                     materializeNode.replaceAtUsages(materializeNode.getFrame());
                     graph.removeFixed(materializeNode);
                 }
-                for (VirtualOnlyInstanceNode virtualOnlyNode : graph.getNodes(VirtualOnlyInstanceNode.class)) {
-                    virtualOnlyNode.setAllowMaterialization(true);
+                for (VirtualObjectNode virtualObjectNode : graph.getNodes(VirtualObjectNode.class)) {
+                    if (virtualObjectNode instanceof VirtualOnlyInstanceNode) {
+                        VirtualOnlyInstanceNode virtualOnlyInstanceNode = (VirtualOnlyInstanceNode) virtualObjectNode;
+                        virtualOnlyInstanceNode.setAllowMaterialization(true);
+                    } else if (virtualObjectNode instanceof VirtualInstanceNode) {
+                        VirtualInstanceNode virtualInstanceNode = (VirtualInstanceNode) virtualObjectNode;
+                        ResolvedJavaType type = virtualInstanceNode.type();
+                        if (type.getAnnotation(CompilerDirectives.ValueType.class) != null) {
+                            virtualInstanceNode.setIdentity(false);
+                        }
+                    }
                 }
 
                 // Convert deopt to guards.
--- a/graal/com.oracle.graal.truffle/src/com/oracle/graal/truffle/TruffleCache.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.truffle/src/com/oracle/graal/truffle/TruffleCache.java	Mon Sep 02 15:27:20 2013 +0200
@@ -205,6 +205,10 @@
     }
 
     private void expandPath(StructuredGraph newGraph, int maxNodes, NodeBitMap visitedNodes, AbstractBeginNode start, Queue<AbstractBeginNode> workQueue) {
+        if (start.isDeleted()) {
+            return;
+        }
+
         FixedNode next = start;
         while (!visitedNodes.isMarked(next)) {
             visitedNodes.mark(next);
--- a/graal/com.oracle.graal.truffle/src/com/oracle/graal/truffle/nodes/frame/NewFrameNode.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.graal.truffle/src/com/oracle/graal/truffle/nodes/frame/NewFrameNode.java	Mon Sep 02 15:27:20 2013 +0200
@@ -92,12 +92,12 @@
         throw new RuntimeException("Frame field not found: " + fieldName);
     }
 
-    public static class VirtualOnlyInstanceNode extends VirtualInstanceNode implements Node.IterableNodeType {
+    public static class VirtualOnlyInstanceNode extends VirtualInstanceNode {
 
         private boolean allowMaterialization;
 
         public VirtualOnlyInstanceNode(ResolvedJavaType type, ResolvedJavaField[] fields) {
-            super(type, fields);
+            super(type, fields, false);
         }
 
         @Override
--- a/graal/com.oracle.truffle.api/src/com/oracle/truffle/api/CompilerDirectives.java	Mon Sep 02 15:22:25 2013 +0200
+++ b/graal/com.oracle.truffle.api/src/com/oracle/truffle/api/CompilerDirectives.java	Mon Sep 02 15:27:20 2013 +0200
@@ -205,4 +205,13 @@
     @Target({ElementType.METHOD})
     public @interface SlowPath {
     }
+
+    /**
+     * Marks classes as value types. Reference comparisons (==) between instances of those classes
+     * have undefined semantics and can either return true or false.
+     */
+    @Retention(RetentionPolicy.RUNTIME)
+    @Target({ElementType.TYPE})
+    public @interface ValueType {
+    }
 }
--- a/mx/commands.py	Mon Sep 02 15:22:25 2013 +0200
+++ b/mx/commands.py	Mon Sep 02 15:27:20 2013 +0200
@@ -286,7 +286,7 @@
             with VM(vm, bld):
                 build([])
             return
-    mx.abort('You need to run "mx --vm ' + vm + '--vmbuild ' + bld + ' build" to build the selected VM')
+    mx.abort('You need to run "mx --vm ' + vm + ' --vmbuild ' + bld + ' build" to build the selected VM')
 
 def _jdk(build='product', vmToCheck=None, create=False, installGraalJar=True):
     """
@@ -1358,7 +1358,7 @@
     mx.add_argument('--vmcwd', dest='vm_cwd', help='current directory will be changed to <path> before the VM is executed', default=None, metavar='<path>')
     mx.add_argument('--installed-jdks', help='the base directory in which the JDKs cloned from $JAVA_HOME exist. ' +
                     'The VM selected by --vm and --vmbuild options is under this directory (i.e., ' +
-                    join('<path>', '<vmbuild>', 'jre', 'lib', '<vm>', mx.add_lib_prefix(mx.add_lib_suffix('jvm'))) + ')', default=None, metavar='<path>')
+                    join('<path>', '<jdk-version>', '<vmbuild>', 'jre', 'lib', '<vm>', mx.add_lib_prefix(mx.add_lib_suffix('jvm'))) + ')', default=None, metavar='<path>')
 
     if (_vmSourcesAvailable):
         mx.add_argument('--vm', action='store', dest='vm', choices=_vmChoices.keys(), help='the VM type to build/run')
--- a/mx/projects	Mon Sep 02 15:22:25 2013 +0200
+++ b/mx/projects	Mon Sep 02 15:27:20 2013 +0200
@@ -288,7 +288,7 @@
 # graal.nodes
 project@com.oracle.graal.nodes@subDir=graal
 project@com.oracle.graal.nodes@sourceDirs=src
-project@com.oracle.graal.nodes@dependencies=com.oracle.graal.api.code,com.oracle.graal.graph,com.oracle.graal.debug,com.oracle.graal.api.replacements
+project@com.oracle.graal.nodes@dependencies=com.oracle.graal.graph,com.oracle.graal.debug,com.oracle.graal.api.replacements,com.oracle.graal.api.code
 project@com.oracle.graal.nodes@checkstyle=com.oracle.graal.graph
 project@com.oracle.graal.nodes@javaCompliance=1.7
 project@com.oracle.graal.nodes@workingSets=Graal,Graph
--- a/src/gpu/ptx/vm/gpu_ptx.cpp	Mon Sep 02 15:22:25 2013 +0200
+++ b/src/gpu/ptx/vm/gpu_ptx.cpp	Mon Sep 02 15:27:20 2013 +0200
@@ -29,12 +29,14 @@
 #include "utilities/ostream.hpp"
 #include "memory/allocation.hpp"
 #include "memory/allocation.inline.hpp"
+#include "kernelArguments.hpp"
 
 void * gpu::Ptx::_device_context;
+int    gpu::Ptx::_cu_device = 0;
 
 gpu::Ptx::cuda_cu_init_func_t gpu::Ptx::_cuda_cu_init;
 gpu::Ptx::cuda_cu_ctx_create_func_t gpu::Ptx::_cuda_cu_ctx_create;
-gpu::Ptx::cuda_cu_ctx_detach_func_t gpu::Ptx::_cuda_cu_ctx_detach;
+gpu::Ptx::cuda_cu_ctx_destroy_func_t gpu::Ptx::_cuda_cu_ctx_destroy;
 gpu::Ptx::cuda_cu_ctx_synchronize_func_t gpu::Ptx::_cuda_cu_ctx_synchronize;
 gpu::Ptx::cuda_cu_device_get_count_func_t gpu::Ptx::_cuda_cu_device_get_count;
 gpu::Ptx::cuda_cu_device_get_name_func_t gpu::Ptx::_cuda_cu_device_get_name;
@@ -44,6 +46,8 @@
 gpu::Ptx::cuda_cu_launch_kernel_func_t gpu::Ptx::_cuda_cu_launch_kernel;
 gpu::Ptx::cuda_cu_module_get_function_func_t gpu::Ptx::_cuda_cu_module_get_function;
 gpu::Ptx::cuda_cu_module_load_data_ex_func_t gpu::Ptx::_cuda_cu_module_load_data_ex;
+gpu::Ptx::cuda_cu_memcpy_dtoh_func_t gpu::Ptx::_cuda_cu_memcpy_dtoh;
+gpu::Ptx::cuda_cu_memfree_func_t gpu::Ptx::_cuda_cu_memfree;
 
 void gpu::probe_linkage() {
 #if defined(__APPLE__) || defined(LINUX)
@@ -67,9 +71,9 @@
   }
 }
 
-bool gpu::execute_kernel(address kernel, JavaCallArguments * jca) {
+bool gpu::execute_kernel(address kernel, PTXKernelArguments & ptxka, JavaValue& ret) {
   if (gpu::has_gpu_linkage()) {
-    return (gpu::Ptx::execute_kernel(kernel, jca));
+    return (gpu::Ptx::execute_kernel(kernel, ptxka, ret));
   } else {
     return false;
   }
@@ -108,8 +112,7 @@
   /* Get the handle to the first compute device */
   int device_id = 0;
   /* Compute-capable device handle */
-  int cu_device = 0;
-  status = _cuda_cu_device_get(&cu_device, device_id);
+  status = _cuda_cu_device_get(&_cu_device, device_id);
 
   if (status != GRAAL_CUDA_SUCCESS) {
     tty->print_cr("[CUDA] Failed to get handle of first compute-capable device i.e., the one at ordinal: %d", device_id);
@@ -122,42 +125,42 @@
 
   /* Get device attributes */
   int minor, major, unified_addressing;
-  status = _cuda_cu_device_get_attribute(&minor, GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cu_device);
+  status = _cuda_cu_device_get_attribute(&minor, GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, _cu_device);
 
   if (status != GRAAL_CUDA_SUCCESS) {
-    tty->print_cr("[CUDA] Failed to get minor attribute of device: %d", cu_device);
+    tty->print_cr("[CUDA] Failed to get minor attribute of device: %d", _cu_device);
     return false;
   }
 
-  status = _cuda_cu_device_get_attribute(&major, GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cu_device);
+  status = _cuda_cu_device_get_attribute(&major, GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, _cu_device);
 
   if (status != GRAAL_CUDA_SUCCESS) {
-    tty->print_cr("[CUDA] Failed to get major attribute of device: %d", cu_device);
+    tty->print_cr("[CUDA] Failed to get major attribute of device: %d", _cu_device);
     return false;
   }
 
   if (TraceGPUInteraction) {
-    tty->print_cr("[CUDA] Compatibility version of device %d: %d.%d", cu_device, major, minor);
+    tty->print_cr("[CUDA] Compatibility version of device %d: %d.%d", _cu_device, major, minor);
   }
 
-  status = _cuda_cu_device_get_attribute(&unified_addressing, GRAAL_CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, cu_device);
+  status = _cuda_cu_device_get_attribute(&unified_addressing, GRAAL_CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, _cu_device);
 
   if (status != GRAAL_CUDA_SUCCESS) {
-    tty->print_cr("[CUDA] Failed to query unified addressing mode of device: %d", cu_device);
+    tty->print_cr("[CUDA] Failed to query unified addressing mode of device: %d", _cu_device);
     return false;
   }
 
   if (TraceGPUInteraction) {
-    tty->print_cr("[CUDA] Unified addressing support on device %d: %d", cu_device, unified_addressing);
+    tty->print_cr("[CUDA] Unified addressing support on device %d: %d", _cu_device, unified_addressing);
   }
 
 
   /* Get device name */
   char device_name[256];
-  status = _cuda_cu_device_get_name(device_name, 256, cu_device);
+  status = _cuda_cu_device_get_name(device_name, 256, _cu_device);
 
   if (status != GRAAL_CUDA_SUCCESS) {
-    tty->print_cr("[CUDA] Failed to get name of device: %d", cu_device);
+    tty->print_cr("[CUDA] Failed to get name of device: %d", _cu_device);
     return false;
   }
 
@@ -165,18 +168,6 @@
     tty->print_cr("[CUDA] Using %s", device_name);
   }
 
-  /* Create CUDA context */
-  status = _cuda_cu_ctx_create(&_device_context, 0, cu_device);
-
-  if (status != GRAAL_CUDA_SUCCESS) {
-    tty->print_cr("[CUDA] Failed to create CUDA context for device: %d", cu_device);
-    return false;
-  }
-
-  if (TraceGPUInteraction) {
-    tty->print_cr("[CUDA] Success: Created context for device: %d", cu_device);
-  }
-
   return true;
 }
 
@@ -210,8 +201,20 @@
 
   }
 
+  /* Create CUDA context to compile and execute the kernel */
+  int status = _cuda_cu_ctx_create(&_device_context, 0, _cu_device);
+
+  if (status != GRAAL_CUDA_SUCCESS) {
+    tty->print_cr("[CUDA] Failed to create CUDA context for device: %d", _cu_device);
+    return NULL;
+  }
+
+  if (TraceGPUInteraction) {
+    tty->print_cr("[CUDA] Success: Created context for device: %d", _cu_device);
+  }
+
   /* Load module's data with compiler options */
-  int status = _cuda_cu_module_load_data_ex(&cu_module, code, jit_num_options,
+  status = _cuda_cu_module_load_data_ex(&cu_module, (void*) code, jit_num_options,
                                             jit_options, (void **)jit_option_values);
   if (status != GRAAL_CUDA_SUCCESS) {
     if (status == GRAAL_CUDA_ERROR_NO_BINARY_FOR_GPU) {
@@ -238,10 +241,11 @@
   if (TraceGPUInteraction) {
     tty->print_cr("[CUDA] Got function handle for %s", name);
   }
+
   return cu_function;
 }
 
-bool gpu::Ptx::execute_kernel(address kernel, JavaCallArguments * jca) {
+bool gpu::Ptx::execute_kernel(address kernel, PTXKernelArguments &ptxka, JavaValue &ret) {
   // grid dimensionality
   unsigned int gridX = 1;
   unsigned int gridY = 1;
@@ -252,14 +256,11 @@
   unsigned int blockY = 1;
   unsigned int blockZ = 1;
   
-  int *cu_function = (int *)kernel;
+  struct CUfunc_st* cu_function = (struct CUfunc_st*) kernel;
 
-  char * paramBuffer = (char *) jca->parameters();
-  size_t paramBufferSz = (size_t) jca->size_of_parameters();
-
-  void * config[] = {
-    GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER, paramBuffer,
-    GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE, &paramBufferSz,
+  void * config[5] = {
+    GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER, ptxka._kernelArgBuffer,
+    GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE, &(ptxka._bufferOffset),
     GRAAL_CU_LAUNCH_PARAM_END
   };
 
@@ -270,10 +271,11 @@
   if (TraceGPUInteraction) {
     tty->print_cr("[CUDA] launching kernel");
   }
+
   int status = _cuda_cu_launch_kernel(cu_function,
                                       gridX, gridY, gridZ,
                                       blockX, blockY, blockZ,
-                                      0, NULL, NULL, config);
+                                      0, NULL, NULL, (void **) &config);
   if (status != GRAAL_CUDA_SUCCESS) {
     tty->print_cr("[CUDA] Failed to launch kernel");
     return false;
@@ -282,7 +284,72 @@
   if (TraceGPUInteraction) {
     tty->print_cr("[CUDA] Success: Kernel Launch");
   }
-  return status == 0;  // GRAAL_CUDA_SUCCESS
+
+  status = _cuda_cu_ctx_synchronize();
+
+  if (status != GRAAL_CUDA_SUCCESS) {
+    tty->print_cr("[CUDA] Failed to synchronize launched kernel (%d)", status);
+    return false;
+  }
+
+  if (TraceGPUInteraction) {
+    tty->print_cr("[CUDA] Success: Synchronized launch kernel");
+  }
+
+
+  // Get the result. TODO: Move this code to get_return_oop()
+  BasicType return_type = ptxka.get_ret_type();
+  switch (return_type) {
+     case T_INT :
+       {
+         int return_val;
+         status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._return_value_ptr, T_INT_BYTE_SIZE);
+         if (status != GRAAL_CUDA_SUCCESS) {
+           tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status);
+           return false;
+         }
+         ret.set_jint(return_val);
+       }
+       break;
+     case T_LONG :
+       {
+         long return_val;
+         status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._return_value_ptr, T_LONG_BYTE_SIZE);
+         if (status != GRAAL_CUDA_SUCCESS) {
+           tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status);
+           return false;
+         }
+         ret.set_jlong(return_val);
+       }
+       break;
+     default:
+       tty->print_cr("[CUDA] TODO *** Unhandled return type");
+  }
+
+
+  // Free device memory allocated for result
+  status = gpu::Ptx::_cuda_cu_memfree(ptxka._return_value_ptr);
+  if (status != GRAAL_CUDA_SUCCESS) {
+    tty->print_cr("[CUDA] *** Error (%d) Failed to free device memory of return value", status);
+    return false;
+  }
+
+  if (TraceGPUInteraction) {
+    tty->print_cr("[CUDA] Success: Freed device memory of return value");
+  }
+
+  // Destroy context
+  status = gpu::Ptx::_cuda_cu_ctx_destroy(_device_context);
+  if (status != GRAAL_CUDA_SUCCESS) {
+    tty->print_cr("[CUDA] *** Error (%d) Failed to destroy context", status);
+    return false;
+  }
+
+  if (TraceGPUInteraction) {
+    tty->print_cr("[CUDA] Success: Destroy context");
+  }
+
+  return (status == GRAAL_CUDA_SUCCESS);
 }
 
 #if defined(LINUX)
@@ -305,8 +372,8 @@
         CAST_TO_FN_PTR(cuda_cu_init_func_t, os::dll_lookup(handle, "cuInit"));
       _cuda_cu_ctx_create =
         CAST_TO_FN_PTR(cuda_cu_ctx_create_func_t, os::dll_lookup(handle, "cuCtxCreate"));
-      _cuda_cu_ctx_detach =
-        CAST_TO_FN_PTR(cuda_cu_ctx_detach_func_t, os::dll_lookup(handle, "cuCtxDetach"));
+      _cuda_cu_ctx_destroy =
+        CAST_TO_FN_PTR(cuda_cu_ctx_destroy_func_t, os::dll_lookup(handle, "cuCtxDestroy"));
       _cuda_cu_ctx_synchronize =
         CAST_TO_FN_PTR(cuda_cu_ctx_synchronize_func_t, os::dll_lookup(handle, "cuCtxSynchronize"));
       _cuda_cu_device_get_count =
@@ -325,6 +392,15 @@
         CAST_TO_FN_PTR(cuda_cu_module_load_data_ex_func_t, os::dll_lookup(handle, "cuModuleLoadDataEx"));
       _cuda_cu_launch_kernel =
         CAST_TO_FN_PTR(cuda_cu_launch_kernel_func_t, os::dll_lookup(handle, "cuLaunchKernel"));
+      _cuda_cu_memalloc =
+        CAST_TO_FN_PTR(cuda_cu_memalloc_func_t, os::dll_lookup(handle, "cuMemAlloc"));
+      _cuda_cu_memfree =
+        CAST_TO_FN_PTR(cuda_cu_memfree_func_t, os::dll_lookup(handle, "cuMemFree"));
+      _cuda_cu_memcpy_htod =
+        CAST_TO_FN_PTR(cuda_cu_memcpy_htod_func_t, os::dll_lookup(handle, "cuMemcpyHtoD"));
+      _cuda_cu_memcpy_dtoh =
+        CAST_TO_FN_PTR(cuda_cu_memcpy_dtoh_func_t, os::dll_lookup(handle, "cuMemcpyDtoH"));
+
       if (TraceGPUInteraction) {
         tty->print_cr("[CUDA] Success: library linkage");
       }
--- a/src/gpu/ptx/vm/gpu_ptx.hpp	Mon Sep 02 15:22:25 2013 +0200
+++ b/src/gpu/ptx/vm/gpu_ptx.hpp	Mon Sep 02 15:27:20 2013 +0200
@@ -25,7 +25,7 @@
 #ifndef GPU_PTX_HPP
 #define GPU_PTX_HPP
 
-/* 
+/*
  * Some useful macro definitions from publicly available cuda.h.
  * These definitions are for convenience.
  */
@@ -44,7 +44,7 @@
  * End of array terminator for the extra parameter to
  * ::cuLaunchKernel
  */
-#define GRAAL_CU_LAUNCH_PARAM_END            ((void *) 0x00)
+#define GRAAL_CU_LAUNCH_PARAM_END            ((void*) 0x00)
 
 /**
  * Indicator that the next value in the  extra parameter to
@@ -55,7 +55,7 @@
  *  extra array, then ::GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER will have no
  * effect.
  */
-#define GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER ((void *) 0x01)
+#define GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER ((void*) 0x01)
 
 /**
  * Indicator that the next value in the  extra parameter to
@@ -65,7 +65,7 @@
  * in the extra array if the value associated with
  * ::GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE is not zero.
  */
-#define GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE    ((void *) 0x02)
+#define GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE    ((void*) 0x02)
 
 class Ptx {
   friend class gpu;
@@ -74,28 +74,39 @@
   static bool probe_linkage();
   static bool initialize_gpu();
   static void * generate_kernel(unsigned char *code, int code_len, const char *name);
-  static bool execute_kernel(address kernel, JavaCallArguments *);
-  
+  static bool execute_kernel(address kernel, PTXKernelArguments & ka, JavaValue &ret);
+public:
+#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
+  typedef unsigned long long CUdeviceptr;
+#else
+  typedef unsigned int CUdeviceptr;
+#endif
+
 private:
   typedef int (*cuda_cu_init_func_t)(unsigned int);
-  typedef int (*cuda_cu_ctx_create_func_t)(void *, int, int);
-  typedef int (*cuda_cu_ctx_detach_func_t)(int *);
-  typedef int (*cuda_cu_ctx_synchronize_func_t)(int *);
-  typedef int (*cuda_cu_device_get_count_func_t)(int *);
-  typedef int (*cuda_cu_device_get_name_func_t)(char *, int, int);
-  typedef int (*cuda_cu_device_get_func_t)(int *, int);
-  typedef int (*cuda_cu_device_compute_capability_func_t)(int *, int *, int);
-  typedef int (*cuda_cu_device_get_attribute_func_t)(int *, int, int);
-  typedef int (*cuda_cu_launch_kernel_func_t)(void *,
+  typedef int (*cuda_cu_ctx_create_func_t)(void*, int, int);
+  typedef int (*cuda_cu_ctx_destroy_func_t)(void*);
+  typedef int (*cuda_cu_ctx_synchronize_func_t)(void);
+  typedef int (*cuda_cu_device_get_count_func_t)(int*);
+  typedef int (*cuda_cu_device_get_name_func_t)(char*, int, int);
+  typedef int (*cuda_cu_device_get_func_t)(int*, int);
+  typedef int (*cuda_cu_device_compute_capability_func_t)(int*, int*, int);
+  typedef int (*cuda_cu_device_get_attribute_func_t)(int*, int, int);
+  typedef int (*cuda_cu_launch_kernel_func_t)(struct CUfunc_st*,
                                               unsigned int, unsigned int, unsigned int,
                                               unsigned int, unsigned int, unsigned int,
-                                              unsigned int, void *, void **, void **);
-  typedef int (*cuda_cu_module_get_function_func_t)(void *, void *, const char *);
-  typedef int (*cuda_cu_module_load_data_ex_func_t)(void *, void *, unsigned int, void *, void **);
+                                              unsigned int, void*, void**, void**);
+  typedef int (*cuda_cu_module_get_function_func_t)(void*, void*, const char*);
+  typedef int (*cuda_cu_module_load_data_ex_func_t)(void*, void*, unsigned int, void*, void**);
+  typedef int (*cuda_cu_memalloc_func_t)(void*, unsigned int);
+  typedef int (*cuda_cu_memfree_func_t)(gpu::Ptx::CUdeviceptr);
+  typedef int (*cuda_cu_memcpy_htod_func_t)(gpu::Ptx::CUdeviceptr, const void*, unsigned int);
+  typedef int (*cuda_cu_memcpy_dtoh_func_t)(const void*, gpu::Ptx::CUdeviceptr,  unsigned int);
 
+public:
   static cuda_cu_init_func_t                      _cuda_cu_init;
   static cuda_cu_ctx_create_func_t                _cuda_cu_ctx_create;
-  static cuda_cu_ctx_detach_func_t                _cuda_cu_ctx_detach;
+  static cuda_cu_ctx_destroy_func_t               _cuda_cu_ctx_destroy;
   static cuda_cu_ctx_synchronize_func_t           _cuda_cu_ctx_synchronize;
   static cuda_cu_device_get_count_func_t          _cuda_cu_device_get_count;
   static cuda_cu_device_get_name_func_t           _cuda_cu_device_get_name;
@@ -105,8 +116,13 @@
   static cuda_cu_launch_kernel_func_t             _cuda_cu_launch_kernel;
   static cuda_cu_module_get_function_func_t       _cuda_cu_module_get_function;
   static cuda_cu_module_load_data_ex_func_t       _cuda_cu_module_load_data_ex;
+  static cuda_cu_memalloc_func_t                  _cuda_cu_memalloc;
+  static cuda_cu_memfree_func_t                   _cuda_cu_memfree;
+  static cuda_cu_memcpy_htod_func_t               _cuda_cu_memcpy_htod;
+  static cuda_cu_memcpy_dtoh_func_t               _cuda_cu_memcpy_dtoh;
 
 protected:
-  static void * _device_context;
+  static void* _device_context;
+  static int _cu_device;
 };
 #endif // GPU_PTX_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/gpu/ptx/vm/kernelArguments.cpp	Mon Sep 02 15:27:20 2013 +0200
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "kernelArguments.hpp"
+#include "runtime/javaCalls.hpp"
+
+gpu::Ptx::cuda_cu_memalloc_func_t gpu::Ptx::_cuda_cu_memalloc;
+gpu::Ptx::cuda_cu_memcpy_htod_func_t gpu::Ptx::_cuda_cu_memcpy_htod;
+
+// Get next java argument
+oop PTXKernelArguments::next_arg(BasicType expectedType) {
+  assert(_index < _args->length(), "out of bounds");
+  oop arg=((objArrayOop) (_args))->obj_at(_index++);
+  assert(expectedType == T_OBJECT || java_lang_boxing_object::is_instance(arg, expectedType), "arg type mismatch");
+  return arg;
+}
+
+void PTXKernelArguments::do_int()    { 
+  // If the parameter is a return value, 
+  if (is_return_type()) {
+    // Allocate device memory for T_INT return value pointer on device. Size in bytes
+    int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_INT_BYTE_SIZE);
+    if (status != GRAAL_CUDA_SUCCESS) {
+      tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+      _success = false;
+      return;
+    }
+    // Push _return_value_ptr to _kernelBuffer
+    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
+    _bufferOffset += sizeof(_return_value_ptr);
+  }
+  else {
+    // Get the next java argument and its value which should be a T_INT
+    oop arg = next_arg(T_INT);
+    // Copy the java argument value to kernelArgBuffer
+    jvalue intval;
+    if (java_lang_boxing_object::get_value(arg, &intval) != T_INT) {
+      tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_INT");
+      _success = false;
+      return;
+    }
+    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = intval.i;
+    _bufferOffset += sizeof(intval.i);
+  }
+  return;
+}
+
+void PTXKernelArguments::do_long()    { 
+  // If the parameter is a return value, 
+  if (is_return_type()) {
+    // Allocate device memory for T_LONG return value pointer on device. Size in bytes
+    int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_LONG_BYTE_SIZE);
+    if (status != GRAAL_CUDA_SUCCESS) {
+      tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+      _success = false;
+      return;
+    }
+    // Push _return_value_ptr to _kernelBuffer
+    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
+    _bufferOffset += sizeof(_return_value_ptr);
+  }
+  else {
+    // Get the next java argument and its value which should be a T_LONG
+    oop arg = next_arg(T_LONG);
+    // Copy the java argument value to kernelArgBuffer
+    jvalue val;
+    if (java_lang_boxing_object::get_value(arg, &val) != T_LONG) {
+      tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_LONG");
+      _success = false;
+      return;
+    }
+    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.j;
+    _bufferOffset += sizeof(val.j);
+  }
+  return;
+}
+
+void PTXKernelArguments::do_byte()    { 
+  // If the parameter is a return value, 
+  if (is_return_type()) {
+    // Allocate device memory for T_BYTE return value pointer on device. Size in bytes
+    int status = gpu::Ptx::_cuda_cu_memalloc(&_return_value_ptr, T_BYTE_SIZE);
+    if (status != GRAAL_CUDA_SUCCESS) {
+      tty->print_cr("[CUDA] *** Error (%d) Failed to allocate memory for return value pointer on device", status);
+      _success = false;
+      return;
+    }
+    // Push _return_value_ptr to _kernelBuffer
+    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = _return_value_ptr;
+    _bufferOffset += sizeof(_return_value_ptr);
+  }
+  else {
+    // Get the next java argument and its value which should be a T_BYTE
+    oop arg = next_arg(T_BYTE);
+    // Copy the java argument value to kernelArgBuffer
+    jvalue val;
+    if (java_lang_boxing_object::get_value(arg, &val) != T_BYTE) {
+      tty->print_cr("[CUDA] *** Error: Unexpected argument type; expecting T_BYTE");
+      _success = false;
+      return;
+    }
+    *((gpu::Ptx::CUdeviceptr*) &_kernelArgBuffer[_bufferOffset]) = val.b;
+    _bufferOffset += sizeof(val.b);
+  }
+  return;
+}
+
+// TODO implement other do_*
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/gpu/ptx/vm/kernelArguments.hpp	Mon Sep 02 15:27:20 2013 +0200
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef KERNEL_ARGUMENTS_PTX_HPP
+#define KERNEL_ARGUMENTS_PTX_HPP
+
+#include "runtime/gpu.hpp"
+#include "runtime/signature.hpp"
+
+#define T_BYTE_SIZE       1
+#define T_INT_BYTE_SIZE   4
+#define T_LONG_BYTE_SIZE  8
+
+class PTXKernelArguments : public SignatureIterator {
+public:
+  // Buffer holding CUdeviceptr values that represent the kernel arguments
+  char _kernelArgBuffer[1024];
+  // Current offset into _kernelArgBuffer
+  size_t _bufferOffset;
+  gpu::Ptx::CUdeviceptr _return_value_ptr;
+private:
+  // Array of java argument oops
+  arrayOop _args;
+  // Current index into _args
+  int _index;
+  // Flag to indicate successful creation of kernel argument buffer
+  bool _success;
+  // Get next java argument
+  oop next_arg(BasicType expectedType);
+
+ public:
+  PTXKernelArguments(Symbol* signature, arrayOop args, bool is_static) : SignatureIterator(signature) {
+    this->_return_type = T_ILLEGAL;
+    _index = 0;
+    _args = args;
+    _success = true;
+    _bufferOffset = 0;
+    if (!is_static) {
+      // TODO : Create a device argument for receiver object and add it to _kernelBuffer
+      tty->print_cr("{CUDA] ****** TODO: Support for execution of non-static java methods not implemented yet.");
+    }
+    // Iterate over the entire signature
+    iterate();
+    assert((_success && (_index == args->length())), "arg count mismatch with signature");
+  }
+
+  inline char* device_argument_buffer() {
+    return _kernelArgBuffer;
+  }
+
+  inline size_t device_argument_buffer_size() {
+    return _bufferOffset;
+  }
+
+  // Get the return oop value
+  oop get_return_oop();
+
+  // get device return value ptr
+  gpu::Ptx::CUdeviceptr get_return_value_ptr() {
+      return _return_value_ptr;
+  }
+
+  
+  void do_byte();
+  void do_int();
+  void do_long();
+
+  inline void do_bool()   {
+    /* TODO : To be implemented */ 
+    guarantee(false, "NYI");
+  }
+  inline void do_char()   {
+    /* TODO : To be implemented */ 
+    guarantee(false, "NYI");
+  }
+  inline void do_short()  {
+    /* TODO : To be implemented */ 
+    guarantee(false, "NYI");
+  }
+  inline void do_float()  {
+    /* TODO : To be implemented */ 
+    guarantee(false, "NYI");
+  }
+  inline void do_double() {
+    /* TODO : To be implemented */ 
+    guarantee(false, "NYI");
+  }
+
+  inline void do_object() {
+    /* TODO : To be implemented */ 
+    guarantee(false, "NYI");
+  }
+  inline void do_object(int begin, int end) {
+    /* TODO : To be implemented */ 
+    guarantee(false, "NYI");
+  }
+  inline void do_array(int begin, int end)  {
+    /* TODO : To be implemented */ 
+    guarantee(false, "NYI");
+  }
+  inline void do_void() {
+    /* TODO : To be implemented */ 
+    guarantee(false, "NYI");
+  }
+};
+
+#endif  // KERNEL_ARGUMENTS_HPP
--- a/src/os_gpu/linux_ptx/vm/gpu_linux.cpp	Mon Sep 02 15:22:25 2013 +0200
+++ b/src/os_gpu/linux_ptx/vm/gpu_linux.cpp	Mon Sep 02 15:27:20 2013 +0200
@@ -39,7 +39,6 @@
  */
 
 static unsigned int nvidia_vendor_id = 0x10de;
-static unsigned int nvidia_gk110_dev_id = 0x1005;
 
 bool gpu::Linux::probe_gpu() {
   /* 
@@ -62,7 +61,7 @@
   while (fgets(contents, sizeof(contents)-1, pci_devices)) {
     sscanf(contents, "%04x%04x%04x", &bus_num_devfn_ign, &vendor, &device);
     /* Break after finding the first CUDA device. */
-    if ((vendor == nvidia_vendor_id) && (device = nvidia_gk110_dev_id)) {
+    if (vendor == nvidia_vendor_id) {
       cuda_device_exists = true;
       if (TraceGPUInteraction) {
         tty->print_cr("Found supported nVidia CUDA device vendor : 0x%04x device 0x%04x", vendor, device);
--- a/src/share/vm/graal/graalCompilerToGPU.cpp	Mon Sep 02 15:22:25 2013 +0200
+++ b/src/share/vm/graal/graalCompilerToGPU.cpp	Mon Sep 02 15:27:20 2013 +0200
@@ -24,13 +24,11 @@
 #include "precompiled.hpp"
 
 #include "graal/graalCompiler.hpp"
-#include "runtime/javaCalls.hpp"
-#include "graal/graalCompilerToVM.hpp"
 #include "graal/graalEnv.hpp"
 #include "graal/graalJavaAccess.hpp"
 #include "runtime/gpu.hpp"
 #include "runtime/javaCalls.hpp"
-
+# include "ptx/vm/kernelArguments.hpp"
 
 // Entry to native method implementation that transitions current thread to '_thread_in_vm'.
 #define C2V_VMENTRY(result_type, name, signature) \
@@ -81,27 +79,28 @@
   nmethod* nm = (nmethod*) (address) nmethodValue;
   methodHandle mh = nm->method();
   Symbol* signature = mh->signature();
-  JavaCallArguments jca(mh->size_of_parameters());
-
-  JavaArgumentUnboxer jap(signature, &jca, (arrayOop) JNIHandles::resolve(args), mh->is_static());
-  JavaValue result(jap.get_ret_type());
-  jca.set_alternative_target(nm);
 
   // start value is the kernel
   jlong startValue = HotSpotInstalledCode::codeStart(hotspotInstalledCode);
 
-  if (!gpu::execute_kernel((address)startValue, &jca)) {
+  PTXKernelArguments ptxka(signature, (arrayOop) JNIHandles::resolve(args), mh->is_static());
+  JavaValue result(ptxka.get_ret_type());
+  if (!gpu::execute_kernel((address)startValue, ptxka, result)) {
     return NULL;
   }
 
-  if (jap.get_ret_type() == T_VOID) {
+  if (ptxka.get_ret_type() == T_VOID) {
     return NULL;
-  } else if (jap.get_ret_type() == T_OBJECT || jap.get_ret_type() == T_ARRAY) {
+  } else if (ptxka.get_ret_type() == T_OBJECT || ptxka.get_ret_type() == T_ARRAY) {
     return JNIHandles::make_local((oop) result.get_jobject());
   } else {
-    oop o = java_lang_boxing_object::create(jap.get_ret_type(), (jvalue *) result.get_value_addr(), CHECK_NULL);
+    oop o = java_lang_boxing_object::create(ptxka.get_ret_type(), (jvalue *) result.get_value_addr(), CHECK_NULL);
+    if (TraceGPUInteraction) {
+      tty->print_cr("GPU execution returned %d", result.get_jint());
+    }
     return JNIHandles::make_local(o);
   }
+
 C2V_END
 
 C2V_VMENTRY(jboolean, deviceInit, (JNIEnv *env, jobject))
--- a/src/share/vm/runtime/gpu.hpp	Mon Sep 02 15:22:25 2013 +0200
+++ b/src/share/vm/runtime/gpu.hpp	Mon Sep 02 15:27:20 2013 +0200
@@ -26,6 +26,9 @@
 #define SHARE_VM_RUNTIME_GPU_HPP
 
 #include "runtime/atomic.hpp"
+#include "oops/symbol.hpp"
+
+class PTXKernelArguments;
 
 // gpu defines the interface to the graphics processor; this includes traditional
 // GPU services such as graphics kernel load and execute.
@@ -43,7 +46,7 @@
   
   static void * generate_kernel(unsigned char *code, int code_len, const char *name);
 
-  static bool execute_kernel(address kernel, JavaCallArguments * jca);
+  static bool execute_kernel(address kernel, PTXKernelArguments & ptxka, JavaValue & ret);
 
   static void set_available(bool value) {
     _available = value;
@@ -80,6 +83,7 @@
 # include "gpu_bsd.hpp"
 #endif
 
+public:
 # include "ptx/vm/gpu_ptx.hpp"
 
 };