changeset 11527:c99e65785936

Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
author bharadwaj
date Wed, 04 Sep 2013 10:47:37 -0400
parents db297343d44e
children 8f500c7a510a
files graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/IntegerPTXTest.java graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXArithmetic.java graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMemOp.java graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXParameterOp.java src/gpu/ptx/vm/gpu_ptx.cpp src/gpu/ptx/vm/gpu_ptx.hpp src/gpu/ptx/vm/kernelArguments.hpp
diffstat 9 files changed, 247 insertions(+), 156 deletions(-) [+]
line wrap: on
line diff
--- a/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java	Wed Sep 04 14:56:30 2013 +0200
+++ b/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java	Wed Sep 04 10:47:37 2013 -0400
@@ -272,69 +272,69 @@
     }
 
     public final void ld_global_b8(Register d, Register a, long immOff) {
-        emitString("ld.global.b8" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.b8" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_b16(Register d, Register a, long immOff) {
-        emitString("ld.global.b16" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.b16" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_b32(Register d, Register a, long immOff) {
-        emitString("ld.global.b32" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.b32" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_b64(Register d, Register a, long immOff) {
-        emitString("ld.global.b64" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.b64" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_u8(Register d, Register a, long immOff) {
-        emitString("ld.global.u8" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.u8" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_u16(Register d, Register a, long immOff) {
-        emitString("ld.global.u16" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.u16" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_u32(Register d, Register a, long immOff) {
-        emitString("ld.global.u32" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.u32" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_u64(Register d, Register a, long immOff) {
-        emitString("ld.global.u64" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.u64" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_s8(Register d, Register a, long immOff) {
-        emitString("ld.global.s8" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.s8" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_s16(Register d, Register a, long immOff) {
-        emitString("ld.global.s16" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.s16" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_s32(Register d, Register a, long immOff) {
-        emitString("ld.global.s32" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.s32" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_s64(Register d, Register a, long immOff) {
-        emitString("ld.global.s64" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.s64" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_f32(Register d, Register a, long immOff) {
-        emitString("ld.global.f32" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.f32" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + "");
     }
 
     public final void ld_global_f64(Register d, Register a, long immOff) {
-        emitString("ld.global.f64" + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.global.f64" + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + "");
     }
 
     // Load from state space to destination register
     public final void ld_from_state_space(String s, Register d, Register a, long immOff) {
-        emitString("ld" + s + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
+        emitString("ld" + s + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + "");
     }
 
     // Load return address from return parameter which is in .param state space
     public final void ld_return_address(String s, Register d, Register a, long immOff) {
-        emitString("ld.param." + s + " " + "%r" + d.encoding() + ", [" + a.toString() + " + " + immOff + "]" + ";" + "");
+        emitString("ld.param." + s + " " + "%r" + d.encoding() + ", [" + a + " + " + immOff + "]" + ";" + "");
     }
 
     public final void mov_b16(Register d, Register a) {
@@ -429,68 +429,68 @@
         emitString("mov.f64" + " " + "%r" + d.encoding() + ", " + f64 + ";" + "");
     }
 
-    public final void mul_f32(Register d, Register a, Register b) {
-        emitString("mul.f32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void mul_lo_f32(Register d, Register a, Register b) {
+        emitString("mul.lo.f32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void mul_f64(Register d, Register a, Register b) {
-        emitString("smul.f64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void mul_lo_f64(Register d, Register a, Register b) {
+        emitString("mul.lo.f64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void mul_s16(Register d, Register a, Register b) {
-        emitString("mul.s16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void mul_lo_s16(Register d, Register a, Register b) {
+        emitString("mul.lo.s16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void mul_s32(Register d, Register a, Register b) {
-        emitString("mul.s32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void mul_lo_s32(Register d, Register a, Register b) {
+        emitString("mul.lo.s32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void mul_s64(Register d, Register a, Register b) {
-        emitString("mul.s64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void mul_lo_s64(Register d, Register a, Register b) {
+        emitString("mul.lo.s64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void mul_s16(Register d, Register a, short s16) {
-        emitString("mul.s16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + s16 + ";" + "");
+    public final void mul_lo_s16(Register d, Register a, short s16) {
+        emitString("mul.lo.s16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + s16 + ";" + "");
     }
 
-    public final void mul_s32(Register d, Register a, int s32) {
-        emitString("mul.s32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + s32 + ";" + "");
+    public final void mul_lo_s32(Register d, Register a, int s32) {
+        emitString("mul.lo.s32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + s32 + ";" + "");
     }
 
-    public final void mul_s64(Register d, Register a, long s64) {
-        emitString("mul.s64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + s64 + ";" + "");
+    public final void mul_lo_s64(Register d, Register a, long s64) {
+        emitString("mul.lo.s64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + s64 + ";" + "");
     }
 
-    public final void mul_f32(Register d, Register a, float f32) {
-        emitString("mul.f32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + f32 + ";" + "");
+    public final void mul_lo_f32(Register d, Register a, float f32) {
+        emitString("mul.lo.f32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + f32 + ";" + "");
     }
 
-    public final void mul_f64(Register d, Register a, double f64) {
-        emitString("mul.f64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + f64 + ";" + "");
+    public final void mul_lo_f64(Register d, Register a, double f64) {
+        emitString("mul.lo.f64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + f64 + ";" + "");
     }
 
-    public final void mul_u16(Register d, Register a, Register b) {
-        emitString("mul.u16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void mul_lo_u16(Register d, Register a, Register b) {
+        emitString("mul.lo.u16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void mul_u32(Register d, Register a, Register b) {
-        emitString("mul.u32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void mul_lo_u32(Register d, Register a, Register b) {
+        emitString("mul.lo.u32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void mul_u64(Register d, Register a, Register b) {
-        emitString("mul.u64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void mul_lo_u64(Register d, Register a, Register b) {
+        emitString("mul.lo.u64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void mul_u16(Register d, Register a, short u16) {
-        emitString("mul.u16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + u16 + ";" + "");
+    public final void mul_lo_u16(Register d, Register a, short u16) {
+        emitString("mul.lo.u16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + u16 + ";" + "");
     }
 
-    public final void mul_u32(Register d, Register a, int u32) {
-        emitString("mul.u32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + u32 + ";" + "");
+    public final void mul_lo_u32(Register d, Register a, int u32) {
+        emitString("mul.lo.u32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + u32 + ";" + "");
     }
 
-    public final void mul_u64(Register d, Register a, long u64) {
-        emitString("mul.u64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + u64 + ";" + "");
+    public final void mul_lo_u64(Register d, Register a, long u64) {
+        emitString("mul.lo.u64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + u64 + ";" + "");
     }
 
     public final void neg_f32(Register d, Register a) {
@@ -550,15 +550,15 @@
     }
 
     public final void param_8_decl(Register d, boolean lastParam) {
-        emitString(".param" + " " + ".s8" + " " + d.toString() + (lastParam ? "" : ","));
+        emitString(".param" + " " + ".s8" + " " + d + (lastParam ? "" : ","));
     }
 
     public final void param_32_decl(Register d, boolean lastParam) {
-        emitString(".param" + " " + ".s32" + " " + d.toString() + (lastParam ? "" : ","));
+        emitString(".param" + " " + ".s32" + " " + d + (lastParam ? "" : ","));
     }
 
     public final void param_64_decl(Register d, boolean lastParam) {
-        emitString(".param" + " " + ".s64" + " " + d.toString() + (lastParam ? "" : ","));
+        emitString(".param" + " " + ".s64" + " " + d + (lastParam ? "" : ","));
     }
 
     public final void popc_b32(Register d, Register a) {
@@ -849,54 +849,32 @@
         emitString("setp.ge.u32" + " " + "%p" + ", " + u32 + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void shl_s16(Register d, Register a, Register b) {
-        emitString("shl.s16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    // Shift left - only types supported are .b16, .b32 and .b64
+    public final void shl_b16(Register d, Register a, Register b) {
+        emitString("shl.b16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void shl_s32(Register d, Register a, Register b) {
-        emitString("shl.s32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void shl_b32(Register d, Register a, Register b) {
+        emitString("shl.b32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void shl_s64(Register d, Register a, Register b) {
-        emitString("shl.s64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
+    public final void shl_b64(Register d, Register a, Register b) {
+        emitString("shl.b64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
 
-    public final void shl_s16(Register d, Register a, int u32) {
-        emitString("shl.s16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + u32 + ";" + "");
+    public final void shl_b16_const(Register d, Register a, int b) {
+        emitString("shl.b16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + b + ";" + "");
     }
 
-    public final void shl_s32(Register d, Register a, int u32) {
-        emitString("shl.s32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + u32 + ";" + "");
+    public final void shl_b32_const(Register d, Register a, int b) {
+        emitString("shl.b32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + b + ";" + "");
     }
 
-    public final void shl_s64(Register d, Register a, int u32) {
-        emitString("shl.s64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + u32 + ";" + "");
+    public final void shl_b64_const(Register d, Register a, int b) {
+        emitString("shl.b64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + b + ";" + "");
     }
 
-    public final void shl_u16(Register d, Register a, Register b) {
-        emitString("shl.u16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
-    }
-
-    public final void shl_u32(Register d, Register a, Register b) {
-        emitString("shl.u32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
-    }
-
-    public final void shl_u64(Register d, Register a, Register b) {
-        emitString("shl.u64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
-    }
-
-    public final void shl_u16(Register d, Register a, int u32) {
-        emitString("shl.u16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + u32 + ";" + "");
-    }
-
-    public final void shl_u32(Register d, Register a, int u32) {
-        emitString("shl.u32" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + u32 + ";" + "");
-    }
-
-    public final void shl_u64(Register d, Register a, int u32) {
-        emitString("shl.u64" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", " + u32 + ";" + "");
-    }
-
+    // Shift Right instruction
     public final void shr_s16(Register d, Register a, Register b) {
         emitString("shr.s16" + " " + "%r" + d.encoding() + ", %r" + a.encoding() + ", %r" + b.encoding() + ";" + "");
     }
--- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/IntegerPTXTest.java	Wed Sep 04 14:56:30 2013 +0200
+++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/IntegerPTXTest.java	Wed Sep 04 10:47:37 2013 -0400
@@ -22,7 +22,7 @@
  */
 package com.oracle.graal.compiler.ptx.test;
 
-import org.junit.Test;
+import org.junit.*;
 
 import java.lang.reflect.Method;
 
@@ -32,10 +32,19 @@
     @Test
     public void testAdd() {
 
+        Integer r4 = (Integer) invoke(compile("testAdd2I"), 18, 24);
+        if (r4 == null) {
+            printReport("testAdd2I FAILED");
+        } else if (r4.intValue() == testAdd2I(18, 24)) {
+            printReport("testAdd2I PASSED");
+        } else {
+            printReport("testAdd2I FAILED");
+        }
+
         Long r2 = (Long) invoke(compile("testAdd2L"), (long) 12, (long) 6);
         if (r2 == null) {
             printReport("testAdd2L FAILED");
-        } else if (r2.longValue() == 18) {
+        } else if (r2.longValue() == testAdd2L(12, 6)) {
             printReport("testAdd2L PASSED");
         } else {
             printReport("testAdd2L FAILED");
@@ -43,10 +52,10 @@
 
         //invoke(compile("testAdd2B"), (byte) 6, (byte) 4);
 
-        Integer r4 = (Integer) invoke(compile("testAddIConst"), 5);
+        r4 = (Integer) invoke(compile("testAddIConst"), 5);
         if (r4 == null) {
             printReport("testAddIConst FAILED");
-        } else if (r4.intValue() == 37) {
+        } else if (r4.intValue() == testAddIConst(5)) {
             printReport("testAddIConst PASSED");
         } else {
             printReport("testAddIConst FAILED");
@@ -55,20 +64,12 @@
         r4 = (Integer) invoke(compile("testAddConstI"), 7);
         if (r4 == null) {
             printReport("testAddConstI FAILED");
-        } else if (r4.intValue() == 39) {
+        } else if (r4.intValue() == testAddConstI(7)) {
             printReport("testAddConstI PASSED");
         } else {
             printReport("testAddConstI FAILED");
         }
 
-        r4 = (Integer) invoke(compile("testAdd2I"), 18, 24);
-        if (r4 == null) {
-            printReport("testAdd2I FAILED");
-        } else if (r4.intValue() == 42) {
-            printReport("testAdd2I PASSED");
-        } else {
-            printReport("testAdd2I FAILED");
-        }
     }
 
     public static int testAdd2I(int a, int b) {
@@ -93,20 +94,21 @@
 
     @Test
     public void testSub() {
-        Long r2 = (Long) invoke(compile("testSub2L"), (long) 12, (long) 6);
-        if (r2 == null) {
-            printReport("testSub2I FAILED (null return value)");
-        } else if (r2.longValue() == 6) {
+
+        Integer r1 = (Integer) invoke(compile("testSub2I"), 18, 4);
+
+        if (r1 == null) {
+            printReport("testSub2I FAILED");
+        } else if (r1.intValue() == testSub2I(18, 4)) {
             printReport("testSub2I PASSED");
         } else {
             printReport("testSub2I FAILED");
         }
 
-        Integer r1 = (Integer) invoke(compile("testSub2I"), 18, 4);
-
-        if (r1 == null) {
-            printReport("testSub2I FAILED");
-        } else if (r1.intValue() == 14) {
+        Long r2 = (Long) invoke(compile("testSub2L"), (long) 12, (long) 6);
+        if (r2 == null) {
+            printReport("testSub2I FAILED (null return value)");
+        } else if (r2.longValue() == testSub2L(12, 6)) {
             printReport("testSub2I PASSED");
         } else {
             printReport("testSub2I FAILED");
@@ -115,7 +117,7 @@
         r1 = (Integer) invoke(compile("testSubIConst"), 35);
         if (r1 == null) {
             printReport("testSubIConst FAILED");
-        } else if (r1.intValue() == 3) {
+        } else if (r1.intValue() == testSubIConst(35)) {
             printReport("testSubIConst PASSED");
         } else {
             printReport("testSubIConst FAILED");
@@ -124,7 +126,7 @@
         r1 = (Integer) invoke(compile("testSubConstI"), 12);
         if (r1 == null) {
             printReport("testSubConstI FAILED");
-        } else if (r1.intValue() == 20) {
+        } else if (r1.intValue() == testSubConstI(12)) {
             printReport("testSubConstI PASSED");
         } else {
             printReport("testSubConstI FAILED");
@@ -149,10 +151,42 @@
 
     @Test
     public void testMul() {
-        invoke(compile("testMul2I"), 8, 4);
-        invoke(compile("testMul2L"), (long) 12, (long) 6);
-        invoke(compile("testMulIConst"), 4);
-        invoke(compile("testMulConstI"), 5);
+
+        Integer r1 = (Integer) invoke(compile("testMul2I"), 8, 4);
+        if (r1 == null) {
+            printReport("testMul2I FAILED");
+        } else if (r1.intValue() == testMul2I(8, 4)) {
+            printReport("testMul2I PASSED");
+        } else {
+            printReport("testMul2I FAILED");
+        }
+
+        Long r2 = (Long) invoke(compile("testMul2L"), (long) 12, (long) 6);
+        if (r2 == null) {
+            printReport("testMul2L FAILED");
+        } else if (r2.longValue() == testMul2L(12, 6)) {
+            printReport("testMul2L PASSED");
+        } else {
+            printReport("testMul2L FAILED");
+        }
+
+        r1 = (Integer) invoke(compile("testMulIConst"), 4);
+        if (r1 == null) {
+            printReport("testMulIConst FAILED");
+        } else if (r1.intValue() == testMulIConst(4)) {
+            printReport("testMulIConst PASSED");
+        } else {
+            printReport("testMulIConst FAILED");
+        }
+
+        r1 = (Integer) invoke(compile("testMulConstI"), 5);
+        if (r1 == null) {
+            printReport("testMulConstI FAILED");
+        } else if (r1.intValue() == testMulConstI(5)) {
+            printReport("testMulConstI PASSED");
+        } else {
+            printReport("testMulConstI FAILED");
+        }
     }
 
     public static int testMul2I(int a, int b) {
@@ -170,12 +204,44 @@
     public static int testMulConstI(int a) {
         return 32 * a;
     }
+
     @Test
     public void testDiv() {
-        invoke(compile("testDiv2I"), 8, 4);
-        invoke(compile("testDiv2L"), (long) 12, (long) 6);
-        invoke(compile("testDivIConst"), 64);
-        invoke(compile("testDivConstI"), 8);
+        Integer r1 = (Integer) invoke(compile("testDiv2I"), 8, 4);
+        if (r1 == null) {
+            printReport("testDiv2I FAILED (null value returned)");
+        } else if (r1.intValue() == testDiv2I(8, 4)) {
+            printReport("testDiv2I PASSED");
+        } else {
+            printReport("testDiv2I FAILED");
+        }
+
+        Long r2 = (Long) invoke(compile("testDiv2L"), (long) 12, (long) 6);
+        if (r2 == null) {
+            printReport("testDiv2L FAILED (null value returned)");
+        } else if (r2.longValue() == testDiv2L(12, 6)) {
+            printReport("testDiv2L PASSED");
+        } else {
+            printReport("testDiv2L FAILED");
+        }
+
+        r1 = (Integer) invoke(compile("testDivIConst"), 64);
+        if (r1 == null) {
+            printReport("testDivIConst FAILED (null value returned)");
+        } else if (r1.intValue() == testDivIConst(64)) {
+            printReport("testDivIConst PASSED");
+        } else {
+            printReport("testDivIConst FAILED");
+        }
+
+        r1 = (Integer) invoke(compile("testDivConstI"), 8);
+        if (r1 == null) {
+            printReport("testDivConstI FAILED (null value returned)");
+        } else if (r1.intValue() == testDivConstI(8)) {
+            printReport("testDivConstI PASSED");
+        } else {
+            printReport("testDivConstI FAILED");
+        }
     }
 
     public static int testDiv2I(int a, int b) {
@@ -196,8 +262,23 @@
 
     @Test
     public void testRem() {
-        invoke(compile("testRem2I"), 8, 4);
-        invoke(compile("testRem2L"), (long) 12, (long) 6);
+        Integer r1 = (Integer) invoke(compile("testRem2I"), 8, 4);
+        if (r1 == null) {
+            printReport("testRem2I FAILED (null value returned)");
+        } else if (r1.intValue() == testRem2I(8, 4)) {
+            printReport("testRem2I PASSED");
+        } else {
+            printReport("testRem2I FAILED");
+        }
+
+        Long r2 = (Long) invoke(compile("testRem2L"), (long) 12, (long) 6);
+        if (r2 == null) {
+            printReport("testRem2L FAILED (null value returned)");
+        } else if (r1.longValue() == testRem2L(12, 6)) {
+            printReport("testRem2L PASSED");
+        } else {
+            printReport("testRem2L FAILED");
+        }
     }
 
     public static int testRem2I(int a, int b) {
@@ -207,11 +288,27 @@
     public static long testRem2L(long a, long b) {
         return a % b;
     }
-
+    @Ignore
     @Test
     public void testIntConversion() {
-        invoke(compile("testI2L"), 8);
-        invoke(compile("testL2I"), (long) 12);
+        Long r1 = (Long) invoke(compile("testI2L"), 8);
+        if (r1 == null) {
+            printReport("testI2L FAILED (null value returned)");
+        } else if (r1.longValue() == testI2L(8)) {
+            printReport("testI2L PASSED");
+        } else {
+            printReport("testI2L FAILED");
+        }
+
+        Integer r2 = (Integer) invoke(compile("testL2I"), (long) 12);
+        if (r2 == null) {
+            printReport("testL2I FAILED (null value returned)");
+        } else if (r1.longValue() == testL2I(12)) {
+            printReport("testL2I PASSED");
+        } else {
+            printReport("testL2I FAILED");
+        }
+
         // invoke(compile("testI2C"), 65);
         // invoke(compile("testI2B"), 9);
         // invoke(compile("testI2F"), 17);
--- a/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java	Wed Sep 04 14:56:30 2013 +0200
+++ b/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java	Wed Sep 04 10:47:37 2013 -0400
@@ -174,7 +174,7 @@
         }
 
         for (Integer i : signed32) {
-            codeBuffer.emitString("  .reg .s32 %r" + i.intValue() + ";");
+            codeBuffer.emitString(".reg .s32 %r" + i.intValue() + ";");
         }
         for (Integer i : signed64) {
             codeBuffer.emitString(".reg .s64 %r" + i.intValue() + ";");
--- a/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXArithmetic.java	Wed Sep 04 14:56:30 2013 +0200
+++ b/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXArithmetic.java	Wed Sep 04 10:47:37 2013 -0400
@@ -316,7 +316,7 @@
                     masm.cvt_f32_f64(asFloatReg(dst), asDoubleReg(src));
                     break;
                 case LSHL:
-                    masm.shl_s64(asLongReg(dst), asLongReg(dst), asIntReg(src));
+                    masm.shl_b64(asLongReg(dst), asLongReg(dst), asIntReg(src));
                     break;
                 case LSHR:
                     masm.shr_s64(asLongReg(dst), asLongReg(dst), asIntReg(src));
@@ -360,19 +360,19 @@
             switch (opcode) {
             case IADD:  masm.add_s32(asIntReg(dst),    asIntReg(src1),    tasm.asIntConst(src2));    break;
             case ISUB:  masm.sub_s32(asIntReg(dst),    asIntReg(src1),    tasm.asIntConst(src2));    break;
-            case IMUL:  masm.mul_s32(asIntReg(dst),    asIntReg(src1),    tasm.asIntConst(src2));    break;
+            case IMUL:  masm.mul_lo_s32(asIntReg(dst), asIntReg(src1),    tasm.asIntConst(src2));    break;
             case IAND:  masm.and_b32(asIntReg(dst),    asIntReg(src1),    tasm.asIntConst(src2));    break;
-            case ISHL:  masm.shl_s32(asIntReg(dst),    asIntReg(src1),    tasm.asIntConst(src2));    break;
+            case ISHL:  masm.shl_b32_const(asIntReg(dst), asIntReg(src1), tasm.asIntConst(src2));    break;
             case ISHR:  masm.shr_s32(asIntReg(dst),    asIntReg(src1),    tasm.asIntConst(src2));    break;
             case IUSHR: masm.shr_u32(asIntReg(dst),    asIntReg(src1),    tasm.asIntConst(src2));    break;
             case IXOR:  masm.xor_b32(asIntReg(dst),    asIntReg(src1),    tasm.asIntConst(src2));    break;
             case LXOR:  masm.xor_b64(asLongReg(dst),   asLongReg(src1),   tasm.asLongConst(src2));   break;
             case LUSHR: masm.shr_u64(asLongReg(dst),   asLongReg(src1),   tasm.asLongConst(src2));   break;
             case FADD:  masm.add_f32(asFloatReg(dst),  asFloatReg(src1),  tasm.asFloatConst(src2));  break;
-            case FMUL:  masm.mul_f32(asFloatReg(dst),  asFloatReg(src1),  tasm.asFloatConst(src2));  break;
+            case FMUL:  masm.mul_lo_f32(asFloatReg(dst), asFloatReg(src1), tasm.asFloatConst(src2)); break;
             case FDIV:  masm.div_f32(asFloatReg(dst),  asFloatReg(src1),  tasm.asFloatConst(src2));  break;
             case DADD:  masm.add_f64(asDoubleReg(dst), asDoubleReg(src1), tasm.asDoubleConst(src2)); break;
-            case DMUL:  masm.mul_f64(asDoubleReg(dst), asDoubleReg(src1), tasm.asDoubleConst(src2)); break;
+            case DMUL:  masm.mul_lo_f64(asDoubleReg(dst), asDoubleReg(src1), tasm.asDoubleConst(src2)); break;
             case DDIV:  masm.div_f64(asDoubleReg(dst), asDoubleReg(src1), tasm.asDoubleConst(src2)); break;
             default:
                 throw GraalInternalError.shouldNotReachHere();
@@ -387,34 +387,34 @@
             // case D:  new Mul(Double, dst, src1, src2);
             case IADD:  masm.add_s32(asIntReg(dst),    asIntReg(src1),    asIntReg(src2));    break;
             case ISUB:  masm.sub_s32(asIntReg(dst),    asIntReg(src1),    asIntReg(src2));    break;
-            case IMUL:  masm.mul_s32(asIntReg(dst),    asIntReg(src1),    asIntReg(src2));    break;
+            case IMUL:  masm.mul_lo_s32(asIntReg(dst), asIntReg(src1),    asIntReg(src2));    break;
             case IDIV:  masm.div_s32(asIntReg(dst),    asIntReg(src1),    asIntReg(src2));    break;
             case IAND:  masm.and_b32(asIntReg(dst),    asIntReg(src1),    asIntReg(src2));    break;
             case IOR:    masm.or_b32(asIntReg(dst),    asIntReg(src1),    asIntReg(src2));    break;
             case IXOR:  masm.xor_b32(asIntReg(dst),    asIntReg(src1),    asIntReg(src2));    break;
-            case ISHL:  masm.shl_s32(asIntReg(dst),    asIntReg(src1),    asIntReg(src2));    break;
+            case ISHL:  masm.shl_b32(asIntReg(dst),    asIntReg(src1),    asIntReg(src2));    break;
             case ISHR:  masm.shr_s32(asIntReg(dst),    asIntReg(src1),    asIntReg(src2));    break;
             case IUSHR: masm.shr_u32(asIntReg(dst),    asIntReg(src1),    asIntReg(src2));    break;
             case IREM:  masm.rem_s32(asIntReg(dst),    asIntReg(src1),    asIntReg(src2));    break;
             case LADD:  masm.add_s64(asLongReg(dst),   asLongReg(src1),   asLongReg(src2));   break;
             case LSUB:  masm.sub_s64(asLongReg(dst),   asLongReg(src1),   asLongReg(src2));   break;
-            case LMUL:  masm.mul_s64(asLongReg(dst),   asLongReg(src1),   asLongReg(src2));   break;
+            case LMUL:  masm.mul_lo_s64(asLongReg(dst), asLongReg(src1),  asLongReg(src2));   break;
             case LDIV:  masm.div_s64(asLongReg(dst),   asLongReg(src1),   asLongReg(src2));   break;
             case LAND:  masm.and_b64(asLongReg(dst),   asLongReg(src1),   asLongReg(src2));   break;
-            case LOR:    masm.or_b64(asLongReg(dst),   asLongReg(src1),   asLongReg(src2));   break;
+            case LOR:   masm.or_b64(asLongReg(dst),    asLongReg(src1),   asLongReg(src2));   break;
             case LXOR:  masm.xor_b64(asLongReg(dst),   asLongReg(src1),   asLongReg(src2));   break;
-            case LSHL:  masm.shl_s64(asLongReg(dst),   asLongReg(src1),   asLongReg(src2));   break;
+            case LSHL:  masm.shl_b64(asLongReg(dst),   asLongReg(src1),   asLongReg(src2));   break;
             case LSHR:  masm.shr_s64(asLongReg(dst),   asLongReg(src1),   asLongReg(src2));   break;
             case LUSHR: masm.shr_u64(asLongReg(dst),   asLongReg(src1),   asIntReg(src2));    break;
             case LREM:  masm.rem_s64(asLongReg(dst),   asLongReg(src1),   asLongReg(src2));   break;
             case FADD:  masm.add_f32(asFloatReg(dst),  asFloatReg(src1),  asFloatReg(src2));  break;
             case FSUB:  masm.sub_f32(asFloatReg(dst),  asFloatReg(src1),  asFloatReg(src2));  break;
-            case FMUL:  masm.mul_f32(asFloatReg(dst),  asFloatReg(src1),  asFloatReg(src2));  break;
+            case FMUL:  masm.mul_lo_f32(asFloatReg(dst), asFloatReg(src1), asFloatReg(src2)); break;
             case FDIV:  masm.div_f32(asFloatReg(dst),  asFloatReg(src1),  asFloatReg(src2));  break;
             case FREM:  masm.div_f32(asFloatReg(dst),  asFloatReg(src1),  asFloatReg(src2));  break;
             case DADD:  masm.add_f64(asDoubleReg(dst), asDoubleReg(src1), asDoubleReg(src2)); break;
             case DSUB:  masm.sub_f64(asDoubleReg(dst), asDoubleReg(src1), asDoubleReg(src2)); break;
-            case DMUL:  masm.mul_f64(asDoubleReg(dst), asDoubleReg(src1), asDoubleReg(src2)); break;
+            case DMUL:  masm.mul_lo_f64(asDoubleReg(dst), asDoubleReg(src1), asDoubleReg(src2)); break;
             case DDIV:  masm.div_f64(asDoubleReg(dst), asDoubleReg(src1), asDoubleReg(src2)); break;
             case DREM:  masm.div_f64(asDoubleReg(dst), asDoubleReg(src1), asDoubleReg(src2)); break;
             default:
--- a/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMemOp.java	Wed Sep 04 14:56:30 2013 +0200
+++ b/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXMemOp.java	Wed Sep 04 10:47:37 2013 -0400
@@ -158,7 +158,7 @@
                     masm.ld_from_state_space(".param.s16", asRegister(result), addr.getBase(), addr.getDisplacement());
                     break;
                 case Char:
-                    masm.ld_from_state_space(".param.s16", asRegister(result), addr.getBase(), addr.getDisplacement());
+                    masm.ld_from_state_space(".param.u16", asRegister(result), addr.getBase(), addr.getDisplacement());
                     break;
                 case Int:
                     masm.ld_from_state_space(".param.s32", asRegister(result), addr.getBase(), addr.getDisplacement());
--- a/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXParameterOp.java	Wed Sep 04 14:56:30 2013 +0200
+++ b/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/PTXParameterOp.java	Wed Sep 04 10:47:37 2013 -0400
@@ -42,23 +42,23 @@
 
     @Override
     public void emitCode(TargetMethodAssembler tasm) {
-        PTXAssembler ptxasm = (PTXAssembler) tasm.asm;
+        PTXAssembler masm = (PTXAssembler) tasm.asm;
         // Emit parameter directives for arguments
         int argCount = params.length;
         for (int i = 0; i < argCount; i++) {
             Kind paramKind = params[i].getKind();
             switch (paramKind) {
             case Int :
-                ptxasm.param_32_decl(asIntReg(params[i]), (i == (argCount - 1)));
+                masm.param_32_decl(asIntReg(params[i]), (i == (argCount - 1)));
                 break;
             case Long :
-                ptxasm.param_64_decl(asLongReg(params[i]), (i == (argCount - 1)));
+                masm.param_64_decl(asLongReg(params[i]), (i == (argCount - 1)));
                 break;
             case Float :
-                ptxasm.param_32_decl(asFloatReg(params[i]), (i == (argCount - 1)));
+                masm.param_32_decl(asFloatReg(params[i]), (i == (argCount - 1)));
                 break;
             case Double :
-                ptxasm.param_64_decl(asDoubleReg(params[i]), (i == (argCount - 1)));
+                masm.param_64_decl(asDoubleReg(params[i]), (i == (argCount - 1)));
                 break;
             default :
                 throw GraalInternalError.shouldNotReachHere("unhandled parameter type "  + paramKind.toString());
--- a/src/gpu/ptx/vm/gpu_ptx.cpp	Wed Sep 04 14:56:30 2013 +0200
+++ b/src/gpu/ptx/vm/gpu_ptx.cpp	Wed Sep 04 10:47:37 2013 -0400
@@ -38,6 +38,7 @@
 gpu::Ptx::cuda_cu_ctx_create_func_t gpu::Ptx::_cuda_cu_ctx_create;
 gpu::Ptx::cuda_cu_ctx_destroy_func_t gpu::Ptx::_cuda_cu_ctx_destroy;
 gpu::Ptx::cuda_cu_ctx_synchronize_func_t gpu::Ptx::_cuda_cu_ctx_synchronize;
+gpu::Ptx::cuda_cu_ctx_set_current_func_t gpu::Ptx::_cuda_cu_ctx_set_current;
 gpu::Ptx::cuda_cu_device_get_count_func_t gpu::Ptx::_cuda_cu_device_get_count;
 gpu::Ptx::cuda_cu_device_get_name_func_t gpu::Ptx::_cuda_cu_device_get_name;
 gpu::Ptx::cuda_cu_device_get_func_t gpu::Ptx::_cuda_cu_device_get;
@@ -87,7 +88,7 @@
     tty->print_cr("Failed to initialize CUDA device");
     return false;
   }
- 
+
   if (TraceGPUInteraction) {
     tty->print_cr("CUDA driver initialization: Success");
   }
@@ -108,7 +109,7 @@
   if (TraceGPUInteraction) {
     tty->print_cr("[CUDA] Number of compute-capable devices found: %d", device_count);
   }
-  
+
   /* Get the handle to the first compute device */
   int device_id = 0;
   /* Compute-capable device handle */
@@ -195,12 +196,6 @@
   jit_options[2] = GRAAL_CU_JIT_MAX_REGISTERS;
   jit_option_values[2] = (void *)(size_t)jit_register_count;
 
-  if (TraceGPUInteraction) {
-    tty->print_cr("[CUDA] PTX Kernel\n%s", code);
-    tty->print_cr("[CUDA] Function name : %s", name);
-
-  }
-
   /* Create CUDA context to compile and execute the kernel */
   int status = _cuda_cu_ctx_create(&_device_context, 0, _cu_device);
 
@@ -213,6 +208,23 @@
     tty->print_cr("[CUDA] Success: Created context for device: %d", _cu_device);
   }
 
+  status = _cuda_cu_ctx_set_current(_device_context);
+
+  if (status != GRAAL_CUDA_SUCCESS) {
+    tty->print_cr("[CUDA] Failed to set current context for device: %d", _cu_device);
+    return NULL;
+  }
+
+  if (TraceGPUInteraction) {
+    tty->print_cr("[CUDA] Success: Set current context for device: %d", _cu_device);
+  }
+
+  if (TraceGPUInteraction) {
+    tty->print_cr("[CUDA] PTX Kernel\n%s", code);
+    tty->print_cr("[CUDA] Function name : %s", name);
+
+  }
+
   /* Load module's data with compiler options */
   status = _cuda_cu_module_load_data_ex(&cu_module, (void*) code, jit_num_options,
                                             jit_options, (void **)jit_option_values);
@@ -220,7 +232,7 @@
     if (status == GRAAL_CUDA_ERROR_NO_BINARY_FOR_GPU) {
       tty->print_cr("[CUDA] Check for malformed PTX kernel or incorrect PTX compilation options");
     }
-    tty->print_cr("[CUDA] *** Error (%d) Failed to load module data with online compiler options for method %s", 
+    tty->print_cr("[CUDA] *** Error (%d) Failed to load module data with online compiler options for method %s",
                   status, name);
     return NULL;
   }
@@ -255,7 +267,7 @@
   unsigned int blockX = 1;
   unsigned int blockY = 1;
   unsigned int blockZ = 1;
-  
+
   struct CUfunc_st* cu_function = (struct CUfunc_st*) kernel;
 
   void * config[5] = {
@@ -366,7 +378,7 @@
   if (cuda_library_name != NULL) {
     char *buffer = (char*)malloc(STD_BUFFER_SIZE);
     void *handle = os::dll_load(cuda_library_name, buffer, STD_BUFFER_SIZE);
-	free(buffer);
+        free(buffer);
     if (handle != NULL) {
       _cuda_cu_init =
         CAST_TO_FN_PTR(cuda_cu_init_func_t, os::dll_lookup(handle, "cuInit"));
@@ -376,6 +388,8 @@
         CAST_TO_FN_PTR(cuda_cu_ctx_destroy_func_t, os::dll_lookup(handle, "cuCtxDestroy"));
       _cuda_cu_ctx_synchronize =
         CAST_TO_FN_PTR(cuda_cu_ctx_synchronize_func_t, os::dll_lookup(handle, "cuCtxSynchronize"));
+      _cuda_cu_ctx_set_current =
+        CAST_TO_FN_PTR(cuda_cu_ctx_set_current_func_t, os::dll_lookup(handle, "cuCtxSetCurrent"));
       _cuda_cu_device_get_count =
         CAST_TO_FN_PTR(cuda_cu_device_get_count_func_t, os::dll_lookup(handle, "cuDeviceGetCount"));
       _cuda_cu_device_get_name =
@@ -416,4 +430,3 @@
   tty->print_cr("Failed to find CUDA linkage");
   return false;
 }
-
--- a/src/gpu/ptx/vm/gpu_ptx.hpp	Wed Sep 04 14:56:30 2013 +0200
+++ b/src/gpu/ptx/vm/gpu_ptx.hpp	Wed Sep 04 10:47:37 2013 -0400
@@ -87,6 +87,7 @@
   typedef int (*cuda_cu_ctx_create_func_t)(void*, int, int);
   typedef int (*cuda_cu_ctx_destroy_func_t)(void*);
   typedef int (*cuda_cu_ctx_synchronize_func_t)(void);
+  typedef int (*cuda_cu_ctx_set_current_func_t)(void*);
   typedef int (*cuda_cu_device_get_count_func_t)(int*);
   typedef int (*cuda_cu_device_get_name_func_t)(char*, int, int);
   typedef int (*cuda_cu_device_get_func_t)(int*, int);
@@ -98,7 +99,7 @@
                                               unsigned int, void*, void**, void**);
   typedef int (*cuda_cu_module_get_function_func_t)(void*, void*, const char*);
   typedef int (*cuda_cu_module_load_data_ex_func_t)(void*, void*, unsigned int, void*, void**);
-  typedef int (*cuda_cu_memalloc_func_t)(void*, unsigned int);
+  typedef int (*cuda_cu_memalloc_func_t)(void*, size_t);
   typedef int (*cuda_cu_memfree_func_t)(gpu::Ptx::CUdeviceptr);
   typedef int (*cuda_cu_memcpy_htod_func_t)(gpu::Ptx::CUdeviceptr, const void*, unsigned int);
   typedef int (*cuda_cu_memcpy_dtoh_func_t)(const void*, gpu::Ptx::CUdeviceptr,  unsigned int);
@@ -120,6 +121,7 @@
   static cuda_cu_memfree_func_t                   _cuda_cu_memfree;
   static cuda_cu_memcpy_htod_func_t               _cuda_cu_memcpy_htod;
   static cuda_cu_memcpy_dtoh_func_t               _cuda_cu_memcpy_dtoh;
+  static cuda_cu_ctx_set_current_func_t           _cuda_cu_ctx_set_current;
 
 protected:
   static void* _device_context;
--- a/src/gpu/ptx/vm/kernelArguments.hpp	Wed Sep 04 14:56:30 2013 +0200
+++ b/src/gpu/ptx/vm/kernelArguments.hpp	Wed Sep 04 10:47:37 2013 -0400
@@ -56,6 +56,7 @@
     _args = args;
     _success = true;
     _bufferOffset = 0;
+    _return_value_ptr = 0;
     if (!is_static) {
       // TODO : Create a device argument for receiver object and add it to _kernelBuffer
       tty->print_cr("{CUDA] ****** TODO: Support for execution of non-static java methods not implemented yet.");