[Beignet] [PATCH] support converting 64-bit integer to shorter integer

Homer Hsing homer.xing at intel.com
Sun Sep 1 18:25:10 PDT 2013


Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
 backend/src/backend/gen_context.cpp        | 15 +++++++++
 backend/src/backend/gen_insn_selection.cpp | 13 ++++++--
 backend/src/backend/gen_insn_selection.hxx |  1 +
 kernels/compiler_long_convert.cl           |  7 ++++
 utests/compiler_long_convert.cpp           | 51 ++++++++++++++++++++++++++++++
 5 files changed, 85 insertions(+), 2 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index a029719..3e3a8fc 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -159,6 +159,21 @@ namespace gbe
       case SEL_OP_RNDE: p->RNDE(dst, src); break;
       case SEL_OP_RNDZ: p->RNDZ(dst, src); break;
       case SEL_OP_LOAD_INT64_IMM: p->LOAD_INT64_IMM(dst, src.value.i64); break;
+      case SEL_OP_CONVI64_TO_I:
+       {
+        int execWidth = p->curr.execWidth;
+        GenRegister xsrc = src.bottom_half(), xdst = dst;
+        p->push();
+        p->curr.execWidth = 8;
+        for(int i = 0; i < execWidth/4; i ++) {
+          p->curr.chooseNib(i);
+          p->MOV(xdst, xsrc);
+          xdst = GenRegister::suboffset(xdst, 4);
+          xsrc = GenRegister::suboffset(xsrc, 8);
+        }
+        p->pop();
+        break;
+       }
       default: NOT_IMPLEMENTED;
     }
   }
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index bca08ba..f1b85bb 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -459,6 +459,7 @@ namespace gbe
     ALU2(UPSAMPLE_INT)
     ALU2(UPSAMPLE_LONG)
     ALU1WithTemp(CONVI_TO_I64)
+    ALU1(CONVI64_TO_I)
     I64Shift(I64SHL)
     I64Shift(I64SHR)
     I64Shift(I64ASR)
@@ -2340,7 +2341,7 @@ namespace gbe
       const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
 
       // We need two instructions to make the conversion
-      if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && srcFamily == FAMILY_DWORD) {
+      if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && (srcFamily == FAMILY_DWORD || srcFamily == FAMILY_QWORD)) {
         GenRegister unpacked;
         if (dstFamily == FAMILY_WORD) {
           const uint32_t type = TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W;
@@ -2351,8 +2352,16 @@ namespace gbe
           unpacked = GenRegister::unpacked_ub(sel.reg(FAMILY_DWORD));
           unpacked = GenRegister::retype(unpacked, type);
         }
-        sel.MOV(unpacked, src);
+        if(srcFamily == FAMILY_QWORD) {
+          GenRegister tmp = sel.selReg(sel.reg(FAMILY_DWORD));
+          tmp.type = GEN_TYPE_D;
+          sel.CONVI64_TO_I(tmp, src);
+          sel.MOV(unpacked, tmp);
+        } else
+          sel.MOV(unpacked, src);
         sel.MOV(dst, unpacked);
+      } else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) && srcFamily == FAMILY_QWORD) {
+        sel.CONVI64_TO_I(dst, src);
       } else if (dst.isdf()) {
         ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
         sel.MOV_DF(dst, src, sel.selReg(r));
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 32c7a05..5c857dd 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -65,3 +65,4 @@ DECL_SELECTION_IR(UPSAMPLE_SHORT, BinaryInstruction)
 DECL_SELECTION_IR(UPSAMPLE_INT, BinaryInstruction)
 DECL_SELECTION_IR(UPSAMPLE_LONG, BinaryInstruction)
 DECL_SELECTION_IR(CONVI_TO_I64, UnaryWithTempInstruction)
+DECL_SELECTION_IR(CONVI64_TO_I, UnaryInstruction)
diff --git a/kernels/compiler_long_convert.cl b/kernels/compiler_long_convert.cl
index f22914f..03df147 100644
--- a/kernels/compiler_long_convert.cl
+++ b/kernels/compiler_long_convert.cl
@@ -5,3 +5,10 @@ kernel void compiler_long_convert(global char *src1, global short *src2, global
   dst2[i] = src2[i];
   dst3[i] = src3[i];
 }
+
+kernel void compiler_long_convert_2(global char *dst1, global short *dst2, global int *dst3, global long *src) {
+  int i = get_global_id(0);
+  dst1[i] = src[i];
+  dst2[i] = src[i];
+  dst3[i] = src[i];
+}
diff --git a/utests/compiler_long_convert.cpp b/utests/compiler_long_convert.cpp
index 18e13ee..fe976be 100644
--- a/utests/compiler_long_convert.cpp
+++ b/utests/compiler_long_convert.cpp
@@ -3,6 +3,7 @@
 #include <iostream>
 #include "utest_helper.hpp"
 
+// convert shorter integer to 64-bit integer
 void compiler_long_convert(void)
 {
   const size_t n = 16;
@@ -65,3 +66,53 @@ void compiler_long_convert(void)
 }
 
 MAKE_UTEST_FROM_FUNCTION(compiler_long_convert);
+
+// convert 64-bit integer to shorter integer
+void compiler_long_convert_2(void)
+{
+  const size_t n = 16;
+  int64_t src[n];
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_long_convert", "compiler_long_convert_2");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(char), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(short), NULL);
+  OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int), NULL);
+  OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(int64_t), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+  OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]);
+  globals[0] = n;
+  locals[0] = 16;
+
+  // Run random tests
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    src[i] = -i;
+  }
+  OCL_MAP_BUFFER(3);
+  memcpy(buf_data[3], src, sizeof(src));
+  OCL_UNMAP_BUFFER(3);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Compare
+  OCL_MAP_BUFFER(0);
+  OCL_MAP_BUFFER(1);
+  OCL_MAP_BUFFER(2);
+  char *dst1 = ((char *)buf_data[0]);
+  short *dst2 = ((short *)buf_data[1]);
+  int *dst3 = ((int *)buf_data[2]);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    //printf("%x %x %x\n", dst1[i], dst2[i], dst3[i]);
+    OCL_ASSERT(dst1[i] == -i);
+    OCL_ASSERT(dst2[i] == -i);
+    OCL_ASSERT(dst3[i] == -i);
+  }
+  OCL_UNMAP_BUFFER(0);
+  OCL_UNMAP_BUFFER(1);
+  OCL_UNMAP_BUFFER(2);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_long_convert_2);
-- 
1.8.1.2



More information about the Beignet mailing list