[Beignet] [PATCH] support converting shorter int to 64bit int

Homer Hsing homer.xing at intel.com
Wed Aug 7 21:35:17 PDT 2013


converting byte/word/dword to int64
also add test case

Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
 backend/src/backend/gen_context.cpp        | 26 ++++++++++++
 backend/src/backend/gen_insn_selection.cpp |  9 ++++
 backend/src/backend/gen_insn_selection.hxx |  1 +
 backend/src/backend/gen_register.hpp       |  6 +++
 kernels/compiler_long_convert.cl           |  7 ++++
 utests/CMakeLists.txt                      |  1 +
 utests/compiler_long_convert.cpp           | 67 ++++++++++++++++++++++++++++++
 7 files changed, 117 insertions(+)
 create mode 100644 kernels/compiler_long_convert.cl
 create mode 100644 utests/compiler_long_convert.cpp

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 0c5ecae..86f5924 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -158,6 +158,32 @@ namespace gbe
     switch (insn.opcode) {
       case SEL_OP_LOAD_DF_IMM: p->LOAD_DF_IMM(dst, src1, src0.value.df); break;
       case SEL_OP_MOV_DF: p->MOV_DF(dst, src0, src1); break;
+      case SEL_OP_CONVI_TO_I64: {
+        GenRegister middle;
+        if (src0.type == GEN_TYPE_B || src0.type == GEN_TYPE_D) {
+          middle = src1;
+          middle.type = src0.is_signed_int() ? GEN_TYPE_D : GEN_TYPE_UD;
+          p->MOV(middle, src0);
+        } else {
+          middle = src0;
+        }
+        int execWidth = p->curr.execWidth;
+        GenRegister xdst = GenRegister::retype(dst, GEN_TYPE_UL);
+        p->push();
+        p->curr.execWidth = 8;
+        for (int nib = 0; nib < execWidth / 4; nib ++) {
+          p->curr.chooseNib(nib);
+          p->MOV(xdst.bottom_half(), middle);
+          if(middle.is_signed_int())
+            p->ASR(xdst.top_half(), middle, GenRegister::immud(31));
+          else
+            p->MOV(xdst.top_half(), GenRegister::immd(0));
+          xdst = GenRegister::suboffset(xdst, 4);
+          middle = GenRegister::suboffset(middle, 4);
+        }
+        p->pop();
+        break;
+      }
       case SEL_OP_SEL:  p->SEL(dst, src0, src1); break;
       case SEL_OP_SEL_INT64:
         {
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 1a3af68..3ef957f 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -451,6 +451,7 @@ namespace gbe
     ALU3(RHADD)
     ALU2(UPSAMPLE_SHORT)
     ALU2(UPSAMPLE_INT)
+    ALU2(CONVI_TO_I64)
 #undef ALU1
 #undef ALU2
 #undef ALU3
@@ -2223,6 +2224,14 @@ namespace gbe
       } else if (dst.isdf()) {
         ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
         sel.MOV_DF(dst, src, sel.selReg(r));
+      } else if (dst.isint64()) {
+        switch(src.type) {
+          case GEN_TYPE_F:
+          case GEN_TYPE_DF:
+            NOT_IMPLEMENTED;
+          default:
+            sel.CONVI_TO_I64(dst, src, sel.selReg(sel.reg(FAMILY_DWORD)));
+        }
       } else
         sel.MOV(dst, src);
       return true;
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index eeca9af..66b8125 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -58,3 +58,4 @@ DECL_SELECTION_IR(HADD, TernaryInstruction)
 DECL_SELECTION_IR(RHADD, TernaryInstruction)
 DECL_SELECTION_IR(UPSAMPLE_SHORT, BinaryInstruction)
 DECL_SELECTION_IR(UPSAMPLE_INT, BinaryInstruction)
+DECL_SELECTION_IR(CONVI_TO_I64, BinaryInstruction)
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index 2cad4c0..c953319 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -273,6 +273,12 @@ namespace gbe
       return r;
     }
 
+    INLINE bool is_signed_int(void) const {
+      if ((type == GEN_TYPE_B || type == GEN_TYPE_W || type == GEN_TYPE_D || type == GEN_TYPE_L) && file == GEN_GENERAL_REGISTER_FILE)
+        return true;
+      return false;
+    }
+
     INLINE bool isdf(void) const {
       if (type == GEN_TYPE_DF && file == GEN_GENERAL_REGISTER_FILE)
         return true;
diff --git a/kernels/compiler_long_convert.cl b/kernels/compiler_long_convert.cl
new file mode 100644
index 0000000..f22914f
--- /dev/null
+++ b/kernels/compiler_long_convert.cl
@@ -0,0 +1,7 @@
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+kernel void compiler_long_convert(global char *src1, global short *src2, global int *src3, global long *dst1, global long *dst2, global long *dst3) {
+  int i = get_global_id(0);
+  dst1[i] = src1[i];
+  dst2[i] = src2[i];
+  dst3[i] = src3[i];
+}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 85c6902..33c3765 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -123,6 +123,7 @@ set (utests_sources
   compiler_double_4.cpp
   compiler_long.cpp
   compiler_long_2.cpp
+  compiler_long_convert.cpp
   utest_assert.cpp
   utest.cpp
   utest_file_map.cpp
diff --git a/utests/compiler_long_convert.cpp b/utests/compiler_long_convert.cpp
new file mode 100644
index 0000000..18e13ee
--- /dev/null
+++ b/utests/compiler_long_convert.cpp
@@ -0,0 +1,67 @@
+#include <cstdint>
+#include <cstring>
+#include <iostream>
+#include "utest_helper.hpp"
+
+void compiler_long_convert(void)
+{
+  const size_t n = 16;
+  char src1[n];
+  short src2[n];
+  int src3[n];
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL("compiler_long_convert");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(char), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(short), NULL);
+  OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int), NULL);
+  OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(int64_t), NULL);
+  OCL_CREATE_BUFFER(buf[4], 0, n * sizeof(int64_t), NULL);
+  OCL_CREATE_BUFFER(buf[5], 0, n * sizeof(int64_t), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+  OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]);
+  OCL_SET_ARG(4, sizeof(cl_mem), &buf[4]);
+  OCL_SET_ARG(5, sizeof(cl_mem), &buf[5]);
+  globals[0] = n;
+  locals[0] = 16;
+
+  // Run random tests
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    src1[i] = -i;
+    src2[i] = -i;
+    src3[i] = -i;
+  }
+  OCL_MAP_BUFFER(0);
+  OCL_MAP_BUFFER(1);
+  OCL_MAP_BUFFER(2);
+  memcpy(buf_data[0], src1, sizeof(src1));
+  memcpy(buf_data[1], src2, sizeof(src2));
+  memcpy(buf_data[2], src3, sizeof(src3));
+  OCL_UNMAP_BUFFER(0);
+  OCL_UNMAP_BUFFER(1);
+  OCL_UNMAP_BUFFER(2);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Compare
+  OCL_MAP_BUFFER(3);
+  OCL_MAP_BUFFER(4);
+  OCL_MAP_BUFFER(5);
+  int64_t *dst1 = ((int64_t *)buf_data[3]);
+  int64_t *dst2 = ((int64_t *)buf_data[4]);
+  int64_t *dst3 = ((int64_t *)buf_data[5]);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    //printf("%lx %lx %lx\n", dst1[i], dst2[i], dst3[i]);
+    OCL_ASSERT(dst1[i] == -(int64_t)i);
+    OCL_ASSERT(dst2[i] == -(int64_t)i);
+    OCL_ASSERT(dst3[i] == -(int64_t)i);
+  }
+  OCL_UNMAP_BUFFER(3);
+  OCL_UNMAP_BUFFER(4);
+  OCL_UNMAP_BUFFER(5);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_long_convert);
-- 
1.8.1.2



More information about the Beignet mailing list