[Beignet] [PATCH v2] support converting shorter int to 64bit int
Homer Hsing
homer.xing at intel.com
Sun Aug 11 19:12:16 PDT 2013
converting byte/word/dword to int64
also add test case
v2: define temporary reg as dest reg of instruction
Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
backend/src/backend/gen_context.cpp | 25 +++++++++++
backend/src/backend/gen_insn_selection.cpp | 9 ++++
backend/src/backend/gen_insn_selection.hxx | 1 +
backend/src/backend/gen_register.hpp | 6 +++
kernels/compiler_long_convert.cl | 7 ++++
utests/CMakeLists.txt | 1 +
utests/compiler_long_convert.cpp | 67 ++++++++++++++++++++++++++++++
7 files changed, 116 insertions(+)
create mode 100644 kernels/compiler_long_convert.cl
create mode 100644 utests/compiler_long_convert.cpp
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index f432fa5..5cf6a65 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -162,6 +162,31 @@ namespace gbe
case SEL_OP_MOV_DF:
p->MOV_DF(dst, src, tmp);
break;
+ case SEL_OP_CONVI_TO_I64: {
+ GenRegister middle;
+ if (src.type == GEN_TYPE_B || src.type == GEN_TYPE_D) {
+ middle = tmp;
+ middle.type = src.is_signed_int() ? GEN_TYPE_D : GEN_TYPE_UD;
+ p->MOV(middle, src);
+ } else {
+ middle = src;
+ }
+ int execWidth = p->curr.execWidth;
+ p->push();
+ p->curr.execWidth = 8;
+ for (int nib = 0; nib < execWidth / 4; nib ++) {
+ p->curr.chooseNib(nib);
+ p->MOV(dst.bottom_half(), middle);
+ if(middle.is_signed_int())
+ p->ASR(dst.top_half(), middle, GenRegister::immud(31));
+ else
+ p->MOV(dst.top_half(), GenRegister::immd(0));
+ dst = GenRegister::suboffset(dst, 4);
+ middle = GenRegister::suboffset(middle, 4);
+ }
+ p->pop();
+ break;
+ }
default:
NOT_IMPLEMENTED;
}
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 63638f3..8360b7b 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -455,6 +455,7 @@ namespace gbe
ALU2WithTemp(RHADD)
ALU2(UPSAMPLE_SHORT)
ALU2(UPSAMPLE_INT)
+ ALU1WithTemp(CONVI_TO_I64)
#undef ALU1
#undef ALU1WithTemp
#undef ALU2
@@ -2248,6 +2249,14 @@ namespace gbe
} else if (dst.isdf()) {
ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
sel.MOV_DF(dst, src, sel.selReg(r));
+ } else if (dst.isint64()) {
+ switch(src.type) {
+ case GEN_TYPE_F:
+ case GEN_TYPE_DF:
+ NOT_IMPLEMENTED;
+ default:
+ sel.CONVI_TO_I64(dst, src, sel.selReg(sel.reg(FAMILY_DWORD)));
+ }
} else
sel.MOV(dst, src);
return true;
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 5660078..9e24dd9 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -58,3 +58,4 @@ DECL_SELECTION_IR(HADD, BinaryWithTempInstruction)
DECL_SELECTION_IR(RHADD, BinaryWithTempInstruction)
DECL_SELECTION_IR(UPSAMPLE_SHORT, BinaryInstruction)
DECL_SELECTION_IR(UPSAMPLE_INT, BinaryInstruction)
+DECL_SELECTION_IR(CONVI_TO_I64, UnaryWithTempInstruction)
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index 2cad4c0..c953319 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -273,6 +273,12 @@ namespace gbe
return r;
}
+ INLINE bool is_signed_int(void) const {
+ if ((type == GEN_TYPE_B || type == GEN_TYPE_W || type == GEN_TYPE_D || type == GEN_TYPE_L) && file == GEN_GENERAL_REGISTER_FILE)
+ return true;
+ return false;
+ }
+
INLINE bool isdf(void) const {
if (type == GEN_TYPE_DF && file == GEN_GENERAL_REGISTER_FILE)
return true;
diff --git a/kernels/compiler_long_convert.cl b/kernels/compiler_long_convert.cl
new file mode 100644
index 0000000..f22914f
--- /dev/null
+++ b/kernels/compiler_long_convert.cl
@@ -0,0 +1,7 @@
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+kernel void compiler_long_convert(global char *src1, global short *src2, global int *src3, global long *dst1, global long *dst2, global long *dst3) {
+ int i = get_global_id(0);
+ dst1[i] = src1[i];
+ dst2[i] = src2[i];
+ dst3[i] = src3[i];
+}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 85c6902..33c3765 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -123,6 +123,7 @@ set (utests_sources
compiler_double_4.cpp
compiler_long.cpp
compiler_long_2.cpp
+ compiler_long_convert.cpp
utest_assert.cpp
utest.cpp
utest_file_map.cpp
diff --git a/utests/compiler_long_convert.cpp b/utests/compiler_long_convert.cpp
new file mode 100644
index 0000000..18e13ee
--- /dev/null
+++ b/utests/compiler_long_convert.cpp
@@ -0,0 +1,67 @@
+#include <cstdint>
+#include <cstring>
+#include <iostream>
+#include "utest_helper.hpp"
+
+void compiler_long_convert(void)
+{
+ const size_t n = 16;
+ char src1[n];
+ short src2[n];
+ int src3[n];
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_long_convert");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(char), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(short), NULL);
+ OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int), NULL);
+ OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(int64_t), NULL);
+ OCL_CREATE_BUFFER(buf[4], 0, n * sizeof(int64_t), NULL);
+ OCL_CREATE_BUFFER(buf[5], 0, n * sizeof(int64_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+ OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]);
+ OCL_SET_ARG(4, sizeof(cl_mem), &buf[4]);
+ OCL_SET_ARG(5, sizeof(cl_mem), &buf[5]);
+ globals[0] = n;
+ locals[0] = 16;
+
+ // Run random tests
+ for (int32_t i = 0; i < (int32_t) n; ++i) {
+ src1[i] = -i;
+ src2[i] = -i;
+ src3[i] = -i;
+ }
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ OCL_MAP_BUFFER(2);
+ memcpy(buf_data[0], src1, sizeof(src1));
+ memcpy(buf_data[1], src2, sizeof(src2));
+ memcpy(buf_data[2], src3, sizeof(src3));
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+ OCL_UNMAP_BUFFER(2);
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(1);
+
+ // Compare
+ OCL_MAP_BUFFER(3);
+ OCL_MAP_BUFFER(4);
+ OCL_MAP_BUFFER(5);
+ int64_t *dst1 = ((int64_t *)buf_data[3]);
+ int64_t *dst2 = ((int64_t *)buf_data[4]);
+ int64_t *dst3 = ((int64_t *)buf_data[5]);
+ for (int32_t i = 0; i < (int32_t) n; ++i) {
+ //printf("%lx %lx %lx\n", dst1[i], dst2[i], dst3[i]);
+ OCL_ASSERT(dst1[i] == -(int64_t)i);
+ OCL_ASSERT(dst2[i] == -(int64_t)i);
+ OCL_ASSERT(dst3[i] == -(int64_t)i);
+ }
+ OCL_UNMAP_BUFFER(3);
+ OCL_UNMAP_BUFFER(4);
+ OCL_UNMAP_BUFFER(5);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_long_convert);
--
1.8.1.2
More information about the Beignet
mailing list