[Beignet] [PATCH] support converting shorter int to 64bit int
Xing, Homer
homer.xing at intel.com
Thu Aug 8 19:53:54 PDT 2013
OK. Will put the temp register to destination register array.
-----Original Message-----
From: Zhigang Gong [mailto:zhigang.gong at gmail.com]
Sent: Friday, August 9, 2013 10:50 AM
To: Xing, Homer
Cc: beignet at lists.freedesktop.org
Subject: Re: [Beignet] [PATCH] support converting shorter int to 64bit int
Homer,
After analyzing the instruction scheduling code, I found there is a common issue in some of your code including this patch. You introducued some new instruction as below:
sel.CONVI_TO_I64(dst, src, sel.selReg(sel.reg(FAMILY_DWORD)));
And the src1 is a temporary register which is in CONVI_TO_I64's soruce register array. Thus the scheduler will treat it will not be modified. This will break the scheduler's algorithm and may cause very difficult bug.
When you introduce a new instruction, and to allocate some registers for its usage. You need to put all the registers which may be changed to the new instruction's destination register array rather than the source register.
On Thu, Aug 08, 2013 at 12:35:17PM +0800, Homer Hsing wrote:
> converting byte/word/dword to int64
> also add test case
>
> Signed-off-by: Homer Hsing <homer.xing at intel.com>
> ---
> backend/src/backend/gen_context.cpp | 26 ++++++++++++
> backend/src/backend/gen_insn_selection.cpp | 9 ++++
> backend/src/backend/gen_insn_selection.hxx | 1 +
> backend/src/backend/gen_register.hpp | 6 +++
> kernels/compiler_long_convert.cl | 7 ++++
> utests/CMakeLists.txt | 1 +
> utests/compiler_long_convert.cpp | 67 ++++++++++++++++++++++++++++++
> 7 files changed, 117 insertions(+)
> create mode 100644 kernels/compiler_long_convert.cl create mode
> 100644 utests/compiler_long_convert.cpp
>
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index 0c5ecae..86f5924 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -158,6 +158,32 @@ namespace gbe
> switch (insn.opcode) {
> case SEL_OP_LOAD_DF_IMM: p->LOAD_DF_IMM(dst, src1, src0.value.df); break;
> case SEL_OP_MOV_DF: p->MOV_DF(dst, src0, src1); break;
> + case SEL_OP_CONVI_TO_I64: {
> + GenRegister middle;
> + if (src0.type == GEN_TYPE_B || src0.type == GEN_TYPE_D) {
> + middle = src1;
> + middle.type = src0.is_signed_int() ? GEN_TYPE_D : GEN_TYPE_UD;
> + p->MOV(middle, src0);
> + } else {
> + middle = src0;
> + }
> + int execWidth = p->curr.execWidth;
> + GenRegister xdst = GenRegister::retype(dst, GEN_TYPE_UL);
> + p->push();
> + p->curr.execWidth = 8;
> + for (int nib = 0; nib < execWidth / 4; nib ++) {
> + p->curr.chooseNib(nib);
> + p->MOV(xdst.bottom_half(), middle);
> + if(middle.is_signed_int())
> + p->ASR(xdst.top_half(), middle, GenRegister::immud(31));
> + else
> + p->MOV(xdst.top_half(), GenRegister::immd(0));
> + xdst = GenRegister::suboffset(xdst, 4);
> + middle = GenRegister::suboffset(middle, 4);
> + }
> + p->pop();
> + break;
> + }
> case SEL_OP_SEL: p->SEL(dst, src0, src1); break;
> case SEL_OP_SEL_INT64:
> {
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index 1a3af68..3ef957f 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -451,6 +451,7 @@ namespace gbe
> ALU3(RHADD)
> ALU2(UPSAMPLE_SHORT)
> ALU2(UPSAMPLE_INT)
> + ALU2(CONVI_TO_I64)
> #undef ALU1
> #undef ALU2
> #undef ALU3
> @@ -2223,6 +2224,14 @@ namespace gbe
> } else if (dst.isdf()) {
> ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
> sel.MOV_DF(dst, src, sel.selReg(r));
> + } else if (dst.isint64()) {
> + switch(src.type) {
> + case GEN_TYPE_F:
> + case GEN_TYPE_DF:
> + NOT_IMPLEMENTED;
> + default:
> + sel.CONVI_TO_I64(dst, src, sel.selReg(sel.reg(FAMILY_DWORD)));
> + }
> } else
> sel.MOV(dst, src);
> return true;
> diff --git a/backend/src/backend/gen_insn_selection.hxx
> b/backend/src/backend/gen_insn_selection.hxx
> index eeca9af..66b8125 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -58,3 +58,4 @@ DECL_SELECTION_IR(HADD, TernaryInstruction)
> DECL_SELECTION_IR(RHADD, TernaryInstruction)
> DECL_SELECTION_IR(UPSAMPLE_SHORT, BinaryInstruction)
> DECL_SELECTION_IR(UPSAMPLE_INT, BinaryInstruction)
> +DECL_SELECTION_IR(CONVI_TO_I64, BinaryInstruction)
> diff --git a/backend/src/backend/gen_register.hpp
> b/backend/src/backend/gen_register.hpp
> index 2cad4c0..c953319 100644
> --- a/backend/src/backend/gen_register.hpp
> +++ b/backend/src/backend/gen_register.hpp
> @@ -273,6 +273,12 @@ namespace gbe
> return r;
> }
>
> + INLINE bool is_signed_int(void) const {
> + if ((type == GEN_TYPE_B || type == GEN_TYPE_W || type == GEN_TYPE_D || type == GEN_TYPE_L) && file == GEN_GENERAL_REGISTER_FILE)
> + return true;
> + return false;
> + }
> +
> INLINE bool isdf(void) const {
> if (type == GEN_TYPE_DF && file == GEN_GENERAL_REGISTER_FILE)
> return true;
> diff --git a/kernels/compiler_long_convert.cl
> b/kernels/compiler_long_convert.cl
> new file mode 100644
> index 0000000..f22914f
> --- /dev/null
> +++ b/kernels/compiler_long_convert.cl
> @@ -0,0 +1,7 @@
> +#pragma OPENCL EXTENSION cl_khr_fp64 : enable kernel void
> +compiler_long_convert(global char *src1, global short *src2, global
> +int *src3, global long *dst1, global long *dst2, global long *dst3) {
> + int i = get_global_id(0);
> + dst1[i] = src1[i];
> + dst2[i] = src2[i];
> + dst3[i] = src3[i];
> +}
> diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index
> 85c6902..33c3765 100644
> --- a/utests/CMakeLists.txt
> +++ b/utests/CMakeLists.txt
> @@ -123,6 +123,7 @@ set (utests_sources
> compiler_double_4.cpp
> compiler_long.cpp
> compiler_long_2.cpp
> + compiler_long_convert.cpp
> utest_assert.cpp
> utest.cpp
> utest_file_map.cpp
> diff --git a/utests/compiler_long_convert.cpp
> b/utests/compiler_long_convert.cpp
> new file mode 100644
> index 0000000..18e13ee
> --- /dev/null
> +++ b/utests/compiler_long_convert.cpp
> @@ -0,0 +1,67 @@
> +#include <cstdint>
> +#include <cstring>
> +#include <iostream>
> +#include "utest_helper.hpp"
> +
> +void compiler_long_convert(void)
> +{
> + const size_t n = 16;
> + char src1[n];
> + short src2[n];
> + int src3[n];
> +
> + // Setup kernel and buffers
> + OCL_CREATE_KERNEL("compiler_long_convert");
> + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(char), NULL);
> + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(short), NULL);
> + OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int), NULL);
> + OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(int64_t), NULL);
> + OCL_CREATE_BUFFER(buf[4], 0, n * sizeof(int64_t), NULL);
> + OCL_CREATE_BUFFER(buf[5], 0, n * sizeof(int64_t), NULL);
> + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1,
> + sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
> + OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]); OCL_SET_ARG(4,
> + sizeof(cl_mem), &buf[4]); OCL_SET_ARG(5, sizeof(cl_mem), &buf[5]);
> + globals[0] = n; locals[0] = 16;
> +
> + // Run random tests
> + for (int32_t i = 0; i < (int32_t) n; ++i) {
> + src1[i] = -i;
> + src2[i] = -i;
> + src3[i] = -i;
> + }
> + OCL_MAP_BUFFER(0);
> + OCL_MAP_BUFFER(1);
> + OCL_MAP_BUFFER(2);
> + memcpy(buf_data[0], src1, sizeof(src1)); memcpy(buf_data[1], src2,
> + sizeof(src2)); memcpy(buf_data[2], src3, sizeof(src3));
> + OCL_UNMAP_BUFFER(0); OCL_UNMAP_BUFFER(1); OCL_UNMAP_BUFFER(2);
> +
> + // Run the kernel on GPU
> + OCL_NDRANGE(1);
> +
> + // Compare
> + OCL_MAP_BUFFER(3);
> + OCL_MAP_BUFFER(4);
> + OCL_MAP_BUFFER(5);
> + int64_t *dst1 = ((int64_t *)buf_data[3]);
> + int64_t *dst2 = ((int64_t *)buf_data[4]);
> + int64_t *dst3 = ((int64_t *)buf_data[5]);
> + for (int32_t i = 0; i < (int32_t) n; ++i) {
> + //printf("%lx %lx %lx\n", dst1[i], dst2[i], dst3[i]);
> + OCL_ASSERT(dst1[i] == -(int64_t)i);
> + OCL_ASSERT(dst2[i] == -(int64_t)i);
> + OCL_ASSERT(dst3[i] == -(int64_t)i);
> + }
> + OCL_UNMAP_BUFFER(3);
> + OCL_UNMAP_BUFFER(4);
> + OCL_UNMAP_BUFFER(5);
> +}
> +
> +MAKE_UTEST_FROM_FUNCTION(compiler_long_convert);
> --
> 1.8.1.2
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list