[Beignet] [PATCH] support converting shorter int to 64bit int

Zhigang Gong zhigang.gong at gmail.com
Thu Aug 8 19:49:54 PDT 2013


Homer,

After analyzing the instruction scheduling code, I found there is a
common issue in some of your code including this patch. You introducued
some new instruction as below:

sel.CONVI_TO_I64(dst, src, sel.selReg(sel.reg(FAMILY_DWORD)));

And the src1 is a temporary register which is in CONVI_TO_I64's
soruce register array. Thus the scheduler will treat it will not
be modified. This will break the scheduler's algorithm and may
cause very difficult bug.

When you introduce a new instruction, and to allocate some registers
for its usage. You need to put all the registers which may be changed
to the new instruction's destination register array rather than the
source register.

On Thu, Aug 08, 2013 at 12:35:17PM +0800, Homer Hsing wrote:
> converting byte/word/dword to int64
> also add test case
> 
> Signed-off-by: Homer Hsing <homer.xing at intel.com>
> ---
>  backend/src/backend/gen_context.cpp        | 26 ++++++++++++
>  backend/src/backend/gen_insn_selection.cpp |  9 ++++
>  backend/src/backend/gen_insn_selection.hxx |  1 +
>  backend/src/backend/gen_register.hpp       |  6 +++
>  kernels/compiler_long_convert.cl           |  7 ++++
>  utests/CMakeLists.txt                      |  1 +
>  utests/compiler_long_convert.cpp           | 67 ++++++++++++++++++++++++++++++
>  7 files changed, 117 insertions(+)
>  create mode 100644 kernels/compiler_long_convert.cl
>  create mode 100644 utests/compiler_long_convert.cpp
> 
> diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
> index 0c5ecae..86f5924 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -158,6 +158,32 @@ namespace gbe
>      switch (insn.opcode) {
>        case SEL_OP_LOAD_DF_IMM: p->LOAD_DF_IMM(dst, src1, src0.value.df); break;
>        case SEL_OP_MOV_DF: p->MOV_DF(dst, src0, src1); break;
> +      case SEL_OP_CONVI_TO_I64: {
> +        GenRegister middle;
> +        if (src0.type == GEN_TYPE_B || src0.type == GEN_TYPE_D) {
> +          middle = src1;
> +          middle.type = src0.is_signed_int() ? GEN_TYPE_D : GEN_TYPE_UD;
> +          p->MOV(middle, src0);
> +        } else {
> +          middle = src0;
> +        }
> +        int execWidth = p->curr.execWidth;
> +        GenRegister xdst = GenRegister::retype(dst, GEN_TYPE_UL);
> +        p->push();
> +        p->curr.execWidth = 8;
> +        for (int nib = 0; nib < execWidth / 4; nib ++) {
> +          p->curr.chooseNib(nib);
> +          p->MOV(xdst.bottom_half(), middle);
> +          if(middle.is_signed_int())
> +            p->ASR(xdst.top_half(), middle, GenRegister::immud(31));
> +          else
> +            p->MOV(xdst.top_half(), GenRegister::immd(0));
> +          xdst = GenRegister::suboffset(xdst, 4);
> +          middle = GenRegister::suboffset(middle, 4);
> +        }
> +        p->pop();
> +        break;
> +      }
>        case SEL_OP_SEL:  p->SEL(dst, src0, src1); break;
>        case SEL_OP_SEL_INT64:
>          {
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index 1a3af68..3ef957f 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -451,6 +451,7 @@ namespace gbe
>      ALU3(RHADD)
>      ALU2(UPSAMPLE_SHORT)
>      ALU2(UPSAMPLE_INT)
> +    ALU2(CONVI_TO_I64)
>  #undef ALU1
>  #undef ALU2
>  #undef ALU3
> @@ -2223,6 +2224,14 @@ namespace gbe
>        } else if (dst.isdf()) {
>          ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD);
>          sel.MOV_DF(dst, src, sel.selReg(r));
> +      } else if (dst.isint64()) {
> +        switch(src.type) {
> +          case GEN_TYPE_F:
> +          case GEN_TYPE_DF:
> +            NOT_IMPLEMENTED;
> +          default:
> +            sel.CONVI_TO_I64(dst, src, sel.selReg(sel.reg(FAMILY_DWORD)));
> +        }
>        } else
>          sel.MOV(dst, src);
>        return true;
> diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
> index eeca9af..66b8125 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -58,3 +58,4 @@ DECL_SELECTION_IR(HADD, TernaryInstruction)
>  DECL_SELECTION_IR(RHADD, TernaryInstruction)
>  DECL_SELECTION_IR(UPSAMPLE_SHORT, BinaryInstruction)
>  DECL_SELECTION_IR(UPSAMPLE_INT, BinaryInstruction)
> +DECL_SELECTION_IR(CONVI_TO_I64, BinaryInstruction)
> diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
> index 2cad4c0..c953319 100644
> --- a/backend/src/backend/gen_register.hpp
> +++ b/backend/src/backend/gen_register.hpp
> @@ -273,6 +273,12 @@ namespace gbe
>        return r;
>      }
>  
> +    INLINE bool is_signed_int(void) const {
> +      if ((type == GEN_TYPE_B || type == GEN_TYPE_W || type == GEN_TYPE_D || type == GEN_TYPE_L) && file == GEN_GENERAL_REGISTER_FILE)
> +        return true;
> +      return false;
> +    }
> +
>      INLINE bool isdf(void) const {
>        if (type == GEN_TYPE_DF && file == GEN_GENERAL_REGISTER_FILE)
>          return true;
> diff --git a/kernels/compiler_long_convert.cl b/kernels/compiler_long_convert.cl
> new file mode 100644
> index 0000000..f22914f
> --- /dev/null
> +++ b/kernels/compiler_long_convert.cl
> @@ -0,0 +1,7 @@
> +#pragma OPENCL EXTENSION cl_khr_fp64 : enable
> +kernel void compiler_long_convert(global char *src1, global short *src2, global int *src3, global long *dst1, global long *dst2, global long *dst3) {
> +  int i = get_global_id(0);
> +  dst1[i] = src1[i];
> +  dst2[i] = src2[i];
> +  dst3[i] = src3[i];
> +}
> diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
> index 85c6902..33c3765 100644
> --- a/utests/CMakeLists.txt
> +++ b/utests/CMakeLists.txt
> @@ -123,6 +123,7 @@ set (utests_sources
>    compiler_double_4.cpp
>    compiler_long.cpp
>    compiler_long_2.cpp
> +  compiler_long_convert.cpp
>    utest_assert.cpp
>    utest.cpp
>    utest_file_map.cpp
> diff --git a/utests/compiler_long_convert.cpp b/utests/compiler_long_convert.cpp
> new file mode 100644
> index 0000000..18e13ee
> --- /dev/null
> +++ b/utests/compiler_long_convert.cpp
> @@ -0,0 +1,67 @@
> +#include <cstdint>
> +#include <cstring>
> +#include <iostream>
> +#include "utest_helper.hpp"
> +
> +void compiler_long_convert(void)
> +{
> +  const size_t n = 16;
> +  char src1[n];
> +  short src2[n];
> +  int src3[n];
> +
> +  // Setup kernel and buffers
> +  OCL_CREATE_KERNEL("compiler_long_convert");
> +  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(char), NULL);
> +  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(short), NULL);
> +  OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int), NULL);
> +  OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(int64_t), NULL);
> +  OCL_CREATE_BUFFER(buf[4], 0, n * sizeof(int64_t), NULL);
> +  OCL_CREATE_BUFFER(buf[5], 0, n * sizeof(int64_t), NULL);
> +  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
> +  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
> +  OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
> +  OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]);
> +  OCL_SET_ARG(4, sizeof(cl_mem), &buf[4]);
> +  OCL_SET_ARG(5, sizeof(cl_mem), &buf[5]);
> +  globals[0] = n;
> +  locals[0] = 16;
> +
> +  // Run random tests
> +  for (int32_t i = 0; i < (int32_t) n; ++i) {
> +    src1[i] = -i;
> +    src2[i] = -i;
> +    src3[i] = -i;
> +  }
> +  OCL_MAP_BUFFER(0);
> +  OCL_MAP_BUFFER(1);
> +  OCL_MAP_BUFFER(2);
> +  memcpy(buf_data[0], src1, sizeof(src1));
> +  memcpy(buf_data[1], src2, sizeof(src2));
> +  memcpy(buf_data[2], src3, sizeof(src3));
> +  OCL_UNMAP_BUFFER(0);
> +  OCL_UNMAP_BUFFER(1);
> +  OCL_UNMAP_BUFFER(2);
> +
> +  // Run the kernel on GPU
> +  OCL_NDRANGE(1);
> +
> +  // Compare
> +  OCL_MAP_BUFFER(3);
> +  OCL_MAP_BUFFER(4);
> +  OCL_MAP_BUFFER(5);
> +  int64_t *dst1 = ((int64_t *)buf_data[3]);
> +  int64_t *dst2 = ((int64_t *)buf_data[4]);
> +  int64_t *dst3 = ((int64_t *)buf_data[5]);
> +  for (int32_t i = 0; i < (int32_t) n; ++i) {
> +    //printf("%lx %lx %lx\n", dst1[i], dst2[i], dst3[i]);
> +    OCL_ASSERT(dst1[i] == -(int64_t)i);
> +    OCL_ASSERT(dst2[i] == -(int64_t)i);
> +    OCL_ASSERT(dst3[i] == -(int64_t)i);
> +  }
> +  OCL_UNMAP_BUFFER(3);
> +  OCL_UNMAP_BUFFER(4);
> +  OCL_UNMAP_BUFFER(5);
> +}
> +
> +MAKE_UTEST_FROM_FUNCTION(compiler_long_convert);
> -- 
> 1.8.1.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list