[Beignet] [PATCH] support 64bit-integer multiplication

Zhigang Gong zhigang.gong at linux.intel.com
Tue Aug 13 03:02:29 PDT 2013


LGTM, pushed, thanks.

On Tue, Aug 13, 2013 at 11:05:28AM +0800, Homer Hsing wrote:
> also add test case
> 
> Signed-off-by: Homer Hsing <homer.xing at intel.com>
> ---
>  backend/src/backend/gen_context.cpp                | 46 ++++++++++++++++++++
>  backend/src/backend/gen_context.hpp                |  3 ++
>  .../src/backend/gen_insn_gen7_schedule_info.hxx    |  1 +
>  backend/src/backend/gen_insn_selection.cpp         | 23 +++++++---
>  backend/src/backend/gen_insn_selection.hxx         |  1 +
>  kernels/compiler_long_mult.cl                      |  7 ++++
>  utests/CMakeLists.txt                              |  1 +
>  utests/compiler_long_mult.cpp                      | 49 ++++++++++++++++++++++
>  8 files changed, 126 insertions(+), 5 deletions(-)
>  create mode 100644 kernels/compiler_long_mult.cl
>  create mode 100644 utests/compiler_long_mult.cpp
> 
> diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
> index 406cb80..86f3555 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -598,6 +598,52 @@ namespace gbe
>      p->pop();
>    }
>  
> +  void GenContext::I32FullMult(GenRegister high, GenRegister low, GenRegister src0, GenRegister src1) {
> +    GenRegister acc = GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD);
> +    int execWidth = p->curr.execWidth;
> +    p->push();
> +    p->curr.execWidth = 8;
> +    for(int i = 0; i < execWidth; i += 8) {
> +      p->MUL(acc, src0, src1);
> +      p->curr.accWrEnable = 1;
> +      p->MACH(high, src0, src1);
> +      p->curr.accWrEnable = 0;
> +      p->MOV(low, acc);
> +      src0 = GenRegister::suboffset(src0, 8);
> +      src1 = GenRegister::suboffset(src1, 8);
> +      high = GenRegister::suboffset(high, 8);
> +      low = GenRegister::suboffset(low, 8);
> +    }
> +    p->pop();
> +  }
> +
> +  void GenContext::emitI64MULInstruction(const SelectionInstruction &insn) {
> +    GenRegister dest = ra->genReg(insn.dst(0));
> +    GenRegister x = ra->genReg(insn.src(0));
> +    GenRegister y = ra->genReg(insn.src(1));
> +    GenRegister a = ra->genReg(insn.dst(1));
> +    GenRegister b = ra->genReg(insn.dst(2));
> +    GenRegister c = ra->genReg(insn.dst(3));
> +    GenRegister d = ra->genReg(insn.dst(4));
> +    GenRegister e = ra->genReg(insn.dst(5));
> +    GenRegister f = ra->genReg(insn.dst(6));
> +    a.type = b.type = c.type = d.type = e.type = f.type = GEN_TYPE_UD;
> +    loadTopHalf(a, x);
> +    loadBottomHalf(b, x);
> +    loadTopHalf(c, y);
> +    loadBottomHalf(d, y);
> +    p->push();
> +    p->curr.predicate = GEN_PREDICATE_NONE;
> +    I32FullMult(GenRegister::null(), e, b, c);
> +    I32FullMult(GenRegister::null(), f, a, d);
> +    p->ADD(e, e, f);
> +    I32FullMult(f, a, b, d);
> +    p->ADD(e, e, f);
> +    p->pop();
> +    storeTopHalf(dest, e);
> +    storeBottomHalf(dest, a);
> +  }
> +
>    void GenContext::emitTernaryInstruction(const SelectionInstruction &insn) {
>      const GenRegister dst = ra->genReg(insn.dst(0));
>      const GenRegister src0 = ra->genReg(insn.src(0));
> diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
> index b294042..1531961 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -86,6 +86,7 @@ namespace gbe
>  
>      void addWithCarry(GenRegister dest, GenRegister src0, GenRegister src1);
>      void subWithBorrow(GenRegister dest, GenRegister src0, GenRegister src1);
> +    void I32FullMult(GenRegister high, GenRegister low, GenRegister src0, GenRegister src1);
>  
>      /*! Final Gen ISA emission helper functions */
>      void emitLabelInstruction(const SelectionInstruction &insn);
> @@ -116,8 +117,10 @@ namespace gbe
>      void emitSpillRegInstruction(const SelectionInstruction &insn);
>      void emitUnSpillRegInstruction(const SelectionInstruction &insn);
>      void emitGetImageInfoInstruction(const SelectionInstruction &insn);
> +    void emitI64MULInstruction(const SelectionInstruction &insn);
>      void scratchWrite(const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
>      void scratchRead(const GenRegister dst, const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode);
> +
>      /*! Implements base class */
>      virtual Kernel *allocateKernel(void);
>      /*! Store the position of each label instruction in the Gen ISA stream */
> diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> index 4879b66..7f214ac 100644
> --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> @@ -27,3 +27,4 @@ DECL_GEN7_SCHEDULE(SpillReg,        80,        1,        1)
>  DECL_GEN7_SCHEDULE(UnSpillReg,      80,        1,        1)
>  DECL_GEN7_SCHEDULE(GetImageInfo,    20,        4,        2)
>  DECL_GEN7_SCHEDULE(Atomic,          80,        1,        1)
> +DECL_GEN7_SCHEDULE(I64MUL,          20,        4,        2)
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index 929a3bd..9e3c535 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -523,6 +523,8 @@ namespace gbe
>      void TYPED_WRITE(GenRegister *src, uint32_t srcNum, GenRegister *msgs, uint32_t msgNum, uint32_t bti);
>      /*! Get image information */
>      void GET_IMAGE_INFO(uint32_t type, GenRegister *dst, uint32_t dst_num, uint32_t bti);
> +    /*! Multiply 64-bit integers */
> +    void I64MUL(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]);
>      /*! Use custom allocators */
>      GBE_CLASS(Opaque);
>      friend class SelectionBlock;
> @@ -1003,6 +1005,15 @@ namespace gbe
>      insn->extra.function = function;
>    }
>  
> +  void Selection::Opaque::I64MUL(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) {
> +    SelectionInstruction *insn = this->appendInsn(SEL_OP_I64MUL, 7, 2);
> +    insn->dst(0) = dst;
> +    insn->src(0) = src0;
> +    insn->src(1) = src1;
> +    for(int i = 0; i < 6; i++)
> +      insn->dst(i + 1) = tmp[i];
> +  }
> +
>    void Selection::Opaque::ALU1(SelectionOpcode opcode, Reg dst, Reg src) {
>      SelectionInstruction *insn = this->appendInsn(opcode, 1, 1);
>      insn->dst(0) = dst;
> @@ -1610,12 +1621,14 @@ namespace gbe
>            if (type == TYPE_U32 || type == TYPE_S32) {
>              sel.pop();
>              return false;
> -          }
> -          else {
> -            GBE_ASSERTM((type != TYPE_S64 && type != TYPE_U64), "64bit integer not supported yet!" );
> +          } else if (type == TYPE_S64 || type == TYPE_U64) {
> +            GenRegister tmp[6];
> +            for(int i = 0; i < 6; i++)
> +              tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
> +            sel.I64MUL(dst, src0, src1, tmp);
> +          } else
>              sel.MUL(dst, src0, src1);
> -          }
> -        break;
> +          break;
>          case OP_HADD: {
>              GenRegister temp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), GEN_TYPE_D);
>              sel.HADD(dst, src0, src1, temp);
> diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
> index 06469ca..6ef50b8 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -30,6 +30,7 @@ DECL_SELECTION_IR(ADD, BinaryInstruction)
>  DECL_SELECTION_IR(I64ADD, BinaryWithTempInstruction)
>  DECL_SELECTION_IR(I64SUB, BinaryWithTempInstruction)
>  DECL_SELECTION_IR(MUL, BinaryInstruction)
> +DECL_SELECTION_IR(I64MUL, I64MULInstruction)
>  DECL_SELECTION_IR(ATOMIC, AtomicInstruction)
>  DECL_SELECTION_IR(MACH, BinaryInstruction)
>  DECL_SELECTION_IR(CMP, CompareInstruction)
> diff --git a/kernels/compiler_long_mult.cl b/kernels/compiler_long_mult.cl
> new file mode 100644
> index 0000000..5b96d74
> --- /dev/null
> +++ b/kernels/compiler_long_mult.cl
> @@ -0,0 +1,7 @@
> +kernel void compiler_long_mult(global long *src1, global long *src2, global long *dst) {
> +  int i = get_global_id(0);
> +  if(i < 3)
> +    dst[i] = src1[i] + src2[i];
> +  else
> +    dst[i] = src1[i] * src2[i];
> +}
> diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
> index b3d039e..746d77b 100644
> --- a/utests/CMakeLists.txt
> +++ b/utests/CMakeLists.txt
> @@ -129,6 +129,7 @@ set (utests_sources
>    compiler_long_shl.cpp
>    compiler_long_shr.cpp
>    compiler_long_asr.cpp
> +  compiler_long_mult.cpp
>    utest_assert.cpp
>    utest.cpp
>    utest_file_map.cpp
> diff --git a/utests/compiler_long_mult.cpp b/utests/compiler_long_mult.cpp
> new file mode 100644
> index 0000000..06070f7
> --- /dev/null
> +++ b/utests/compiler_long_mult.cpp
> @@ -0,0 +1,49 @@
> +#include <cstdint>
> +#include <cstring>
> +#include <iostream>
> +#include "utest_helper.hpp"
> +
> +void compiler_long_mult(void)
> +{
> +  const size_t n = 16;
> +  int64_t src1[n], src2[n];
> +
> +  // Setup kernel and buffers
> +  OCL_CREATE_KERNEL("compiler_long_mult");
> +  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int64_t), NULL);
> +  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int64_t), NULL);
> +  OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int64_t), NULL);
> +  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
> +  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
> +  OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
> +  globals[0] = n;
> +  locals[0] = 16;
> +
> +  // Run random tests
> +  for (int32_t i = 0; i < (int32_t) n; ++i) {
> +    src1[i] = 0x77665544FFEEDDCCLL;
> +    src2[i] = ((int64_t)rand() << 32) + rand();
> +  }
> +  OCL_MAP_BUFFER(0);
> +  OCL_MAP_BUFFER(1);
> +  memcpy(buf_data[0], src1, sizeof(src1));
> +  memcpy(buf_data[1], src2, sizeof(src2));
> +  OCL_UNMAP_BUFFER(0);
> +  OCL_UNMAP_BUFFER(1);
> +
> +  // Run the kernel on GPU
> +  OCL_NDRANGE(1);
> +
> +  // Compare
> +  OCL_MAP_BUFFER(2);
> +  for (int32_t i = 0; i < (int32_t) n; ++i) {
> +    //printf("%lx\n", ((int64_t *)buf_data[2])[i]);
> +    if (i < 3)
> +      OCL_ASSERT(src1[i] + src2[i] == ((int64_t *)buf_data[2])[i]);
> +    else
> +      OCL_ASSERT(src1[i] * src2[i] == ((int64_t *)buf_data[2])[i]);
> +  }
> +  OCL_UNMAP_BUFFER(2);
> +}
> +
> +MAKE_UTEST_FROM_FUNCTION(compiler_long_mult);
> -- 
> 1.8.1.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list