[Beignet] [PATCH v2] support 64bit-integer addition, subtraction

Zhigang Gong zhigang.gong at gmail.com
Tue Aug 6 00:56:21 PDT 2013


Pushed, thanks.

On Tue, Aug 06, 2013 at 03:41:40PM +0800, Homer Hsing wrote:
> also enable GPU command "subb" (subtract with borrow)
> 
> also add test cases
> 
> v2: renamed GEN_TYPE_UQ/GEN_TYPE_Q to GEN_TYPE_UL/GEN_TYPE_L
> 
> Signed-off-by: Homer Hsing <homer.xing at intel.com>
> ---
>  backend/src/backend/gen/gen_mesa_disasm.c  |   1 +
>  backend/src/backend/gen_context.cpp        | 130 +++++++++++++++++++++++++++++
>  backend/src/backend/gen_context.hpp        |  11 +++
>  backend/src/backend/gen_defs.hpp           |   3 +
>  backend/src/backend/gen_encoder.cpp        |   7 ++
>  backend/src/backend/gen_encoder.hpp        |   1 +
>  backend/src/backend/gen_insn_selection.cpp |  22 ++++-
>  backend/src/backend/gen_insn_selection.hxx |   2 +
>  backend/src/backend/gen_register.hpp       |  22 +++++
>  kernels/compiler_long.cl                   |   7 ++
>  utests/CMakeLists.txt                      |   1 +
>  utests/compiler_long.cpp                   |  58 +++++++++++++
>  12 files changed, 262 insertions(+), 3 deletions(-)
>  create mode 100644 kernels/compiler_long.cl
>  create mode 100644 utests/compiler_long.cpp
> 
> diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
> index ce20e44..7d77e7a 100644
> --- a/backend/src/backend/gen/gen_mesa_disasm.c
> +++ b/backend/src/backend/gen/gen_mesa_disasm.c
> @@ -83,6 +83,7 @@ static const struct {
>    [GEN_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
>    [GEN_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
>    [GEN_OPCODE_ADDC] = { .name = "addc", .nsrc = 2, .ndst = 1 },
> +  [GEN_OPCODE_SUBB] = { .name = "subb", .nsrc = 2, .ndst = 1 },
>    [GEN_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
>    [GEN_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
>    [GEN_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 },
> diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
> index 709122a..7a33b79 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -175,12 +175,142 @@ namespace gbe
>      }
>    }
>  
> +  void GenContext::loadTopHalf(GenRegister dest, GenRegister src) {
> +    int execWidth = p->curr.execWidth;
> +    src = src.top_half();
> +    p->push();
> +    p->curr.predicate = GEN_PREDICATE_NONE;
> +    p->curr.execWidth = 8;
> +    p->MOV(dest, src);
> +    p->MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(src, 8));
> +    if (execWidth == 16) {
> +      p->MOV(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src, 16));
> +      p->MOV(GenRegister::suboffset(dest, 12), GenRegister::suboffset(src, 24));
> +    }
> +    p->pop();
> +  }
> +
> +  void GenContext::storeTopHalf(GenRegister dest, GenRegister src) {
> +    int execWidth = p->curr.execWidth;
> +    dest = dest.top_half();
> +    p->push();
> +    p->curr.execWidth = 8;
> +    p->MOV(dest, src);
> +    p->curr.nibControl = 1;
> +    p->MOV(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src, 4));
> +    if (execWidth == 16) {
> +      p->curr.quarterControl = 1;
> +      p->curr.nibControl = 0;
> +      p->MOV(GenRegister::suboffset(dest, 16), GenRegister::suboffset(src, 8));
> +      p->curr.nibControl = 1;
> +      p->MOV(GenRegister::suboffset(dest, 24), GenRegister::suboffset(src, 12));
> +    }
> +    p->pop();
> +  }
> +
> +  void GenContext::loadBottomHalf(GenRegister dest, GenRegister src) {
> +    int execWidth = p->curr.execWidth;
> +    src = src.bottom_half();
> +    p->push();
> +    p->curr.predicate = GEN_PREDICATE_NONE;
> +    p->curr.execWidth = 8;
> +    p->MOV(dest, src);
> +    p->MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(src, 8));
> +    if (execWidth == 16) {
> +      p->MOV(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src, 16));
> +      p->MOV(GenRegister::suboffset(dest, 12), GenRegister::suboffset(src, 24));
> +    }
> +    p->pop();
> +  }
> +
> +  void GenContext::storeBottomHalf(GenRegister dest, GenRegister src) {
> +    int execWidth = p->curr.execWidth;
> +    dest = dest.bottom_half();
> +    p->push();
> +    p->curr.execWidth = 8;
> +    p->MOV(dest, src);
> +    p->curr.nibControl = 1;
> +    p->MOV(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src, 4));
> +    if (execWidth == 16) {
> +      p->curr.quarterControl = 1;
> +      p->curr.nibControl = 0;
> +      p->MOV(GenRegister::suboffset(dest, 16), GenRegister::suboffset(src, 8));
> +      p->curr.nibControl = 1;
> +      p->MOV(GenRegister::suboffset(dest, 24), GenRegister::suboffset(src, 12));
> +    }
> +    p->pop();
> +  }
> +
> +  void GenContext::addWithCarry(GenRegister dest, GenRegister src0, GenRegister src1) {
> +    int execWidth = p->curr.execWidth;
> +    GenRegister acc0 = GenRegister::retype(GenRegister::acc(), GEN_TYPE_D);
> +    p->push();
> +    p->curr.predicate = GEN_PREDICATE_NONE;
> +    p->curr.execWidth = 8;
> +    p->ADDC(dest, src0, src1);
> +    p->MOV(src1, acc0);
> +    if (execWidth == 16) {
> +      p->ADDC(GenRegister::suboffset(dest, 8),
> +              GenRegister::suboffset(src0, 8),
> +              GenRegister::suboffset(src1, 8));
> +      p->MOV(GenRegister::suboffset(src1, 8), acc0);
> +    }
> +    p->pop();    
> +  }
> +
> +  void GenContext::subWithBorrow(GenRegister dest, GenRegister src0, GenRegister src1) {
> +    int execWidth = p->curr.execWidth;
> +    GenRegister acc0 = GenRegister::retype(GenRegister::acc(), GEN_TYPE_D);
> +    p->push();
> +    p->curr.predicate = GEN_PREDICATE_NONE;
> +    p->curr.execWidth = 8;
> +    p->SUBB(dest, src0, src1);
> +    p->MOV(src1, acc0);
> +    if (execWidth == 16) {
> +      p->SUBB(GenRegister::suboffset(dest, 8),
> +              GenRegister::suboffset(src0, 8),
> +              GenRegister::suboffset(src1, 8));
> +      p->MOV(GenRegister::suboffset(src1, 8), acc0);
> +    }
> +    p->pop();    
> +  }
> +
>    void GenContext::emitTernaryInstruction(const SelectionInstruction &insn) {
>      const GenRegister dst = ra->genReg(insn.dst(0));
>      const GenRegister src0 = ra->genReg(insn.src(0));
>      const GenRegister src1 = ra->genReg(insn.src(1));
>      const GenRegister src2 = ra->genReg(insn.src(2));
>      switch (insn.opcode) {
> +      case SEL_OP_I64ADD:
> +        {
> +          GenRegister x = GenRegister::retype(src2, GEN_TYPE_UD),
> +                      y = GenRegister::suboffset(x, p->curr.execWidth);
> +          loadBottomHalf(x, src0);
> +          loadBottomHalf(y, src1);
> +          addWithCarry(x, x, y);
> +          storeBottomHalf(dst, x);
> +          loadTopHalf(x, src0);
> +          p->ADD(x, x, y);
> +          loadTopHalf(y, src1);
> +          p->ADD(x, x, y);
> +          storeTopHalf(dst, x);
> +        }
> +        break;
> +      case SEL_OP_I64SUB:
> +        {
> +          GenRegister x = GenRegister::retype(src2, GEN_TYPE_UD),
> +                      y = GenRegister::suboffset(x, p->curr.execWidth);
> +          loadBottomHalf(x, src0);
> +          loadBottomHalf(y, src1);
> +          subWithBorrow(x, x, y);
> +          storeBottomHalf(dst, x);
> +          loadTopHalf(x, src0);
> +          subWithBorrow(x, x, y);
> +          loadTopHalf(y, src1);
> +          subWithBorrow(x, x, y);
> +          storeTopHalf(dst, x);
> +        }
> +        break;
>        case SEL_OP_MUL_HI:
>         {
>          int w = p->curr.execWidth;
> diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
> index 82d41b6..dc5dc45 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -27,6 +27,7 @@
>  
>  #include "backend/context.hpp"
>  #include "backend/program.h"
> +#include "backend/gen_register.hpp"
>  #include "ir/function.hpp"
>  #include "ir/liveness.hpp"
>  #include "sys/map.hpp"
> @@ -73,6 +74,16 @@ namespace gbe
>      INLINE const ir::Liveness::LiveOut &getLiveOut(const ir::BasicBlock *bb) const {
>        return this->liveness->getLiveOut(bb);
>      }
> +
> +    void loadTopHalf(GenRegister dest, GenRegister src);
> +    void storeTopHalf(GenRegister dest, GenRegister src);
> +
> +    void loadBottomHalf(GenRegister dest, GenRegister src);
> +    void storeBottomHalf(GenRegister dest, GenRegister src);
> +
> +    void addWithCarry(GenRegister dest, GenRegister src0, GenRegister src1);
> +    void subWithBorrow(GenRegister dest, GenRegister src0, GenRegister src1);
> +
>      /*! Final Gen ISA emission helper functions */
>      void emitLabelInstruction(const SelectionInstruction &insn);
>      void emitUnaryInstruction(const SelectionInstruction &insn);
> diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
> index 5a9bb2d..5b15e30 100644
> --- a/backend/src/backend/gen_defs.hpp
> +++ b/backend/src/backend/gen_defs.hpp
> @@ -157,6 +157,7 @@ enum opcode {
>    GEN_OPCODE_FBH = 75,
>    GEN_OPCODE_FBL = 76,
>    GEN_OPCODE_ADDC = 78,
> +  GEN_OPCODE_SUBB = 79,
>    GEN_OPCODE_SAD2 = 80,
>    GEN_OPCODE_SADA2 = 81,
>    GEN_OPCODE_DP4 = 84,
> @@ -242,6 +243,8 @@ enum GenMessageTarget {
>  #define GEN_TYPE_V   6 /* packed int vector, immediates only, uword dest only */
>  #define GEN_TYPE_DF  6
>  #define GEN_TYPE_F   7
> +#define GEN_TYPE_UL  8
> +#define GEN_TYPE_L   9
>  
>  #define GEN_ARF_NULL                  0x00
>  #define GEN_ARF_ADDRESS               0x10
> diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
> index 70e542f..64b5bd1 100644
> --- a/backend/src/backend/gen_encoder.cpp
> +++ b/backend/src/backend/gen_encoder.cpp
> @@ -882,6 +882,13 @@ namespace gbe
>    ALU2(MACH)
>    ALU3(MAD)
>  
> +  void GenEncoder::SUBB(GenRegister dest, GenRegister src0, GenRegister src1) {
> +    push();
> +    curr.accWrEnable = 1;
> +    alu2(this, GEN_OPCODE_SUBB, dest, src0, src1);
> +    pop();
> +  }
> +
>    void GenEncoder::ADDC(GenRegister dest, GenRegister src0, GenRegister src1) {
>      push();
>      curr.accWrEnable = 1;
> diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
> index 054c343..083bd8c 100644
> --- a/backend/src/backend/gen_encoder.hpp
> +++ b/backend/src/backend/gen_encoder.hpp
> @@ -92,6 +92,7 @@ namespace gbe
>      ALU1(MOV)
>      ALU1(FBH)
>      ALU1(FBL)
> +    ALU2(SUBB)
>      ALU2(UPSAMPLE_SHORT)
>      ALU2(UPSAMPLE_INT)
>      ALU1(RNDZ)
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index f3baa6d..eca62b4 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -128,6 +128,8 @@ namespace gbe
>        case TYPE_U16: return GEN_TYPE_UW;
>        case TYPE_S32: return GEN_TYPE_D;
>        case TYPE_U32: return GEN_TYPE_UD;
> +      case TYPE_S64: return GEN_TYPE_L;
> +      case TYPE_U64: return GEN_TYPE_UL;
>        case TYPE_FLOAT: return GEN_TYPE_F;
>        case TYPE_DOUBLE: return GEN_TYPE_DF;
>        default: NOT_SUPPORTED; return GEN_TYPE_F;
> @@ -426,6 +428,8 @@ namespace gbe
>      ALU2(RSL)
>      ALU2(ASR)
>      ALU2(ADD)
> +    ALU3(I64ADD)
> +    ALU3(I64SUB)
>      ALU2(MUL)
>      ALU1(FRC)
>      ALU1(RNDD)
> @@ -1193,7 +1197,7 @@ namespace gbe
>      using namespace ir;
>      const auto &childInsn = cast<LoadImmInstruction>(insn);
>      const auto &imm = childInsn.getImmediate();
> -    if(imm.type != TYPE_DOUBLE)
> +    if(imm.type != TYPE_DOUBLE && imm.type != TYPE_S64 && imm.type != TYPE_U64)
>        return true;
>      return false;
>    }
> @@ -1416,7 +1420,13 @@ namespace gbe
>  
>        // Output the binary instruction
>        switch (opcode) {
> -        case OP_ADD: sel.ADD(dst, src0, src1); break;
> +        case OP_ADD:
> +          if (type == Type::TYPE_U64 || type == Type::TYPE_S64) {
> +            GenRegister t = sel.selReg(sel.reg(RegisterFamily::FAMILY_QWORD), Type::TYPE_S64);
> +            sel.I64ADD(dst, src0, src1, t);
> +          } else
> +            sel.ADD(dst, src0, src1);
> +          break;
>          case OP_ADDSAT:
>            sel.push();
>              sel.curr.saturate = GEN_MATH_SATURATE_SATURATE;
> @@ -1426,7 +1436,13 @@ namespace gbe
>          case OP_XOR: sel.XOR(dst, src0, src1); break;
>          case OP_OR:  sel.OR(dst, src0,  src1); break;
>          case OP_AND: sel.AND(dst, src0, src1); break;
> -        case OP_SUB: sel.ADD(dst, src0, GenRegister::negate(src1)); break;
> +        case OP_SUB:
> +          if (type == Type::TYPE_U64 || type == Type::TYPE_S64) {
> +            GenRegister t = sel.selReg(sel.reg(RegisterFamily::FAMILY_QWORD), Type::TYPE_S64);
> +            sel.I64SUB(dst, src0, src1, t);
> +          } else
> +            sel.ADD(dst, src0, GenRegister::negate(src1));
> +          break;
>          case OP_SUBSAT:
>            sel.push();
>              sel.curr.saturate = GEN_MATH_SATURATE_SATURATE;
> diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
> index c6aede5..f2b86c4 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -19,6 +19,8 @@ DECL_SELECTION_IR(RSR, BinaryInstruction)
>  DECL_SELECTION_IR(RSL, BinaryInstruction)
>  DECL_SELECTION_IR(ASR, BinaryInstruction)
>  DECL_SELECTION_IR(ADD, BinaryInstruction)
> +DECL_SELECTION_IR(I64ADD, TernaryInstruction)
> +DECL_SELECTION_IR(I64SUB, TernaryInstruction)
>  DECL_SELECTION_IR(MUL, BinaryInstruction)
>  DECL_SELECTION_IR(ATOMIC, AtomicInstruction)
>  DECL_SELECTION_IR(MACH, BinaryInstruction)
> diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
> index 7e48837..fda2e6c 100644
> --- a/backend/src/backend/gen_register.hpp
> +++ b/backend/src/backend/gen_register.hpp
> @@ -70,6 +70,8 @@ namespace gbe
>    INLINE int typeSize(uint32_t type) {
>      switch(type) {
>        case GEN_TYPE_DF:
> +      case GEN_TYPE_UL:
> +      case GEN_TYPE_L:
>          return 8;
>        case GEN_TYPE_UD:
>        case GEN_TYPE_D:
> @@ -222,12 +224,32 @@ namespace gbe
>        return r;
>      }
>  
> +    INLINE bool isint64(void) const {
> +      if ((type == GEN_TYPE_UL || type == GEN_TYPE_L) && file == GEN_GENERAL_REGISTER_FILE)
> +        return true;
> +      return false;
> +    }
> +
>      INLINE bool isimmdf(void) const {
>        if (type == GEN_TYPE_DF && file == GEN_IMMEDIATE_VALUE)
>          return true;
>        return false;
>      }
>  
> +    INLINE GenRegister top_half(void) const {
> +      GenRegister r = bottom_half();
> +      r.subnr += 4;
> +      return r;
> +    }
> +
> +    INLINE GenRegister bottom_half(void) const {
> +      GBE_ASSERT(isint64());
> +      GenRegister r = *this;
> +      r.type = type == GEN_TYPE_UL ? GEN_TYPE_UD : GEN_TYPE_D;
> +      r.hstride = GEN_HORIZONTAL_STRIDE_2;
> +      return r;
> +    }
> +
>      INLINE bool isdf(void) const {
>        if (type == GEN_TYPE_DF && file == GEN_GENERAL_REGISTER_FILE)
>          return true;
> diff --git a/kernels/compiler_long.cl b/kernels/compiler_long.cl
> new file mode 100644
> index 0000000..3087292
> --- /dev/null
> +++ b/kernels/compiler_long.cl
> @@ -0,0 +1,7 @@
> +kernel void compiler_long(global long *src1, global long *src2, global long *dst) {
> +  int i = get_global_id(0);
> +  if(i < 5)
> +    dst[i] = src1[i] + src2[i];
> +  if(i > 5)
> +    dst[i] = src1[i] - src2[i];
> +}
> diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
> index be565ea..a0dafa2 100644
> --- a/utests/CMakeLists.txt
> +++ b/utests/CMakeLists.txt
> @@ -121,6 +121,7 @@ set (utests_sources
>    compiler_double_2.cpp
>    compiler_double_3.cpp
>    compiler_double_4.cpp
> +  compiler_long.cpp
>    utest_assert.cpp
>    utest.cpp
>    utest_file_map.cpp
> diff --git a/utests/compiler_long.cpp b/utests/compiler_long.cpp
> new file mode 100644
> index 0000000..fad2744
> --- /dev/null
> +++ b/utests/compiler_long.cpp
> @@ -0,0 +1,58 @@
> +#include <cstdint>
> +#include <cstring>
> +#include <iostream>
> +#include "utest_helper.hpp"
> +
> +void compiler_long(void)
> +{
> +  const size_t n = 16;
> +  int64_t src1[n], src2[n];
> +
> +  // Setup kernel and buffers
> +  OCL_CREATE_KERNEL("compiler_long");
> +  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int64_t), NULL);
> +  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int64_t), NULL);
> +  OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int64_t), NULL);
> +  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
> +  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
> +  OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
> +  globals[0] = n;
> +  locals[0] = 16;
> +
> +  // Run random tests
> +  src1[0] = -1L,                  src2[0] = -1L;
> +  src1[1] = 0x8000000000000000UL, src2[1] = 0x8000000000000000UL;
> +  src1[2] = 0x7FFFFFFFFFFFFFFFL,  src2[2] = 1L;
> +  src1[3] = 0xFFFFFFFEL,          src2[3] = 1L;
> +  src1[4] = 0x7FFFFFFFL,          src2[4] = 0x80000000L;
> +  src1[5] = 0,                    src2[5] = 0;
> +  src1[6] = 0,                    src2[6] = 1;
> +  src1[7] = -2L,                  src2[7] = -1L;
> +  src1[8] = 0,                    src2[8] = 0x8000000000000000UL;
> +  for (int32_t i = 9; i < (int32_t) n; ++i) {
> +    src1[i] = ((long)rand() << 32) + rand();
> +    src2[i] = ((long)rand() << 32) + rand();
> +  }
> +  OCL_MAP_BUFFER(0);
> +  OCL_MAP_BUFFER(1);
> +  memcpy(buf_data[0], src1, sizeof(src1));
> +  memcpy(buf_data[1], src2, sizeof(src2));
> +  OCL_UNMAP_BUFFER(0);
> +  OCL_UNMAP_BUFFER(1);
> +
> +  // Run the kernel on GPU
> +  OCL_NDRANGE(1);
> +
> +  // Compare
> +  OCL_MAP_BUFFER(2);
> +  for (int32_t i = 0; i < (int32_t) n; ++i) {
> +    //printf("%lx\n", ((int64_t *)buf_data[2])[i]);
> +    if (i < 5)
> +      OCL_ASSERT(src1[i] + src2[i] == ((int64_t *)buf_data[2])[i]);
> +    if (i > 5)
> +      OCL_ASSERT(src1[i] - src2[i] == ((int64_t *)buf_data[2])[i]);
> +  }
> +  OCL_UNMAP_BUFFER(2);
> +}
> +
> +MAKE_UTEST_FROM_FUNCTION(compiler_long);
> -- 
> 1.8.1.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list