[Beignet] [PATCH 2/2] support 64bit-integer addition, subtraction
Zhigang Gong
zhigang.gong at gmail.com
Tue Aug 6 00:43:03 PDT 2013
good, no need to send a new version, I will help to modify it when
I push it.
On Tue, Aug 06, 2013 at 07:33:55AM +0000, Xing, Homer wrote:
> Good idea. I will rename them.
>
> -----Original Message-----
> From: Zhigang Gong [mailto:zhigang.gong at gmail.com]
> Sent: Tuesday, August 6, 2013 3:33 PM
> To: Xing, Homer
> Cc: beignet at lists.freedesktop.org
> Subject: Re: [Beignet] [PATCH 2/2] support 64bit-integer addition, subtraction
>
> Nice patch. One minor comment. You pick GEN_TYPE_UQ/GEN_TYPE_Q for the ulong and long type enum name.
> IMO, Q is more like a family name rather than a type name, How about change to use GEN_TYPE_L/GEN_TYPE_UL?
>
> On Tue, Aug 06, 2013 at 02:24:35PM +0800, Homer Hsing wrote:
> > also enable GPU command "subb" (subtract with borrow)
> >
> > also add test cases
> >
> > Signed-off-by: Homer Hsing <homer.xing at intel.com>
> > ---
> > backend/src/backend/gen/gen_mesa_disasm.c | 1 +
> > backend/src/backend/gen_context.cpp | 130 +++++++++++++++++++++++++++++
> > backend/src/backend/gen_context.hpp | 11 +++
> > backend/src/backend/gen_defs.hpp | 3 +
> > backend/src/backend/gen_encoder.cpp | 7 ++
> > backend/src/backend/gen_encoder.hpp | 1 +
> > backend/src/backend/gen_insn_selection.cpp | 22 ++++-
> > backend/src/backend/gen_insn_selection.hxx | 2 +
> > backend/src/backend/gen_register.hpp | 22 +++++
> > kernels/compiler_long.cl | 7 ++
> > utests/CMakeLists.txt | 1 +
> > utests/compiler_long.cpp | 58 +++++++++++++
> > 12 files changed, 262 insertions(+), 3 deletions(-) create mode
> > 100644 kernels/compiler_long.cl create mode 100644
> > utests/compiler_long.cpp
> >
> > diff --git a/backend/src/backend/gen/gen_mesa_disasm.c
> > b/backend/src/backend/gen/gen_mesa_disasm.c
> > index ce20e44..7d77e7a 100644
> > --- a/backend/src/backend/gen/gen_mesa_disasm.c
> > +++ b/backend/src/backend/gen/gen_mesa_disasm.c
> > @@ -83,6 +83,7 @@ static const struct {
> > [GEN_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
> > [GEN_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
> > [GEN_OPCODE_ADDC] = { .name = "addc", .nsrc = 2, .ndst = 1 },
> > + [GEN_OPCODE_SUBB] = { .name = "subb", .nsrc = 2, .ndst = 1 },
> > [GEN_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
> > [GEN_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
> > [GEN_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 }, diff
> > --git a/backend/src/backend/gen_context.cpp
> > b/backend/src/backend/gen_context.cpp
> > index 3d8ed1c..9447f97 100644
> > --- a/backend/src/backend/gen_context.cpp
> > +++ b/backend/src/backend/gen_context.cpp
> > @@ -175,12 +175,142 @@ namespace gbe
> > }
> > }
> >
> > + void GenContext::loadTopHalf(GenRegister dest, GenRegister src) {
> > + int execWidth = p->curr.execWidth;
> > + src = src.top_half();
> > + p->push();
> > + p->curr.predicate = GEN_PREDICATE_NONE;
> > + p->curr.execWidth = 8;
> > + p->MOV(dest, src);
> > + p->MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(src, 8));
> > + if (execWidth == 16) {
> > + p->MOV(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src, 16));
> > + p->MOV(GenRegister::suboffset(dest, 12), GenRegister::suboffset(src, 24));
> > + }
> > + p->pop();
> > + }
> > +
> > + void GenContext::storeTopHalf(GenRegister dest, GenRegister src) {
> > + int execWidth = p->curr.execWidth;
> > + dest = dest.top_half();
> > + p->push();
> > + p->curr.execWidth = 8;
> > + p->MOV(dest, src);
> > + p->curr.nibControl = 1;
> > + p->MOV(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src, 4));
> > + if (execWidth == 16) {
> > + p->curr.quarterControl = 1;
> > + p->curr.nibControl = 0;
> > + p->MOV(GenRegister::suboffset(dest, 16), GenRegister::suboffset(src, 8));
> > + p->curr.nibControl = 1;
> > + p->MOV(GenRegister::suboffset(dest, 24), GenRegister::suboffset(src, 12));
> > + }
> > + p->pop();
> > + }
> > +
> > + void GenContext::loadBottomHalf(GenRegister dest, GenRegister src) {
> > + int execWidth = p->curr.execWidth;
> > + src = src.bottom_half();
> > + p->push();
> > + p->curr.predicate = GEN_PREDICATE_NONE;
> > + p->curr.execWidth = 8;
> > + p->MOV(dest, src);
> > + p->MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(src, 8));
> > + if (execWidth == 16) {
> > + p->MOV(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src, 16));
> > + p->MOV(GenRegister::suboffset(dest, 12), GenRegister::suboffset(src, 24));
> > + }
> > + p->pop();
> > + }
> > +
> > + void GenContext::storeBottomHalf(GenRegister dest, GenRegister src) {
> > + int execWidth = p->curr.execWidth;
> > + dest = dest.bottom_half();
> > + p->push();
> > + p->curr.execWidth = 8;
> > + p->MOV(dest, src);
> > + p->curr.nibControl = 1;
> > + p->MOV(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src, 4));
> > + if (execWidth == 16) {
> > + p->curr.quarterControl = 1;
> > + p->curr.nibControl = 0;
> > + p->MOV(GenRegister::suboffset(dest, 16), GenRegister::suboffset(src, 8));
> > + p->curr.nibControl = 1;
> > + p->MOV(GenRegister::suboffset(dest, 24), GenRegister::suboffset(src, 12));
> > + }
> > + p->pop();
> > + }
> > +
> > + void GenContext::addWithCarry(GenRegister dest, GenRegister src0, GenRegister src1) {
> > + int execWidth = p->curr.execWidth;
> > + GenRegister acc0 = GenRegister::retype(GenRegister::acc(), GEN_TYPE_D);
> > + p->push();
> > + p->curr.predicate = GEN_PREDICATE_NONE;
> > + p->curr.execWidth = 8;
> > + p->ADDC(dest, src0, src1);
> > + p->MOV(src1, acc0);
> > + if (execWidth == 16) {
> > + p->ADDC(GenRegister::suboffset(dest, 8),
> > + GenRegister::suboffset(src0, 8),
> > + GenRegister::suboffset(src1, 8));
> > + p->MOV(GenRegister::suboffset(src1, 8), acc0);
> > + }
> > + p->pop();
> > + }
> > +
> > + void GenContext::subWithBorrow(GenRegister dest, GenRegister src0, GenRegister src1) {
> > + int execWidth = p->curr.execWidth;
> > + GenRegister acc0 = GenRegister::retype(GenRegister::acc(), GEN_TYPE_D);
> > + p->push();
> > + p->curr.predicate = GEN_PREDICATE_NONE;
> > + p->curr.execWidth = 8;
> > + p->SUBB(dest, src0, src1);
> > + p->MOV(src1, acc0);
> > + if (execWidth == 16) {
> > + p->SUBB(GenRegister::suboffset(dest, 8),
> > + GenRegister::suboffset(src0, 8),
> > + GenRegister::suboffset(src1, 8));
> > + p->MOV(GenRegister::suboffset(src1, 8), acc0);
> > + }
> > + p->pop();
> > + }
> > +
> > void GenContext::emitTernaryInstruction(const SelectionInstruction &insn) {
> > const GenRegister dst = ra->genReg(insn.dst(0));
> > const GenRegister src0 = ra->genReg(insn.src(0));
> > const GenRegister src1 = ra->genReg(insn.src(1));
> > const GenRegister src2 = ra->genReg(insn.src(2));
> > switch (insn.opcode) {
> > + case SEL_OP_I64ADD:
> > + {
> > + GenRegister x = GenRegister::retype(src2, GEN_TYPE_UD),
> > + y = GenRegister::suboffset(x, p->curr.execWidth);
> > + loadBottomHalf(x, src0);
> > + loadBottomHalf(y, src1);
> > + addWithCarry(x, x, y);
> > + storeBottomHalf(dst, x);
> > + loadTopHalf(x, src0);
> > + p->ADD(x, x, y);
> > + loadTopHalf(y, src1);
> > + p->ADD(x, x, y);
> > + storeTopHalf(dst, x);
> > + }
> > + break;
> > + case SEL_OP_I64SUB:
> > + {
> > + GenRegister x = GenRegister::retype(src2, GEN_TYPE_UD),
> > + y = GenRegister::suboffset(x, p->curr.execWidth);
> > + loadBottomHalf(x, src0);
> > + loadBottomHalf(y, src1);
> > + subWithBorrow(x, x, y);
> > + storeBottomHalf(dst, x);
> > + loadTopHalf(x, src0);
> > + subWithBorrow(x, x, y);
> > + loadTopHalf(y, src1);
> > + subWithBorrow(x, x, y);
> > + storeTopHalf(dst, x);
> > + }
> > + break;
> > case SEL_OP_MUL_HI:
> > {
> > int w = p->curr.execWidth;
> > diff --git a/backend/src/backend/gen_context.hpp
> > b/backend/src/backend/gen_context.hpp
> > index 82d41b6..dc5dc45 100644
> > --- a/backend/src/backend/gen_context.hpp
> > +++ b/backend/src/backend/gen_context.hpp
> > @@ -27,6 +27,7 @@
> >
> > #include "backend/context.hpp"
> > #include "backend/program.h"
> > +#include "backend/gen_register.hpp"
> > #include "ir/function.hpp"
> > #include "ir/liveness.hpp"
> > #include "sys/map.hpp"
> > @@ -73,6 +74,16 @@ namespace gbe
> > INLINE const ir::Liveness::LiveOut &getLiveOut(const ir::BasicBlock *bb) const {
> > return this->liveness->getLiveOut(bb);
> > }
> > +
> > + void loadTopHalf(GenRegister dest, GenRegister src);
> > + void storeTopHalf(GenRegister dest, GenRegister src);
> > +
> > + void loadBottomHalf(GenRegister dest, GenRegister src);
> > + void storeBottomHalf(GenRegister dest, GenRegister src);
> > +
> > + void addWithCarry(GenRegister dest, GenRegister src0, GenRegister src1);
> > + void subWithBorrow(GenRegister dest, GenRegister src0,
> > + GenRegister src1);
> > +
> > /*! Final Gen ISA emission helper functions */
> > void emitLabelInstruction(const SelectionInstruction &insn);
> > void emitUnaryInstruction(const SelectionInstruction &insn); diff
> > --git a/backend/src/backend/gen_defs.hpp
> > b/backend/src/backend/gen_defs.hpp
> > index 5a9bb2d..6c8297a 100644
> > --- a/backend/src/backend/gen_defs.hpp
> > +++ b/backend/src/backend/gen_defs.hpp
> > @@ -157,6 +157,7 @@ enum opcode {
> > GEN_OPCODE_FBH = 75,
> > GEN_OPCODE_FBL = 76,
> > GEN_OPCODE_ADDC = 78,
> > + GEN_OPCODE_SUBB = 79,
> > GEN_OPCODE_SAD2 = 80,
> > GEN_OPCODE_SADA2 = 81,
> > GEN_OPCODE_DP4 = 84,
> > @@ -242,6 +243,8 @@ enum GenMessageTarget {
> > #define GEN_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
> > #define GEN_TYPE_DF 6
> > #define GEN_TYPE_F 7
> > +#define GEN_TYPE_UQ 8
> > +#define GEN_TYPE_Q 9
> >
> > #define GEN_ARF_NULL 0x00
> > #define GEN_ARF_ADDRESS 0x10
> > diff --git a/backend/src/backend/gen_encoder.cpp
> > b/backend/src/backend/gen_encoder.cpp
> > index 70e542f..64b5bd1 100644
> > --- a/backend/src/backend/gen_encoder.cpp
> > +++ b/backend/src/backend/gen_encoder.cpp
> > @@ -882,6 +882,13 @@ namespace gbe
> > ALU2(MACH)
> > ALU3(MAD)
> >
> > + void GenEncoder::SUBB(GenRegister dest, GenRegister src0, GenRegister src1) {
> > + push();
> > + curr.accWrEnable = 1;
> > + alu2(this, GEN_OPCODE_SUBB, dest, src0, src1);
> > + pop();
> > + }
> > +
> > void GenEncoder::ADDC(GenRegister dest, GenRegister src0, GenRegister src1) {
> > push();
> > curr.accWrEnable = 1;
> > diff --git a/backend/src/backend/gen_encoder.hpp
> > b/backend/src/backend/gen_encoder.hpp
> > index 054c343..083bd8c 100644
> > --- a/backend/src/backend/gen_encoder.hpp
> > +++ b/backend/src/backend/gen_encoder.hpp
> > @@ -92,6 +92,7 @@ namespace gbe
> > ALU1(MOV)
> > ALU1(FBH)
> > ALU1(FBL)
> > + ALU2(SUBB)
> > ALU2(UPSAMPLE_SHORT)
> > ALU2(UPSAMPLE_INT)
> > ALU1(RNDZ)
> > diff --git a/backend/src/backend/gen_insn_selection.cpp
> > b/backend/src/backend/gen_insn_selection.cpp
> > index f3baa6d..2ef17e9 100644
> > --- a/backend/src/backend/gen_insn_selection.cpp
> > +++ b/backend/src/backend/gen_insn_selection.cpp
> > @@ -128,6 +128,8 @@ namespace gbe
> > case TYPE_U16: return GEN_TYPE_UW;
> > case TYPE_S32: return GEN_TYPE_D;
> > case TYPE_U32: return GEN_TYPE_UD;
> > + case TYPE_S64: return GEN_TYPE_Q;
> > + case TYPE_U64: return GEN_TYPE_UQ;
> > case TYPE_FLOAT: return GEN_TYPE_F;
> > case TYPE_DOUBLE: return GEN_TYPE_DF;
> > default: NOT_SUPPORTED; return GEN_TYPE_F; @@ -426,6 +428,8 @@
> > namespace gbe
> > ALU2(RSL)
> > ALU2(ASR)
> > ALU2(ADD)
> > + ALU3(I64ADD)
> > + ALU3(I64SUB)
> > ALU2(MUL)
> > ALU1(FRC)
> > ALU1(RNDD)
> > @@ -1193,7 +1197,7 @@ namespace gbe
> > using namespace ir;
> > const auto &childInsn = cast<LoadImmInstruction>(insn);
> > const auto &imm = childInsn.getImmediate();
> > - if(imm.type != TYPE_DOUBLE)
> > + if(imm.type != TYPE_DOUBLE && imm.type != TYPE_S64 && imm.type !=
> > + TYPE_U64)
> > return true;
> > return false;
> > }
> > @@ -1416,7 +1420,13 @@ namespace gbe
> >
> > // Output the binary instruction
> > switch (opcode) {
> > - case OP_ADD: sel.ADD(dst, src0, src1); break;
> > + case OP_ADD:
> > + if (type == Type::TYPE_U64 || type == Type::TYPE_S64) {
> > + GenRegister t = sel.selReg(sel.reg(RegisterFamily::FAMILY_QWORD), Type::TYPE_S64);
> > + sel.I64ADD(dst, src0, src1, t);
> > + } else
> > + sel.ADD(dst, src0, src1);
> > + break;
> > case OP_ADDSAT:
> > sel.push();
> > sel.curr.saturate = GEN_MATH_SATURATE_SATURATE; @@
> > -1426,7 +1436,13 @@ namespace gbe
> > case OP_XOR: sel.XOR(dst, src0, src1); break;
> > case OP_OR: sel.OR(dst, src0, src1); break;
> > case OP_AND: sel.AND(dst, src0, src1); break;
> > - case OP_SUB: sel.ADD(dst, src0, GenRegister::negate(src1)); break;
> > + case OP_SUB:
> > + if (type == Type::TYPE_U64 || type == Type::TYPE_S64) {
> > + GenRegister t = sel.selReg(sel.reg(RegisterFamily::FAMILY_QWORD), Type::TYPE_S64);
> > + sel.I64SUB(dst, src0, src1, t);
> > + } else
> > + sel.ADD(dst, src0, GenRegister::negate(src1));
> > + break;
> > case OP_SUBSAT:
> > sel.push();
> > sel.curr.saturate = GEN_MATH_SATURATE_SATURATE; diff
> > --git a/backend/src/backend/gen_insn_selection.hxx
> > b/backend/src/backend/gen_insn_selection.hxx
> > index c6aede5..f2b86c4 100644
> > --- a/backend/src/backend/gen_insn_selection.hxx
> > +++ b/backend/src/backend/gen_insn_selection.hxx
> > @@ -19,6 +19,8 @@ DECL_SELECTION_IR(RSR, BinaryInstruction)
> > DECL_SELECTION_IR(RSL, BinaryInstruction) DECL_SELECTION_IR(ASR,
> > BinaryInstruction) DECL_SELECTION_IR(ADD, BinaryInstruction)
> > +DECL_SELECTION_IR(I64ADD, TernaryInstruction)
> > +DECL_SELECTION_IR(I64SUB, TernaryInstruction)
> > DECL_SELECTION_IR(MUL, BinaryInstruction) DECL_SELECTION_IR(ATOMIC,
> > AtomicInstruction) DECL_SELECTION_IR(MACH, BinaryInstruction) diff
> > --git a/backend/src/backend/gen_register.hpp
> > b/backend/src/backend/gen_register.hpp
> > index 7e48837..fd70b90 100644
> > --- a/backend/src/backend/gen_register.hpp
> > +++ b/backend/src/backend/gen_register.hpp
> > @@ -70,6 +70,8 @@ namespace gbe
> > INLINE int typeSize(uint32_t type) {
> > switch(type) {
> > case GEN_TYPE_DF:
> > + case GEN_TYPE_UQ:
> > + case GEN_TYPE_Q:
> > return 8;
> > case GEN_TYPE_UD:
> > case GEN_TYPE_D:
> > @@ -222,12 +224,32 @@ namespace gbe
> > return r;
> > }
> >
> > + INLINE bool isint64(void) const {
> > + if ((type == GEN_TYPE_UQ || type == GEN_TYPE_Q) && file == GEN_GENERAL_REGISTER_FILE)
> > + return true;
> > + return false;
> > + }
> > +
> > INLINE bool isimmdf(void) const {
> > if (type == GEN_TYPE_DF && file == GEN_IMMEDIATE_VALUE)
> > return true;
> > return false;
> > }
> >
> > + INLINE GenRegister top_half(void) const {
> > + GenRegister r = bottom_half();
> > + r.subnr += 4;
> > + return r;
> > + }
> > +
> > + INLINE GenRegister bottom_half(void) const {
> > + GBE_ASSERT(isint64());
> > + GenRegister r = *this;
> > + r.type = type == GEN_TYPE_UQ ? GEN_TYPE_UD : GEN_TYPE_D;
> > + r.hstride = GEN_HORIZONTAL_STRIDE_2;
> > + return r;
> > + }
> > +
> > INLINE bool isdf(void) const {
> > if (type == GEN_TYPE_DF && file == GEN_GENERAL_REGISTER_FILE)
> > return true;
> > diff --git a/kernels/compiler_long.cl b/kernels/compiler_long.cl new
> > file mode 100644 index 0000000..3087292
> > --- /dev/null
> > +++ b/kernels/compiler_long.cl
> > @@ -0,0 +1,7 @@
> > +kernel void compiler_long(global long *src1, global long *src2,
> > +global long *dst) {
> > + int i = get_global_id(0);
> > + if(i < 5)
> > + dst[i] = src1[i] + src2[i];
> > + if(i > 5)
> > + dst[i] = src1[i] - src2[i];
> > +}
> > diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index
> > be565ea..a0dafa2 100644
> > --- a/utests/CMakeLists.txt
> > +++ b/utests/CMakeLists.txt
> > @@ -121,6 +121,7 @@ set (utests_sources
> > compiler_double_2.cpp
> > compiler_double_3.cpp
> > compiler_double_4.cpp
> > + compiler_long.cpp
> > utest_assert.cpp
> > utest.cpp
> > utest_file_map.cpp
> > diff --git a/utests/compiler_long.cpp b/utests/compiler_long.cpp new
> > file mode 100644 index 0000000..fad2744
> > --- /dev/null
> > +++ b/utests/compiler_long.cpp
> > @@ -0,0 +1,58 @@
> > +#include <cstdint>
> > +#include <cstring>
> > +#include <iostream>
> > +#include "utest_helper.hpp"
> > +
> > +void compiler_long(void)
> > +{
> > + const size_t n = 16;
> > + int64_t src1[n], src2[n];
> > +
> > + // Setup kernel and buffers
> > + OCL_CREATE_KERNEL("compiler_long");
> > + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int64_t), NULL);
> > + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int64_t), NULL);
> > + OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int64_t), NULL);
> > + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1,
> > + sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
> > + globals[0] = n; locals[0] = 16;
> > +
> > + // Run random tests
> > + src1[0] = -1L, src2[0] = -1L;
> > + src1[1] = 0x8000000000000000UL, src2[1] = 0x8000000000000000UL;
> > + src1[2] = 0x7FFFFFFFFFFFFFFFL, src2[2] = 1L;
> > + src1[3] = 0xFFFFFFFEL, src2[3] = 1L;
> > + src1[4] = 0x7FFFFFFFL, src2[4] = 0x80000000L;
> > + src1[5] = 0, src2[5] = 0;
> > + src1[6] = 0, src2[6] = 1;
> > + src1[7] = -2L, src2[7] = -1L;
> > + src1[8] = 0, src2[8] = 0x8000000000000000UL;
> > + for (int32_t i = 9; i < (int32_t) n; ++i) {
> > + src1[i] = ((long)rand() << 32) + rand();
> > + src2[i] = ((long)rand() << 32) + rand(); } OCL_MAP_BUFFER(0);
> > + OCL_MAP_BUFFER(1); memcpy(buf_data[0], src1, sizeof(src1));
> > + memcpy(buf_data[1], src2, sizeof(src2)); OCL_UNMAP_BUFFER(0);
> > + OCL_UNMAP_BUFFER(1);
> > +
> > + // Run the kernel on GPU
> > + OCL_NDRANGE(1);
> > +
> > + // Compare
> > + OCL_MAP_BUFFER(2);
> > + for (int32_t i = 0; i < (int32_t) n; ++i) {
> > + //printf("%lx\n", ((int64_t *)buf_data[2])[i]);
> > + if (i < 5)
> > + OCL_ASSERT(src1[i] + src2[i] == ((int64_t *)buf_data[2])[i]);
> > + if (i > 5)
> > + OCL_ASSERT(src1[i] - src2[i] == ((int64_t *)buf_data[2])[i]);
> > + }
> > + OCL_UNMAP_BUFFER(2);
> > +}
> > +
> > +MAKE_UTEST_FROM_FUNCTION(compiler_long);
> > --
> > 1.8.1.2
> >
> > _______________________________________________
> > Beignet mailing list
> > Beignet at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list