[Beignet] [PATCH 2/2] support 64bit-integer addition, subtraction
Xing, Homer
homer.xing at intel.com
Tue Aug 6 00:33:55 PDT 2013
Good idea. I will rename them.
-----Original Message-----
From: Zhigang Gong [mailto:zhigang.gong at gmail.com]
Sent: Tuesday, August 6, 2013 3:33 PM
To: Xing, Homer
Cc: beignet at lists.freedesktop.org
Subject: Re: [Beignet] [PATCH 2/2] support 64bit-integer addition, subtraction
Nice patch. One minor comment. You pick GEN_TYPE_UQ/GEN_TYPE_Q for the ulong and long type enum name.
IMO, Q is more like a family name rather than a type name, How about change to use GEN_TYPE_L/GEN_TYPE_UL?
On Tue, Aug 06, 2013 at 02:24:35PM +0800, Homer Hsing wrote:
> also enable GPU command "subb" (subtract with borrow)
>
> also add test cases
>
> Signed-off-by: Homer Hsing <homer.xing at intel.com>
> ---
> backend/src/backend/gen/gen_mesa_disasm.c | 1 +
> backend/src/backend/gen_context.cpp | 130 +++++++++++++++++++++++++++++
> backend/src/backend/gen_context.hpp | 11 +++
> backend/src/backend/gen_defs.hpp | 3 +
> backend/src/backend/gen_encoder.cpp | 7 ++
> backend/src/backend/gen_encoder.hpp | 1 +
> backend/src/backend/gen_insn_selection.cpp | 22 ++++-
> backend/src/backend/gen_insn_selection.hxx | 2 +
> backend/src/backend/gen_register.hpp | 22 +++++
> kernels/compiler_long.cl | 7 ++
> utests/CMakeLists.txt | 1 +
> utests/compiler_long.cpp | 58 +++++++++++++
> 12 files changed, 262 insertions(+), 3 deletions(-) create mode
> 100644 kernels/compiler_long.cl create mode 100644
> utests/compiler_long.cpp
>
> diff --git a/backend/src/backend/gen/gen_mesa_disasm.c
> b/backend/src/backend/gen/gen_mesa_disasm.c
> index ce20e44..7d77e7a 100644
> --- a/backend/src/backend/gen/gen_mesa_disasm.c
> +++ b/backend/src/backend/gen/gen_mesa_disasm.c
> @@ -83,6 +83,7 @@ static const struct {
> [GEN_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
> [GEN_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
> [GEN_OPCODE_ADDC] = { .name = "addc", .nsrc = 2, .ndst = 1 },
> + [GEN_OPCODE_SUBB] = { .name = "subb", .nsrc = 2, .ndst = 1 },
> [GEN_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
> [GEN_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
> [GEN_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 }, diff
> --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index 3d8ed1c..9447f97 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -175,12 +175,142 @@ namespace gbe
> }
> }
>
> + void GenContext::loadTopHalf(GenRegister dest, GenRegister src) {
> + int execWidth = p->curr.execWidth;
> + src = src.top_half();
> + p->push();
> + p->curr.predicate = GEN_PREDICATE_NONE;
> + p->curr.execWidth = 8;
> + p->MOV(dest, src);
> + p->MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(src, 8));
> + if (execWidth == 16) {
> + p->MOV(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src, 16));
> + p->MOV(GenRegister::suboffset(dest, 12), GenRegister::suboffset(src, 24));
> + }
> + p->pop();
> + }
> +
> + void GenContext::storeTopHalf(GenRegister dest, GenRegister src) {
> + int execWidth = p->curr.execWidth;
> + dest = dest.top_half();
> + p->push();
> + p->curr.execWidth = 8;
> + p->MOV(dest, src);
> + p->curr.nibControl = 1;
> + p->MOV(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src, 4));
> + if (execWidth == 16) {
> + p->curr.quarterControl = 1;
> + p->curr.nibControl = 0;
> + p->MOV(GenRegister::suboffset(dest, 16), GenRegister::suboffset(src, 8));
> + p->curr.nibControl = 1;
> + p->MOV(GenRegister::suboffset(dest, 24), GenRegister::suboffset(src, 12));
> + }
> + p->pop();
> + }
> +
> + void GenContext::loadBottomHalf(GenRegister dest, GenRegister src) {
> + int execWidth = p->curr.execWidth;
> + src = src.bottom_half();
> + p->push();
> + p->curr.predicate = GEN_PREDICATE_NONE;
> + p->curr.execWidth = 8;
> + p->MOV(dest, src);
> + p->MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(src, 8));
> + if (execWidth == 16) {
> + p->MOV(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src, 16));
> + p->MOV(GenRegister::suboffset(dest, 12), GenRegister::suboffset(src, 24));
> + }
> + p->pop();
> + }
> +
> + void GenContext::storeBottomHalf(GenRegister dest, GenRegister src) {
> + int execWidth = p->curr.execWidth;
> + dest = dest.bottom_half();
> + p->push();
> + p->curr.execWidth = 8;
> + p->MOV(dest, src);
> + p->curr.nibControl = 1;
> + p->MOV(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src, 4));
> + if (execWidth == 16) {
> + p->curr.quarterControl = 1;
> + p->curr.nibControl = 0;
> + p->MOV(GenRegister::suboffset(dest, 16), GenRegister::suboffset(src, 8));
> + p->curr.nibControl = 1;
> + p->MOV(GenRegister::suboffset(dest, 24), GenRegister::suboffset(src, 12));
> + }
> + p->pop();
> + }
> +
> + void GenContext::addWithCarry(GenRegister dest, GenRegister src0, GenRegister src1) {
> + int execWidth = p->curr.execWidth;
> + GenRegister acc0 = GenRegister::retype(GenRegister::acc(), GEN_TYPE_D);
> + p->push();
> + p->curr.predicate = GEN_PREDICATE_NONE;
> + p->curr.execWidth = 8;
> + p->ADDC(dest, src0, src1);
> + p->MOV(src1, acc0);
> + if (execWidth == 16) {
> + p->ADDC(GenRegister::suboffset(dest, 8),
> + GenRegister::suboffset(src0, 8),
> + GenRegister::suboffset(src1, 8));
> + p->MOV(GenRegister::suboffset(src1, 8), acc0);
> + }
> + p->pop();
> + }
> +
> + void GenContext::subWithBorrow(GenRegister dest, GenRegister src0, GenRegister src1) {
> + int execWidth = p->curr.execWidth;
> + GenRegister acc0 = GenRegister::retype(GenRegister::acc(), GEN_TYPE_D);
> + p->push();
> + p->curr.predicate = GEN_PREDICATE_NONE;
> + p->curr.execWidth = 8;
> + p->SUBB(dest, src0, src1);
> + p->MOV(src1, acc0);
> + if (execWidth == 16) {
> + p->SUBB(GenRegister::suboffset(dest, 8),
> + GenRegister::suboffset(src0, 8),
> + GenRegister::suboffset(src1, 8));
> + p->MOV(GenRegister::suboffset(src1, 8), acc0);
> + }
> + p->pop();
> + }
> +
> void GenContext::emitTernaryInstruction(const SelectionInstruction &insn) {
> const GenRegister dst = ra->genReg(insn.dst(0));
> const GenRegister src0 = ra->genReg(insn.src(0));
> const GenRegister src1 = ra->genReg(insn.src(1));
> const GenRegister src2 = ra->genReg(insn.src(2));
> switch (insn.opcode) {
> + case SEL_OP_I64ADD:
> + {
> + GenRegister x = GenRegister::retype(src2, GEN_TYPE_UD),
> + y = GenRegister::suboffset(x, p->curr.execWidth);
> + loadBottomHalf(x, src0);
> + loadBottomHalf(y, src1);
> + addWithCarry(x, x, y);
> + storeBottomHalf(dst, x);
> + loadTopHalf(x, src0);
> + p->ADD(x, x, y);
> + loadTopHalf(y, src1);
> + p->ADD(x, x, y);
> + storeTopHalf(dst, x);
> + }
> + break;
> + case SEL_OP_I64SUB:
> + {
> + GenRegister x = GenRegister::retype(src2, GEN_TYPE_UD),
> + y = GenRegister::suboffset(x, p->curr.execWidth);
> + loadBottomHalf(x, src0);
> + loadBottomHalf(y, src1);
> + subWithBorrow(x, x, y);
> + storeBottomHalf(dst, x);
> + loadTopHalf(x, src0);
> + subWithBorrow(x, x, y);
> + loadTopHalf(y, src1);
> + subWithBorrow(x, x, y);
> + storeTopHalf(dst, x);
> + }
> + break;
> case SEL_OP_MUL_HI:
> {
> int w = p->curr.execWidth;
> diff --git a/backend/src/backend/gen_context.hpp
> b/backend/src/backend/gen_context.hpp
> index 82d41b6..dc5dc45 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -27,6 +27,7 @@
>
> #include "backend/context.hpp"
> #include "backend/program.h"
> +#include "backend/gen_register.hpp"
> #include "ir/function.hpp"
> #include "ir/liveness.hpp"
> #include "sys/map.hpp"
> @@ -73,6 +74,16 @@ namespace gbe
> INLINE const ir::Liveness::LiveOut &getLiveOut(const ir::BasicBlock *bb) const {
> return this->liveness->getLiveOut(bb);
> }
> +
> + void loadTopHalf(GenRegister dest, GenRegister src);
> + void storeTopHalf(GenRegister dest, GenRegister src);
> +
> + void loadBottomHalf(GenRegister dest, GenRegister src);
> + void storeBottomHalf(GenRegister dest, GenRegister src);
> +
> + void addWithCarry(GenRegister dest, GenRegister src0, GenRegister src1);
> + void subWithBorrow(GenRegister dest, GenRegister src0,
> + GenRegister src1);
> +
> /*! Final Gen ISA emission helper functions */
> void emitLabelInstruction(const SelectionInstruction &insn);
> void emitUnaryInstruction(const SelectionInstruction &insn); diff
> --git a/backend/src/backend/gen_defs.hpp
> b/backend/src/backend/gen_defs.hpp
> index 5a9bb2d..6c8297a 100644
> --- a/backend/src/backend/gen_defs.hpp
> +++ b/backend/src/backend/gen_defs.hpp
> @@ -157,6 +157,7 @@ enum opcode {
> GEN_OPCODE_FBH = 75,
> GEN_OPCODE_FBL = 76,
> GEN_OPCODE_ADDC = 78,
> + GEN_OPCODE_SUBB = 79,
> GEN_OPCODE_SAD2 = 80,
> GEN_OPCODE_SADA2 = 81,
> GEN_OPCODE_DP4 = 84,
> @@ -242,6 +243,8 @@ enum GenMessageTarget {
> #define GEN_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
> #define GEN_TYPE_DF 6
> #define GEN_TYPE_F 7
> +#define GEN_TYPE_UQ 8
> +#define GEN_TYPE_Q 9
>
> #define GEN_ARF_NULL 0x00
> #define GEN_ARF_ADDRESS 0x10
> diff --git a/backend/src/backend/gen_encoder.cpp
> b/backend/src/backend/gen_encoder.cpp
> index 70e542f..64b5bd1 100644
> --- a/backend/src/backend/gen_encoder.cpp
> +++ b/backend/src/backend/gen_encoder.cpp
> @@ -882,6 +882,13 @@ namespace gbe
> ALU2(MACH)
> ALU3(MAD)
>
> + void GenEncoder::SUBB(GenRegister dest, GenRegister src0, GenRegister src1) {
> + push();
> + curr.accWrEnable = 1;
> + alu2(this, GEN_OPCODE_SUBB, dest, src0, src1);
> + pop();
> + }
> +
> void GenEncoder::ADDC(GenRegister dest, GenRegister src0, GenRegister src1) {
> push();
> curr.accWrEnable = 1;
> diff --git a/backend/src/backend/gen_encoder.hpp
> b/backend/src/backend/gen_encoder.hpp
> index 054c343..083bd8c 100644
> --- a/backend/src/backend/gen_encoder.hpp
> +++ b/backend/src/backend/gen_encoder.hpp
> @@ -92,6 +92,7 @@ namespace gbe
> ALU1(MOV)
> ALU1(FBH)
> ALU1(FBL)
> + ALU2(SUBB)
> ALU2(UPSAMPLE_SHORT)
> ALU2(UPSAMPLE_INT)
> ALU1(RNDZ)
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index f3baa6d..2ef17e9 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -128,6 +128,8 @@ namespace gbe
> case TYPE_U16: return GEN_TYPE_UW;
> case TYPE_S32: return GEN_TYPE_D;
> case TYPE_U32: return GEN_TYPE_UD;
> + case TYPE_S64: return GEN_TYPE_Q;
> + case TYPE_U64: return GEN_TYPE_UQ;
> case TYPE_FLOAT: return GEN_TYPE_F;
> case TYPE_DOUBLE: return GEN_TYPE_DF;
> default: NOT_SUPPORTED; return GEN_TYPE_F; @@ -426,6 +428,8 @@
> namespace gbe
> ALU2(RSL)
> ALU2(ASR)
> ALU2(ADD)
> + ALU3(I64ADD)
> + ALU3(I64SUB)
> ALU2(MUL)
> ALU1(FRC)
> ALU1(RNDD)
> @@ -1193,7 +1197,7 @@ namespace gbe
> using namespace ir;
> const auto &childInsn = cast<LoadImmInstruction>(insn);
> const auto &imm = childInsn.getImmediate();
> - if(imm.type != TYPE_DOUBLE)
> + if(imm.type != TYPE_DOUBLE && imm.type != TYPE_S64 && imm.type !=
> + TYPE_U64)
> return true;
> return false;
> }
> @@ -1416,7 +1420,13 @@ namespace gbe
>
> // Output the binary instruction
> switch (opcode) {
> - case OP_ADD: sel.ADD(dst, src0, src1); break;
> + case OP_ADD:
> + if (type == Type::TYPE_U64 || type == Type::TYPE_S64) {
> + GenRegister t = sel.selReg(sel.reg(RegisterFamily::FAMILY_QWORD), Type::TYPE_S64);
> + sel.I64ADD(dst, src0, src1, t);
> + } else
> + sel.ADD(dst, src0, src1);
> + break;
> case OP_ADDSAT:
> sel.push();
> sel.curr.saturate = GEN_MATH_SATURATE_SATURATE; @@
> -1426,7 +1436,13 @@ namespace gbe
> case OP_XOR: sel.XOR(dst, src0, src1); break;
> case OP_OR: sel.OR(dst, src0, src1); break;
> case OP_AND: sel.AND(dst, src0, src1); break;
> - case OP_SUB: sel.ADD(dst, src0, GenRegister::negate(src1)); break;
> + case OP_SUB:
> + if (type == Type::TYPE_U64 || type == Type::TYPE_S64) {
> + GenRegister t = sel.selReg(sel.reg(RegisterFamily::FAMILY_QWORD), Type::TYPE_S64);
> + sel.I64SUB(dst, src0, src1, t);
> + } else
> + sel.ADD(dst, src0, GenRegister::negate(src1));
> + break;
> case OP_SUBSAT:
> sel.push();
> sel.curr.saturate = GEN_MATH_SATURATE_SATURATE; diff
> --git a/backend/src/backend/gen_insn_selection.hxx
> b/backend/src/backend/gen_insn_selection.hxx
> index c6aede5..f2b86c4 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -19,6 +19,8 @@ DECL_SELECTION_IR(RSR, BinaryInstruction)
> DECL_SELECTION_IR(RSL, BinaryInstruction) DECL_SELECTION_IR(ASR,
> BinaryInstruction) DECL_SELECTION_IR(ADD, BinaryInstruction)
> +DECL_SELECTION_IR(I64ADD, TernaryInstruction)
> +DECL_SELECTION_IR(I64SUB, TernaryInstruction)
> DECL_SELECTION_IR(MUL, BinaryInstruction) DECL_SELECTION_IR(ATOMIC,
> AtomicInstruction) DECL_SELECTION_IR(MACH, BinaryInstruction) diff
> --git a/backend/src/backend/gen_register.hpp
> b/backend/src/backend/gen_register.hpp
> index 7e48837..fd70b90 100644
> --- a/backend/src/backend/gen_register.hpp
> +++ b/backend/src/backend/gen_register.hpp
> @@ -70,6 +70,8 @@ namespace gbe
> INLINE int typeSize(uint32_t type) {
> switch(type) {
> case GEN_TYPE_DF:
> + case GEN_TYPE_UQ:
> + case GEN_TYPE_Q:
> return 8;
> case GEN_TYPE_UD:
> case GEN_TYPE_D:
> @@ -222,12 +224,32 @@ namespace gbe
> return r;
> }
>
> + INLINE bool isint64(void) const {
> + if ((type == GEN_TYPE_UQ || type == GEN_TYPE_Q) && file == GEN_GENERAL_REGISTER_FILE)
> + return true;
> + return false;
> + }
> +
> INLINE bool isimmdf(void) const {
> if (type == GEN_TYPE_DF && file == GEN_IMMEDIATE_VALUE)
> return true;
> return false;
> }
>
> + INLINE GenRegister top_half(void) const {
> + GenRegister r = bottom_half();
> + r.subnr += 4;
> + return r;
> + }
> +
> + INLINE GenRegister bottom_half(void) const {
> + GBE_ASSERT(isint64());
> + GenRegister r = *this;
> + r.type = type == GEN_TYPE_UQ ? GEN_TYPE_UD : GEN_TYPE_D;
> + r.hstride = GEN_HORIZONTAL_STRIDE_2;
> + return r;
> + }
> +
> INLINE bool isdf(void) const {
> if (type == GEN_TYPE_DF && file == GEN_GENERAL_REGISTER_FILE)
> return true;
> diff --git a/kernels/compiler_long.cl b/kernels/compiler_long.cl new
> file mode 100644 index 0000000..3087292
> --- /dev/null
> +++ b/kernels/compiler_long.cl
> @@ -0,0 +1,7 @@
> +kernel void compiler_long(global long *src1, global long *src2,
> +global long *dst) {
> + int i = get_global_id(0);
> + if(i < 5)
> + dst[i] = src1[i] + src2[i];
> + if(i > 5)
> + dst[i] = src1[i] - src2[i];
> +}
> diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index
> be565ea..a0dafa2 100644
> --- a/utests/CMakeLists.txt
> +++ b/utests/CMakeLists.txt
> @@ -121,6 +121,7 @@ set (utests_sources
> compiler_double_2.cpp
> compiler_double_3.cpp
> compiler_double_4.cpp
> + compiler_long.cpp
> utest_assert.cpp
> utest.cpp
> utest_file_map.cpp
> diff --git a/utests/compiler_long.cpp b/utests/compiler_long.cpp new
> file mode 100644 index 0000000..fad2744
> --- /dev/null
> +++ b/utests/compiler_long.cpp
> @@ -0,0 +1,58 @@
> +#include <cstdint>
> +#include <cstring>
> +#include <iostream>
> +#include "utest_helper.hpp"
> +
> +void compiler_long(void)
> +{
> + const size_t n = 16;
> + int64_t src1[n], src2[n];
> +
> + // Setup kernel and buffers
> + OCL_CREATE_KERNEL("compiler_long");
> + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int64_t), NULL);
> + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int64_t), NULL);
> + OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int64_t), NULL);
> + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1,
> + sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
> + globals[0] = n; locals[0] = 16;
> +
> + // Run random tests
> + src1[0] = -1L, src2[0] = -1L;
> + src1[1] = 0x8000000000000000UL, src2[1] = 0x8000000000000000UL;
> + src1[2] = 0x7FFFFFFFFFFFFFFFL, src2[2] = 1L;
> + src1[3] = 0xFFFFFFFEL, src2[3] = 1L;
> + src1[4] = 0x7FFFFFFFL, src2[4] = 0x80000000L;
> + src1[5] = 0, src2[5] = 0;
> + src1[6] = 0, src2[6] = 1;
> + src1[7] = -2L, src2[7] = -1L;
> + src1[8] = 0, src2[8] = 0x8000000000000000UL;
> + for (int32_t i = 9; i < (int32_t) n; ++i) {
> + src1[i] = ((long)rand() << 32) + rand();
> + src2[i] = ((long)rand() << 32) + rand(); } OCL_MAP_BUFFER(0);
> + OCL_MAP_BUFFER(1); memcpy(buf_data[0], src1, sizeof(src1));
> + memcpy(buf_data[1], src2, sizeof(src2)); OCL_UNMAP_BUFFER(0);
> + OCL_UNMAP_BUFFER(1);
> +
> + // Run the kernel on GPU
> + OCL_NDRANGE(1);
> +
> + // Compare
> + OCL_MAP_BUFFER(2);
> + for (int32_t i = 0; i < (int32_t) n; ++i) {
> + //printf("%lx\n", ((int64_t *)buf_data[2])[i]);
> + if (i < 5)
> + OCL_ASSERT(src1[i] + src2[i] == ((int64_t *)buf_data[2])[i]);
> + if (i > 5)
> + OCL_ASSERT(src1[i] - src2[i] == ((int64_t *)buf_data[2])[i]);
> + }
> + OCL_UNMAP_BUFFER(2);
> +}
> +
> +MAKE_UTEST_FROM_FUNCTION(compiler_long);
> --
> 1.8.1.2
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list