[Beignet] [PATCH v2] support 64bit-integer addition, subtraction
Homer Hsing
homer.xing at intel.com
Tue Aug 6 00:41:40 PDT 2013
also enable GPU command "subb" (subtract with borrow)
also add test cases
v2: renamed GEN_TYPE_UQ/GEN_TYPE_Q to GEN_TYPE_UL/GEN_TYPE_L
Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
backend/src/backend/gen/gen_mesa_disasm.c | 1 +
backend/src/backend/gen_context.cpp | 130 +++++++++++++++++++++++++++++
backend/src/backend/gen_context.hpp | 11 +++
backend/src/backend/gen_defs.hpp | 3 +
backend/src/backend/gen_encoder.cpp | 7 ++
backend/src/backend/gen_encoder.hpp | 1 +
backend/src/backend/gen_insn_selection.cpp | 22 ++++-
backend/src/backend/gen_insn_selection.hxx | 2 +
backend/src/backend/gen_register.hpp | 22 +++++
kernels/compiler_long.cl | 7 ++
utests/CMakeLists.txt | 1 +
utests/compiler_long.cpp | 58 +++++++++++++
12 files changed, 262 insertions(+), 3 deletions(-)
create mode 100644 kernels/compiler_long.cl
create mode 100644 utests/compiler_long.cpp
diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
index ce20e44..7d77e7a 100644
--- a/backend/src/backend/gen/gen_mesa_disasm.c
+++ b/backend/src/backend/gen/gen_mesa_disasm.c
@@ -83,6 +83,7 @@ static const struct {
[GEN_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
[GEN_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
[GEN_OPCODE_ADDC] = { .name = "addc", .nsrc = 2, .ndst = 1 },
+ [GEN_OPCODE_SUBB] = { .name = "subb", .nsrc = 2, .ndst = 1 },
[GEN_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
[GEN_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
[GEN_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 },
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 709122a..7a33b79 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -175,12 +175,142 @@ namespace gbe
}
}
+ void GenContext::loadTopHalf(GenRegister dest, GenRegister src) {
+ int execWidth = p->curr.execWidth;
+ src = src.top_half();
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.execWidth = 8;
+ p->MOV(dest, src);
+ p->MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(src, 8));
+ if (execWidth == 16) {
+ p->MOV(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src, 16));
+ p->MOV(GenRegister::suboffset(dest, 12), GenRegister::suboffset(src, 24));
+ }
+ p->pop();
+ }
+
+ void GenContext::storeTopHalf(GenRegister dest, GenRegister src) {
+ int execWidth = p->curr.execWidth;
+ dest = dest.top_half();
+ p->push();
+ p->curr.execWidth = 8;
+ p->MOV(dest, src);
+ p->curr.nibControl = 1;
+ p->MOV(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src, 4));
+ if (execWidth == 16) {
+ p->curr.quarterControl = 1;
+ p->curr.nibControl = 0;
+ p->MOV(GenRegister::suboffset(dest, 16), GenRegister::suboffset(src, 8));
+ p->curr.nibControl = 1;
+ p->MOV(GenRegister::suboffset(dest, 24), GenRegister::suboffset(src, 12));
+ }
+ p->pop();
+ }
+
+ void GenContext::loadBottomHalf(GenRegister dest, GenRegister src) {
+ int execWidth = p->curr.execWidth;
+ src = src.bottom_half();
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.execWidth = 8;
+ p->MOV(dest, src);
+ p->MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(src, 8));
+ if (execWidth == 16) {
+ p->MOV(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src, 16));
+ p->MOV(GenRegister::suboffset(dest, 12), GenRegister::suboffset(src, 24));
+ }
+ p->pop();
+ }
+
+ void GenContext::storeBottomHalf(GenRegister dest, GenRegister src) {
+ int execWidth = p->curr.execWidth;
+ dest = dest.bottom_half();
+ p->push();
+ p->curr.execWidth = 8;
+ p->MOV(dest, src);
+ p->curr.nibControl = 1;
+ p->MOV(GenRegister::suboffset(dest, 8), GenRegister::suboffset(src, 4));
+ if (execWidth == 16) {
+ p->curr.quarterControl = 1;
+ p->curr.nibControl = 0;
+ p->MOV(GenRegister::suboffset(dest, 16), GenRegister::suboffset(src, 8));
+ p->curr.nibControl = 1;
+ p->MOV(GenRegister::suboffset(dest, 24), GenRegister::suboffset(src, 12));
+ }
+ p->pop();
+ }
+
+ void GenContext::addWithCarry(GenRegister dest, GenRegister src0, GenRegister src1) {
+ int execWidth = p->curr.execWidth;
+ GenRegister acc0 = GenRegister::retype(GenRegister::acc(), GEN_TYPE_D);
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.execWidth = 8;
+ p->ADDC(dest, src0, src1);
+ p->MOV(src1, acc0);
+ if (execWidth == 16) {
+ p->ADDC(GenRegister::suboffset(dest, 8),
+ GenRegister::suboffset(src0, 8),
+ GenRegister::suboffset(src1, 8));
+ p->MOV(GenRegister::suboffset(src1, 8), acc0);
+ }
+ p->pop();
+ }
+
+ void GenContext::subWithBorrow(GenRegister dest, GenRegister src0, GenRegister src1) {
+ int execWidth = p->curr.execWidth;
+ GenRegister acc0 = GenRegister::retype(GenRegister::acc(), GEN_TYPE_D);
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.execWidth = 8;
+ p->SUBB(dest, src0, src1);
+ p->MOV(src1, acc0);
+ if (execWidth == 16) {
+ p->SUBB(GenRegister::suboffset(dest, 8),
+ GenRegister::suboffset(src0, 8),
+ GenRegister::suboffset(src1, 8));
+ p->MOV(GenRegister::suboffset(src1, 8), acc0);
+ }
+ p->pop();
+ }
+
void GenContext::emitTernaryInstruction(const SelectionInstruction &insn) {
const GenRegister dst = ra->genReg(insn.dst(0));
const GenRegister src0 = ra->genReg(insn.src(0));
const GenRegister src1 = ra->genReg(insn.src(1));
const GenRegister src2 = ra->genReg(insn.src(2));
switch (insn.opcode) {
+ case SEL_OP_I64ADD:
+ {
+ GenRegister x = GenRegister::retype(src2, GEN_TYPE_UD),
+ y = GenRegister::suboffset(x, p->curr.execWidth);
+ loadBottomHalf(x, src0);
+ loadBottomHalf(y, src1);
+ addWithCarry(x, x, y);
+ storeBottomHalf(dst, x);
+ loadTopHalf(x, src0);
+ p->ADD(x, x, y);
+ loadTopHalf(y, src1);
+ p->ADD(x, x, y);
+ storeTopHalf(dst, x);
+ }
+ break;
+ case SEL_OP_I64SUB:
+ {
+ GenRegister x = GenRegister::retype(src2, GEN_TYPE_UD),
+ y = GenRegister::suboffset(x, p->curr.execWidth);
+ loadBottomHalf(x, src0);
+ loadBottomHalf(y, src1);
+ subWithBorrow(x, x, y);
+ storeBottomHalf(dst, x);
+ loadTopHalf(x, src0);
+ subWithBorrow(x, x, y);
+ loadTopHalf(y, src1);
+ subWithBorrow(x, x, y);
+ storeTopHalf(dst, x);
+ }
+ break;
case SEL_OP_MUL_HI:
{
int w = p->curr.execWidth;
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 82d41b6..dc5dc45 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -27,6 +27,7 @@
#include "backend/context.hpp"
#include "backend/program.h"
+#include "backend/gen_register.hpp"
#include "ir/function.hpp"
#include "ir/liveness.hpp"
#include "sys/map.hpp"
@@ -73,6 +74,16 @@ namespace gbe
INLINE const ir::Liveness::LiveOut &getLiveOut(const ir::BasicBlock *bb) const {
return this->liveness->getLiveOut(bb);
}
+
+ void loadTopHalf(GenRegister dest, GenRegister src);
+ void storeTopHalf(GenRegister dest, GenRegister src);
+
+ void loadBottomHalf(GenRegister dest, GenRegister src);
+ void storeBottomHalf(GenRegister dest, GenRegister src);
+
+ void addWithCarry(GenRegister dest, GenRegister src0, GenRegister src1);
+ void subWithBorrow(GenRegister dest, GenRegister src0, GenRegister src1);
+
/*! Final Gen ISA emission helper functions */
void emitLabelInstruction(const SelectionInstruction &insn);
void emitUnaryInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index 5a9bb2d..5b15e30 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -157,6 +157,7 @@ enum opcode {
GEN_OPCODE_FBH = 75,
GEN_OPCODE_FBL = 76,
GEN_OPCODE_ADDC = 78,
+ GEN_OPCODE_SUBB = 79,
GEN_OPCODE_SAD2 = 80,
GEN_OPCODE_SADA2 = 81,
GEN_OPCODE_DP4 = 84,
@@ -242,6 +243,8 @@ enum GenMessageTarget {
#define GEN_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
#define GEN_TYPE_DF 6
#define GEN_TYPE_F 7
+#define GEN_TYPE_UL 8
+#define GEN_TYPE_L 9
#define GEN_ARF_NULL 0x00
#define GEN_ARF_ADDRESS 0x10
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 70e542f..64b5bd1 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -882,6 +882,13 @@ namespace gbe
ALU2(MACH)
ALU3(MAD)
+ void GenEncoder::SUBB(GenRegister dest, GenRegister src0, GenRegister src1) {
+ push();
+ curr.accWrEnable = 1;
+ alu2(this, GEN_OPCODE_SUBB, dest, src0, src1);
+ pop();
+ }
+
void GenEncoder::ADDC(GenRegister dest, GenRegister src0, GenRegister src1) {
push();
curr.accWrEnable = 1;
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index 054c343..083bd8c 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -92,6 +92,7 @@ namespace gbe
ALU1(MOV)
ALU1(FBH)
ALU1(FBL)
+ ALU2(SUBB)
ALU2(UPSAMPLE_SHORT)
ALU2(UPSAMPLE_INT)
ALU1(RNDZ)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index f3baa6d..eca62b4 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -128,6 +128,8 @@ namespace gbe
case TYPE_U16: return GEN_TYPE_UW;
case TYPE_S32: return GEN_TYPE_D;
case TYPE_U32: return GEN_TYPE_UD;
+ case TYPE_S64: return GEN_TYPE_L;
+ case TYPE_U64: return GEN_TYPE_UL;
case TYPE_FLOAT: return GEN_TYPE_F;
case TYPE_DOUBLE: return GEN_TYPE_DF;
default: NOT_SUPPORTED; return GEN_TYPE_F;
@@ -426,6 +428,8 @@ namespace gbe
ALU2(RSL)
ALU2(ASR)
ALU2(ADD)
+ ALU3(I64ADD)
+ ALU3(I64SUB)
ALU2(MUL)
ALU1(FRC)
ALU1(RNDD)
@@ -1193,7 +1197,7 @@ namespace gbe
using namespace ir;
const auto &childInsn = cast<LoadImmInstruction>(insn);
const auto &imm = childInsn.getImmediate();
- if(imm.type != TYPE_DOUBLE)
+ if(imm.type != TYPE_DOUBLE && imm.type != TYPE_S64 && imm.type != TYPE_U64)
return true;
return false;
}
@@ -1416,7 +1420,13 @@ namespace gbe
// Output the binary instruction
switch (opcode) {
- case OP_ADD: sel.ADD(dst, src0, src1); break;
+ case OP_ADD:
+ if (type == Type::TYPE_U64 || type == Type::TYPE_S64) {
+ GenRegister t = sel.selReg(sel.reg(RegisterFamily::FAMILY_QWORD), Type::TYPE_S64);
+ sel.I64ADD(dst, src0, src1, t);
+ } else
+ sel.ADD(dst, src0, src1);
+ break;
case OP_ADDSAT:
sel.push();
sel.curr.saturate = GEN_MATH_SATURATE_SATURATE;
@@ -1426,7 +1436,13 @@ namespace gbe
case OP_XOR: sel.XOR(dst, src0, src1); break;
case OP_OR: sel.OR(dst, src0, src1); break;
case OP_AND: sel.AND(dst, src0, src1); break;
- case OP_SUB: sel.ADD(dst, src0, GenRegister::negate(src1)); break;
+ case OP_SUB:
+ if (type == Type::TYPE_U64 || type == Type::TYPE_S64) {
+ GenRegister t = sel.selReg(sel.reg(RegisterFamily::FAMILY_QWORD), Type::TYPE_S64);
+ sel.I64SUB(dst, src0, src1, t);
+ } else
+ sel.ADD(dst, src0, GenRegister::negate(src1));
+ break;
case OP_SUBSAT:
sel.push();
sel.curr.saturate = GEN_MATH_SATURATE_SATURATE;
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index c6aede5..f2b86c4 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -19,6 +19,8 @@ DECL_SELECTION_IR(RSR, BinaryInstruction)
DECL_SELECTION_IR(RSL, BinaryInstruction)
DECL_SELECTION_IR(ASR, BinaryInstruction)
DECL_SELECTION_IR(ADD, BinaryInstruction)
+DECL_SELECTION_IR(I64ADD, TernaryInstruction)
+DECL_SELECTION_IR(I64SUB, TernaryInstruction)
DECL_SELECTION_IR(MUL, BinaryInstruction)
DECL_SELECTION_IR(ATOMIC, AtomicInstruction)
DECL_SELECTION_IR(MACH, BinaryInstruction)
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index 7e48837..fda2e6c 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -70,6 +70,8 @@ namespace gbe
INLINE int typeSize(uint32_t type) {
switch(type) {
case GEN_TYPE_DF:
+ case GEN_TYPE_UL:
+ case GEN_TYPE_L:
return 8;
case GEN_TYPE_UD:
case GEN_TYPE_D:
@@ -222,12 +224,32 @@ namespace gbe
return r;
}
+ INLINE bool isint64(void) const {
+ if ((type == GEN_TYPE_UL || type == GEN_TYPE_L) && file == GEN_GENERAL_REGISTER_FILE)
+ return true;
+ return false;
+ }
+
INLINE bool isimmdf(void) const {
if (type == GEN_TYPE_DF && file == GEN_IMMEDIATE_VALUE)
return true;
return false;
}
+ INLINE GenRegister top_half(void) const {
+ GenRegister r = bottom_half();
+ r.subnr += 4;
+ return r;
+ }
+
+ INLINE GenRegister bottom_half(void) const {
+ GBE_ASSERT(isint64());
+ GenRegister r = *this;
+ r.type = type == GEN_TYPE_UL ? GEN_TYPE_UD : GEN_TYPE_D;
+ r.hstride = GEN_HORIZONTAL_STRIDE_2;
+ return r;
+ }
+
INLINE bool isdf(void) const {
if (type == GEN_TYPE_DF && file == GEN_GENERAL_REGISTER_FILE)
return true;
diff --git a/kernels/compiler_long.cl b/kernels/compiler_long.cl
new file mode 100644
index 0000000..3087292
--- /dev/null
+++ b/kernels/compiler_long.cl
@@ -0,0 +1,7 @@
+kernel void compiler_long(global long *src1, global long *src2, global long *dst) {
+ int i = get_global_id(0);
+ if(i < 5)
+ dst[i] = src1[i] + src2[i];
+ if(i > 5)
+ dst[i] = src1[i] - src2[i];
+}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index be565ea..a0dafa2 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -121,6 +121,7 @@ set (utests_sources
compiler_double_2.cpp
compiler_double_3.cpp
compiler_double_4.cpp
+ compiler_long.cpp
utest_assert.cpp
utest.cpp
utest_file_map.cpp
diff --git a/utests/compiler_long.cpp b/utests/compiler_long.cpp
new file mode 100644
index 0000000..fad2744
--- /dev/null
+++ b/utests/compiler_long.cpp
@@ -0,0 +1,58 @@
+#include <cstdint>
+#include <cstring>
+#include <iostream>
+#include "utest_helper.hpp"
+
+void compiler_long(void)
+{
+ const size_t n = 16;
+ int64_t src1[n], src2[n];
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_long");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int64_t), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int64_t), NULL);
+ OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int64_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+ globals[0] = n;
+ locals[0] = 16;
+
+ // Run random tests
+ src1[0] = -1L, src2[0] = -1L;
+ src1[1] = 0x8000000000000000UL, src2[1] = 0x8000000000000000UL;
+ src1[2] = 0x7FFFFFFFFFFFFFFFL, src2[2] = 1L;
+ src1[3] = 0xFFFFFFFEL, src2[3] = 1L;
+ src1[4] = 0x7FFFFFFFL, src2[4] = 0x80000000L;
+ src1[5] = 0, src2[5] = 0;
+ src1[6] = 0, src2[6] = 1;
+ src1[7] = -2L, src2[7] = -1L;
+ src1[8] = 0, src2[8] = 0x8000000000000000UL;
+ for (int32_t i = 9; i < (int32_t) n; ++i) {
+ src1[i] = ((long)rand() << 32) + rand();
+ src2[i] = ((long)rand() << 32) + rand();
+ }
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ memcpy(buf_data[0], src1, sizeof(src1));
+ memcpy(buf_data[1], src2, sizeof(src2));
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(1);
+
+ // Compare
+ OCL_MAP_BUFFER(2);
+ for (int32_t i = 0; i < (int32_t) n; ++i) {
+ //printf("%lx\n", ((int64_t *)buf_data[2])[i]);
+ if (i < 5)
+ OCL_ASSERT(src1[i] + src2[i] == ((int64_t *)buf_data[2])[i]);
+ if (i > 5)
+ OCL_ASSERT(src1[i] - src2[i] == ((int64_t *)buf_data[2])[i]);
+ }
+ OCL_UNMAP_BUFFER(2);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_long);
--
1.8.1.2
More information about the Beignet
mailing list