[Beignet] [PATCH 2/9] support 64bit-integer addition, subtraction

Homer Hsing homer.xing at intel.com
Sun Aug 4 22:06:36 PDT 2013


also enable GPU command "subb" (subtract with borrow)

also add test cases

Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
 backend/src/backend/gen/gen_mesa_disasm.c  |  1 +
 backend/src/backend/gen_context.cpp        | 56 +++++++++++++++++++++++++++++
 backend/src/backend/gen_defs.hpp           |  3 ++
 backend/src/backend/gen_encoder.cpp        |  7 ++++
 backend/src/backend/gen_encoder.hpp        |  1 +
 backend/src/backend/gen_insn_selection.cpp | 22 ++++++++++--
 backend/src/backend/gen_insn_selection.hxx |  2 ++
 backend/src/backend/gen_register.hpp       | 22 ++++++++++++
 kernels/compiler_long.cl                   |  7 ++++
 utests/CMakeLists.txt                      |  1 +
 utests/compiler_long.cpp                   | 57 ++++++++++++++++++++++++++++++
 11 files changed, 176 insertions(+), 3 deletions(-)
 create mode 100644 kernels/compiler_long.cl
 create mode 100644 utests/compiler_long.cpp

diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
index ce20e44..7d77e7a 100644
--- a/backend/src/backend/gen/gen_mesa_disasm.c
+++ b/backend/src/backend/gen/gen_mesa_disasm.c
@@ -83,6 +83,7 @@ static const struct {
   [GEN_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
   [GEN_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
   [GEN_OPCODE_ADDC] = { .name = "addc", .nsrc = 2, .ndst = 1 },
+  [GEN_OPCODE_SUBB] = { .name = "subb", .nsrc = 2, .ndst = 1 },
   [GEN_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
   [GEN_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
   [GEN_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 },
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 83011cb..c8c6e49 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -181,6 +181,62 @@ namespace gbe
     const GenRegister src1 = ra->genReg(insn.src(1));
     const GenRegister src2 = ra->genReg(insn.src(2));
     switch (insn.opcode) {
+      case SEL_OP_I64ADD:
+        {
+          GenRegister acc = GenRegister::acc(),
+                      xdst = GenRegister::retype(dst, GEN_TYPE_UQ),
+                      xsrc0 = GenRegister::retype(src0, GEN_TYPE_UQ),
+                      xsrc1 = GenRegister::retype(src1, GEN_TYPE_UQ);
+          acc.type = GEN_TYPE_D;
+          acc.hstride = GEN_HORIZONTAL_STRIDE_2;
+          GenRegister temp = src2;
+          temp.hstride = GEN_HORIZONTAL_STRIDE_2;
+          p->push();
+          p->curr.quarterControl = 0;
+          p->curr.nibControl = 0;
+          p->ADDC(xdst.bottom_half(), xsrc0.bottom_half(), xsrc1.bottom_half());
+          p->MOV(temp, acc);
+          p->ADD(xdst.top_half(), xsrc0.top_half(), xsrc1.top_half());
+          p->ADD(xdst.top_half(), xdst.top_half(), temp);
+          p->curr.nibControl = 1;
+          xdst = GenRegister::suboffset(xdst, 4),
+          xsrc0 = GenRegister::suboffset(xsrc0, 4),
+          xsrc1 = GenRegister::suboffset(xsrc1, 4);
+          p->ADDC(xdst.bottom_half(), xsrc0.bottom_half(), xsrc1.bottom_half());
+          p->MOV(temp, acc);
+          p->ADD(xdst.top_half(), xsrc0.top_half(), xsrc1.top_half());
+          p->ADD(xdst.top_half(), xdst.top_half(), temp);
+          p->pop();
+        }
+        break;
+      case SEL_OP_I64SUB:
+        {
+          GenRegister acc = GenRegister::acc(),
+                      xdst = GenRegister::retype(dst, GEN_TYPE_UQ),
+                      xsrc0 = GenRegister::retype(src0, GEN_TYPE_UQ),
+                      xsrc1 = GenRegister::retype(src1, GEN_TYPE_UQ);
+          acc.type = GEN_TYPE_D;
+          acc.hstride = GEN_HORIZONTAL_STRIDE_2;
+          GenRegister temp = src2;
+          temp.hstride = GEN_HORIZONTAL_STRIDE_2;
+          p->push();
+          p->curr.quarterControl = 0;
+          p->curr.nibControl = 0;
+          p->SUBB(xdst.bottom_half(), xsrc0.bottom_half(), xsrc1.bottom_half());
+          p->MOV(temp, acc);
+          p->SUBB(xdst.top_half(), xsrc0.top_half(), xsrc1.top_half());
+          p->SUBB(xdst.top_half(), xdst.top_half(), temp);
+          p->curr.nibControl = 1;
+          xdst = GenRegister::suboffset(xdst, 4),
+          xsrc0 = GenRegister::suboffset(xsrc0, 4),
+          xsrc1 = GenRegister::suboffset(xsrc1, 4);
+          p->SUBB(xdst.bottom_half(), xsrc0.bottom_half(), xsrc1.bottom_half());
+          p->MOV(temp, acc);
+          p->SUBB(xdst.top_half(), xsrc0.top_half(), xsrc1.top_half());
+          p->SUBB(xdst.top_half(), xdst.top_half(), temp);
+          p->pop();
+        }
+        break;
       case SEL_OP_MUL_HI:
        {
         int w = p->curr.execWidth;
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index 5a9bb2d..6c8297a 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -157,6 +157,7 @@ enum opcode {
   GEN_OPCODE_FBH = 75,
   GEN_OPCODE_FBL = 76,
   GEN_OPCODE_ADDC = 78,
+  GEN_OPCODE_SUBB = 79,
   GEN_OPCODE_SAD2 = 80,
   GEN_OPCODE_SADA2 = 81,
   GEN_OPCODE_DP4 = 84,
@@ -242,6 +243,8 @@ enum GenMessageTarget {
 #define GEN_TYPE_V   6 /* packed int vector, immediates only, uword dest only */
 #define GEN_TYPE_DF  6
 #define GEN_TYPE_F   7
+#define GEN_TYPE_UQ  8
+#define GEN_TYPE_Q   9
 
 #define GEN_ARF_NULL                  0x00
 #define GEN_ARF_ADDRESS               0x10
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 33f5950..73eebe6 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -903,6 +903,13 @@ namespace gbe
   ALU2(MACH)
   ALU3(MAD)
 
+  void GenEncoder::SUBB(GenRegister dest, GenRegister src0, GenRegister src1) {
+    push();
+    curr.accWrEnable = 1;
+    alu2(this, GEN_OPCODE_SUBB, dest, src0, src1);
+    pop();
+  }
+
   void GenEncoder::ADDC(GenRegister dest, GenRegister src0, GenRegister src1) {
     push();
     curr.accWrEnable = 1;
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index cd8990e..a272ff8 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -92,6 +92,7 @@ namespace gbe
     ALU1(MOV)
     ALU1(FBH)
     ALU1(FBL)
+    ALU2(SUBB)
     ALU2(UPSAMPLE_SHORT)
     ALU2(UPSAMPLE_INT)
     ALU1(RNDZ)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index b1c9cfb..072fc23 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -128,6 +128,8 @@ namespace gbe
       case TYPE_U16: return GEN_TYPE_UW;
       case TYPE_S32: return GEN_TYPE_D;
       case TYPE_U32: return GEN_TYPE_UD;
+      case TYPE_S64: return GEN_TYPE_Q;
+      case TYPE_U64: return GEN_TYPE_UQ;
       case TYPE_FLOAT: return GEN_TYPE_F;
       case TYPE_DOUBLE: return GEN_TYPE_DF;
       default: NOT_SUPPORTED; return GEN_TYPE_F;
@@ -426,6 +428,8 @@ namespace gbe
     ALU2(RSL)
     ALU2(ASR)
     ALU2(ADD)
+    ALU3(I64ADD)
+    ALU3(I64SUB)
     ALU2(MUL)
     ALU1(FRC)
     ALU1(RNDD)
@@ -1185,7 +1189,7 @@ namespace gbe
     using namespace ir;
     const auto &childInsn = cast<LoadImmInstruction>(insn);
     const auto &imm = childInsn.getImmediate();
-    if(imm.type != TYPE_DOUBLE)
+    if(imm.type != TYPE_DOUBLE && imm.type != TYPE_S64 && imm.type != TYPE_U64)
       return true;
     return false;
   }
@@ -1408,7 +1412,13 @@ namespace gbe
 
       // Output the binary instruction
       switch (opcode) {
-        case OP_ADD: sel.ADD(dst, src0, src1); break;
+        case OP_ADD:
+          if (type == Type::TYPE_U64 || type == Type::TYPE_S64) {
+            GenRegister t = sel.selReg(sel.reg(RegisterFamily::FAMILY_DWORD), Type::TYPE_S32);
+            sel.I64ADD(dst, src0, src1, t);
+          } else
+            sel.ADD(dst, src0, src1);
+          break;
         case OP_ADDSAT:
           sel.push();
             sel.curr.saturate = GEN_MATH_SATURATE_SATURATE;
@@ -1418,7 +1428,13 @@ namespace gbe
         case OP_XOR: sel.XOR(dst, src0, src1); break;
         case OP_OR:  sel.OR(dst, src0,  src1); break;
         case OP_AND: sel.AND(dst, src0, src1); break;
-        case OP_SUB: sel.ADD(dst, src0, GenRegister::negate(src1)); break;
+        case OP_SUB:
+          if (type == Type::TYPE_U64 || type == Type::TYPE_S64) {
+            GenRegister t = sel.selReg(sel.reg(RegisterFamily::FAMILY_DWORD), Type::TYPE_S32);
+            sel.I64SUB(dst, src0, src1, t);
+          } else
+            sel.ADD(dst, src0, GenRegister::negate(src1));
+          break;
         case OP_SUBSAT:
           sel.push();
             sel.curr.saturate = GEN_MATH_SATURATE_SATURATE;
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index c6aede5..f2b86c4 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -19,6 +19,8 @@ DECL_SELECTION_IR(RSR, BinaryInstruction)
 DECL_SELECTION_IR(RSL, BinaryInstruction)
 DECL_SELECTION_IR(ASR, BinaryInstruction)
 DECL_SELECTION_IR(ADD, BinaryInstruction)
+DECL_SELECTION_IR(I64ADD, TernaryInstruction)
+DECL_SELECTION_IR(I64SUB, TernaryInstruction)
 DECL_SELECTION_IR(MUL, BinaryInstruction)
 DECL_SELECTION_IR(ATOMIC, AtomicInstruction)
 DECL_SELECTION_IR(MACH, BinaryInstruction)
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index fedb743..177690b 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -70,6 +70,8 @@ namespace gbe
   INLINE int typeSize(uint32_t type) {
     switch(type) {
       case GEN_TYPE_DF:
+      case GEN_TYPE_UQ:
+      case GEN_TYPE_Q:
         return 8;
       case GEN_TYPE_UD:
       case GEN_TYPE_D:
@@ -222,12 +224,32 @@ namespace gbe
       return r;
     }
 
+    INLINE bool isint64(void) const {
+      if ((type == GEN_TYPE_UQ || type == GEN_TYPE_Q) && file == GEN_GENERAL_REGISTER_FILE)
+        return true;
+      return false;
+    }
+
     INLINE bool isimmdf(void) const {
       if (type == GEN_TYPE_DF && file == GEN_IMMEDIATE_VALUE)
         return true;
       return false;
     }
 
+    INLINE GenRegister top_half(void) const {
+      GenRegister r = bottom_half();
+      r.subnr += 4;
+      return r;
+    }
+
+    INLINE GenRegister bottom_half(void) const {
+      GBE_ASSERT(isint64());
+      GenRegister r = *this;
+      r.type = type == GEN_TYPE_UQ ? GEN_TYPE_UD : GEN_TYPE_D;
+      r.hstride = GEN_HORIZONTAL_STRIDE_2;
+      return r;
+    }
+
     INLINE bool isdf(void) const {
       if (type == GEN_TYPE_DF && file == GEN_GENERAL_REGISTER_FILE)
         return true;
diff --git a/kernels/compiler_long.cl b/kernels/compiler_long.cl
new file mode 100644
index 0000000..3087292
--- /dev/null
+++ b/kernels/compiler_long.cl
@@ -0,0 +1,7 @@
+kernel void compiler_long(global long *src1, global long *src2, global long *dst) {
+  int i = get_global_id(0);
+  if(i < 5)
+    dst[i] = src1[i] + src2[i];
+  if(i > 5)
+    dst[i] = src1[i] - src2[i];
+}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index e067d74..103b55a 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -120,6 +120,7 @@ set (utests_sources
   compiler_double_2.cpp
   compiler_double_3.cpp
   compiler_double_4.cpp
+  compiler_long.cpp
   utest_assert.cpp
   utest.cpp
   utest_file_map.cpp
diff --git a/utests/compiler_long.cpp b/utests/compiler_long.cpp
new file mode 100644
index 0000000..140a075
--- /dev/null
+++ b/utests/compiler_long.cpp
@@ -0,0 +1,57 @@
+#include <cstdint>
+#include <cstring>
+#include <iostream>
+#include "utest_helper.hpp"
+
+void compiler_long(void)
+{
+  const size_t n = 16;
+  int64_t src1[n], src2[n];
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL("compiler_long");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int64_t), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int64_t), NULL);
+  OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int64_t), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+  globals[0] = n;
+  locals[0] = 16;
+
+  // Run random tests
+  src1[0] = -1L,                  src2[0] = -1L;
+  src1[1] = 0x8000000000000000UL, src2[1] = 0x8000000000000000UL;
+  src1[2] = 0x7FFFFFFFFFFFFFFFL,  src2[2] = 1L;
+  src1[3] = 0xFFFFFFFEL,          src2[3] = 1L;
+  src1[4] = 0x7FFFFFFFL,          src2[4] = 0x80000000L;
+  src1[5] = 0,                    src2[5] = 0;
+  src1[6] = 0,                    src2[5] = 1;
+  src1[7] = -2L,                  src2[6] = -1L;
+  src1[8] = 0,                    src2[7] = 0x8000000000000000UL;
+  for (int32_t i = 9; i < (int32_t) n; ++i) {
+    src1[i] = ((long)rand() << 32) + rand();
+    src2[i] = ((long)rand() << 32) + rand();
+  }
+  OCL_MAP_BUFFER(0);
+  OCL_MAP_BUFFER(1);
+  memcpy(buf_data[0], src1, sizeof(src1));
+  memcpy(buf_data[1], src2, sizeof(src2));
+  OCL_UNMAP_BUFFER(0);
+  OCL_UNMAP_BUFFER(1);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Compare
+  OCL_MAP_BUFFER(2);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    if (i < 5)
+      OCL_ASSERT(src1[i] + src2[i] == ((int64_t *)buf_data[2])[i]);
+    if (i > 5)
+      OCL_ASSERT(src1[i] - src2[i] == ((int64_t *)buf_data[2])[i]);
+  }
+  OCL_UNMAP_BUFFER(2);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_long);
-- 
1.8.1.2



More information about the Beignet mailing list