[Beignet] [PATCH] support 64-bit version "add_sat"

Homer Hsing homer.xing at intel.com
Fri Sep 13 00:57:41 PDT 2013


tested by piglit:
  piglit/bin/cl-program-tester generated_tests/cl/builtin/int/builtin-long-add_sat-1.0.generated.cl
  piglit/bin/cl-program-tester generated_tests/cl/builtin/int/builtin-ulong-add_sat-1.0.generated.cl

Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
 backend/src/backend/gen_context.cpp                | 53 ++++++++++++++++++++++
 backend/src/backend/gen_context.hpp                |  1 +
 .../src/backend/gen_insn_gen7_schedule_info.hxx    |  1 +
 backend/src/backend/gen_insn_selection.cpp         | 20 ++++++++
 backend/src/backend/gen_insn_selection.hxx         |  1 +
 backend/src/ocl_stdlib.tmpl.h                      | 13 +++++-
 6 files changed, 87 insertions(+), 2 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index fdb305b..b88b6b4 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -945,6 +945,59 @@ namespace gbe
     p->pop();
   }
 
+  void GenContext::emitI64SATADDInstruction(const SelectionInstruction &insn) {
+    GenRegister x = ra->genReg(insn.src(0));
+    GenRegister y = ra->genReg(insn.src(1));
+    GenRegister dst = ra->genReg(insn.dst(0));
+    GenRegister a = ra->genReg(insn.dst(1));
+    GenRegister b = ra->genReg(insn.dst(2));
+    GenRegister c = ra->genReg(insn.dst(3));
+    GenRegister d = ra->genReg(insn.dst(4));
+    GenRegister e = ra->genReg(insn.dst(5));
+    loadTopHalf(a, x);
+    loadBottomHalf(b, x);
+    loadTopHalf(c, y);
+    loadBottomHalf(d, y);
+    switch(insn.opcode) {
+      case SEL_OP_I64SATADD:
+        if(dst.is_signed_int())
+          p->SHR(e, a, GenRegister::immud(31));
+        addWithCarry(b, b, d);
+        addWithCarry(a, a, d);
+        addWithCarry(a, a, c);
+        p->ADD(c, c, d);
+        p->push();
+        p->curr.predicate = GEN_PREDICATE_NONE;
+        p->curr.physicalFlag = 1;
+        p->curr.flag = 1;
+        p->curr.subFlag = 0;
+        if(! dst.is_signed_int()) {
+          p->CMP(GEN_CONDITIONAL_NZ, c, GenRegister::immud(0));
+          p->curr.predicate = GEN_PREDICATE_NORMAL;
+          p->MOV(a, GenRegister::immud(0xFFFFFFFFu));
+          p->MOV(b, GenRegister::immud(0xFFFFFFFFu));
+        } else {
+          p->CMP(GEN_CONDITIONAL_EQ, e, GenRegister::immud(1));
+          p->curr.predicate = GEN_PREDICATE_NORMAL;
+          p->CMP(GEN_CONDITIONAL_L, a, GenRegister::immud(0x80000000u));
+          p->MOV(a, GenRegister::immud(0x80000000u));
+          p->MOV(b, GenRegister::immud(0));
+          p->curr.predicate = GEN_PREDICATE_NONE;
+          p->CMP(GEN_CONDITIONAL_EQ, e, GenRegister::immud(0));
+          p->curr.predicate = GEN_PREDICATE_NORMAL;
+          p->CMP(GEN_CONDITIONAL_GE, a, GenRegister::immud(0x80000000u));
+          p->MOV(a, GenRegister::immud(0x7FFFFFFFu));
+          p->MOV(b, GenRegister::immud(0xFFFFFFFFu));
+        }
+        p->pop();
+        break;
+      default:
+        NOT_IMPLEMENTED;
+    }
+    storeTopHalf(dst, a);
+    storeBottomHalf(dst, b);
+  }
+
   void GenContext::loadTopHalf(GenRegister dest, GenRegister src) {
     int execWidth = p->curr.execWidth;
     src = src.top_half();
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 23da406..e7a9d3c 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -106,6 +106,7 @@ namespace gbe
     void emitI64RHADDInstruction(const SelectionInstruction &insn);
     void emitI64ShiftInstruction(const SelectionInstruction &insn);
     void emitI64CompareInstruction(const SelectionInstruction &insn);
+    void emitI64SATADDInstruction(const SelectionInstruction &insn);
     void emitI64ToFloatInstruction(const SelectionInstruction &insn);
     void emitCompareInstruction(const SelectionInstruction &insn);
     void emitJumpInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index 0a12a91..98db756 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -35,3 +35,4 @@ DECL_GEN7_SCHEDULE(UnSpillReg,      80,        1,        1)
 DECL_GEN7_SCHEDULE(GetImageInfo,    20,        4,        2)
 DECL_GEN7_SCHEDULE(Atomic,          80,        1,        1)
 DECL_GEN7_SCHEDULE(I64MUL,          20,        4,        2)
+DECL_GEN7_SCHEDULE(I64SATADD,       20,        4,        2)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index a38d8c4..55720ff 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -483,6 +483,8 @@ namespace gbe
     void I64Shift(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, GenRegister tmp[6]);
     /*! Compare 64-bit integer */
     void I64CMP(uint32_t conditional, Reg src0, Reg src1, GenRegister tmp[3]);
+    /*! Saturated addition of 64-bit integer */
+    void I64SATADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[5]);
     /*! Encode a barrier instruction */
     void BARRIER(GenRegister src);
     /*! Encode a barrier instruction */
@@ -1083,6 +1085,15 @@ namespace gbe
     insn->extra.function = conditional;
   }
 
+  void Selection::Opaque::I64SATADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[5]) {
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_I64SATADD, 6, 2);
+    insn->dst(0) = dst;
+    insn->src(0) = src0;
+    insn->src(1) = src1;
+    for(int i=0; i<5; i++)
+      insn->dst(i + 1) = tmp[i];
+  }
+
   void Selection::Opaque::CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[3]) {
     SelectionInstruction *insn = this->appendInsn(SEL_OP_CONVI64_TO_F, 4, 1);
     insn->dst(0) = dst;
@@ -1632,6 +1643,15 @@ namespace gbe
             sel.ADD(dst, src0, src1);
           break;
         case OP_ADDSAT:
+          if (type == Type::TYPE_U64 || type == Type::TYPE_S64) {
+            GenRegister tmp[5];
+            for(int i=0; i<5; i++) {
+              tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
+              tmp[i].type = GEN_TYPE_UD;
+            }
+            sel.I64SATADD(dst, src0, src1, tmp);
+            break;
+          }
           sel.push();
             sel.curr.saturate = GEN_MATH_SATURATE_SATURATE;
             sel.ADD(dst, src0, src1);
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 63ad810..9814cc0 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -28,6 +28,7 @@ DECL_SELECTION_IR(I64SHL, I64ShiftInstruction)
 DECL_SELECTION_IR(I64ASR, I64ShiftInstruction)
 DECL_SELECTION_IR(ADD, BinaryInstruction)
 DECL_SELECTION_IR(I64ADD, BinaryWithTempInstruction)
+DECL_SELECTION_IR(I64SATADD, I64SATADDInstruction)
 DECL_SELECTION_IR(I64SUB, BinaryWithTempInstruction)
 DECL_SELECTION_IR(MUL, BinaryInstruction)
 DECL_SELECTION_IR(I64MUL, I64MULInstruction)
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index 692440f..0d63777 100644
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -182,7 +182,6 @@ INLINE_OVERLOADABLE TYPE add_sat(TYPE x, TYPE y) { return ocl_sadd_sat(x, y); }
 INLINE_OVERLOADABLE TYPE sub_sat(TYPE x, TYPE y) { return ocl_ssub_sat(x, y); }
 SDEF(char);
 SDEF(short);
-SDEF(long);
 #undef SDEF
 OVERLOADABLE int ocl_sadd_sat(int x, int y);
 INLINE_OVERLOADABLE int add_sat(int x, int y) { return ocl_sadd_sat(x, y); }
@@ -190,6 +189,17 @@ OVERLOADABLE int ocl_ssub_sat(int x, int y);
 INLINE_OVERLOADABLE int sub_sat(int x, int y) {
   return (y == 0x80000000u) ? (x & 0x7FFFFFFF) : ocl_ssub_sat(x, y);
 }
+OVERLOADABLE long ocl_sadd_sat(long x, long y);
+INLINE_OVERLOADABLE long add_sat(long x, long y) {
+  union {long l; uint i[2];} ux, uy;
+  ux.l = x;
+  uy.l = y;
+  if((ux.i[1] ^ uy.i[1]) & 0x80000000u)
+    return x + y;
+  return ocl_sadd_sat(x, y);
+}
+OVERLOADABLE long ocl_ssub_sat(long x, long y);
+INLINE_OVERLOADABLE long sub_sat(long x, long y) { return ocl_ssub_sat(x, y); }
 #define UDEF(TYPE)                                                              \
 OVERLOADABLE TYPE ocl_uadd_sat(TYPE x, TYPE y);                          \
 OVERLOADABLE TYPE ocl_usub_sat(TYPE x, TYPE y);                          \
@@ -201,7 +211,6 @@ UDEF(uint);
 UDEF(ulong);
 #undef UDEF
 
-
 uchar INLINE_OVERLOADABLE convert_uchar_sat(float x) {
     return add_sat((uchar)x, (uchar)0);
 }
-- 
1.8.1.2



More information about the Beignet mailing list