[Beignet] [PATCH version 3] support 64-bit version "add_sat"
Homer Hsing
homer.xing at intel.com
Sat Sep 21 23:18:02 PDT 2013
tested by piglit:
piglit/bin/cl-program-tester generated_tests/cl/builtin/int/builtin-long-add_sat-1.0.generated.cl
piglit/bin/cl-program-tester generated_tests/cl/builtin/int/builtin-ulong-add_sat-1.0.generated.cl
version 2:
temp flag register is now allocated by RA
version 3:
divide subnr of temp flag reg by typesize
Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
backend/src/backend/gen_context.cpp | 46 ++++++++++++++++++++++
backend/src/backend/gen_context.hpp | 1 +
.../src/backend/gen_insn_gen7_schedule_info.hxx | 1 +
backend/src/backend/gen_insn_selection.cpp | 21 ++++++++++
backend/src/backend/gen_insn_selection.hxx | 1 +
backend/src/ocl_stdlib.tmpl.h | 13 +++++-
6 files changed, 81 insertions(+), 2 deletions(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index fa635de..3d4f616 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -937,6 +937,52 @@ namespace gbe
p->pop();
}
+ void GenContext::emitI64SATADDInstruction(const SelectionInstruction &insn) {
+ GenRegister x = ra->genReg(insn.src(0));
+ GenRegister y = ra->genReg(insn.src(1));
+ GenRegister dst = ra->genReg(insn.dst(0));
+ GenRegister a = ra->genReg(insn.dst(1));
+ GenRegister b = ra->genReg(insn.dst(2));
+ GenRegister c = ra->genReg(insn.dst(3));
+ GenRegister d = ra->genReg(insn.dst(4));
+ GenRegister e = ra->genReg(insn.dst(5));
+ GenRegister flagReg = ra->genReg(insn.dst(6));
+ loadTopHalf(a, x);
+ loadBottomHalf(b, x);
+ loadTopHalf(c, y);
+ loadBottomHalf(d, y);
+ if(dst.is_signed_int())
+ p->SHR(e, a, GenRegister::immud(31));
+ addWithCarry(b, b, d);
+ addWithCarry(a, a, d);
+ addWithCarry(a, a, c);
+ p->ADD(c, c, d);
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.useFlag(flagReg.flag_nr(), flagReg.flag_subnr());
+ if(! dst.is_signed_int()) {
+ p->CMP(GEN_CONDITIONAL_NZ, c, GenRegister::immud(0));
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->MOV(a, GenRegister::immud(0xFFFFFFFFu));
+ p->MOV(b, GenRegister::immud(0xFFFFFFFFu));
+ } else {
+ p->CMP(GEN_CONDITIONAL_EQ, e, GenRegister::immud(1));
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->CMP(GEN_CONDITIONAL_L, a, GenRegister::immud(0x80000000u));
+ p->MOV(a, GenRegister::immud(0x80000000u));
+ p->MOV(b, GenRegister::immud(0));
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->CMP(GEN_CONDITIONAL_EQ, e, GenRegister::immud(0));
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->CMP(GEN_CONDITIONAL_GE, a, GenRegister::immud(0x80000000u));
+ p->MOV(a, GenRegister::immud(0x7FFFFFFFu));
+ p->MOV(b, GenRegister::immud(0xFFFFFFFFu));
+ }
+ p->pop();
+ storeTopHalf(dst, a);
+ storeBottomHalf(dst, b);
+ }
+
void GenContext::loadTopHalf(GenRegister dest, GenRegister src) {
int execWidth = p->curr.execWidth;
src = src.top_half();
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index c9b74eb..fa43571 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -106,6 +106,7 @@ namespace gbe
void emitI64RHADDInstruction(const SelectionInstruction &insn);
void emitI64ShiftInstruction(const SelectionInstruction &insn);
void emitI64CompareInstruction(const SelectionInstruction &insn);
+ void emitI64SATADDInstruction(const SelectionInstruction &insn);
void emitI64ToFloatInstruction(const SelectionInstruction &insn);
void emitCompareInstruction(const SelectionInstruction &insn);
void emitJumpInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index 46537c6..1ff10de 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -35,3 +35,4 @@ DECL_GEN7_SCHEDULE(UnSpillReg, 80, 1, 1)
DECL_GEN7_SCHEDULE(GetImageInfo, 20, 4, 2)
DECL_GEN7_SCHEDULE(Atomic, 80, 1, 1)
DECL_GEN7_SCHEDULE(I64MUL, 20, 4, 2)
+DECL_GEN7_SCHEDULE(I64SATADD, 20, 4, 2)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 0691a58..bc3c884 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -483,6 +483,8 @@ namespace gbe
void I64Shift(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, GenRegister tmp[7]);
/*! Compare 64-bit integer */
void I64CMP(uint32_t conditional, Reg src0, Reg src1, GenRegister tmp[3]);
+ /*! Saturated addition of 64-bit integer */
+ void I64SATADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]);
/*! Encode a barrier instruction */
void BARRIER(GenRegister src);
/*! Encode a barrier instruction */
@@ -1083,6 +1085,15 @@ namespace gbe
insn->extra.function = conditional;
}
+ void Selection::Opaque::I64SATADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_I64SATADD, 7, 2);
+ insn->dst(0) = dst;
+ insn->src(0) = src0;
+ insn->src(1) = src1;
+ for(int i=0; i<6; i++)
+ insn->dst(i + 1) = tmp[i];
+ }
+
void Selection::Opaque::CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[4]) {
SelectionInstruction *insn = this->appendInsn(SEL_OP_CONVI64_TO_F, 5, 1);
insn->dst(0) = dst;
@@ -1632,6 +1643,16 @@ namespace gbe
sel.ADD(dst, src0, src1);
break;
case OP_ADDSAT:
+ if (type == Type::TYPE_U64 || type == Type::TYPE_S64) {
+ GenRegister tmp[6];
+ for(int i=0; i<5; i++) {
+ tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
+ tmp[i].type = GEN_TYPE_UD;
+ }
+ tmp[5] = sel.selReg(sel.reg(FAMILY_BOOL));
+ sel.I64SATADD(dst, src0, src1, tmp);
+ break;
+ }
sel.push();
sel.curr.saturate = GEN_MATH_SATURATE_SATURATE;
sel.ADD(dst, src0, src1);
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 63ad810..9814cc0 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -28,6 +28,7 @@ DECL_SELECTION_IR(I64SHL, I64ShiftInstruction)
DECL_SELECTION_IR(I64ASR, I64ShiftInstruction)
DECL_SELECTION_IR(ADD, BinaryInstruction)
DECL_SELECTION_IR(I64ADD, BinaryWithTempInstruction)
+DECL_SELECTION_IR(I64SATADD, I64SATADDInstruction)
DECL_SELECTION_IR(I64SUB, BinaryWithTempInstruction)
DECL_SELECTION_IR(MUL, BinaryInstruction)
DECL_SELECTION_IR(I64MUL, I64MULInstruction)
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index abbb1a6..bc0eb2c 100644
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -182,7 +182,6 @@ INLINE_OVERLOADABLE TYPE add_sat(TYPE x, TYPE y) { return ocl_sadd_sat(x, y); }
INLINE_OVERLOADABLE TYPE sub_sat(TYPE x, TYPE y) { return ocl_ssub_sat(x, y); }
SDEF(char);
SDEF(short);
-SDEF(long);
#undef SDEF
OVERLOADABLE int ocl_sadd_sat(int x, int y);
INLINE_OVERLOADABLE int add_sat(int x, int y) { return ocl_sadd_sat(x, y); }
@@ -190,6 +189,17 @@ OVERLOADABLE int ocl_ssub_sat(int x, int y);
INLINE_OVERLOADABLE int sub_sat(int x, int y) {
return (y == 0x80000000u) ? (x & 0x7FFFFFFF) : ocl_ssub_sat(x, y);
}
+OVERLOADABLE long ocl_sadd_sat(long x, long y);
+INLINE_OVERLOADABLE long add_sat(long x, long y) {
+ union {long l; uint i[2];} ux, uy;
+ ux.l = x;
+ uy.l = y;
+ if((ux.i[1] ^ uy.i[1]) & 0x80000000u)
+ return x + y;
+ return ocl_sadd_sat(x, y);
+}
+OVERLOADABLE long ocl_ssub_sat(long x, long y);
+INLINE_OVERLOADABLE long sub_sat(long x, long y) { return ocl_ssub_sat(x, y); }
#define UDEF(TYPE) \
OVERLOADABLE TYPE ocl_uadd_sat(TYPE x, TYPE y); \
OVERLOADABLE TYPE ocl_usub_sat(TYPE x, TYPE y); \
@@ -201,7 +211,6 @@ UDEF(uint);
UDEF(ulong);
#undef UDEF
-
uchar INLINE_OVERLOADABLE convert_uchar_sat(float x) {
return add_sat((uchar)x, (uchar)0);
}
--
1.8.1.2
More information about the Beignet
mailing list