[Beignet] [PATCH] add 64-bit version of "mul_hi"
Homer Hsing
homer.xing at intel.com
Thu Sep 5 01:34:34 PDT 2013
passed piglit test cases:
piglit/framework/../bin/cl-program-tester generated_tests/cl/builtin/int/builtin-long-mul_hi-1.0.generated.cl
piglit/framework/../bin/cl-program-tester generated_tests/cl/builtin/int/builtin-ulong-mul_hi-1.0.generated.cl
Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
backend/src/backend/gen_context.cpp | 84 +++++++++++++++++++++-
backend/src/backend/gen_context.hpp | 4 ++
.../src/backend/gen_insn_gen7_schedule_info.hxx | 1 +
backend/src/backend/gen_insn_selection.cpp | 21 ++++++
backend/src/backend/gen_insn_selection.hpp | 4 +-
backend/src/backend/gen_insn_selection.hxx | 1 +
backend/src/ir/instruction.cpp | 1 +
backend/src/ir/instruction.hpp | 2 +
backend/src/ir/instruction.hxx | 1 +
backend/src/llvm/llvm_gen_backend.cpp | 18 +++++
backend/src/llvm/llvm_gen_ocl_function.hxx | 2 +
backend/src/ocl_stdlib.tmpl.h | 6 +-
12 files changed, 140 insertions(+), 5 deletions(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index bf4e94b..d7f37c2 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -458,6 +458,88 @@ namespace gbe
p->pop();
}
+ void GenContext::I64FullAdd(GenRegister high1, GenRegister low1, GenRegister high2, GenRegister low2) {
+ addWithCarry(low1, low1, low2);
+ addWithCarry(high1, high1, high2);
+ p->ADD(high1, high1, low2);
+ }
+
+ void GenContext::I64FullMult(GenRegister dst1, GenRegister dst2, GenRegister dst3, GenRegister dst4, GenRegister x_high, GenRegister x_low, GenRegister y_high, GenRegister y_low) {
+ GenRegister &e = dst1, &f = dst2, &g = dst3, &h = dst4,
+ &a = x_high, &b = x_low, &c = y_high, &d = y_low;
+ I32FullMult(e, h, b, d);
+ I32FullMult(f, g, a, d);
+ addWithCarry(g, g, e);
+ addWithCarry(f, f, e);
+ I32FullMult(e, d, b, c);
+ I64FullAdd(f, g, e, d);
+ I32FullMult(b, d, a, c);
+ I64FullAdd(e, f, b, d);
+ }
+
+ void GenContext::I64ABS(GenRegister sign, GenRegister high, GenRegister low, GenRegister tmp) {
+ p->SHR(sign, high, GenRegister::immud(31));
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.physicalFlag = 1;
+ p->curr.flag = 1;
+ p->curr.subFlag = 0;
+ p->CMP(GEN_CONDITIONAL_NZ, sign, GenRegister::immud(0));
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->NOT(high, high);
+ p->NOT(low, low);
+ p->MOV(tmp, GenRegister::immud(1));
+ addWithCarry(low, low, tmp);
+ p->ADD(high, high, tmp);
+ p->pop();
+ }
+
+ void GenContext::emitI64MULHIInstruction(const SelectionInstruction &insn) {
+ GenRegister dest = ra->genReg(insn.dst(0));
+ GenRegister x = ra->genReg(insn.src(0));
+ GenRegister y = ra->genReg(insn.src(1));
+ GenRegister a = ra->genReg(insn.dst(1));
+ GenRegister b = ra->genReg(insn.dst(2));
+ GenRegister c = ra->genReg(insn.dst(3));
+ GenRegister d = ra->genReg(insn.dst(4));
+ GenRegister e = ra->genReg(insn.dst(5));
+ GenRegister f = ra->genReg(insn.dst(6));
+ GenRegister g = ra->genReg(insn.dst(7));
+ GenRegister h = ra->genReg(insn.dst(8));
+ GenRegister i = ra->genReg(insn.dst(9));
+ loadTopHalf(a, x);
+ loadBottomHalf(b, x);
+ loadTopHalf(c, y);
+ loadBottomHalf(d, y);
+ if(x.type == GEN_TYPE_UL) {
+ I64FullMult(e, f, g, h, a, b, c, d);
+ } else {
+ I64ABS(e, a, b, i);
+ I64ABS(f, c, d, i);
+ p->XOR(i, e, f);
+ I64FullMult(e, f, g, h, a, b, c, d);
+ p->push();
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.physicalFlag = 1;
+ p->curr.flag = 1;
+ p->curr.subFlag = 0;
+ p->CMP(GEN_CONDITIONAL_NZ, i, GenRegister::immud(0));
+ p->curr.predicate = GEN_PREDICATE_NORMAL;
+ p->NOT(e, e);
+ p->NOT(f, f);
+ p->NOT(g, g);
+ p->NOT(h, h);
+ p->MOV(i, GenRegister::immud(1));
+ addWithCarry(h, h, i);
+ addWithCarry(g, g, i);
+ addWithCarry(f, f, i);
+ p->ADD(e, e, i);
+ p->pop();
+ }
+ storeTopHalf(dest, e);
+ storeBottomHalf(dest, f);
+ }
+
void GenContext::emitI64HADDInstruction(const SelectionInstruction &insn) {
GenRegister dest = ra->genReg(insn.dst(0));
GenRegister x = ra->genReg(insn.src(0));
@@ -768,11 +850,11 @@ namespace gbe
int execWidth = p->curr.execWidth;
GenRegister acc0 = GenRegister::retype(GenRegister::acc(), GEN_TYPE_D);
p->push();
- p->curr.predicate = GEN_PREDICATE_NONE;
p->curr.execWidth = 8;
p->ADDC(dest, src0, src1);
p->MOV(src1, acc0);
if (execWidth == 16) {
+ p->curr.quarterControl = 1;
p->ADDC(GenRegister::suboffset(dest, 8),
GenRegister::suboffset(src0, 8),
GenRegister::suboffset(src1, 8));
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 9c9299d..8e473ca 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -86,7 +86,10 @@ namespace gbe
void addWithCarry(GenRegister dest, GenRegister src0, GenRegister src1);
void subWithBorrow(GenRegister dest, GenRegister src0, GenRegister src1);
+ void I64ABS(GenRegister sign, GenRegister high, GenRegister low, GenRegister tmp);
+ void I64FullAdd(GenRegister high1, GenRegister low1, GenRegister high2, GenRegister low2);
void I32FullMult(GenRegister high, GenRegister low, GenRegister src0, GenRegister src1);
+ void I64FullMult(GenRegister dst1, GenRegister dst2, GenRegister dst3, GenRegister dst4, GenRegister x_high, GenRegister x_low, GenRegister y_high, GenRegister y_low);
void saveFlag(GenRegister dest, int flag, int subFlag);
/*! Final Gen ISA emission helper functions */
@@ -96,6 +99,7 @@ namespace gbe
void emitBinaryInstruction(const SelectionInstruction &insn);
void emitBinaryWithTempInstruction(const SelectionInstruction &insn);
void emitTernaryInstruction(const SelectionInstruction &insn);
+ void emitI64MULHIInstruction(const SelectionInstruction &insn);
void emitI64HADDInstruction(const SelectionInstruction &insn);
void emitI64RHADDInstruction(const SelectionInstruction &insn);
void emitI64ShiftInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index 2035741..6cbdecd 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -8,6 +8,7 @@ DECL_GEN7_SCHEDULE(Ternary, 20, 4, 2)
DECL_GEN7_SCHEDULE(I64Shift, 20, 4, 2)
DECL_GEN7_SCHEDULE(I64HADD, 20, 4, 2)
DECL_GEN7_SCHEDULE(I64RHADD, 20, 4, 2)
+DECL_GEN7_SCHEDULE(I64MULHI, 20, 4, 2)
DECL_GEN7_SCHEDULE(Compare, 20, 4, 2)
DECL_GEN7_SCHEDULE(I64Compare, 20, 4, 2)
DECL_GEN7_SCHEDULE(Jump, 14, 1, 1)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 2d34022..f0a98dd 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -470,6 +470,8 @@ namespace gbe
#undef ALU2WithTemp
#undef ALU3
#undef I64Shift
+ /*! High 64bit of x*y */
+ void I64_MUL_HI(Reg dst, Reg src0, Reg src1, GenRegister tmp[9]);
/*! (x+y)>>1 without mod. overflow */
void I64HADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]);
/*! (x+y+1)>>1 without mod. overflow */
@@ -1078,6 +1080,15 @@ namespace gbe
insn->extra.function = conditional;
}
+ void Selection::Opaque::I64_MUL_HI(Reg dst, Reg src0, Reg src1, GenRegister tmp[9]) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_I64_MUL_HI, 10, 2);
+ insn->dst(0) = dst;
+ insn->src(0) = src0;
+ insn->src(1) = src1;
+ for(int i = 0; i < 9; i ++)
+ insn->dst(i + 1) = tmp[i];
+ }
+
void Selection::Opaque::I64HADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]) {
SelectionInstruction *insn = this->appendInsn(SEL_OP_I64HADD, 5, 2);
insn->dst(0) = dst;
@@ -1668,6 +1679,16 @@ namespace gbe
sel.MUL_HI(dst, src0, src1, temp);
break;
}
+ case OP_I64_MUL_HI:
+ {
+ GenRegister temp[9];
+ for(int i=0; i<9; i++) {
+ temp[i] = sel.selReg(sel.reg(FAMILY_DWORD));
+ temp[i].type = GEN_TYPE_UD;
+ }
+ sel.I64_MUL_HI(dst, src0, src1, temp);
+ break;
+ }
case OP_MUL:
if (type == TYPE_U32 || type == TYPE_S32) {
sel.pop();
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index 79b73e2..aebdc6f 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -87,7 +87,7 @@ namespace gbe
/*! No more than 17 sources (used by typed writes on simd8 mode.) */
enum { MAX_SRC_NUM = 17 };
/*! No more than 4 destinations (used by samples and untyped reads) */
- enum { MAX_DST_NUM = 4 };
+ enum { MAX_DST_NUM = 10 };
/*! State of the instruction (extra fields neeed for the encoding) */
GenInstructionState state;
union {
@@ -115,7 +115,7 @@ namespace gbe
/*! Gen opcode */
uint8_t opcode;
/*! Number of destinations */
- uint8_t dstNum:3;
+ uint8_t dstNum:4;
/*! Number of sources */
uint8_t srcNum:5;
/*! To store various indices */
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 86773cd..ee0e740 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -58,6 +58,7 @@ DECL_SELECTION_IR(GET_IMAGE_INFO, GetImageInfoInstruction)
DECL_SELECTION_IR(SPILL_REG, SpillRegInstruction)
DECL_SELECTION_IR(UNSPILL_REG, UnSpillRegInstruction)
DECL_SELECTION_IR(MUL_HI, BinaryWithTempInstruction)
+DECL_SELECTION_IR(I64_MUL_HI, I64MULHIInstruction)
DECL_SELECTION_IR(FBH, UnaryInstruction)
DECL_SELECTION_IR(FBL, UnaryInstruction)
DECL_SELECTION_IR(HADD, BinaryWithTempInstruction)
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 1925d93..8130b8b 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -1333,6 +1333,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType())
DECL_EMIT_FUNCTION(SUB)
DECL_EMIT_FUNCTION(SUBSAT)
DECL_EMIT_FUNCTION(MUL_HI)
+ DECL_EMIT_FUNCTION(I64_MUL_HI)
DECL_EMIT_FUNCTION(UPSAMPLE_SHORT)
DECL_EMIT_FUNCTION(UPSAMPLE_INT)
DECL_EMIT_FUNCTION(UPSAMPLE_LONG)
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 2a06f76..f165595 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -519,6 +519,8 @@ namespace ir {
Instruction SIN(Type type, Register dst, Register src);
/*! mul_hi.type dst src */
Instruction MUL_HI(Type type, Register dst, Register src0, Register src1);
+ /*! i64_mul_hi.type dst src */
+ Instruction I64_MUL_HI(Type type, Register dst, Register src0, Register src1);
/*! upsample_short.type dst src */
Instruction UPSAMPLE_SHORT(Type type, Register dst, Register src0, Register src1);
/*! upsample_int.type dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index 7ead344..135dc82 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -73,6 +73,7 @@ DECL_INSN(SYNC, SyncInstruction)
DECL_INSN(LABEL, LabelInstruction)
DECL_INSN(GET_IMAGE_INFO, GetImageInfoInstruction)
DECL_INSN(MUL_HI, BinaryInstruction)
+DECL_INSN(I64_MUL_HI, BinaryInstruction)
DECL_INSN(FBH, UnaryInstruction)
DECL_INSN(FBL, UnaryInstruction)
DECL_INSN(HADD, BinaryInstruction)
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 6018d7d..8373cc9 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1820,6 +1820,8 @@ namespace gbe
}
case GEN_OCL_MUL_HI_INT:
case GEN_OCL_MUL_HI_UINT:
+ case GEN_OCL_MUL_HI_I64:
+ case GEN_OCL_MUL_HI_UI64:
case GEN_OCL_UPSAMPLE_SHORT:
case GEN_OCL_UPSAMPLE_INT:
case GEN_OCL_UPSAMPLE_LONG:
@@ -2205,6 +2207,22 @@ namespace gbe
ctx.MUL_HI(getUnsignedType(ctx, I.getType()), dst, src0, src1);
break;
}
+ case GEN_OCL_MUL_HI_I64:
+ {
+ GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+ GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+ const ir::Register dst = this->getRegister(&I);
+ ctx.I64_MUL_HI(getType(ctx, I.getType()), dst, src0, src1);
+ break;
+ }
+ case GEN_OCL_MUL_HI_UI64:
+ {
+ GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+ GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+ const ir::Register dst = this->getRegister(&I);
+ ctx.I64_MUL_HI(getUnsignedType(ctx, I.getType()), dst, src0, src1);
+ break;
+ }
case GEN_OCL_UPSAMPLE_SHORT:
{
GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 58df2b0..7b5a2d3 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -128,6 +128,8 @@ DECL_LLVM_GEN_FUNCTION(USUB_SAT_LONG, _Z12ocl_usub_satmm)
// integer built-in functions
DECL_LLVM_GEN_FUNCTION(MUL_HI_INT, _Z16__gen_ocl_mul_hiii)
DECL_LLVM_GEN_FUNCTION(MUL_HI_UINT, _Z16__gen_ocl_mul_hijj)
+DECL_LLVM_GEN_FUNCTION(MUL_HI_I64, _Z16__gen_ocl_mul_hill)
+DECL_LLVM_GEN_FUNCTION(MUL_HI_UI64, _Z16__gen_ocl_mul_himm)
DECL_LLVM_GEN_FUNCTION(FBH, __gen_ocl_fbh)
DECL_LLVM_GEN_FUNCTION(FBL, __gen_ocl_fbl)
DECL_LLVM_GEN_FUNCTION(ABS, __gen_ocl_abs)
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index eadcc00..688becc 100644
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -343,6 +343,8 @@ INLINE_OVERLOADABLE ulong clz(ulong x) {
OVERLOADABLE int __gen_ocl_mul_hi(int x, int y);
OVERLOADABLE uint __gen_ocl_mul_hi(uint x, uint y);
+OVERLOADABLE long __gen_ocl_mul_hi(long x, long y);
+OVERLOADABLE ulong __gen_ocl_mul_hi(ulong x, ulong y);
INLINE_OVERLOADABLE char mul_hi(char x, char y) { return (x * y) >> 8; }
INLINE_OVERLOADABLE uchar mul_hi(uchar x, uchar y) { return (x * y) >> 8; }
INLINE_OVERLOADABLE short mul_hi(short x, short y) { return (x * y) >> 16; }
@@ -350,10 +352,10 @@ INLINE_OVERLOADABLE ushort mul_hi(ushort x, ushort y) { return (x * y) >> 16; }
INLINE_OVERLOADABLE int mul_hi(int x, int y) { return __gen_ocl_mul_hi(x, y); }
INLINE_OVERLOADABLE uint mul_hi(uint x, uint y) { return __gen_ocl_mul_hi(x, y); }
INLINE_OVERLOADABLE long mul_hi(long x, long y) {
- return 0;
+ return __gen_ocl_mul_hi(x, y);
}
INLINE_OVERLOADABLE ulong mul_hi(ulong x, ulong y) {
- return 0;
+ return __gen_ocl_mul_hi(x, y);
}
#define DEF(type) INLINE_OVERLOADABLE type mad_hi(type a, type b, type c) { return mul_hi(a, b) + c; }
--
1.8.1.2
More information about the Beignet
mailing list