[Beignet] [PATCH 1/2] support built-in function "upsample"
Song, Ruiling
ruiling.song at intel.com
Fri Jul 5 00:53:06 PDT 2013
LGTM.
-----Original Message-----
From: beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org [mailto:beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org] On Behalf Of Homer Hsing
Sent: Friday, July 05, 2013 3:38 PM
To: beignet at lists.freedesktop.org
Cc: Xing, Homer
Subject: [Beignet] [PATCH 1/2] support built-in function "upsample"
Signed-off-by: Homer Hsing <homer.xing at intel.com>
---
backend/src/backend/gen_context.cpp | 2 ++
backend/src/backend/gen_encoder.cpp | 24 +++++++++++++++++
backend/src/backend/gen_encoder.hpp | 2 ++
backend/src/backend/gen_insn_selection.cpp | 8 ++++++ backend/src/backend/gen_insn_selection.hxx | 2 ++
backend/src/ir/instruction.cpp | 2 ++
backend/src/ir/instruction.hpp | 4 +++
backend/src/ir/instruction.hxx | 2 ++
backend/src/llvm/llvm_gen_backend.cpp | 18 +++++++++++++
backend/src/llvm/llvm_gen_ocl_function.hxx | 2 ++
backend/src/ocl_stdlib.h | 41 ++++++++++++++++++++++++++++++
11 files changed, 107 insertions(+)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index acd9c1d..e33d8da 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -169,6 +169,8 @@ namespace gbe
case SEL_OP_ADD: p->ADD(dst, src0, src1); break;
case SEL_OP_MUL: p->MUL(dst, src0, src1); break;
case SEL_OP_MACH: p->MACH(dst, src0, src1); break;
+ case SEL_OP_UPSAMPLE_SHORT: p->UPSAMPLE_SHORT(dst, src0, src1); break;
+ case SEL_OP_UPSAMPLE_INT: p->UPSAMPLE_INT(dst, src0, src1);
+ break;
default: NOT_IMPLEMENTED;
}
}
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 25303b4..f84c6dd 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -812,6 +812,30 @@ namespace gbe
pop();
}
+ void GenEncoder::UPSAMPLE_SHORT(GenRegister dest, GenRegister src0, GenRegister src1) {
+ dest.type = GEN_TYPE_B;
+ dest.hstride = GEN_HORIZONTAL_STRIDE_2;
+ src0.type = GEN_TYPE_B;
+ src0.hstride = GEN_HORIZONTAL_STRIDE_2;
+ src1.type = GEN_TYPE_B;
+ src1.hstride = GEN_HORIZONTAL_STRIDE_2;
+ MOV(dest, src1);
+ dest.subnr ++;
+ MOV(dest, src0);
+ }
+
+ void GenEncoder::UPSAMPLE_INT(GenRegister dest, GenRegister src0, GenRegister src1) {
+ dest.type = GEN_TYPE_W;
+ dest.hstride = GEN_HORIZONTAL_STRIDE_2;
+ src0.type = GEN_TYPE_W;
+ src0.hstride = GEN_HORIZONTAL_STRIDE_2;
+ src1.type = GEN_TYPE_W;
+ src1.hstride = GEN_HORIZONTAL_STRIDE_2;
+ MOV(dest, src1);
+ dest.subnr += 2;
+ MOV(dest, src0);
+ }
+
void GenEncoder::MOV_DF(GenRegister dest, GenRegister src0, GenRegister r) {
int w = curr.execWidth;
if (src0.isdf()) {
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index a7cbc89..d3a7165 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -92,6 +92,8 @@ namespace gbe
ALU1(MOV)
ALU1(FBH)
ALU1(FBL)
+ ALU2(UPSAMPLE_SHORT)
+ ALU2(UPSAMPLE_INT)
ALU1(RNDZ)
ALU1(RNDE)
ALU1(RNDD)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index bfe1e28..d4be8bf 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -438,6 +438,8 @@ namespace gbe
ALU1(FBL)
ALU3(HADD)
ALU3(RHADD)
+ ALU2(UPSAMPLE_SHORT)
+ ALU2(UPSAMPLE_INT)
#undef ALU1
#undef ALU2
#undef ALU3
@@ -1451,6 +1453,12 @@ namespace gbe
sel.RHADD(dst, src0, src1, temp);
break;
}
+ case OP_UPSAMPLE_SHORT:
+ sel.UPSAMPLE_SHORT(dst, src0, src1);
+ break;
+ case OP_UPSAMPLE_INT:
+ sel.UPSAMPLE_INT(dst, src0, src1);
+ break;
default: NOT_IMPLEMENTED;
}
sel.pop();
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index c85d328..33c3937 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -47,3 +47,5 @@ DECL_SELECTION_IR(FBH, UnaryInstruction) DECL_SELECTION_IR(FBL, UnaryInstruction) DECL_SELECTION_IR(HADD, TernaryInstruction) DECL_SELECTION_IR(RHADD, TernaryInstruction)
+DECL_SELECTION_IR(UPSAMPLE_SHORT, BinaryInstruction)
+DECL_SELECTION_IR(UPSAMPLE_INT, BinaryInstruction)
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 21b82ce..2a77454 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -1330,6 +1330,8 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType())
DECL_EMIT_FUNCTION(SUB)
DECL_EMIT_FUNCTION(SUBSAT)
DECL_EMIT_FUNCTION(MUL_HI)
+ DECL_EMIT_FUNCTION(UPSAMPLE_SHORT)
+ DECL_EMIT_FUNCTION(UPSAMPLE_INT)
DECL_EMIT_FUNCTION(DIV)
DECL_EMIT_FUNCTION(REM)
DECL_EMIT_FUNCTION(SHL)
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index fc1c984..48e6963 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -519,6 +519,10 @@ namespace ir {
Instruction SIN(Type type, Register dst, Register src);
/*! mul_hi.type dst src */
Instruction MUL_HI(Type type, Register dst, Register src0, Register src1);
+ /*! upsample_short.type dst src */
+ Instruction UPSAMPLE_SHORT(Type type, Register dst, Register src0,
+ Register src1); /*! upsample_int.type dst src */ Instruction
+ UPSAMPLE_INT(Type type, Register dst, Register src0, Register src1);
/*! fbh.type dst src */
Instruction FBH(Type type, Register dst, Register src);
/*! fbl.type dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index 0e1c575..b9f0e73 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -77,3 +77,5 @@ DECL_INSN(FBH, UnaryInstruction) DECL_INSN(FBL, UnaryInstruction) DECL_INSN(HADD, BinaryInstruction) DECL_INSN(RHADD, BinaryInstruction)
+DECL_INSN(UPSAMPLE_SHORT, BinaryInstruction) DECL_INSN(UPSAMPLE_INT,
+BinaryInstruction)
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 260e3c1..564e441 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1829,6 +1829,8 @@ namespace gbe
}
case GEN_OCL_MUL_HI_INT:
case GEN_OCL_MUL_HI_UINT:
+ case GEN_OCL_UPSAMPLE_SHORT:
+ case GEN_OCL_UPSAMPLE_INT:
case GEN_OCL_SADD_SAT_CHAR:
case GEN_OCL_SADD_SAT_SHORT:
case GEN_OCL_SADD_SAT_INT:
@@ -2209,6 +2211,22 @@ namespace gbe
ctx.MUL_HI(getUnsignedType(ctx, I.getType()), dst, src0, src1);
break;
}
+ case GEN_OCL_UPSAMPLE_SHORT:
+ {
+ GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+ GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+ const ir::Register dst = this->getRegister(&I);
+ ctx.UPSAMPLE_SHORT(getType(ctx, I.getType()), dst, src0, src1);
+ break;
+ }
+ case GEN_OCL_UPSAMPLE_INT:
+ {
+ GBE_ASSERT(AI != AE); const ir::Register src0 = this->getRegister(*AI); ++AI;
+ GBE_ASSERT(AI != AE); const ir::Register src1 = this->getRegister(*AI); ++AI;
+ const ir::Register dst = this->getRegister(&I);
+ ctx.UPSAMPLE_INT(getType(ctx, I.getType()), dst, src0, src1);
+ break;
+ }
case GEN_OCL_SADD_SAT_CHAR:
case GEN_OCL_SADD_SAT_SHORT:
case GEN_OCL_SADD_SAT_INT:
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index f448a50..8e940bc 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -133,3 +133,5 @@ DECL_LLVM_GEN_FUNCTION(FBL, __gen_ocl_fbl) DECL_LLVM_GEN_FUNCTION(ABS, __gen_ocl_abs) DECL_LLVM_GEN_FUNCTION(HADD, __gen_ocl_hadd) DECL_LLVM_GEN_FUNCTION(RHADD, __gen_ocl_rhadd)
+DECL_LLVM_GEN_FUNCTION(UPSAMPLE_SHORT, _Z18__gen_ocl_upsampless)
+DECL_LLVM_GEN_FUNCTION(UPSAMPLE_INT, _Z18__gen_ocl_upsampleii)
diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h index bceac86..090e2b7 100644
--- a/backend/src/ocl_stdlib.h
+++ b/backend/src/ocl_stdlib.h
@@ -4480,6 +4480,47 @@ DEF(16)
#undef DEC8
#undef DEC16
+OVERLOADABLE short __gen_ocl_upsample(short hi, short lo); OVERLOADABLE
+int __gen_ocl_upsample(int hi, int lo); INLINE_OVERLOADABLE short
+upsample(char hi, uchar lo) { return __gen_ocl_upsample((short)hi,
+(short)lo); } INLINE_OVERLOADABLE ushort upsample(uchar hi, uchar lo) {
+return __gen_ocl_upsample((short)hi, (short)lo); } INLINE_OVERLOADABLE
+int upsample(short hi, ushort lo) { return __gen_ocl_upsample((int)hi,
+(int)lo); } INLINE_OVERLOADABLE uint upsample(ushort hi, ushort lo) {
+return __gen_ocl_upsample((int)hi, (int)lo); } #define DEC2(type,
+type2) INLINE_OVERLOADABLE type2##2 upsample(type##2 a, type##2 b) {
+return (type2##2)(upsample(a.s0, b.s0), upsample(a.s1, b.s1)); }
+#define DEC3(type, type2) INLINE_OVERLOADABLE type2##3 upsample(type##3
+a, type##3 b) { return (type2##3)(upsample(a.s0, b.s0), upsample(a.s1,
+b.s1), upsample(a.s2, b.s2)); } #define DEC4(type, type2)
+INLINE_OVERLOADABLE type2##4 upsample(type##4 a, type##4 b) { return
+(type2##4)(upsample(a.s0, b.s0), upsample(a.s1, b.s1), upsample(a.s2,
+b.s2), upsample(a.s3, b.s3)); } #define DEC8(type, type2)
+INLINE_OVERLOADABLE type2##8 upsample(type##8 a, type##8 b) { return
+(type2##8)(upsample(a.s0, b.s0), upsample(a.s1, b.s1), upsample(a.s2,
+b.s2), upsample(a.s3, b.s3), upsample(a.s4, b.s4), upsample(a.s5,
+b.s5), upsample(a.s6, b.s6), upsample(a.s7, b.s7)); } #define
+DEC16(type, type2) INLINE_OVERLOADABLE type2##16 upsample(type##16 a,
+type##16 b) { return (type2##16)(upsample(a.s0, b.s0), upsample(a.s1,
+b.s1), upsample(a.s2, b.s2), upsample(a.s3, b.s3), upsample(a.s4,
+b.s4), upsample(a.s5, b.s5), upsample(a.s6, b.s6), upsample(a.s7,
+b.s7), upsample(a.s8, b.s8), upsample(a.s9, b.s9), upsample(a.sa,
+b.sa), upsample(a.sb, b.sb), upsample(a.sc, b.sc), upsample(a.sd,
+b.sd), upsample(a.se, b.se), upsample(a.sf, b.sf)); } #define DEF(n)
+DEC##n(uchar, ushort); DEC##n(ushort, uint)
+DEF(2)
+DEF(3)
+DEF(4)
+DEF(8)
+DEF(16)
+#undef DEF
+#undef DEC2
+#undef DEC3
+#undef DEC4
+#undef DEC8
+#undef DEC16
+#define DEC2(type, type2) INLINE_OVERLOADABLE type2##2 upsample(type##2
+a, u##type##2 b) { return (type2##2)(upsample(a.s0, b.s0),
+upsample(a.s1, b.s1)); } #define DEC3(type, type2) INLINE_OVERLOADABLE
+type2##3 upsample(type##3 a, u##type##3 b) { return
+(type2##3)(upsample(a.s0, b.s0), upsample(a.s1, b.s1), upsample(a.s2,
+b.s2)); } #define DEC4(type, type2) INLINE_OVERLOADABLE type2##4
+upsample(type##4 a, u##type##4 b) { return (type2##4)(upsample(a.s0,
+b.s0), upsample(a.s1, b.s1), upsample(a.s2, b.s2), upsample(a.s3,
+b.s3)); } #define DEC8(type, type2) INLINE_OVERLOADABLE type2##8
+upsample(type##8 a, u##type##8 b) { return (type2##8)(upsample(a.s0,
+b.s0), upsample(a.s1, b.s1), upsample(a.s2, b.s2), upsample(a.s3,
+b.s3), upsample(a.s4, b.s4), upsample(a.s5, b.s5), upsample(a.s6,
+b.s6), upsample(a.s7, b.s7)); } #define DEC16(type, type2)
+INLINE_OVERLOADABLE type2##16 upsample(type##16 a, u##type##16 b) {
+return (type2##16)(upsample(a.s0, b.s0), upsample(a.s1, b.s1),
+upsample(a.s2, b.s2), upsample(a.s3, b.s3), upsample(a.s4, b.s4),
+upsample(a.s5, b.s5), upsample(a.s6, b.s6), upsample(a.s7, b.s7),
+upsample(a.s8, b.s8), upsample(a.s9, b.s9), upsample(a.sa, b.sa),
+upsample(a.sb, b.sb), upsample(a.sc, b.sc), upsample(a.sd, b.sd),
+upsample(a.se, b.se), upsample(a.sf, b.sf)); } #define DEF(n)
+DEC##n(char, short); DEC##n(short, int)
+DEF(2)
+DEF(3)
+DEF(4)
+DEF(8)
+DEF(16)
+#undef DEF
+#undef DEC2
+#undef DEC3
+#undef DEC4
+#undef DEC8
+#undef DEC16
+
PURE CONST uint __gen_ocl_hadd(uint x, uint y); PURE CONST uint __gen_ocl_rhadd(uint x, uint y); #define DEC DEF(char); DEF(uchar); DEF(short); DEF(ushort)
--
1.8.1.2
_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list