[Beignet] [PATCH V2 8/9] Backend: Add half type for mad
Xiuli Pan
xiuli.pan at intel.com
Mon Aug 8 03:31:26 UTC 2016
From: Pan Xiuli <xiuli.pan at intel.com>
Add half type for mad encoder and libocl.
Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
---
backend/src/backend/gen8_encoder.cpp | 13 +++++++++----
backend/src/backend/gen_insn_compact.cpp | 2 ++
backend/src/libocl/tmpl/ocl_math.tmpl.cl | 8 +++-----
3 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/backend/src/backend/gen8_encoder.cpp b/backend/src/backend/gen8_encoder.cpp
index 2a79e30..277260f 100644
--- a/backend/src/backend/gen8_encoder.cpp
+++ b/backend/src/backend/gen8_encoder.cpp
@@ -509,11 +509,19 @@ namespace gbe
assert(dest.file == GEN_GENERAL_REGISTER_FILE);
assert(dest.nr < 128);
assert(dest.address_mode == GEN_ADDRESS_DIRECT);
- assert(dest.type == GEN_TYPE_F);
+ assert(src0.type == GEN_TYPE_HF || src0.type == GEN_TYPE_F || src0.type == GEN_TYPE_DF);
+ assert(src0.type == dest.type);
+ assert(src0.type == src1.type);
+ assert(src0.type == src2.type);
+ int32_t dataType = src0.type == GEN_TYPE_DF ? 3 : (src0.type == GEN_TYPE_HF ? 4 : 0);
//gen8_insn->bits1.da3src.dest_reg_file = 0;
gen8_insn->bits1.da3src.dest_reg_nr = dest.nr;
gen8_insn->bits1.da3src.dest_subreg_nr = dest.subnr / 4;
gen8_insn->bits1.da3src.dest_writemask = 0xf;
+ gen8_insn->bits1.da3src.dest_type = dataType;
+ gen8_insn->bits1.da3src.src_type = dataType;
+ gen8_insn->bits1.da3src.src1_type = src1.type == GEN_TYPE_HF;
+ gen8_insn->bits1.da3src.src2_type = src2.type == GEN_TYPE_HF;
this->setHeader(insn);
gen8_insn->header.access_mode = GEN_ALIGN_16;
gen8_insn->header.execution_size = execution_size;
@@ -521,7 +529,6 @@ namespace gbe
assert(src0.file == GEN_GENERAL_REGISTER_FILE);
assert(src0.address_mode == GEN_ADDRESS_DIRECT);
assert(src0.nr < 128);
- assert(src0.type == GEN_TYPE_F);
gen8_insn->bits2.da3src.src0_swizzle = NO_SWIZZLE;
gen8_insn->bits2.da3src.src0_subreg_nr = src0.subnr / 4 ;
gen8_insn->bits2.da3src.src0_reg_nr = src0.nr;
@@ -532,7 +539,6 @@ namespace gbe
assert(src1.file == GEN_GENERAL_REGISTER_FILE);
assert(src1.address_mode == GEN_ADDRESS_DIRECT);
assert(src1.nr < 128);
- assert(src1.type == GEN_TYPE_F);
gen8_insn->bits2.da3src.src1_swizzle = NO_SWIZZLE;
gen8_insn->bits2.da3src.src1_subreg_nr_low = (src1.subnr / 4) & 0x3;
gen8_insn->bits3.da3src.src1_subreg_nr_high = (src1.subnr / 4) >> 2;
@@ -544,7 +550,6 @@ namespace gbe
assert(src2.file == GEN_GENERAL_REGISTER_FILE);
assert(src2.address_mode == GEN_ADDRESS_DIRECT);
assert(src2.nr < 128);
- assert(src2.type == GEN_TYPE_F);
gen8_insn->bits3.da3src.src2_swizzle = NO_SWIZZLE;
gen8_insn->bits3.da3src.src2_subreg_nr = src2.subnr / 4;
gen8_insn->bits3.da3src.src2_rep_ctrl = src2.vstride == GEN_VERTICAL_STRIDE_0;
diff --git a/backend/src/backend/gen_insn_compact.cpp b/backend/src/backend/gen_insn_compact.cpp
index 036d057..5de451c 100644
--- a/backend/src/backend/gen_insn_compact.cpp
+++ b/backend/src/backend/gen_insn_compact.cpp
@@ -788,6 +788,8 @@ namespace gbe {
return false;
if(opcode != GEN_OPCODE_MAD && opcode != GEN_OPCODE_LRP)
return false;
+ if(src0.type != GEN_TYPE_F)
+ return false;
assert(src0.file == GEN_GENERAL_REGISTER_FILE);
assert(src0.address_mode == GEN_ADDRESS_DIRECT);
assert(src0.nr < 128);
diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
index 0d2a57d..9f10713 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
@@ -2494,7 +2494,8 @@ OVERLOADABLE float ldexp(float x, int n) {
return __gen_ocl_internal_ldexp(x, n);
}
-CONST float __gen_ocl_mad(float a, float b, float c) __asm("llvm.fma" ".f32");
+CONST OVERLOADABLE float __gen_ocl_mad(float a, float b, float c) __asm("llvm.fma" ".f32");
+CONST OVERLOADABLE half __gen_ocl_mad(half a, half b, half c) __asm("llvm.fma" ".f16");
PURE CONST float __gen_ocl_fmax(float a, float b);
PURE CONST float __gen_ocl_fmin(float a, float b);
@@ -3722,10 +3723,7 @@ OVERLOADABLE half exp2(half x) {
return (half)exp2(_x);
}
OVERLOADABLE half mad(half a, half b, half c) {
- float _a = (float)a;
- float _b = (float)b;
- float _c = (float)c;
- return (half)mad(_a, _b, _c);
+ return __gen_ocl_mad(a,b,c);
}
OVERLOADABLE half sin(half x) {
float _x = (float)x;
--
2.7.4
More information about the Beignet
mailing list