[Beignet] [PATCH V2 8/9] Backend: Add half type for mad

Xiuli Pan xiuli.pan at intel.com
Mon Aug 8 03:31:26 UTC 2016


From: Pan Xiuli <xiuli.pan at intel.com>

Add half type for mad encoder and libocl.

Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
---
 backend/src/backend/gen8_encoder.cpp     | 13 +++++++++----
 backend/src/backend/gen_insn_compact.cpp |  2 ++
 backend/src/libocl/tmpl/ocl_math.tmpl.cl |  8 +++-----
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/backend/src/backend/gen8_encoder.cpp b/backend/src/backend/gen8_encoder.cpp
index 2a79e30..277260f 100644
--- a/backend/src/backend/gen8_encoder.cpp
+++ b/backend/src/backend/gen8_encoder.cpp
@@ -509,11 +509,19 @@ namespace gbe
      assert(dest.file == GEN_GENERAL_REGISTER_FILE);
      assert(dest.nr < 128);
      assert(dest.address_mode == GEN_ADDRESS_DIRECT);
-     assert(dest.type == GEN_TYPE_F);
+     assert(src0.type == GEN_TYPE_HF || src0.type == GEN_TYPE_F || src0.type == GEN_TYPE_DF);
+     assert(src0.type == dest.type);
+     assert(src0.type == src1.type);
+     assert(src0.type == src2.type);
+     int32_t dataType = src0.type == GEN_TYPE_DF ? 3 : (src0.type == GEN_TYPE_HF ? 4 : 0);
      //gen8_insn->bits1.da3src.dest_reg_file = 0;
      gen8_insn->bits1.da3src.dest_reg_nr = dest.nr;
      gen8_insn->bits1.da3src.dest_subreg_nr = dest.subnr / 4;
      gen8_insn->bits1.da3src.dest_writemask = 0xf;
+     gen8_insn->bits1.da3src.dest_type = dataType;
+     gen8_insn->bits1.da3src.src_type = dataType;
+     gen8_insn->bits1.da3src.src1_type = src1.type == GEN_TYPE_HF;
+     gen8_insn->bits1.da3src.src2_type = src2.type == GEN_TYPE_HF;
      this->setHeader(insn);
      gen8_insn->header.access_mode = GEN_ALIGN_16;
      gen8_insn->header.execution_size = execution_size;
@@ -521,7 +529,6 @@ namespace gbe
      assert(src0.file == GEN_GENERAL_REGISTER_FILE);
      assert(src0.address_mode == GEN_ADDRESS_DIRECT);
      assert(src0.nr < 128);
-     assert(src0.type == GEN_TYPE_F);
      gen8_insn->bits2.da3src.src0_swizzle = NO_SWIZZLE;
      gen8_insn->bits2.da3src.src0_subreg_nr = src0.subnr / 4 ;
      gen8_insn->bits2.da3src.src0_reg_nr = src0.nr;
@@ -532,7 +539,6 @@ namespace gbe
      assert(src1.file == GEN_GENERAL_REGISTER_FILE);
      assert(src1.address_mode == GEN_ADDRESS_DIRECT);
      assert(src1.nr < 128);
-     assert(src1.type == GEN_TYPE_F);
      gen8_insn->bits2.da3src.src1_swizzle = NO_SWIZZLE;
      gen8_insn->bits2.da3src.src1_subreg_nr_low = (src1.subnr / 4) & 0x3;
      gen8_insn->bits3.da3src.src1_subreg_nr_high = (src1.subnr / 4) >> 2;
@@ -544,7 +550,6 @@ namespace gbe
      assert(src2.file == GEN_GENERAL_REGISTER_FILE);
      assert(src2.address_mode == GEN_ADDRESS_DIRECT);
      assert(src2.nr < 128);
-     assert(src2.type == GEN_TYPE_F);
      gen8_insn->bits3.da3src.src2_swizzle = NO_SWIZZLE;
      gen8_insn->bits3.da3src.src2_subreg_nr = src2.subnr / 4;
      gen8_insn->bits3.da3src.src2_rep_ctrl = src2.vstride == GEN_VERTICAL_STRIDE_0;
diff --git a/backend/src/backend/gen_insn_compact.cpp b/backend/src/backend/gen_insn_compact.cpp
index 036d057..5de451c 100644
--- a/backend/src/backend/gen_insn_compact.cpp
+++ b/backend/src/backend/gen_insn_compact.cpp
@@ -788,6 +788,8 @@ namespace gbe {
       return false;
     if(opcode != GEN_OPCODE_MAD && opcode != GEN_OPCODE_LRP)
       return false;
+    if(src0.type != GEN_TYPE_F)
+      return false;
     assert(src0.file == GEN_GENERAL_REGISTER_FILE);
     assert(src0.address_mode == GEN_ADDRESS_DIRECT);
     assert(src0.nr < 128);
diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
index 0d2a57d..9f10713 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
@@ -2494,7 +2494,8 @@ OVERLOADABLE float ldexp(float x, int n) {
   return __gen_ocl_internal_ldexp(x, n);
 }
 
-CONST float __gen_ocl_mad(float a, float b, float c) __asm("llvm.fma" ".f32");
+CONST OVERLOADABLE float __gen_ocl_mad(float a, float b, float c) __asm("llvm.fma" ".f32");
+CONST OVERLOADABLE half __gen_ocl_mad(half a, half b, half c) __asm("llvm.fma" ".f16");
 PURE CONST float __gen_ocl_fmax(float a, float b);
 PURE CONST float __gen_ocl_fmin(float a, float b);
 
@@ -3722,10 +3723,7 @@ OVERLOADABLE half exp2(half x) {
   return (half)exp2(_x);
 }
 OVERLOADABLE half mad(half a, half b, half c) {
-  float _a = (float)a;
-  float _b = (float)b;
-  float _c = (float)c;
-  return (half)mad(_a, _b, _c);
+  return __gen_ocl_mad(a,b,c);
 }
 OVERLOADABLE half sin(half x) {
   float _x = (float)x;
-- 
2.7.4



More information about the Beignet mailing list