[Beignet] [PATCH] set SIMD width as 1 for mad when the dst is uniform

Guo Yejun yejun.guo at intel.com
Mon Apr 25 01:54:25 UTC 2016


Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
 backend/src/backend/gen8_encoder.cpp       |  6 +++++-
 backend/src/backend/gen_insn_selection.cpp | 12 ++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/backend/src/backend/gen8_encoder.cpp b/backend/src/backend/gen8_encoder.cpp
index 16b3fc6..32a096b 100644
--- a/backend/src/backend/gen8_encoder.cpp
+++ b/backend/src/backend/gen8_encoder.cpp
@@ -503,7 +503,11 @@ namespace gbe
      gen8_insn->bits1.da3src.dest_writemask = 0xf;
      this->setHeader(insn);
      gen8_insn->header.access_mode = GEN_ALIGN_16;
-     gen8_insn->header.execution_size = GEN_WIDTH_8;
+
+     if (this->curr.execWidth == 1)
+       gen8_insn->header.execution_size = GEN_WIDTH_1;
+     else
+       gen8_insn->header.execution_size = GEN_WIDTH_8;
 
      assert(src0.file == GEN_GENERAL_REGISTER_FILE);
      assert(src0.address_mode == GEN_ADDRESS_DIRECT);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index d157009..9e6c6be 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -3133,7 +3133,11 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
         const GenRegister src1 = sel.selReg(child0->insn.getSrc(1), TYPE_FLOAT);
         GenRegister src2 = sel.selReg(insn.getSrc(1), TYPE_FLOAT);
         if(insn.getOpcode() == ir::OP_SUB) src2 = GenRegister::negate(src2);
+        sel.push();
+        if (sel.isScalarReg(insn.getDst(0)))
+          sel.curr.execWidth = 1;
         sel.MAD(dst, src2, src0, src1); // order different on HW!
+        sel.pop();
         if (child0->child[0]) child0->child[0]->isRoot = 1;
         if (child0->child[1]) child0->child[1]->isRoot = 1;
         if (child1) child1->isRoot = 1;
@@ -3145,7 +3149,11 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
         const GenRegister src1 = sel.selReg(child1->insn.getSrc(1), TYPE_FLOAT);
         const GenRegister src2 = sel.selReg(insn.getSrc(0), TYPE_FLOAT);
         if(insn.getOpcode() == ir::OP_SUB) src0 = GenRegister::negate(src0);
+        sel.push();
+        if (sel.isScalarReg(insn.getDst(0)))
+          sel.curr.execWidth = 1;
         sel.MAD(dst, src2, src0, src1); // order different on HW!
+        sel.pop();
         if (child1->child[0]) child1->child[0]->isRoot = 1;
         if (child1->child[1]) child1->child[1]->isRoot = 1;
         if (child0) child0->isRoot = 1;
@@ -5285,7 +5293,11 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
          }
         case OP_MAD:
          {
+          sel.push();
+          if (sel.isScalarReg(insn.getDst(0)))
+            sel.curr.execWidth = 1;
           sel.MAD(dst, src2, src0, src1);
+          sel.pop();
           break;
          }
         case OP_LRP:
-- 
1.9.1



More information about the Beignet mailing list