[Beignet] [PATCH] GBE: fixed a predication bug for DW multiplication.

Zhigang Gong zhigang.gong at linux.intel.com
Mon Jun 3 20:10:14 PDT 2013


Per bspec:
  mul (8) acc0:d r2.0<8;8,1>:d r3.0<8;8,1>:d //All channels must be enabled
  mach (8) rTemp<1>:d r2.0<8;8,1>:d r3.0<8;8,1>:d //All channels must be enabled
  mov (8) r5.0<1>:d rTemp<8;8,1>:d // High 32 bits
  mov (8) r6.0<1>:d acc0:d // Low 32 bits

  The mul and mach instructions must have all channels enabled.
  The first mov should have channel enable from the destHI of IMUL,
  the second mov should have the channel enable from the destLO of IMUL.

We need to disable the predication and the mask rather than only set noMask to 1.
The strange thing here is for the first quarter, it seems we don't need to do so.
As change both quarter to this style will waste some registers which cause some
kernels fail to compile (compiler_box_blur.cl), I just change the second quater
to fully comply with bspec here. And in practice, it works fine with all unit
test cases and Homer's specific test case.

Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
---
 backend/src/backend/gen_insn_selection.cpp | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 561e32f..88f9e94 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -1429,7 +1429,10 @@ namespace gbe
 
         // Right part of the 16-wide register now
         if (simdWidth == 16) {
+          int predicate = sel.curr.predicate;
+          int noMask = sel.curr.noMask;
           sel.curr.noMask = 1;
+          sel.curr.predicate = GEN_PREDICATE_NONE;
           const GenRegister nextSrc0 = sel.selRegQn(insn.getSrc(0), 1, TYPE_S32);
           const GenRegister nextSrc1 = sel.selRegQn(insn.getSrc(1), 1, TYPE_S32);
           sel.MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_D), nextSrc0, nextSrc1);
@@ -1437,11 +1440,15 @@ namespace gbe
           sel.MACH(GenRegister::retype(GenRegister::null(), GEN_TYPE_D), nextSrc0, nextSrc1);
           sel.curr.accWrEnable = 0;
           sel.curr.quarterControl = GEN_COMPRESSION_Q2;
-          const ir::Register reg = sel.reg(FAMILY_DWORD);
-          sel.MOV(GenRegister::f8grf(reg), GenRegister::acc());
-          sel.curr.noMask = 0;
-          sel.MOV(GenRegister::retype(GenRegister::next(dst), GEN_TYPE_F),
-                  GenRegister::f8grf(reg));
+          if (predicate != GEN_PREDICATE_NONE || noMask != 1) {
+            const ir::Register reg = sel.reg(FAMILY_DWORD);
+            sel.MOV(GenRegister::f8grf(reg), GenRegister::acc());
+            sel.curr.noMask = noMask;;
+            sel.curr.predicate = predicate;
+            sel.MOV(GenRegister::retype(GenRegister::next(dst), GEN_TYPE_F),
+                    GenRegister::f8grf(reg));
+          } else
+            sel.MOV(GenRegister::retype(GenRegister::next(dst), GEN_TYPE_F), GenRegister::acc());
         }
 
         sel.pop();
-- 
1.7.11.7



More information about the Beignet mailing list