[Beignet] [PATCH 1/2] GBE: fix uniform/scalar related bugs.

Wed May 28 18:30:29 PDT 2014

One major fix is that even a register is a scalar, when
we move a scalar Dword to a scalar Byte, we have to set
the hstride to 4, otherwise, some piglit case fails.

The following instruction may doesn't take effect.
mov.sat(1)  g127.4<1>:B  g127.24<0,1,0>:D
We have to change it to
mov.sat(1)  g127.4<4>:B  g127.24<0,1,0>:D

To fix that, I change the hstrid to 4 and remainds the
width to 1 for the unpacked_uw/ub.

And all the other scalar checking code, need to consider
the case that hstride is not zero bug the width is 1.

And if all operands are scalar, we don't need to split
an instruction which has simd16 byte vector.

Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
 backend/src/backend/gen_encoder.cpp        |  8 ++++----
 backend/src/backend/gen_insn_selection.cpp | 29 ++++++++++++++++++++++++++---
 backend/src/backend/gen_reg_allocation.cpp |  4 ++++
 backend/src/backend/gen_register.hpp       |  8 ++++----
 4 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 0091e81..6099745 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -60,7 +60,7 @@ namespace gbe
   // Some helper functions to encode
   //////////////////////////////////////////////////////////////////////////
   INLINE bool isVectorOfBytes(GenRegister reg) {
-    if (reg.hstride != GEN_HORIZONTAL_STRIDE_0 &&
+    if (reg.width != GEN_WIDTH_1 &&
         (reg.type == GEN_TYPE_UB || reg.type == GEN_TYPE_B))
       return true;
     else
@@ -68,14 +68,14 @@ namespace gbe
   }
 
   INLINE bool needToSplitAlu1(GenEncoder *p, GenRegister dst, GenRegister src) {
-    if (p->curr.execWidth != 16) return false;
+    if (p->curr.execWidth != 16 || src.width == GEN_WIDTH_1) return false;
     if (isVectorOfBytes(dst) == true) return true;
     if (isVectorOfBytes(src) == true) return true;
     return false;
   }
 
   INLINE bool needToSplitAlu2(GenEncoder *p, GenRegister dst, GenRegister src0, GenRegister src1) {
-    if (p->curr.execWidth != 16) return false;
+    if (p->curr.execWidth != 16 || (src0.width == GEN_WIDTH_1 && src1.width == GEN_WIDTH_1)) return false;
     if (isVectorOfBytes(dst) == true) return true;
     if (isVectorOfBytes(src0) == true) return true;
     if (isVectorOfBytes(src1) == true) return true;
@@ -83,7 +83,7 @@ namespace gbe
   }
 
   INLINE bool needToSplitCmp(GenEncoder *p, GenRegister src0, GenRegister src1) {
-    if (p->curr.execWidth != 16) return false;
+    if (p->curr.execWidth != 16 || (src0.width == GEN_WIDTH_1 && src1.width == GEN_WIDTH_1)) return false;
     if (isVectorOfBytes(src0) == true) return true;
     if (isVectorOfBytes(src1) == true) return true;
     if (src0.type == GEN_TYPE_D || src0.type == GEN_TYPE_UD || src0.type == GEN_TYPE_F)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index cf0af9d..19921d4 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -3076,6 +3076,13 @@ namespace gbe
         narrowDst = 0;
       }
 
+      sel.push();
+      if (sel.isScalarReg(insn.getDst(0)) == true) {
+        sel.curr.execWidth = 1;
+        sel.curr.predicate = GEN_PREDICATE_NONE;
+        sel.curr.noMask = 1;
+      }
+
       for(int i = 0; i < narrowNum; i++, index++) {
         GenRegister narrowReg, wideReg;
         if(narrowDst) {
@@ -3120,6 +3127,7 @@ namespace gbe
         } else
           sel.MOV(xdst, xsrc);
       }
+      sel.pop();
 
       return true;
     }
@@ -3154,7 +3162,14 @@ namespace gbe
       } else if (opcode == OP_F32TO16) {
         GenRegister unpacked;
         unpacked = sel.unpacked_uw(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
-        sel.F32TO16(unpacked, src);
+        sel.push();
+          if (sel.isScalarReg(insn.getSrc(0))) {
+            sel.curr.execWidth = 1;
+            sel.curr.predicate = GEN_PREDICATE_NONE;
+            sel.curr.noMask = 1;
+          }
+          sel.F32TO16(unpacked, src);
+        sel.pop();
         sel.MOV(dst, unpacked);
       } else if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && (srcFamily == FAMILY_DWORD || srcFamily == FAMILY_QWORD)) {
         GenRegister unpacked;
@@ -3172,8 +3187,16 @@ namespace gbe
           tmp.type = GEN_TYPE_D;
           sel.CONVI64_TO_I(tmp, src);
           sel.MOV(unpacked, tmp);
-        } else
-          sel.MOV(unpacked, src);
+        } else {
+          sel.push();
+            if (sel.isScalarReg(insn.getSrc(0))) {
+              sel.curr.execWidth = 1;
+              sel.curr.predicate = GEN_PREDICATE_NONE;
+              sel.curr.noMask = 1;
+            }
+            sel.MOV(unpacked, src);
+          sel.pop();
+        }
         sel.MOV(dst, unpacked);
       } else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) && srcFamily == FAMILY_QWORD) {
         sel.CONVI64_TO_I(dst, src);
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index f642c2e..3d8b0b3 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -941,6 +941,10 @@ namespace gbe
         || ctx.reservedSpillRegs != 0)
       this->expireGRF(interval);
     tick++;
+    // For some scalar byte register, it may be used as a destination register
+    // and the source is a scalar Dword. If that is the case, the byte register
+    // must get 4byte alignment register offset.
+    alignment = (alignment + 3) & ~3;
     while ((grfOffset = ctx.allocate(size, alignment)) == 0) {
       const bool success = this->expireGRF(interval);
       if (success == false) {
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index 50a6dcd..d0b3afa 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -349,7 +349,7 @@ namespace gbe
 
     static INLINE GenRegister QnVirtual(GenRegister reg, uint32_t quarter) {
       GBE_ASSERT(reg.physical == 0);
-      if (reg.hstride == GEN_HORIZONTAL_STRIDE_0) // scalar register
+      if (reg.hstride == GEN_HORIZONTAL_STRIDE_0 || reg.width == GEN_WIDTH_1) // scalar register
         return reg;
       else {
         reg.quarter = quarter;
@@ -359,7 +359,7 @@ namespace gbe
 
     static INLINE GenRegister QnPhysical(GenRegister reg, uint32_t quarter) {
       GBE_ASSERT(reg.physical);
-      if (reg.hstride == GEN_HORIZONTAL_STRIDE_0) // scalar register
+      if (reg.hstride == GEN_HORIZONTAL_STRIDE_0 || reg.width == GEN_WIDTH_1) // scalar register
         return reg;
       else {
         const uint32_t typeSz = typeSize(reg.type);
@@ -497,7 +497,7 @@ namespace gbe
                            GEN_TYPE_UW,
                            uniform ? GEN_VERTICAL_STRIDE_0 : GEN_VERTICAL_STRIDE_16,
                            uniform ? GEN_WIDTH_1 : GEN_WIDTH_8,
-                           uniform ? GEN_HORIZONTAL_STRIDE_0 : GEN_HORIZONTAL_STRIDE_2);
+                           GEN_HORIZONTAL_STRIDE_2);
     }
 
     static INLINE GenRegister unpacked_ub(ir::Register reg, bool uniform = false) {
@@ -506,7 +506,7 @@ namespace gbe
                          GEN_TYPE_UB,
                          uniform ? GEN_VERTICAL_STRIDE_0 : GEN_VERTICAL_STRIDE_32,
                          uniform ? GEN_WIDTH_1 : GEN_WIDTH_8,
-                         uniform ? GEN_HORIZONTAL_STRIDE_0 : GEN_HORIZONTAL_STRIDE_4);
+                         GEN_HORIZONTAL_STRIDE_4);
     }
 
     static INLINE GenRegister imm(uint32_t type) {
-- 
1.8.3.2