[Beignet] [PATCH 1/2] GBE: fix uniform/scalar related bugs.
Song, Ruiling
ruiling.song at intel.com
Wed May 28 23:05:29 PDT 2014
It seems like hardware limitation for the stride setting.
Another suggested fix is deal with scalar byte/short hstride setting in GenEncoder::setDst(). That is change below logic.
if (dest.hstride == GEN_HORIZONTAL_STRIDE_0)
dest.hstride = GEN_HORIZONTAL_STRIDE_1;
For your fix, as you make GEN_WIDTH_1 stands for scalar register now, so we should replace all the check for
HORIZONTAL_STIRDE_0 using GEN_WIDTH_1, right? There are still some checks for hstride_0 in gen_register.hpp.
If it is possible, it is better we remove the hstride_0 check, only keep gen_width_1 check. That's more clear.
What do you think?
-----Original Message-----
From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of Zhigang Gong
Sent: Thursday, May 29, 2014 9:30 AM
To: beignet at lists.freedesktop.org
Cc: Gong, Zhigang
Subject: [Beignet] [PATCH 1/2] GBE: fix uniform/scalar related bugs.
One major fix is that even a register is a scalar, when we move a scalar Dword to a scalar Byte, we have to set the hstride to 4, otherwise, some piglit case fails.
The following instruction may doesn't take effect.
mov.sat(1) g127.4<1>:B g127.24<0,1,0>:D We have to change it to
mov.sat(1) g127.4<4>:B g127.24<0,1,0>:D
To fix that, I change the hstrid to 4 and remainds the width to 1 for the unpacked_uw/ub.
And all the other scalar checking code, need to consider the case that hstride is not zero bug the width is 1.
And if all operands are scalar, we don't need to split an instruction which has simd16 byte vector.
Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
backend/src/backend/gen_encoder.cpp | 8 ++++----
backend/src/backend/gen_insn_selection.cpp | 29 ++++++++++++++++++++++++++--- backend/src/backend/gen_reg_allocation.cpp | 4 ++++
backend/src/backend/gen_register.hpp | 8 ++++----
4 files changed, 38 insertions(+), 11 deletions(-)
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 0091e81..6099745 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -60,7 +60,7 @@ namespace gbe
// Some helper functions to encode
//////////////////////////////////////////////////////////////////////////
INLINE bool isVectorOfBytes(GenRegister reg) {
- if (reg.hstride != GEN_HORIZONTAL_STRIDE_0 &&
+ if (reg.width != GEN_WIDTH_1 &&
(reg.type == GEN_TYPE_UB || reg.type == GEN_TYPE_B))
return true;
else
@@ -68,14 +68,14 @@ namespace gbe
}
INLINE bool needToSplitAlu1(GenEncoder *p, GenRegister dst, GenRegister src) {
- if (p->curr.execWidth != 16) return false;
+ if (p->curr.execWidth != 16 || src.width == GEN_WIDTH_1) return
+ false;
if (isVectorOfBytes(dst) == true) return true;
if (isVectorOfBytes(src) == true) return true;
return false;
}
INLINE bool needToSplitAlu2(GenEncoder *p, GenRegister dst, GenRegister src0, GenRegister src1) {
- if (p->curr.execWidth != 16) return false;
+ if (p->curr.execWidth != 16 || (src0.width == GEN_WIDTH_1 &&
+ src1.width == GEN_WIDTH_1)) return false;
if (isVectorOfBytes(dst) == true) return true;
if (isVectorOfBytes(src0) == true) return true;
if (isVectorOfBytes(src1) == true) return true; @@ -83,7 +83,7 @@ namespace gbe
}
INLINE bool needToSplitCmp(GenEncoder *p, GenRegister src0, GenRegister src1) {
- if (p->curr.execWidth != 16) return false;
+ if (p->curr.execWidth != 16 || (src0.width == GEN_WIDTH_1 &&
+ src1.width == GEN_WIDTH_1)) return false;
if (isVectorOfBytes(src0) == true) return true;
if (isVectorOfBytes(src1) == true) return true;
if (src0.type == GEN_TYPE_D || src0.type == GEN_TYPE_UD || src0.type == GEN_TYPE_F) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index cf0af9d..19921d4 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -3076,6 +3076,13 @@ namespace gbe
narrowDst = 0;
}
+ sel.push();
+ if (sel.isScalarReg(insn.getDst(0)) == true) {
+ sel.curr.execWidth = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ }
+
for(int i = 0; i < narrowNum; i++, index++) {
GenRegister narrowReg, wideReg;
if(narrowDst) {
@@ -3120,6 +3127,7 @@ namespace gbe
} else
sel.MOV(xdst, xsrc);
}
+ sel.pop();
return true;
}
@@ -3154,7 +3162,14 @@ namespace gbe
} else if (opcode == OP_F32TO16) {
GenRegister unpacked;
unpacked = sel.unpacked_uw(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
- sel.F32TO16(unpacked, src);
+ sel.push();
+ if (sel.isScalarReg(insn.getSrc(0))) {
+ sel.curr.execWidth = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ }
+ sel.F32TO16(unpacked, src);
+ sel.pop();
sel.MOV(dst, unpacked);
} else if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && (srcFamily == FAMILY_DWORD || srcFamily == FAMILY_QWORD)) {
GenRegister unpacked;
@@ -3172,8 +3187,16 @@ namespace gbe
tmp.type = GEN_TYPE_D;
sel.CONVI64_TO_I(tmp, src);
sel.MOV(unpacked, tmp);
- } else
- sel.MOV(unpacked, src);
+ } else {
+ sel.push();
+ if (sel.isScalarReg(insn.getSrc(0))) {
+ sel.curr.execWidth = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ }
+ sel.MOV(unpacked, src);
+ sel.pop();
+ }
sel.MOV(dst, unpacked);
} else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) && srcFamily == FAMILY_QWORD) {
sel.CONVI64_TO_I(dst, src);
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index f642c2e..3d8b0b3 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -941,6 +941,10 @@ namespace gbe
|| ctx.reservedSpillRegs != 0)
this->expireGRF(interval);
tick++;
+ // For some scalar byte register, it may be used as a destination register
+ // and the source is a scalar Dword. If that is the case, the byte register
+ // must get 4byte alignment register offset.
+ alignment = (alignment + 3) & ~3;
while ((grfOffset = ctx.allocate(size, alignment)) == 0) {
const bool success = this->expireGRF(interval);
if (success == false) {
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index 50a6dcd..d0b3afa 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -349,7 +349,7 @@ namespace gbe
static INLINE GenRegister QnVirtual(GenRegister reg, uint32_t quarter) {
GBE_ASSERT(reg.physical == 0);
- if (reg.hstride == GEN_HORIZONTAL_STRIDE_0) // scalar register
+ if (reg.hstride == GEN_HORIZONTAL_STRIDE_0 || reg.width ==
+ GEN_WIDTH_1) // scalar register
return reg;
else {
reg.quarter = quarter;
@@ -359,7 +359,7 @@ namespace gbe
static INLINE GenRegister QnPhysical(GenRegister reg, uint32_t quarter) {
GBE_ASSERT(reg.physical);
- if (reg.hstride == GEN_HORIZONTAL_STRIDE_0) // scalar register
+ if (reg.hstride == GEN_HORIZONTAL_STRIDE_0 || reg.width ==
+ GEN_WIDTH_1) // scalar register
return reg;
else {
const uint32_t typeSz = typeSize(reg.type); @@ -497,7 +497,7 @@ namespace gbe
GEN_TYPE_UW,
uniform ? GEN_VERTICAL_STRIDE_0 : GEN_VERTICAL_STRIDE_16,
uniform ? GEN_WIDTH_1 : GEN_WIDTH_8,
- uniform ? GEN_HORIZONTAL_STRIDE_0 : GEN_HORIZONTAL_STRIDE_2);
+ GEN_HORIZONTAL_STRIDE_2);
}
static INLINE GenRegister unpacked_ub(ir::Register reg, bool uniform = false) { @@ -506,7 +506,7 @@ namespace gbe
GEN_TYPE_UB,
uniform ? GEN_VERTICAL_STRIDE_0 : GEN_VERTICAL_STRIDE_32,
uniform ? GEN_WIDTH_1 : GEN_WIDTH_8,
- uniform ? GEN_HORIZONTAL_STRIDE_0 : GEN_HORIZONTAL_STRIDE_4);
+ GEN_HORIZONTAL_STRIDE_4);
}
static INLINE GenRegister imm(uint32_t type) {
--
1.8.3.2
_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list