[Beignet] [PATCH 1/2] GBE: fix uniform/scalar related bugs.
Zhigang Gong
zhigang.gong at linux.intel.com
Wed May 28 22:45:04 PDT 2014
On Thu, May 29, 2014 at 06:05:29AM +0000, Song, Ruiling wrote:
> It seems like hardware limitation for the stride setting.
> Another suggested fix is deal with scalar byte/short hstride setting in GenEncoder::setDst(). That is change below logic.
> if (dest.hstride == GEN_HORIZONTAL_STRIDE_0)
> dest.hstride = GEN_HORIZONTAL_STRIDE_1;
The above logic should not be changed as it just prevent to set a destination register to have a GEN_HORIZONTAL_STRIDE_0.
If you change it to dest.width == GEN_WIDTH_1, then the hstride will be changed back to 1 and it will trigger the same
bug. Right?
>
> For your fix, as you make GEN_WIDTH_1 stands for scalar register now, so we should replace all the check for
> HORIZONTAL_STIRDE_0 using GEN_WIDTH_1, right? There are still some checks for hstride_0 in gen_register.hpp.
> If it is possible, it is better we remove the hstride_0 check, only keep gen_width_1 check. That's more clear.
Actually, the only affected case is for temporary scalar dst register which isused to upacked byte/word from a dword register.
It will not affect any other cases. I just changed the possible code path which may hit this special case. And haven't changed
the other cases. The other cases which check HORIZONTAL_STRIDE_0, such as:
suboffset,splitReg,top_half,h2.
They will never be used for a temporary scalar dst register for pack/unpack.
What's your opinion?
>
> What do you think?
>
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of Zhigang Gong
> Sent: Thursday, May 29, 2014 9:30 AM
> To: beignet at lists.freedesktop.org
> Cc: Gong, Zhigang
> Subject: [Beignet] [PATCH 1/2] GBE: fix uniform/scalar related bugs.
>
> One major fix is that even a register is a scalar, when we move a scalar Dword to a scalar Byte, we have to set the hstride to 4, otherwise, some piglit case fails.
>
> The following instruction may doesn't take effect.
> mov.sat(1) g127.4<1>:B g127.24<0,1,0>:D We have to change it to
> mov.sat(1) g127.4<4>:B g127.24<0,1,0>:D
>
> To fix that, I change the hstrid to 4 and remainds the width to 1 for the unpacked_uw/ub.
>
> And all the other scalar checking code, need to consider the case that hstride is not zero bug the width is 1.
>
> And if all operands are scalar, we don't need to split an instruction which has simd16 byte vector.
>
> Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> ---
> backend/src/backend/gen_encoder.cpp | 8 ++++----
> backend/src/backend/gen_insn_selection.cpp | 29 ++++++++++++++++++++++++++--- backend/src/backend/gen_reg_allocation.cpp | 4 ++++
> backend/src/backend/gen_register.hpp | 8 ++++----
> 4 files changed, 38 insertions(+), 11 deletions(-)
>
> diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
> index 0091e81..6099745 100644
> --- a/backend/src/backend/gen_encoder.cpp
> +++ b/backend/src/backend/gen_encoder.cpp
> @@ -60,7 +60,7 @@ namespace gbe
> // Some helper functions to encode
> //////////////////////////////////////////////////////////////////////////
> INLINE bool isVectorOfBytes(GenRegister reg) {
> - if (reg.hstride != GEN_HORIZONTAL_STRIDE_0 &&
> + if (reg.width != GEN_WIDTH_1 &&
> (reg.type == GEN_TYPE_UB || reg.type == GEN_TYPE_B))
> return true;
> else
> @@ -68,14 +68,14 @@ namespace gbe
> }
>
> INLINE bool needToSplitAlu1(GenEncoder *p, GenRegister dst, GenRegister src) {
> - if (p->curr.execWidth != 16) return false;
> + if (p->curr.execWidth != 16 || src.width == GEN_WIDTH_1) return
> + false;
> if (isVectorOfBytes(dst) == true) return true;
> if (isVectorOfBytes(src) == true) return true;
> return false;
> }
>
> INLINE bool needToSplitAlu2(GenEncoder *p, GenRegister dst, GenRegister src0, GenRegister src1) {
> - if (p->curr.execWidth != 16) return false;
> + if (p->curr.execWidth != 16 || (src0.width == GEN_WIDTH_1 &&
> + src1.width == GEN_WIDTH_1)) return false;
> if (isVectorOfBytes(dst) == true) return true;
> if (isVectorOfBytes(src0) == true) return true;
> if (isVectorOfBytes(src1) == true) return true; @@ -83,7 +83,7 @@ namespace gbe
> }
>
> INLINE bool needToSplitCmp(GenEncoder *p, GenRegister src0, GenRegister src1) {
> - if (p->curr.execWidth != 16) return false;
> + if (p->curr.execWidth != 16 || (src0.width == GEN_WIDTH_1 &&
> + src1.width == GEN_WIDTH_1)) return false;
> if (isVectorOfBytes(src0) == true) return true;
> if (isVectorOfBytes(src1) == true) return true;
> if (src0.type == GEN_TYPE_D || src0.type == GEN_TYPE_UD || src0.type == GEN_TYPE_F) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index cf0af9d..19921d4 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -3076,6 +3076,13 @@ namespace gbe
> narrowDst = 0;
> }
>
> + sel.push();
> + if (sel.isScalarReg(insn.getDst(0)) == true) {
> + sel.curr.execWidth = 1;
> + sel.curr.predicate = GEN_PREDICATE_NONE;
> + sel.curr.noMask = 1;
> + }
> +
> for(int i = 0; i < narrowNum; i++, index++) {
> GenRegister narrowReg, wideReg;
> if(narrowDst) {
> @@ -3120,6 +3127,7 @@ namespace gbe
> } else
> sel.MOV(xdst, xsrc);
> }
> + sel.pop();
>
> return true;
> }
> @@ -3154,7 +3162,14 @@ namespace gbe
> } else if (opcode == OP_F32TO16) {
> GenRegister unpacked;
> unpacked = sel.unpacked_uw(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
> - sel.F32TO16(unpacked, src);
> + sel.push();
> + if (sel.isScalarReg(insn.getSrc(0))) {
> + sel.curr.execWidth = 1;
> + sel.curr.predicate = GEN_PREDICATE_NONE;
> + sel.curr.noMask = 1;
> + }
> + sel.F32TO16(unpacked, src);
> + sel.pop();
> sel.MOV(dst, unpacked);
> } else if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && (srcFamily == FAMILY_DWORD || srcFamily == FAMILY_QWORD)) {
> GenRegister unpacked;
> @@ -3172,8 +3187,16 @@ namespace gbe
> tmp.type = GEN_TYPE_D;
> sel.CONVI64_TO_I(tmp, src);
> sel.MOV(unpacked, tmp);
> - } else
> - sel.MOV(unpacked, src);
> + } else {
> + sel.push();
> + if (sel.isScalarReg(insn.getSrc(0))) {
> + sel.curr.execWidth = 1;
> + sel.curr.predicate = GEN_PREDICATE_NONE;
> + sel.curr.noMask = 1;
> + }
> + sel.MOV(unpacked, src);
> + sel.pop();
> + }
> sel.MOV(dst, unpacked);
> } else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) && srcFamily == FAMILY_QWORD) {
> sel.CONVI64_TO_I(dst, src);
> diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
> index f642c2e..3d8b0b3 100644
> --- a/backend/src/backend/gen_reg_allocation.cpp
> +++ b/backend/src/backend/gen_reg_allocation.cpp
> @@ -941,6 +941,10 @@ namespace gbe
> || ctx.reservedSpillRegs != 0)
> this->expireGRF(interval);
> tick++;
> + // For some scalar byte register, it may be used as a destination register
> + // and the source is a scalar Dword. If that is the case, the byte register
> + // must get 4byte alignment register offset.
> + alignment = (alignment + 3) & ~3;
> while ((grfOffset = ctx.allocate(size, alignment)) == 0) {
> const bool success = this->expireGRF(interval);
> if (success == false) {
> diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
> index 50a6dcd..d0b3afa 100644
> --- a/backend/src/backend/gen_register.hpp
> +++ b/backend/src/backend/gen_register.hpp
> @@ -349,7 +349,7 @@ namespace gbe
>
> static INLINE GenRegister QnVirtual(GenRegister reg, uint32_t quarter) {
> GBE_ASSERT(reg.physical == 0);
> - if (reg.hstride == GEN_HORIZONTAL_STRIDE_0) // scalar register
> + if (reg.hstride == GEN_HORIZONTAL_STRIDE_0 || reg.width ==
> + GEN_WIDTH_1) // scalar register
> return reg;
> else {
> reg.quarter = quarter;
> @@ -359,7 +359,7 @@ namespace gbe
>
> static INLINE GenRegister QnPhysical(GenRegister reg, uint32_t quarter) {
> GBE_ASSERT(reg.physical);
> - if (reg.hstride == GEN_HORIZONTAL_STRIDE_0) // scalar register
> + if (reg.hstride == GEN_HORIZONTAL_STRIDE_0 || reg.width ==
> + GEN_WIDTH_1) // scalar register
> return reg;
> else {
> const uint32_t typeSz = typeSize(reg.type); @@ -497,7 +497,7 @@ namespace gbe
> GEN_TYPE_UW,
> uniform ? GEN_VERTICAL_STRIDE_0 : GEN_VERTICAL_STRIDE_16,
> uniform ? GEN_WIDTH_1 : GEN_WIDTH_8,
> - uniform ? GEN_HORIZONTAL_STRIDE_0 : GEN_HORIZONTAL_STRIDE_2);
> + GEN_HORIZONTAL_STRIDE_2);
> }
>
> static INLINE GenRegister unpacked_ub(ir::Register reg, bool uniform = false) { @@ -506,7 +506,7 @@ namespace gbe
> GEN_TYPE_UB,
> uniform ? GEN_VERTICAL_STRIDE_0 : GEN_VERTICAL_STRIDE_32,
> uniform ? GEN_WIDTH_1 : GEN_WIDTH_8,
> - uniform ? GEN_HORIZONTAL_STRIDE_0 : GEN_HORIZONTAL_STRIDE_4);
> + GEN_HORIZONTAL_STRIDE_4);
> }
>
> static INLINE GenRegister imm(uint32_t type) {
> --
> 1.8.3.2
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list