[Beignet] [PATCH V2] GBE: fix uniform/scalar related bugs.
Zhigang Gong
zhigang.gong at intel.com
Wed May 28 23:31:48 PDT 2014
One major fix is that even a register is a scalar, when
we move a scalar Dword to a scalar Byte, we have to set
the hstride to 4, otherwise, it breaks the following
register restication:
B. When the Execution Data Type is wider than the destination data type,
the destination must be aligned as required by the wider execution data
type and specify a HorzStride equal to the ratio in sizes of the two data
types. For example, a mov with a D source and B destination must use a
4-byte aligned destination and a Dst.HorzStride of 4.
The following instruction may doesn't take effect.
mov.sat(1) g127.4<1>:B g126<0,1,0>:D
We have to change it to
mov.sat(1) g127.4<4>:B g126<0,1,0>:D
v2: keep the instruction selection stage unchanged, we fix this restircation
in setDst only.
Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
backend/src/backend/gen_encoder.cpp | 23 +++++++++++++++++------
backend/src/backend/gen_insn_selection.cpp | 29 ++++++++++++++++++++++++++---
backend/src/backend/gen_reg_allocation.cpp | 4 ++++
3 files changed, 47 insertions(+), 9 deletions(-)
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 0091e81..ed2fd32 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -68,14 +68,17 @@ namespace gbe
}
INLINE bool needToSplitAlu1(GenEncoder *p, GenRegister dst, GenRegister src) {
- if (p->curr.execWidth != 16) return false;
+ if (p->curr.execWidth != 16 || src.hstride == GEN_HORIZONTAL_STRIDE_0) return false;
if (isVectorOfBytes(dst) == true) return true;
if (isVectorOfBytes(src) == true) return true;
return false;
}
INLINE bool needToSplitAlu2(GenEncoder *p, GenRegister dst, GenRegister src0, GenRegister src1) {
- if (p->curr.execWidth != 16) return false;
+ if (p->curr.execWidth != 16 ||
+ (src0.hstride == GEN_HORIZONTAL_STRIDE_0 &&
+ src1.hstride == GEN_HORIZONTAL_STRIDE_0))
+ return false;
if (isVectorOfBytes(dst) == true) return true;
if (isVectorOfBytes(src0) == true) return true;
if (isVectorOfBytes(src1) == true) return true;
@@ -83,7 +86,10 @@ namespace gbe
}
INLINE bool needToSplitCmp(GenEncoder *p, GenRegister src0, GenRegister src1) {
- if (p->curr.execWidth != 16) return false;
+ if (p->curr.execWidth != 16 ||
+ (src0.hstride == GEN_HORIZONTAL_STRIDE_0 &&
+ src1.hstride == GEN_HORIZONTAL_STRIDE_0))
+ return false;
if (isVectorOfBytes(src0) == true) return true;
if (isVectorOfBytes(src1) == true) return true;
if (src0.type == GEN_TYPE_D || src0.type == GEN_TYPE_UD || src0.type == GEN_TYPE_F)
@@ -93,7 +99,6 @@ namespace gbe
return false;
}
-
void GenEncoder::setMessageDescriptor(GenNativeInstruction *inst, enum GenMessageTarget sfid,
unsigned msg_length, unsigned response_length,
bool header_present, bool end_of_thread)
@@ -268,8 +273,14 @@ namespace gbe
insn->bits1.da1.dest_address_mode = dest.address_mode;
insn->bits1.da1.dest_reg_nr = dest.nr;
insn->bits1.da1.dest_subreg_nr = dest.subnr;
- if (dest.hstride == GEN_HORIZONTAL_STRIDE_0)
- dest.hstride = GEN_HORIZONTAL_STRIDE_1;
+ if (dest.hstride == GEN_HORIZONTAL_STRIDE_0) {
+ if (dest.type == GEN_TYPE_UB || dest.type == GEN_TYPE_B)
+ dest.hstride = GEN_HORIZONTAL_STRIDE_4;
+ else if (dest.type == GEN_TYPE_UW || dest.type == GEN_TYPE_W)
+ dest.hstride = GEN_HORIZONTAL_STRIDE_2;
+ else
+ dest.hstride = GEN_HORIZONTAL_STRIDE_1;
+ }
insn->bits1.da1.dest_horiz_stride = dest.hstride;
}
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index cf0af9d..19921d4 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -3076,6 +3076,13 @@ namespace gbe
narrowDst = 0;
}
+ sel.push();
+ if (sel.isScalarReg(insn.getDst(0)) == true) {
+ sel.curr.execWidth = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ }
+
for(int i = 0; i < narrowNum; i++, index++) {
GenRegister narrowReg, wideReg;
if(narrowDst) {
@@ -3120,6 +3127,7 @@ namespace gbe
} else
sel.MOV(xdst, xsrc);
}
+ sel.pop();
return true;
}
@@ -3154,7 +3162,14 @@ namespace gbe
} else if (opcode == OP_F32TO16) {
GenRegister unpacked;
unpacked = sel.unpacked_uw(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))));
- sel.F32TO16(unpacked, src);
+ sel.push();
+ if (sel.isScalarReg(insn.getSrc(0))) {
+ sel.curr.execWidth = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ }
+ sel.F32TO16(unpacked, src);
+ sel.pop();
sel.MOV(dst, unpacked);
} else if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && (srcFamily == FAMILY_DWORD || srcFamily == FAMILY_QWORD)) {
GenRegister unpacked;
@@ -3172,8 +3187,16 @@ namespace gbe
tmp.type = GEN_TYPE_D;
sel.CONVI64_TO_I(tmp, src);
sel.MOV(unpacked, tmp);
- } else
- sel.MOV(unpacked, src);
+ } else {
+ sel.push();
+ if (sel.isScalarReg(insn.getSrc(0))) {
+ sel.curr.execWidth = 1;
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ }
+ sel.MOV(unpacked, src);
+ sel.pop();
+ }
sel.MOV(dst, unpacked);
} else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) && srcFamily == FAMILY_QWORD) {
sel.CONVI64_TO_I(dst, src);
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index f642c2e..3d8b0b3 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -941,6 +941,10 @@ namespace gbe
|| ctx.reservedSpillRegs != 0)
this->expireGRF(interval);
tick++;
+ // For some scalar byte register, it may be used as a destination register
+ // and the source is a scalar Dword. If that is the case, the byte register
+ // must get 4byte alignment register offset.
+ alignment = (alignment + 3) & ~3;
while ((grfOffset = ctx.allocate(size, alignment)) == 0) {
const bool success = this->expireGRF(interval);
if (success == false) {
--
1.8.3.2
More information about the Beignet
mailing list