[Beignet] [PATCH] Backend: for BDW and after, According to BSpec no need to split CMP when src is DW DF
rander
rander.wang at intel.com
Fri Feb 17 02:42:03 UTC 2017
Signed-off-by: rander <rander.wang at intel.com>
---
backend/src/backend/gen8_encoder.cpp | 130 +++++++++++++++++++++++++++++++++++
backend/src/backend/gen8_encoder.hpp | 1 +
backend/src/backend/gen_encoder.hpp | 2 +-
3 files changed, 132 insertions(+), 1 deletion(-)
diff --git a/backend/src/backend/gen8_encoder.cpp b/backend/src/backend/gen8_encoder.cpp
index a33fbac..0b0f4ea 100644
--- a/backend/src/backend/gen8_encoder.cpp
+++ b/backend/src/backend/gen8_encoder.cpp
@@ -37,6 +37,7 @@ static const uint32_t untypedRWMask[] = {
namespace gbe
{
+ extern bool compactAlu2(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1, uint32_t condition, bool split);
extern bool compactAlu3(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1, GenRegister src2);
void Gen8Encoder::setHeader(GenNativeInstruction *insn) {
Gen8NativeInstruction *gen8_insn = &insn->gen8_insn;
@@ -883,4 +884,133 @@ namespace gbe
msg_length,
response_length);
}
+
+ INLINE bool isVectorOfBytes(GenRegister reg) {
+ if (reg.hstride != GEN_HORIZONTAL_STRIDE_0 &&
+ (reg.type == GEN_TYPE_UB || reg.type == GEN_TYPE_B))
+ return true;
+ else
+ return false;
+ }
+
+ INLINE bool isVectorOfLongs(GenRegister reg) {
+ if (reg.hstride != GEN_HORIZONTAL_STRIDE_0 &&
+ (reg.type == GEN_TYPE_UL || reg.type == GEN_TYPE_L))
+ return true;
+ else
+ return false;
+ }
+
+ INLINE bool isCrossMoreThan2(GenRegister reg) {
+ if (reg.hstride == GEN_HORIZONTAL_STRIDE_0)
+ return false;
+
+ const uint32_t typeSz = typeSize(reg.type);
+ const uint32_t horizontal = stride(reg.hstride);
+ if (horizontal * typeSz * 16 > GEN_REG_SIZE * 2) {
+ return true;
+ }
+ return false;
+ }
+
+ INLINE bool isSrcDstDiffSpan(GenRegister dst, GenRegister src) {
+ if (src.hstride == GEN_HORIZONTAL_STRIDE_0) return false;
+
+ GBE_ASSERT(dst.hstride != GEN_HORIZONTAL_STRIDE_0 && "dst register is uniform but src is not.");
+
+ uint32_t typeSz = typeSize(dst.type);
+ uint32_t horizontal = stride(dst.hstride);
+ uint32_t spans = (dst.subnr / (horizontal * typeSz)) * (horizontal * typeSz) + horizontal * typeSz * 16;
+ uint32_t dstSpan = spans / GEN_REG_SIZE;
+ dstSpan = dstSpan + (spans % GEN_REG_SIZE == 0 ? 0 : 1);
+ if (dstSpan < 2) return false;
+
+ typeSz = typeSize(src.type);
+ horizontal = stride(src.hstride);
+ spans = (src.subnr / (horizontal * typeSz)) * (horizontal * typeSz) + horizontal * typeSz * 16;
+ uint32_t srcSpan = (horizontal * typeSz * 16) / GEN_REG_SIZE;
+ srcSpan = srcSpan + (spans % GEN_REG_SIZE == 0 ? 0 : 1);
+
+ GBE_ASSERT(srcSpan <= 2);
+ GBE_ASSERT(dstSpan == 2);
+
+ if (srcSpan == dstSpan) return false;
+
+ /* Special case, dst is DW and src is w.
+ the case:
+ mov (16) r10.0<1>:d r12<8;8,1>:w
+ is allowed. */
+ if ((dst.type == GEN_TYPE_UD || dst.type == GEN_TYPE_D)
+ && (src.type == GEN_TYPE_UW || src.type == GEN_TYPE_W)
+ && dstSpan == 2 && srcSpan == 1
+ && dst.subnr == 0 && src.subnr == 0) return false;
+
+ return true;
+ }
+
+ INLINE bool needToSplitCmp(GenEncoder *p, GenRegister src0, GenRegister src1, GenRegister dst) {
+ if (p->curr.execWidth != 16) return false;
+ if (isVectorOfLongs(dst) == true) return true;
+ if (isCrossMoreThan2(dst) == true) return true;
+
+ if (src0.hstride == GEN_HORIZONTAL_STRIDE_0 &&
+ src1.hstride == GEN_HORIZONTAL_STRIDE_0)
+ return false;
+
+ if (isVectorOfBytes(src0) == true) return true;
+ if (isVectorOfBytes(src1) == true) return true;
+
+ if (isVectorOfLongs(src0) == true) return true;
+ if (isVectorOfLongs(src1) == true) return true;
+ if (isCrossMoreThan2(src0) == true) return true;
+ if (isCrossMoreThan2(src1) == true) return true;
+
+ if (isSrcDstDiffSpan(dst, src0) == true) return true;
+ if (isSrcDstDiffSpan(dst, src1) == true) return true;
+
+ return false;
+ }
+
+ /* for BDW and after, no need to split CMP when src is DW DF*/
+ void Gen8Encoder::CMP(uint32_t conditional, GenRegister src0, GenRegister src1, GenRegister dst) {
+ if (needToSplitCmp(this, src0, src1, dst) == false) {
+ if(!GenRegister::isNull(dst) && compactAlu2(this, GEN_OPCODE_CMP, dst, src0, src1, conditional, false)) {
+ return;
+ }
+ GenNativeInstruction *insn = this->next(GEN_OPCODE_CMP);
+ this->setHeader(insn);
+ insn->header.destreg_or_condmod = conditional;
+ if (GenRegister::isNull(dst))
+ insn->header.thread_control = GEN_THREAD_SWITCH;
+ this->setDst(insn, dst);
+ this->setSrc0(insn, src0);
+ this->setSrc1(insn, src1);
+ } else {
+ GenNativeInstruction *insnQ1, *insnQ2;
+
+ // Instruction for the first quarter
+ insnQ1 = this->next(GEN_OPCODE_CMP);
+ this->setHeader(insnQ1);
+ if (GenRegister::isNull(dst))
+ insnQ1->header.thread_control = GEN_THREAD_SWITCH;
+ insnQ1->header.quarter_control = GEN_COMPRESSION_Q1;
+ insnQ1->header.execution_size = GEN_WIDTH_8;
+ insnQ1->header.destreg_or_condmod = conditional;
+ this->setDst(insnQ1, dst);
+ this->setSrc0(insnQ1, src0);
+ this->setSrc1(insnQ1, src1);
+
+ // Instruction for the second quarter
+ insnQ2 = this->next(GEN_OPCODE_CMP);
+ this->setHeader(insnQ2);
+ if (GenRegister::isNull(dst))
+ insnQ2->header.thread_control = GEN_THREAD_SWITCH;
+ insnQ2->header.quarter_control = GEN_COMPRESSION_Q2;
+ insnQ2->header.execution_size = GEN_WIDTH_8;
+ insnQ2->header.destreg_or_condmod = conditional;
+ this->setDst(insnQ2, GenRegister::Qn(dst, 1));
+ this->setSrc0(insnQ2, GenRegister::Qn(src0, 1));
+ this->setSrc1(insnQ2, GenRegister::Qn(src1, 1));
+ }
+ }
} /* End of the name space. */
diff --git a/backend/src/backend/gen8_encoder.hpp b/backend/src/backend/gen8_encoder.hpp
index fa62a8d..31ad5d6 100644
--- a/backend/src/backend/gen8_encoder.hpp
+++ b/backend/src/backend/gen8_encoder.hpp
@@ -83,6 +83,7 @@ namespace gbe
virtual void OBREADA64(GenRegister dst, GenRegister header, uint32_t bti, uint32_t elemSize);
/*! A64 OBlock write */
virtual void OBWRITEA64(GenRegister header, uint32_t bti, uint32_t elemSize);
+ virtual void CMP(uint32_t conditional, GenRegister src0, GenRegister src1, GenRegister dst = GenRegister::null());
};
}
#endif /* __GBE_GEN8_ENCODER_HPP__ */
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index 3e45c81..14d456a 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -161,7 +161,7 @@ namespace gbe
/*! BRD indexed instruction */
void BRD(GenRegister src);
/*! Compare instructions */
- void CMP(uint32_t conditional, GenRegister src0, GenRegister src1, GenRegister dst = GenRegister::null());
+ virtual void CMP(uint32_t conditional, GenRegister src0, GenRegister src1, GenRegister dst = GenRegister::null());
/*! Select with embedded compare (like sel.le ...) */
void SEL_CMP(uint32_t conditional, GenRegister dst, GenRegister src0, GenRegister src1);
/*! EOT is used to finish GPGPU threads */
--
2.7.4
More information about the Beignet
mailing list