[Beignet] [PATCH] Backend: for BDW and after, According to BSpec no need to split CMP when src is DW DF

rander rander.wang at intel.com
Fri Feb 17 02:42:03 UTC 2017


Signed-off-by: rander <rander.wang at intel.com>
---
 backend/src/backend/gen8_encoder.cpp | 130 +++++++++++++++++++++++++++++++++++
 backend/src/backend/gen8_encoder.hpp |   1 +
 backend/src/backend/gen_encoder.hpp  |   2 +-
 3 files changed, 132 insertions(+), 1 deletion(-)

diff --git a/backend/src/backend/gen8_encoder.cpp b/backend/src/backend/gen8_encoder.cpp
index a33fbac..0b0f4ea 100644
--- a/backend/src/backend/gen8_encoder.cpp
+++ b/backend/src/backend/gen8_encoder.cpp
@@ -37,6 +37,7 @@ static const uint32_t untypedRWMask[] = {
 
 namespace gbe
 {
+  extern bool compactAlu2(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1, uint32_t condition, bool split);
   extern bool compactAlu3(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1, GenRegister src2);
   void Gen8Encoder::setHeader(GenNativeInstruction *insn) {
     Gen8NativeInstruction *gen8_insn = &insn->gen8_insn;
@@ -883,4 +884,133 @@ namespace gbe
                    msg_length,
                    response_length);
    }
+
+  INLINE bool isVectorOfBytes(GenRegister reg) {
+    if (reg.hstride != GEN_HORIZONTAL_STRIDE_0 &&
+        (reg.type == GEN_TYPE_UB || reg.type == GEN_TYPE_B))
+      return true;
+    else
+      return false;
+  }
+
+  INLINE bool isVectorOfLongs(GenRegister reg) {
+    if (reg.hstride != GEN_HORIZONTAL_STRIDE_0 &&
+        (reg.type == GEN_TYPE_UL || reg.type == GEN_TYPE_L))
+      return true;
+    else
+      return false;
+  }
+
+  INLINE bool isCrossMoreThan2(GenRegister reg) {
+    if (reg.hstride == GEN_HORIZONTAL_STRIDE_0)
+      return false;
+
+    const uint32_t typeSz = typeSize(reg.type);
+    const uint32_t horizontal = stride(reg.hstride);
+    if (horizontal * typeSz * 16 > GEN_REG_SIZE * 2) {
+      return true;
+    }
+    return false;
+  }
+
+  INLINE bool isSrcDstDiffSpan(GenRegister dst, GenRegister src) {
+    if (src.hstride == GEN_HORIZONTAL_STRIDE_0) return false;
+
+    GBE_ASSERT(dst.hstride != GEN_HORIZONTAL_STRIDE_0 && "dst register is uniform but src is not.");
+
+    uint32_t typeSz = typeSize(dst.type);
+    uint32_t horizontal = stride(dst.hstride);
+    uint32_t spans = (dst.subnr / (horizontal * typeSz)) * (horizontal * typeSz)  + horizontal * typeSz * 16;
+    uint32_t dstSpan = spans / GEN_REG_SIZE;
+    dstSpan = dstSpan + (spans % GEN_REG_SIZE == 0 ? 0 : 1);
+    if (dstSpan < 2) return false;
+
+    typeSz = typeSize(src.type);
+    horizontal = stride(src.hstride);
+    spans = (src.subnr / (horizontal * typeSz)) * (horizontal * typeSz)  + horizontal * typeSz * 16;
+    uint32_t srcSpan = (horizontal * typeSz * 16) / GEN_REG_SIZE;
+    srcSpan = srcSpan + (spans % GEN_REG_SIZE == 0 ? 0 : 1);
+
+    GBE_ASSERT(srcSpan <= 2);
+    GBE_ASSERT(dstSpan == 2);
+
+    if (srcSpan == dstSpan) return false;
+
+    /* Special case, dst is DW and src is w.
+       the case:
+       mov (16) r10.0<1>:d r12<8;8,1>:w
+       is allowed. */
+    if ((dst.type == GEN_TYPE_UD || dst.type == GEN_TYPE_D)
+          && (src.type == GEN_TYPE_UW || src.type == GEN_TYPE_W)
+          && dstSpan == 2 && srcSpan == 1
+          && dst.subnr == 0 && src.subnr == 0) return false;
+
+    return true;
+  }
+
+    INLINE bool needToSplitCmp(GenEncoder *p, GenRegister src0, GenRegister src1, GenRegister dst) {
+      if (p->curr.execWidth != 16) return false;
+      if (isVectorOfLongs(dst) == true) return true;
+      if (isCrossMoreThan2(dst) == true) return true;
+
+      if (src0.hstride == GEN_HORIZONTAL_STRIDE_0 &&
+              src1.hstride == GEN_HORIZONTAL_STRIDE_0)
+        return false;
+
+      if (isVectorOfBytes(src0) == true) return true;
+      if (isVectorOfBytes(src1) == true) return true;
+
+      if (isVectorOfLongs(src0) == true) return true;
+      if (isVectorOfLongs(src1) == true) return true;
+      if (isCrossMoreThan2(src0) == true) return true;
+      if (isCrossMoreThan2(src1) == true) return true;
+
+      if (isSrcDstDiffSpan(dst, src0) == true) return true;
+      if (isSrcDstDiffSpan(dst, src1) == true) return true;
+
+      return false;
+    }
+
+    /* for BDW and after, no need to split CMP when src is DW DF*/
+    void Gen8Encoder::CMP(uint32_t conditional, GenRegister src0, GenRegister src1, GenRegister dst) {
+      if (needToSplitCmp(this, src0, src1, dst) == false) {
+        if(!GenRegister::isNull(dst) && compactAlu2(this, GEN_OPCODE_CMP, dst, src0, src1, conditional, false)) {
+          return;
+        }
+        GenNativeInstruction *insn = this->next(GEN_OPCODE_CMP);
+        this->setHeader(insn);
+        insn->header.destreg_or_condmod = conditional;
+        if (GenRegister::isNull(dst))
+          insn->header.thread_control = GEN_THREAD_SWITCH;
+        this->setDst(insn, dst);
+        this->setSrc0(insn, src0);
+        this->setSrc1(insn, src1);
+      } else {
+        GenNativeInstruction *insnQ1, *insnQ2;
+
+        // Instruction for the first quarter
+        insnQ1 = this->next(GEN_OPCODE_CMP);
+        this->setHeader(insnQ1);
+        if (GenRegister::isNull(dst))
+          insnQ1->header.thread_control = GEN_THREAD_SWITCH;
+        insnQ1->header.quarter_control = GEN_COMPRESSION_Q1;
+        insnQ1->header.execution_size = GEN_WIDTH_8;
+        insnQ1->header.destreg_or_condmod = conditional;
+        this->setDst(insnQ1, dst);
+        this->setSrc0(insnQ1, src0);
+        this->setSrc1(insnQ1, src1);
+
+        // Instruction for the second quarter
+        insnQ2 = this->next(GEN_OPCODE_CMP);
+        this->setHeader(insnQ2);
+        if (GenRegister::isNull(dst))
+          insnQ2->header.thread_control = GEN_THREAD_SWITCH;
+        insnQ2->header.quarter_control = GEN_COMPRESSION_Q2;
+        insnQ2->header.execution_size = GEN_WIDTH_8;
+        insnQ2->header.destreg_or_condmod = conditional;
+        this->setDst(insnQ2, GenRegister::Qn(dst, 1));
+        this->setSrc0(insnQ2, GenRegister::Qn(src0, 1));
+        this->setSrc1(insnQ2, GenRegister::Qn(src1, 1));
+      }
+    }
 } /* End of the name space. */
diff --git a/backend/src/backend/gen8_encoder.hpp b/backend/src/backend/gen8_encoder.hpp
index fa62a8d..31ad5d6 100644
--- a/backend/src/backend/gen8_encoder.hpp
+++ b/backend/src/backend/gen8_encoder.hpp
@@ -83,6 +83,7 @@ namespace gbe
     virtual void OBREADA64(GenRegister dst, GenRegister header, uint32_t bti, uint32_t elemSize);
     /*! A64 OBlock write */
     virtual void OBWRITEA64(GenRegister header, uint32_t bti, uint32_t elemSize);
+    virtual void CMP(uint32_t conditional, GenRegister src0, GenRegister src1, GenRegister dst = GenRegister::null());
   };
 }
 #endif /* __GBE_GEN8_ENCODER_HPP__ */
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index 3e45c81..14d456a 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -161,7 +161,7 @@ namespace gbe
     /*! BRD indexed instruction */
     void BRD(GenRegister src);
     /*! Compare instructions */
-    void CMP(uint32_t conditional, GenRegister src0, GenRegister src1, GenRegister dst = GenRegister::null());
+    virtual void CMP(uint32_t conditional, GenRegister src0, GenRegister src1, GenRegister dst = GenRegister::null());
     /*! Select with embedded compare (like sel.le ...) */
     void SEL_CMP(uint32_t conditional, GenRegister dst, GenRegister src0, GenRegister src1);
     /*! EOT is used to finish GPGPU threads */
-- 
2.7.4



More information about the Beignet mailing list