[Beignet] [PATCH V2 2/2] GBE: Optimize constant load with sampler.

Yang Rong rong.r.yang at intel.com
Mon Jul 14 02:24:38 PDT 2014


From: Ruiling Song <ruiling.song at intel.com>

Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
 backend/src/backend/gen_context.cpp        |    2 +-
 backend/src/backend/gen_defs.hpp           |    2 +-
 backend/src/backend/gen_encoder.cpp        |    9 ++++++++-
 backend/src/backend/gen_encoder.hpp        |    3 ++-
 backend/src/backend/gen_insn_selection.cpp |   22 ++++++++++++++++------
 backend/src/backend/gen_insn_selection.hpp |    1 +
 src/intel/intel_gpgpu.c                    |    2 +-
 7 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 7b7dec3..c7222a5 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -1720,7 +1720,7 @@ namespace gbe
     const unsigned char sampler = insn.extra.sampler;
     const unsigned int msgLen = insn.extra.rdmsglen;
     uint32_t simdWidth = p->curr.execWidth;
-    p->SAMPLE(dst, msgPayload, msgLen, false, bti, sampler, simdWidth, -1, 0, insn.extra.isLD);
+    p->SAMPLE(dst, msgPayload, msgLen, false, bti, sampler, simdWidth, -1, 0, insn.extra.isLD, insn.extra.isUniform);
   }
 
   void GenContext::scratchWrite(const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode) {
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index 1b43864..f0da50a 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -396,7 +396,7 @@ enum GenMessageTarget {
 #define GEN_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE  1
 #define GEN_SAMPLER_MESSAGE_SIMD4X2_RESINFO           2
 #define GEN_SAMPLER_MESSAGE_SIMD16_RESINFO            2
-#define GEN_SAMPLER_MESSAGE_SIMD4X2_LD                3
+#define GEN_SAMPLER_MESSAGE_SIMD4X2_LD                7
 #define GEN_SAMPLER_MESSAGE_SIMD8_LD                  7
 #define GEN_SAMPLER_MESSAGE_SIMD16_LD                 7
 
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 26337e9..182752a 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -1214,7 +1214,8 @@ namespace gbe
                           uint32_t simdWidth,
                           uint32_t writemask,
                           uint32_t return_format,
-                          bool isLD)
+                          bool isLD,
+                          bool isUniform)
   {
      if (writemask == 0) return;
      uint32_t msg_type = isLD ? GEN_SAMPLER_MESSAGE_SIMD8_LD :
@@ -1225,6 +1226,12 @@ namespace gbe
        msg_length++;
      uint32_t simd_mode = (simdWidth == 16) ?
                             GEN_SAMPLER_SIMD_MODE_SIMD16 : GEN_SAMPLER_SIMD_MODE_SIMD8;
+    if(isUniform) {
+      response_length = 1;
+      msg_type = GEN_SAMPLER_MESSAGE_SIMD4X2_LD;
+      msg_length = 1;
+      simd_mode = GEN_SAMPLER_SIMD_MODE_SIMD4X2;
+    }
      GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
      this->setHeader(insn);
      this->setDst(insn, dest);
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index eb2d3d7..d6e2b97 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -192,7 +192,8 @@ namespace gbe
                 unsigned int simdWidth,
                 uint32_t writemask,
                 uint32_t return_format,
-                bool isLD);
+                bool isLD,
+                bool isUniform);
 
     /*! TypedWrite instruction for texture */
     virtual void TYPED_WRITE(GenRegister header,
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index cd2240d..d70fd8f 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -592,7 +592,7 @@ namespace gbe
     /*! Encode ternary instructions */
     void ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2);
     /*! Encode sample instructions */
-    void SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister *msgPayloads, uint32_t msgNum, uint32_t bti, uint32_t sampler, bool isLD);
+    void SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister *msgPayloads, uint32_t msgNum, uint32_t bti, uint32_t sampler, bool isLD, bool isUniform);
     /*! Encode typed write instructions */
     void TYPED_WRITE(GenRegister *msgs, uint32_t msgNum, uint32_t bti, bool is3D);
     /*! Get image information */
@@ -1598,7 +1598,7 @@ namespace gbe
 
   void Selection::Opaque::SAMPLE(GenRegister *dst, uint32_t dstNum,
                                  GenRegister *msgPayloads, uint32_t msgNum,
-                                 uint32_t bti, uint32_t sampler, bool isLD) {
+                                 uint32_t bti, uint32_t sampler, bool isLD, bool isUniform) {
     SelectionInstruction *insn = this->appendInsn(SEL_OP_SAMPLE, dstNum, msgNum);
     SelectionVector *dstVector = this->appendVector();
     SelectionVector *msgVector = this->appendVector();
@@ -1623,6 +1623,7 @@ namespace gbe
     insn->extra.sampler = sampler;
     insn->extra.rdmsglen = msgNum;
     insn->extra.isLD = isLD;
+    insn->extra.isUniform = isUniform;
   }
 
   ///////////////////////////////////////////////////////////////////////////
@@ -2717,14 +2718,23 @@ namespace gbe
       using namespace ir;
       const uint32_t simdWidth = sel.isScalarReg(insn.getValue(0)) ? 1 : sel.ctx.getSimdWidth();
       GBE_ASSERT(insn.getValueNum() == 1);
+
+      if(simdWidth == 1) {
+        GenRegister dst = sel.selReg(insn.getValue(0), ir::TYPE_U32);
+        sel.push();
+          sel.curr.noMask = 1;
+          sel.SAMPLE(&dst, 1, &addr, 1, bti, 0, true, true);
+        sel.pop();
+        return;
+      }
+
       GenRegister dst = GenRegister::retype(sel.selReg(insn.getValue(0)), GEN_TYPE_F);
       // get dword based address
-      GenRegister addrDW = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD, simdWidth == 1));
+      GenRegister addrDW = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
 
       sel.push();
-        if (simdWidth == 1) {
+        if (sel.isScalarReg(addr.reg())) {
           sel.curr.noMask = 1;
-          sel.curr.execWidth = 1;
         }
         sel.SHR(addrDW, GenRegister::retype(addr, GEN_TYPE_UD), GenRegister::immud(2));
       sel.pop();
@@ -3612,7 +3622,7 @@ namespace gbe
       }
       uint32_t sampler = insn.getSamplerIndex();
 
-      sel.SAMPLE(dst, insn.getDstNum(), msgPayloads, msgLen, bti, sampler, insn.getSamplerOffset() != 0);
+      sel.SAMPLE(dst, insn.getDstNum(), msgPayloads, msgLen, bti, sampler, insn.getSamplerOffset() != 0, false);
       return true;
     }
     DECL_CTOR(SampleInstruction, 1, 1);
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index 508a37e..9bcce6f 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -126,6 +126,7 @@ namespace gbe
         uint16_t sampler:5;
         uint16_t rdmsglen:3;
         bool     isLD;  // is this a ld message?
+        bool     isUniform;
       };
       uint32_t barrierType;
       bool longjmp;
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index d00bc83..9531001 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -688,7 +688,7 @@ intel_gpgpu_alloc_constant_buffer(intel_gpgpu_t *gpgpu, uint32_t size)
   gen7_surface_state_t *ss2 = (gen7_surface_state_t *) heap->surface[2];
   memset(ss2, 0, sizeof(gen7_surface_state_t));
   ss2->ss0.surface_type = I965_SURFACE_BUFFER;
-  ss2->ss0.surface_format = I965_SURFACEFORMAT_RAW;
+  ss2->ss0.surface_format = I965_SURFACEFORMAT_R32G32B32A32_UINT;
   ss2->ss2.width  = s & 0x7f;            /* bits 6:0 of sz */
   ss2->ss2.height = (s >> 7) & 0x3fff;   /* bits 20:7 of sz */
   ss2->ss3.depth  = (s >> 21) & 0x3ff;   /* bits 30:21 of sz */
-- 
1.7.10.4



More information about the Beignet mailing list