[Beignet] [PATCH 2/2] GBE: Optimize constant load with sampler.

Ruiling Song ruiling.song at intel.com
Fri Jul 11 01:23:08 PDT 2014


Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
 backend/src/backend/gen_context.cpp        |    2 +-
 backend/src/backend/gen_defs.hpp           |    2 +-
 backend/src/backend/gen_encoder.cpp        |    9 +++++++-
 backend/src/backend/gen_encoder.hpp        |    3 ++-
 backend/src/backend/gen_insn_selection.cpp |   34 ++++++++++++++++------------
 backend/src/backend/gen_insn_selection.hpp |    1 +
 src/intel/intel_gpgpu.c                    |    2 +-
 7 files changed, 33 insertions(+), 20 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 7b7dec3..c7222a5 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -1720,7 +1720,7 @@ namespace gbe
     const unsigned char sampler = insn.extra.sampler;
     const unsigned int msgLen = insn.extra.rdmsglen;
     uint32_t simdWidth = p->curr.execWidth;
-    p->SAMPLE(dst, msgPayload, msgLen, false, bti, sampler, simdWidth, -1, 0, insn.extra.isLD);
+    p->SAMPLE(dst, msgPayload, msgLen, false, bti, sampler, simdWidth, -1, 0, insn.extra.isLD, insn.extra.isUniform);
   }
 
   void GenContext::scratchWrite(const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode) {
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index 1b43864..f0da50a 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -396,7 +396,7 @@ enum GenMessageTarget {
 #define GEN_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE  1
 #define GEN_SAMPLER_MESSAGE_SIMD4X2_RESINFO           2
 #define GEN_SAMPLER_MESSAGE_SIMD16_RESINFO            2
-#define GEN_SAMPLER_MESSAGE_SIMD4X2_LD                3
+#define GEN_SAMPLER_MESSAGE_SIMD4X2_LD                7
 #define GEN_SAMPLER_MESSAGE_SIMD8_LD                  7
 #define GEN_SAMPLER_MESSAGE_SIMD16_LD                 7
 
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 26337e9..182752a 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -1214,7 +1214,8 @@ namespace gbe
                           uint32_t simdWidth,
                           uint32_t writemask,
                           uint32_t return_format,
-                          bool isLD)
+                          bool isLD,
+                          bool isUniform)
   {
      if (writemask == 0) return;
      uint32_t msg_type = isLD ? GEN_SAMPLER_MESSAGE_SIMD8_LD :
@@ -1225,6 +1226,12 @@ namespace gbe
        msg_length++;
      uint32_t simd_mode = (simdWidth == 16) ?
                             GEN_SAMPLER_SIMD_MODE_SIMD16 : GEN_SAMPLER_SIMD_MODE_SIMD8;
+    if(isUniform) {
+      response_length = 1;
+      msg_type = GEN_SAMPLER_MESSAGE_SIMD4X2_LD;
+      msg_length = 1;
+      simd_mode = GEN_SAMPLER_SIMD_MODE_SIMD4X2;
+    }
      GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
      this->setHeader(insn);
      this->setDst(insn, dest);
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index eb2d3d7..d6e2b97 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -192,7 +192,8 @@ namespace gbe
                 unsigned int simdWidth,
                 uint32_t writemask,
                 uint32_t return_format,
-                bool isLD);
+                bool isLD,
+                bool isUniform);
 
     /*! TypedWrite instruction for texture */
     virtual void TYPED_WRITE(GenRegister header,
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 565f203..26c52aa 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -592,7 +592,7 @@ namespace gbe
     /*! Encode ternary instructions */
     void ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2);
     /*! Encode sample instructions */
-    void SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister *msgPayloads, uint32_t msgNum, uint32_t bti, uint32_t sampler, bool isLD);
+    void SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister *msgPayloads, uint32_t msgNum, uint32_t bti, uint32_t sampler, bool isLD, bool isUniform);
     /*! Encode typed write instructions */
     void TYPED_WRITE(GenRegister *msgs, uint32_t msgNum, uint32_t bti, bool is3D);
     /*! Get image information */
@@ -1598,7 +1598,7 @@ namespace gbe
 
   void Selection::Opaque::SAMPLE(GenRegister *dst, uint32_t dstNum,
                                  GenRegister *msgPayloads, uint32_t msgNum,
-                                 uint32_t bti, uint32_t sampler, bool isLD) {
+                                 uint32_t bti, uint32_t sampler, bool isLD, bool isUniform) {
     SelectionInstruction *insn = this->appendInsn(SEL_OP_SAMPLE, dstNum, msgNum);
     SelectionVector *dstVector = this->appendVector();
     SelectionVector *msgVector = this->appendVector();
@@ -1623,6 +1623,7 @@ namespace gbe
     insn->extra.sampler = sampler;
     insn->extra.rdmsglen = msgNum;
     insn->extra.isLD = isLD;
+    insn->extra.isUniform = isUniform;
   }
 
   ///////////////////////////////////////////////////////////////////////////
@@ -2762,8 +2763,21 @@ namespace gbe
       using namespace ir;
       const bool isUniformDst = sel.isScalarReg(insn.getValue(0));
       const bool isUniformSrc = sel.isScalarReg(addr.reg());
-
       GBE_ASSERT(insn.getValueNum() == 1);
+
+      if(isUniformDst) {
+        GenRegister tmpAddr = replaceUniformSource(sel, addr.reg(), ir::TYPE_U32);
+        GenRegister tmp = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
+
+        sel.push();
+          sel.curr.noMask = 1;
+          sel.SAMPLE(&tmp, 1, &tmpAddr, 1, bti, 0, true, true);
+          sel.curr.execWidth = 1;
+          sel.MOV(sel.selReg(insn.getValue(0), ir::TYPE_U32), GenRegister::vec1(tmp));
+        sel.pop();
+        return;
+      }
+
       GenRegister dst = GenRegister::retype(sel.selReg(insn.getValue(0)), GEN_TYPE_F);
       // Get dword based address
       GenRegister addrDW = GenRegister::udxgrf(sel.ctx.getSimdWidth(), sel.reg(FAMILY_DWORD));
@@ -2775,17 +2789,7 @@ namespace gbe
         sel.SHR(addrDW, GenRegister::retype(addr, GEN_TYPE_UD), GenRegister::immud(2));
       sel.pop();
 
-      if(isUniformDst) {
-        GenRegister tmp = GenRegister::fxgrf(sel.ctx.getSimdWidth(), sel.reg(FAMILY_DWORD));
-        sel.push();
-          sel.curr.noMask = 1;
-          sel.DWORD_GATHER(tmp, addrDW, bti);
-          sel.curr.execWidth = 1;
-          sel.MOV(dst, GenRegister::vec1(tmp));
-        sel.pop();
-      } else
-        sel.DWORD_GATHER(dst, addrDW, bti);
-
+      sel.DWORD_GATHER(dst, addrDW, bti);
     }
 
     void emitRead64(Selection::Opaque &sel,
@@ -3689,7 +3693,7 @@ namespace gbe
       }
       uint32_t sampler = insn.getSamplerIndex();
 
-      sel.SAMPLE(dst, insn.getDstNum(), msgPayloads, msgLen, bti, sampler, insn.getSamplerOffset() != 0);
+      sel.SAMPLE(dst, insn.getDstNum(), msgPayloads, msgLen, bti, sampler, insn.getSamplerOffset() != 0, false);
       return true;
     }
     DECL_CTOR(SampleInstruction, 1, 1);
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index 508a37e..9bcce6f 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -126,6 +126,7 @@ namespace gbe
         uint16_t sampler:5;
         uint16_t rdmsglen:3;
         bool     isLD;  // is this a ld message?
+        bool     isUniform;
       };
       uint32_t barrierType;
       bool longjmp;
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index d00bc83..9531001 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -688,7 +688,7 @@ intel_gpgpu_alloc_constant_buffer(intel_gpgpu_t *gpgpu, uint32_t size)
   gen7_surface_state_t *ss2 = (gen7_surface_state_t *) heap->surface[2];
   memset(ss2, 0, sizeof(gen7_surface_state_t));
   ss2->ss0.surface_type = I965_SURFACE_BUFFER;
-  ss2->ss0.surface_format = I965_SURFACEFORMAT_RAW;
+  ss2->ss0.surface_format = I965_SURFACEFORMAT_R32G32B32A32_UINT;
   ss2->ss2.width  = s & 0x7f;            /* bits 6:0 of sz */
   ss2->ss2.height = (s >> 7) & 0x3fff;   /* bits 20:7 of sz */
   ss2->ss3.depth  = (s >> 21) & 0x3ff;   /* bits 30:21 of sz */
-- 
1.7.10.4



More information about the Beignet mailing list