[Beignet] [PATCH 5/5] GBE: Refine the curbe entry allocation for sampler/image information.

Zhigang Gong zhigang.gong at linux.intel.com
Wed Sep 25 04:13:19 PDT 2013


After the previous patch, we can move the image infomation curbe
entry allocation to prior to the instruction selection.

Then we can concentrate all curbe allocation before we do the
normal register allocation. This way can bring two advantages:
1. Avoid the image information curbe entry is allocated among the normal registers.
2. The register interval analyzing could handle the image/sampler information correctly.

Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
---
 backend/src/backend/context.cpp            |   34 +++++++++++++++++++++++-----
 backend/src/backend/gen_insn_selection.cpp |   13 ++++-------
 backend/src/backend/gen_reg_allocation.cpp |    3 ++-
 backend/src/ir/instruction.cpp             |   24 ++++++++++----------
 backend/src/ir/instruction.hpp             |    6 ++---
 backend/src/llvm/llvm_gen_backend.cpp      |   15 ++----------
 kernels/compiler_box_blur_image.cl         |    2 +-
 src/cl_command_queue_gen7.c                |    8 ++++---
 8 files changed, 58 insertions(+), 47 deletions(-)

diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index 6b5d11e..c278bd7 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -316,8 +316,6 @@ namespace gbe
       this->kernel = NULL;
     }
     if(this->kernel != NULL) {
-      // Align it on 32 bytes properly
-      this->kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE);
       this->kernel->scratchSize = alignScratchSize(this->scratchOffset);
       this->kernel->ctx = this;
     }
@@ -405,7 +403,7 @@ namespace gbe
     offset = kernel->getCurbeOffset(GBE_CURBE_IMAGE_INFO, key.data);
     GBE_ASSERT(offset >= 0); // XXX do we need to spill it out to bo?
     fn.getImageSet()->appendInfo(key, offset);
-    return offset;
+    return offset + GEN_REG_SIZE;
   }
 
 
@@ -440,7 +438,7 @@ namespace gbe
     insertCurbeReg(ir::ocl::lid0, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_X, 0, localIDSize));
     insertCurbeReg(ir::ocl::lid1, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Y, 0, localIDSize));
     insertCurbeReg(ir::ocl::lid2, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Z, 0, localIDSize));
-    insertCurbeReg(ir::ocl::samplerinfo, this->newCurbeEntry(GBE_CURBE_SAMPLER_INFO, 0, 32));
+            insertCurbeReg(ir::ocl::samplerinfo, this->newCurbeEntry(GBE_CURBE_SAMPLER_INFO, 0, 32));
 
     // Go over all the instructions and find the special register we need
     // to push
@@ -451,10 +449,34 @@ namespace gbe
   } else
 
     bool useStackPtr = false;
-    fn.foreachInstruction([&](const ir::Instruction &insn) {
+    fn.foreachInstruction([&](ir::Instruction &insn) {
       const uint32_t srcNum = insn.getSrcNum();
       for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
         const ir::Register reg = insn.getSrc(srcID);
+        if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) {
+          if (srcID != 0) continue;
+          const unsigned char bti = fn.getImageSet()->getIdx(insn.getSrc(srcID));
+          const unsigned char type =  ir::cast<ir::GetImageInfoInstruction>(insn).getInfoType();;
+          ir::ImageInfoKey key;
+          key.index = bti;
+          key.type = type;
+          const ir::Register imageInfo(key.data | 0x8000);
+          ir::Register realImageInfo;
+          if (curbeRegs.find(imageInfo) == curbeRegs.end()) {
+            uint32_t offset = this->getImageInfoCurbeOffset(key, 4);
+            realImageInfo = insn.getSrc(1);
+            insertCurbeReg(realImageInfo, offset);
+            insertCurbeReg(imageInfo, (uint32_t)realImageInfo);
+          } else
+            realImageInfo = ir::Register(curbeRegs.find(imageInfo)->second);
+          insn.setSrc(srcID, realImageInfo);
+          continue;
+        } else if (insn.getOpcode() == ir::OP_GET_SAMPLER_INFO) {
+          /* change the src to sampler information register. */
+          if (curbeRegs.find(ir::ocl::samplerinfo) == curbeRegs.end())
+            insertCurbeReg(ir::ocl::samplerinfo, this->newCurbeEntry(GBE_CURBE_SAMPLER_INFO, 0, 32));
+          continue;
+        }
         if (fn.isSpecialReg(reg) == false) continue;
         if (curbeRegs.find(reg) != curbeRegs.end()) continue;
         if (reg == ir::ocl::stackptr) useStackPtr = true;
@@ -472,7 +494,7 @@ namespace gbe
         INSERT_REG(numgroup1, GROUP_NUM_Y, 1)
         INSERT_REG(numgroup2, GROUP_NUM_Z, 1)
         INSERT_REG(stackptr, STACK_POINTER, this->simdWidth)
-        do {} while (0);
+        do {} while(0);
       }
     });
 #undef INSERT_REG
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index de9a64d..b695422 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2679,14 +2679,11 @@ namespace gbe
     INLINE bool emitOne(Selection::Opaque &sel, const ir::GetImageInfoInstruction &insn) const
     {
       using namespace ir;
-      const uint32_t infoType = insn.getInfoType();
-      GenRegister dst[4];
-      uint32_t dstNum = ir::GetImageInfoInstruction::getDstNum4Type(infoType);
-      for (uint32_t valueID = 0; valueID < dstNum; ++valueID)
-        dst[valueID] = sel.selReg(insn.getDst(valueID), TYPE_U32);
-      uint32_t bti = sel.ctx.getFunction().getImageSet()->getIdx
-                       (insn.getSrc(0));
-      sel.GET_IMAGE_INFO(infoType, dst, dstNum, bti);
+      GenRegister dst;
+      dst = sel.selReg(insn.getDst(0), TYPE_U32);
+      GenRegister imageInfoReg = GenRegister::ud1grf(insn.getSrc(0));
+      sel.MOV(dst, imageInfoReg);
+
       return true;
     }
     DECL_CTOR(GetImageInfoInstruction, 1, 1);
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index a72333d..ab8b7ee 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -142,7 +142,8 @@ namespace gbe
   INLINE void GenRegAllocator::Opaque::allocatePayloadRegs(void) {
     using namespace ir;
     for(auto &it : this->ctx.curbeRegs)
-      allocatePayloadReg(it.first, it.second);
+      if (it.first.value() < 0x8000)
+        allocatePayloadReg(it.first, it.second);
 
     // Allocate all pushed registers (i.e. structure kernel arguments)
     const Function &fn = ctx.getFunction();
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 8a72929..5f2de07 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -532,18 +532,20 @@ namespace ir {
 
     class ALIGNED_INSTRUCTION GetImageInfoInstruction :
       public BasePolicy,
-      public NSrcPolicy<GetImageInfoInstruction, 1>,
-      public TupleDstPolicy<GetImageInfoInstruction>
+      public NSrcPolicy<GetImageInfoInstruction, 2>,
+      public NDstPolicy<GetImageInfoInstruction, 1>
     {
     public:
       GetImageInfoInstruction( int type,
-                               Tuple dst,
-                               Register src)
+                               Register dst,
+                               Register src,
+                               Register infoReg)
       {
         this->opcode = OP_GET_IMAGE_INFO;
         this->infoType = type;
-        this->dst = dst;
+        this->dst[0] = dst;
         this->src[0] = src;
+        this->src[1] = infoReg;
       }
 
       INLINE uint32_t getInfoType(void) const { return infoType; }
@@ -556,11 +558,9 @@ namespace ir {
       }
 
       uint8_t infoType;                 //!< Type of the requested information.
-      Register src[1];                  //!< Surface to get info
-      Tuple dst;                        //!< dest register to put the information.
-      static const uint32_t dstNum = 4; //! The maximum dst number. Not the actual number
-                                        // of destination tuple. We use the infoType to determin
-                                        // the actual num.
+      Register src[2];                  //!< Surface to get info
+      Register dst[1];                        //!< dest register to put the information.
+      static const uint32_t dstNum = 1;
     };
 
     class ALIGNED_INSTRUCTION LoadImmInstruction :
@@ -1471,8 +1471,8 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType())
     return internal::TypedWriteInstruction(src, srcType, coordType).convert();
   }
 
-  Instruction GET_IMAGE_INFO(int infoType, Tuple dst, Register src) {
-    return internal::GetImageInfoInstruction(infoType, dst, src).convert();
+  Instruction GET_IMAGE_INFO(int infoType, Register dst, Register src, Register infoReg) {
+    return internal::GetImageInfoInstruction(infoType, dst, src, infoReg).convert();
   }
 
   Instruction GET_SAMPLER_INFO(Register dst, Register src) {
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 4a8ff72..a9e6038 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -353,8 +353,8 @@ namespace ir {
 
   typedef union {
     struct {
-     uint16_t index; /*! the allocated image index */
-     uint16_t type;  /*! the information type */
+     uint8_t index; /*! the allocated image index */
+     uint8_t  type;  /*! the information type */
     };
     uint32_t data;
   } ImageInfoKey;
@@ -633,7 +633,7 @@ namespace ir {
   /*! sample textures */
   Instruction SAMPLE(Tuple dst, Tuple src, Type dstType, Type srcType);
   /*! get image information , such as width/height/depth/... */
-  Instruction GET_IMAGE_INFO(int infoType, Tuple dst, Register src);
+  Instruction GET_IMAGE_INFO(int infoType, Register dst, Register src, Register infoReg);
   /*! get sampler information  */
   Instruction GET_SAMPLER_INFO(Register dst, Register src);
   /*! label labelIndex */
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 99e6f4e..c23ba38 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2093,21 +2093,10 @@ namespace gbe
             GBE_ASSERT(AI != AE); const ir::Register surface_id = this->getRegister(*AI); ++AI;
             uint32_t elemNum;
             (void)getVectorInfo(ctx, I.getType(), &I, elemNum);
-            vector<ir::Register> dstTupleData;
-            ir::Register lastReg;
-            for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
-              const ir::Register reg = this->getRegister(&I, elemID);
-              dstTupleData.push_back(reg);
-              lastReg = reg;
-            }
-            // A walk around for the gen IR limitation.
-            for (uint32_t elemID = elemNum; elemID < 4; ++ elemID) {
-              dstTupleData.push_back(lastReg);
-            }
-            const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], 4);
+            const ir::Register reg = this->getRegister(&I, 0);
             int infoType = it->second - GEN_OCL_GET_IMAGE_WIDTH;
 
-            ctx.GET_IMAGE_INFO(infoType, dstTuple, surface_id);
+            ctx.GET_IMAGE_INFO(infoType, reg, surface_id, ctx.reg(ir::FAMILY_DWORD));
             break;
           }
           case GEN_OCL_GET_SAMPLER_INFO:
diff --git a/kernels/compiler_box_blur_image.cl b/kernels/compiler_box_blur_image.cl
index 7bcbdeb..42f463b 100644
--- a/kernels/compiler_box_blur_image.cl
+++ b/kernels/compiler_box_blur_image.cl
@@ -10,7 +10,7 @@ __kernel void compiler_box_blur_image(__read_only image2d_t src,
 
   for (offset.y = -1; offset.y <= 1; offset.y++) {
     for (offset.x = -1; offset.x <= 1; offset.x++) {
-      sum += read_imagef(src, sampler, coord + offset);
+      sum +=  read_imagef(src, sampler, coord + offset);
     }
   }
 
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index f2c051b..b85c0cd 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -182,9 +182,11 @@ cl_curbe_fill(cl_kernel ker,
 
   /* Upload sampler information. */
   offset = gbe_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_SAMPLER_INFO, 0);
-  uint32_t i;
-  for(i = 0; i < ker->sampler_sz; i++, offset += 2) {
-    *((uint16_t *) (ker->curbe + offset)) = ker->samplers[i] & 0xFF;
+  if (offset >= 0) {
+    uint32_t i;
+    for(i = 0; i < ker->sampler_sz; i++, offset += 2) {
+      *((uint16_t *) (ker->curbe + offset)) = ker->samplers[i] & 0xFF;
+    }
   }
 
   /* Write identity for the stack pointer. This is required by the stack pointer
-- 
1.7.9.5



More information about the Beignet mailing list