[Beignet] [PATCH] GBE: fixup/refine a bug for image1D array's extra binding index handling.

Zhigang Gong zhigang.gong at intel.com
Wed Sep 3 17:06:09 PDT 2014


Due to hardware limitation on Gen7/Gen75 when sampling a
surface with clamp address mode and nearest filter mode
on a integer image1Darray type surface, we have to bind
one buffer to to bti. The previous implementation hard
coded it to 128 + original index and when check whether
it is such type bti in driver layer, assume the bti reserved
is 3 which is wrong now.

This patch fixed those hard coded functions and use the
macros defined in the program.h.

Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
 backend/src/backend/gen_insn_selection.cpp |  2 +-
 backend/src/backend/program.h              |  2 ++
 backend/src/llvm/llvm_gen_backend.cpp      | 13 +++++++++----
 src/cl_command_queue.c                     |  5 ++++-
 src/intel/intel_gpgpu.c                    | 28 +++++++++++++++++++---------
 5 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index abbff77..d631579 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -3862,7 +3862,7 @@ namespace gbe
         msgLen = srcNum;
       }
       // We switch to a fixup bti for linear filter on a image1d array sampling.
-      uint32_t bti = insn.getImageIndex() + (insn.getSamplerOffset() == 2 ? 128 : 0);
+      uint32_t bti = insn.getImageIndex() + (insn.getSamplerOffset() == 2 ? BTI_MAX_IMAGE_NUM : 0);
       if (bti > 253) {
         std::cerr << "Too large bti " << bti;
         return false;
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
index 254df92..c63ae6a 100644
--- a/backend/src/backend/program.h
+++ b/backend/src/backend/program.h
@@ -66,6 +66,8 @@ enum gbe_get_arg_info_value {
 #define BTI_CONSTANT 0
 #define BTI_PRIVATE 1
 #define BTI_RESERVED_NUM 2
+#define BTI_MAX_IMAGE_NUM 128
+#define BTI_MAX_ID (BTI_MAX_IMAGE_NUM + BTI_RESERVED_NUM - 1)
 
 /*! Constant buffer values (ie values to setup in the constant buffer) */
 enum gbe_curbe_type {
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 1604ede..738f7d3 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -500,6 +500,11 @@ namespace gbe
     ir::ImmediateIndex processConstantImmIndex(Constant *CPV, int32_t index = 0u);
     const ir::Immediate &processConstantImm(Constant *CPV, int32_t index = 0u);
 
+    uint32_t incBtiBase() {
+      GBE_ASSERT(btiBase <= BTI_MAX_ID);
+      return btiBase++;
+    }
+
     bool runOnFunction(Function &F) {
      // Do not codegen any 'available_externally' functions at all, they have
      // definitions outside the translation unit.
@@ -1363,7 +1368,7 @@ namespace gbe
                 globalPointer.insert(std::make_pair(I, btiBase));
                 ctx.appendSurface(btiBase, reg);
                 ctx.input(argName, ir::FunctionArgument::GLOBAL_POINTER, reg, llvmInfo, ptrSize, align, btiBase);
-                btiBase++;
+                incBtiBase();
               break;
               case ir::MEM_LOCAL:
                 ctx.input(argName, ir::FunctionArgument::LOCAL_POINTER, reg,  llvmInfo, ptrSize, align, 0xfe);
@@ -1374,7 +1379,7 @@ namespace gbe
               break;
               case ir::IMAGE:
                 ctx.input(argName, ir::FunctionArgument::IMAGE, reg, llvmInfo, ptrSize, align, 0x0);
-                ctx.getFunction().getImageSet()->append(reg, &ctx, btiBase++);
+                ctx.getFunction().getImageSet()->append(reg, &ctx, incBtiBase());
               break;
               default: GBE_ASSERT(addrSpace != ir::MEM_PRIVATE);
             }
@@ -1720,12 +1725,12 @@ namespace gbe
         if(v.getName().equals(StringRef("__gen_ocl_printf_buf"))) {
           ctx.appendSurface(btiBase, ir::ocl::printfbptr);
           ctx.getFunction().getPrintfSet()->setBufBTI(btiBase);
-          globalPointer.insert(std::make_pair(&v, btiBase++));
+          globalPointer.insert(std::make_pair(&v, incBtiBase()));
           regTranslator.newScalarProxy(ir::ocl::printfbptr, const_cast<GlobalVariable*>(&v));
         } else if(v.getName().equals(StringRef("__gen_ocl_printf_index_buf"))) {
           ctx.appendSurface(btiBase, ir::ocl::printfiptr);
           ctx.getFunction().getPrintfSet()->setIndexBufBTI(btiBase);
-          globalPointer.insert(std::make_pair(&v, btiBase++));
+          globalPointer.insert(std::make_pair(&v, incBtiBase()));
           regTranslator.newScalarProxy(ir::ocl::printfiptr, const_cast<GlobalVariable*>(&v));
 	} else if(v.getName().str().substr(0, 4) == ".str") {
           /* When there are multi printf statements in multi kernel fucntions within the same
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 52e91ae..4cbb4eb 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -17,6 +17,7 @@
  * Author: Benjamin Segovia <benjamin.segovia at intel.com>
  */
 
+#include "program.h" // for BTI_MAX_IMAGE_NUM
 #include "cl_command_queue.h"
 #include "cl_context.h"
 #include "cl_program.h"
@@ -142,8 +143,10 @@ cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k)
                         image->intel_fmt, image->image_type,
                         image->w, image->h, image->depth,
                         image->row_pitch, (cl_gpgpu_tiling)image->tiling);
+    // TODO, this workaround is for GEN7/GEN75 only, we may need to do it in the driver layer
+    // on demand.
     if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
-      cl_gpgpu_bind_image(gpgpu, k->images[i].idx + 128, image->base.bo, image->offset,
+      cl_gpgpu_bind_image(gpgpu, k->images[i].idx + BTI_MAX_IMAGE_NUM, image->base.bo, image->offset,
                           image->intel_fmt, image->image_type,
                           image->w, image->h, image->depth,
                           image->row_pitch, (cl_gpgpu_tiling)image->tiling);
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 867ab4c..c4b9156 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -35,6 +35,7 @@
 #include "intel/intel_structs.h"
 #include "intel/intel_batchbuffer.h"
 #include "intel/intel_driver.h"
+#include "program.h" // for BTI_RESERVED_NUM
 
 #include "cl_alloc.h"
 #include "cl_utils.h"
@@ -819,6 +820,22 @@ intel_get_surface_type(cl_mem_object_type type)
   return 0;
 }
 
+/* Get fixed surface type. If it is a 1D array image with a large index,
+   we need to fixup it to 2D type due to a Gen7/Gen75's sampler issue
+   on a integer type surface with clamp address mode and nearest filter mode.
+*/
+static uint32_t get_surface_type(intel_gpgpu_t *gpgpu, int index, cl_mem_object_type type)
+{
+  uint32_t surface_type;
+  if (((IS_IVYBRIDGE(gpgpu->drv->device_id) || IS_HASWELL(gpgpu->drv->device_id))) &&
+      index >= 128 + BTI_RESERVED_NUM &&
+      type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+    surface_type = I965_SURFACE_2D;
+  else
+    surface_type = intel_get_surface_type(type);
+  return surface_type;
+}
+
 static void
 intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu,
                               uint32_t index,
@@ -836,12 +853,8 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu,
   gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index];
 
   memset(ss, 0, sizeof(*ss));
-
   ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
-  if (index > 128 + 2 && type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
-    ss->ss0.surface_type = I965_SURFACE_2D;
-  else
-    ss->ss0.surface_type = intel_get_surface_type(type);
+  ss->ss0.surface_type = get_surface_type(gpgpu, index, type);
   if (intel_is_surface_array(type)) {
     ss->ss0.surface_array = 1;
     ss->ss0.surface_array_spacing = 1;
@@ -886,10 +899,7 @@ intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu,
   gen7_surface_state_t *ss = (gen7_surface_state_t *) heap->surface[index];
   memset(ss, 0, sizeof(*ss));
   ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
-  if (index > 128 + 2 && type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
-    ss->ss0.surface_type = I965_SURFACE_2D;
-  else
-    ss->ss0.surface_type = intel_get_surface_type(type);
+  ss->ss0.surface_type = get_surface_type(gpgpu, index, type);
   if (intel_is_surface_array(type)) {
     ss->ss0.surface_array = 1;
     ss->ss0.surface_array_spacing = 1;
-- 
1.8.3.2



More information about the Beignet mailing list