[Beignet] [PATCH 7/8] SKL: Add function intel_gpgpu_bind_image_gen9.

Yang Rong rong.r.yang at intel.com
Thu Jan 29 00:16:22 PST 2015


SKL's qpitch is difference with BDW. And SURFTYPE_1D's qpitch means distance in pixels between array slices.
So add two parameters slice_pitch and bpp to calculate it.

Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
 src/cl_command_queue.c   |  8 ++---
 src/cl_driver.h          |  2 ++
 src/intel/intel_driver.c |  2 +-
 src/intel/intel_gpgpu.c  | 84 +++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 90 insertions(+), 6 deletions(-)

diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 3c04d6d..be6def1 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -140,16 +140,16 @@ cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k)
     image = cl_mem_image(k->args[id].mem);
     set_image_info(k->curbe, &k->images[i], image);
     cl_gpgpu_bind_image(gpgpu, k->images[i].idx, image->base.bo, image->offset,
-                        image->intel_fmt, image->image_type,
+                        image->intel_fmt, image->image_type, image->bpp,
                         image->w, image->h, image->depth,
-                        image->row_pitch, (cl_gpgpu_tiling)image->tiling);
+                        image->row_pitch, image->slice_pitch, (cl_gpgpu_tiling)image->tiling);
     // TODO, this workaround is for GEN7/GEN75 only, we may need to do it in the driver layer
     // on demand.
     if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
       cl_gpgpu_bind_image(gpgpu, k->images[i].idx + BTI_WORKAROUND_IMAGE_OFFSET, image->base.bo, image->offset,
-                          image->intel_fmt, image->image_type,
+                          image->intel_fmt, image->image_type, image->bpp,
                           image->w, image->h, image->depth,
-                          image->row_pitch, (cl_gpgpu_tiling)image->tiling);
+                          image->row_pitch, image->slice_pitch, (cl_gpgpu_tiling)image->tiling);
   }
   return CL_SUCCESS;
 }
diff --git a/src/cl_driver.h b/src/cl_driver.h
index c88b9be..16f8bba 100644
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -145,11 +145,13 @@ typedef void (cl_gpgpu_bind_image_cb)(cl_gpgpu state,
                                       cl_buffer obj_bo,
                                       uint32_t obj_bo_offset,
                                       uint32_t format,
+                                      uint32_t bpp,
                                       uint32_t type,
                                       int32_t w,
                                       int32_t h,
                                       int32_t depth,
                                       int pitch,
+                                      int32_t slice_pitch,
                                       cl_gpgpu_tiling tiling);
 
 extern cl_gpgpu_bind_image_cb *cl_gpgpu_bind_image;
diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
index 9e989b6..afa4486 100644
--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -479,7 +479,7 @@ static uint32_t intel_buffer_get_tiling_align(cl_context ctx, uint32_t tiling_mo
 
   case CL_NO_TILE:
     if (dim == 1) { //vertical alignment
-      if (gen_ver == 8)
+      if (gen_ver == 8 || gen_ver == 9) //SKL 1D array need 4 alignment qpitch
         ret = 4;
       else
         ret = 2;
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index a4e2b7a..36f6eef 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -1158,10 +1158,12 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu,
                               uint32_t obj_bo_offset,
                               uint32_t format,
                               cl_mem_object_type type,
+                              uint32_t bpp,
                               int32_t w,
                               int32_t h,
                               int32_t depth,
                               int32_t pitch,
+                              int32_t slice_pitch,
                               int32_t tiling)
 {
   surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
@@ -1204,10 +1206,12 @@ intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu,
                               uint32_t obj_bo_offset,
                               uint32_t format,
                               cl_mem_object_type type,
+                              uint32_t bpp,
                               int32_t w,
                               int32_t h,
                               int32_t depth,
                               int32_t pitch,
+                              int32_t slice_pitch,
                               int32_t tiling)
 {
   surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
@@ -1252,10 +1256,12 @@ intel_gpgpu_bind_image_gen8(intel_gpgpu_t *gpgpu,
                             uint32_t obj_bo_offset,
                             uint32_t format,
                             cl_mem_object_type type,
+                            uint32_t bpp,
                             int32_t w,
                             int32_t h,
                             int32_t depth,
                             int32_t pitch,
+                            int32_t slice_pitch,
                             int32_t tiling)
 {
   surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
@@ -1311,6 +1317,82 @@ intel_gpgpu_bind_image_gen8(intel_gpgpu_t *gpgpu,
 }
 
 static void
+intel_gpgpu_bind_image_gen9(intel_gpgpu_t *gpgpu,
+                            uint32_t index,
+                            dri_bo* obj_bo,
+                            uint32_t obj_bo_offset,
+                            uint32_t format,
+                            cl_mem_object_type type,
+                            uint32_t bpp,
+                            int32_t w,
+                            int32_t h,
+                            int32_t depth,
+                            int32_t pitch,
+                            int32_t slice_pitch,
+                            int32_t tiling)
+{
+  surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
+  gen8_surface_state_t *ss = (gen8_surface_state_t *) &heap->surface[index * sizeof(gen8_surface_state_t)];
+  memset(ss, 0, sizeof(*ss));
+  ss->ss0.vertical_line_stride = 0; // always choose VALIGN_2
+  ss->ss0.surface_type = get_surface_type(gpgpu, index, type);
+  ss->ss0.surface_format = format;
+  if (intel_is_surface_array(type) && ss->ss0.surface_type == I965_SURFACE_1D) {
+    ss->ss0.surface_array = 1;
+    ss->ss1.surface_qpitch = (slice_pitch/bpp + 3)/4;   //align_h
+  }
+
+  if (intel_is_surface_array(type) && ss->ss0.surface_type == I965_SURFACE_2D) {
+    ss->ss0.surface_array = 1;
+    ss->ss1.surface_qpitch = (h + 3)/4;
+  }
+
+  if(ss->ss0.surface_type == I965_SURFACE_3D)
+    ss->ss1.surface_qpitch = (h + 3)/4;
+
+  ss->ss0.horizontal_alignment = 1;
+  ss->ss0.vertical_alignment = 1;
+
+  if (tiling == GPGPU_TILE_X) {
+    ss->ss0.tile_mode = GEN8_TILEMODE_XMAJOR;
+  } else if (tiling == GPGPU_TILE_Y) {
+    ss->ss0.tile_mode = GEN8_TILEMODE_YMAJOR;
+  } else
+    assert(tiling == GPGPU_NO_TILE);// W mode is not supported now.
+
+  ss->ss2.width = w - 1;
+  ss->ss2.height = h - 1;
+  ss->ss3.depth = depth - 1;
+
+  ss->ss8.surface_base_addr_lo = (obj_bo->offset64 + obj_bo_offset) & 0xffffffff;
+  ss->ss9.surface_base_addr_hi = ((obj_bo->offset64 + obj_bo_offset) >> 32) & 0xffffffff;
+
+  ss->ss4.render_target_view_ext = depth - 1;
+  ss->ss4.min_array_elt = 0;
+  ss->ss3.surface_pitch = pitch - 1;
+
+  ss->ss1.mem_obj_ctrl_state = cl_gpgpu_get_cache_ctrl();
+  ss->ss7.shader_channel_select_red = I965_SURCHAN_SELECT_RED;
+  ss->ss7.shader_channel_select_green = I965_SURCHAN_SELECT_GREEN;
+  ss->ss7.shader_channel_select_blue = I965_SURCHAN_SELECT_BLUE;
+  ss->ss7.shader_channel_select_alpha = I965_SURCHAN_SELECT_ALPHA;
+  ss->ss0.render_cache_rw_mode = 1; /* XXX do we need to set it? */
+
+  heap->binding_table[index] = offsetof(surface_heap_t, surface) +
+                               index * surface_state_sz;
+  dri_bo_emit_reloc(gpgpu->aux_buf.bo,
+                    I915_GEM_DOMAIN_RENDER,
+                    I915_GEM_DOMAIN_RENDER,
+                    obj_bo_offset,
+                    gpgpu->aux_offset.surface_heap_offset +
+                    heap->binding_table[index] +
+                    offsetof(gen8_surface_state_t, ss8),
+                    obj_bo);
+
+  assert(index < GEN_MAX_SURFACES);
+}
+
+static void
 intel_gpgpu_bind_buf(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t offset,
                      uint32_t internal_offset, uint32_t size, uint8_t bti)
 {
@@ -2094,7 +2176,7 @@ intel_set_gpgpu_callbacks(int device_id)
     return;
   }
   if (IS_SKYLAKE(device_id)) {
-    cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen8;
+    cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen9;
     intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen8;
     cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen9;
     intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen8;
-- 
2.1.0



More information about the Beignet mailing list