[Beignet] [PATCH 4/5] Runtime: enable border color state support.

Zhigang Gong zhigang.gong at linux.intel.com
Thu Sep 5 22:43:06 PDT 2013


Also fix the wrong clamp mode for CL_ADDRESS_CLAMP.
According to Gen Bspec, when the surface format is
int/uint, it doesn't support clamp border. We need
to workaround it in the kernel side.

Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
---
 src/intel/intel_gpgpu.c   |   57 +++++++++++++++++++++++++++++++--------------
 src/intel/intel_structs.h |    4 ++++
 2 files changed, 43 insertions(+), 18 deletions(-)

diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 144e10f..6ea0ddb 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -94,6 +94,7 @@ struct intel_gpgpu
   struct { drm_intel_bo *bo; } vfe_state_b;
   struct { drm_intel_bo *bo; } curbe_b;
   struct { drm_intel_bo *bo; } sampler_state_b;
+  struct { drm_intel_bo *bo; } sampler_border_color_state_b;
   struct { drm_intel_bo *bo; } perf_b;
   struct { drm_intel_bo *bo; } scratch_b;
   struct { drm_intel_bo *bo; } constant_b;
@@ -132,6 +133,8 @@ intel_gpgpu_delete(intel_gpgpu_t *gpgpu)
     drm_intel_bo_unreference(gpgpu->curbe_b.bo);
   if (gpgpu->sampler_state_b.bo)
     drm_intel_bo_unreference(gpgpu->sampler_state_b.bo);
+  if (gpgpu->sampler_border_color_state_b.bo)
+    drm_intel_bo_unreference(gpgpu->sampler_border_color_state_b.bo);
   if (gpgpu->perf_b.bo)
     drm_intel_bo_unreference(gpgpu->perf_b.bo);
   if (gpgpu->stack_b.bo)
@@ -201,7 +204,10 @@ intel_gpgpu_set_base_address(intel_gpgpu_t *gpgpu)
   OUT_BATCH(gpgpu->batch, 0x04000000 | BASE_ADDRESS_MODIFY); /* Instruction Access Upper Bound */
 #else
   OUT_BATCH(gpgpu->batch, 0 | BASE_ADDRESS_MODIFY);
-  OUT_BATCH(gpgpu->batch, 0 | BASE_ADDRESS_MODIFY);
+  /* According to mesa i965 driver code, we must set the dynamic state access upper bound
+   * to a valid bound value, otherwise, the border color pointer may be rejected and you
+   * may get incorrect border color. This is a known hardware bug. */
+  OUT_BATCH(gpgpu->batch, 0xfffff000 | BASE_ADDRESS_MODIFY);
   OUT_BATCH(gpgpu->batch, 0 | BASE_ADDRESS_MODIFY);
   OUT_BATCH(gpgpu->batch, 0 | BASE_ADDRESS_MODIFY);
 #endif /* USE_FULSIM */
@@ -288,6 +294,7 @@ intel_gpgpu_pipe_control(intel_gpgpu_t *gpgpu)
   pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D;
   pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX;
   pc->dw1.render_target_cache_flush_enable = 1;
+  pc->dw1.texture_cache_invalidation_enable = 1;
   pc->dw1.cs_stall = 1;
   pc->dw1.dc_flush_enable = 1;
   ADVANCE_BATCH(gpgpu->batch);
@@ -376,6 +383,7 @@ intel_gpgpu_check_binded_buf_address(intel_gpgpu_t *gpgpu)
   for (i = 0; i < gpgpu->binded_n; ++i)
     assert(gpgpu->binded_buf[i]->offset != 0);
 }
+
 static void
 intel_gpgpu_flush(intel_gpgpu_t *gpgpu)
 {
@@ -451,6 +459,18 @@ intel_gpgpu_state_init(intel_gpgpu_t *gpgpu,
   memset(bo->virtual, 0, sizeof(gen6_sampler_state_t) * GEN_MAX_SAMPLERS);
   gpgpu->sampler_state_b.bo = bo;
 
+  /* sampler border color state */
+  if (gpgpu->sampler_border_color_state_b.bo)
+    dri_bo_unreference(gpgpu->sampler_border_color_state_b.bo);
+  bo = dri_bo_alloc(gpgpu->drv->bufmgr,
+                    "SAMPLER_BORDER_COLOR_STATE",
+                    sizeof(gen7_sampler_border_color_t),
+                    32);
+  assert(bo);
+  dri_bo_map(bo, 1);
+  memset(bo->virtual, 0, sizeof(gen7_sampler_border_color_t));
+  gpgpu->sampler_border_color_state_b.bo = bo;
+
   /* stack */
   if (gpgpu->stack_b.bo)
     dri_bo_unreference(gpgpu->stack_b.bo);
@@ -681,7 +701,7 @@ intel_gpgpu_build_idrt(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel)
                     ker_bo);
 
   dri_bo_emit_reloc(bo,
-                    I915_GEM_DOMAIN_INSTRUCTION, 0,
+                    I915_GEM_DOMAIN_SAMPLER, 0,
                     0,
                     offsetof(gen6_interface_descriptor_t, desc2),
                     gpgpu->sampler_state_b.bo);
@@ -731,19 +751,7 @@ int translate_wrap_mode(uint32_t cl_address_mode, int using_nearest)
    case CLK_ADDRESS_REPEAT:
       return GEN_TEXCOORDMODE_WRAP;
    case CLK_ADDRESS_CLAMP:
-      /* GL_CLAMP is the weird mode where coordinates are clamped to
-       * [0.0, 1.0], so linear filtering of coordinates outside of
-       * [0.0, 1.0] give you half edge texel value and half border
-       * color.  The fragment shader will clamp the coordinates, and
-       * we set clamp_border here, which gets the result desired.  We
-       * just use clamp(_to_edge) for nearest, because for nearest
-       * clamping to 1.0 gives border color instead of the desired
-       * edge texels.
-       */
-      if (using_nearest)
-         return GEN_TEXCOORDMODE_CLAMP;
-      else
-         return GEN_TEXCOORDMODE_CLAMP_BORDER;
+      return GEN_TEXCOORDMODE_CLAMP_BORDER;
    case CLK_ADDRESS_CLAMP_TO_EDGE:
       return GEN_TEXCOORDMODE_CLAMP;
    case CLK_ADDRESS_MIRRORED_REPEAT:
@@ -760,7 +768,9 @@ intel_gpgpu_insert_sampler(intel_gpgpu_t *gpgpu, uint32_t index, uint32_t clk_sa
   uint32_t wrap_mode;
   gen7_sampler_state_t *sampler;
 
-  sampler = (gen7_sampler_state_t *)gpgpu->sampler_state_b.bo->virtual + index;
+  sampler = (gen7_sampler_state_t *)(gpgpu->sampler_state_b.bo->virtual)  + index;
+  memset(sampler, 0, sizeof(*sampler));
+  sampler->ss2.default_color_pointer = (gpgpu->sampler_border_color_state_b.bo->offset) >> 5;
   if ((clk_sampler & __CLK_NORMALIZED_MASK) == CLK_NORMALIZED_COORDS_FALSE)
     sampler->ss3.non_normalized_coord = 1;
   else
@@ -781,9 +791,11 @@ intel_gpgpu_insert_sampler(intel_gpgpu_t *gpgpu, uint32_t index, uint32_t clk_sa
   }
 
   wrap_mode = translate_wrap_mode(clk_sampler & __CLK_ADDRESS_MASK, using_nearest);
-  sampler->ss3.r_wrap_mode = wrap_mode;
   sampler->ss3.s_wrap_mode = wrap_mode;
-  sampler->ss3.t_wrap_mode = wrap_mode;
+  /* XXX mesa i965 driver code point out that if the surface is a 1D surface, we may need
+   * to set t_wrap_mode to GEN_TEXCOORDMODE_WRAP. */
+  sampler->ss3.t_wrap_mode = wrap_mode; 
+  sampler->ss3.r_wrap_mode = wrap_mode;
 
   sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
   sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
@@ -801,6 +813,14 @@ intel_gpgpu_insert_sampler(intel_gpgpu_t *gpgpu, uint32_t index, uint32_t clk_sa
      sampler->ss3.address_round |= GEN_ADDRESS_ROUNDING_ENABLE_U_MAG |
                                    GEN_ADDRESS_ROUNDING_ENABLE_V_MAG |
                                    GEN_ADDRESS_ROUNDING_ENABLE_R_MAG;
+
+  dri_bo_emit_reloc(gpgpu->sampler_state_b.bo,
+                    I915_GEM_DOMAIN_SAMPLER, 0,
+                    0,
+                    index * sizeof(gen7_sampler_state_t) +
+                    offsetof(gen7_sampler_state_t, ss2),
+                    gpgpu->sampler_border_color_state_b.bo);
+
 }
 
 static void
@@ -820,6 +840,7 @@ intel_gpgpu_states_setup(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel)
   intel_gpgpu_map_address_space(gpgpu);
   dri_bo_unmap(gpgpu->surface_heap_b.bo);
   dri_bo_unmap(gpgpu->sampler_state_b.bo);
+  dri_bo_unmap(gpgpu->sampler_border_color_state_b.bo);
 }
 
 static void
diff --git a/src/intel/intel_structs.h b/src/intel/intel_structs.h
index ff339c5..7c27ace 100644
--- a/src/intel/intel_structs.h
+++ b/src/intel/intel_structs.h
@@ -380,6 +380,10 @@ typedef struct gen6_sampler_state
   } ss3;
 } gen6_sampler_state_t;
 
+typedef struct gen7_sampler_border_color {
+    float r,g,b,a;
+} gen7_sampler_border_color_t;
+
 typedef struct gen7_sampler_state
 {
   struct {
-- 
1.7.9.5



More information about the Beignet mailing list