[Beignet] [PATCH] runtime: set correct state for constant buffer on hsw.

Zhigang Gong zhigang.gong at linux.intel.com
Tue Jul 29 18:11:02 PDT 2014


LGTM. Pushed with slightly change to add a _gen7 suffix at the original
intel_gpgpu_alloc_constant_buffer() routine.

Thanks.

On Tue, Jul 29, 2014 at 03:41:38PM +0800, Ruiling Song wrote:
> According to spec, should set I965_SURCHAN_SELECT_XXX on hsw.
> Then we can use sampler message to read constant surface.
> 
> This fix the regression in unit test brought by:
> 'GBE: Optimize constant load with sampler.'
> 
> Signed-off-by: Ruiling Song <ruiling.song at intel.com>
> ---
>  src/intel/intel_gpgpu.c |   41 ++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 40 insertions(+), 1 deletion(-)
> 
> diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
> index 72fe8e2..37e5b33 100644
> --- a/src/intel/intel_gpgpu.c
> +++ b/src/intel/intel_gpgpu.c
> @@ -723,6 +723,44 @@ intel_gpgpu_alloc_constant_buffer(intel_gpgpu_t *gpgpu, uint32_t size)
>    return gpgpu->constant_b.bo;
>  }
>  
> +static dri_bo*
> +intel_gpgpu_alloc_constant_buffer_gen75(intel_gpgpu_t *gpgpu, uint32_t size)
> +{
> +  uint32_t s = size - 1;
> +  assert(size != 0);
> +
> +  surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
> +  gen7_surface_state_t *ss2 = (gen7_surface_state_t *) heap->surface[2];
> +  memset(ss2, 0, sizeof(gen7_surface_state_t));
> +  ss2->ss0.surface_type = I965_SURFACE_BUFFER;
> +  ss2->ss0.surface_format = I965_SURFACEFORMAT_R32G32B32A32_UINT;
> +  ss2->ss2.width  = s & 0x7f;            /* bits 6:0 of sz */
> +  ss2->ss2.height = (s >> 7) & 0x3fff;   /* bits 20:7 of sz */
> +  ss2->ss3.depth  = (s >> 21) & 0x3ff;   /* bits 30:21 of sz */
> +  ss2->ss5.cache_control = cl_gpgpu_get_cache_ctrl();
> +  ss2->ss7.shader_r = I965_SURCHAN_SELECT_RED;
> +  ss2->ss7.shader_g = I965_SURCHAN_SELECT_GREEN;
> +  ss2->ss7.shader_b = I965_SURCHAN_SELECT_BLUE;
> +  ss2->ss7.shader_a = I965_SURCHAN_SELECT_ALPHA;
> +  heap->binding_table[2] = offsetof(surface_heap_t, surface) + 2* sizeof(gen7_surface_state_t);
> +
> +  if(gpgpu->constant_b.bo)
> +    dri_bo_unreference(gpgpu->constant_b.bo);
> +  gpgpu->constant_b.bo = drm_intel_bo_alloc(gpgpu->drv->bufmgr, "CONSTANT_BUFFER", s, 64);
> +  if (gpgpu->constant_b.bo == NULL)
> +    return NULL;
> +  ss2->ss1.base_addr = gpgpu->constant_b.bo->offset;
> +  dri_bo_emit_reloc(gpgpu->aux_buf.bo,
> +                      I915_GEM_DOMAIN_RENDER,
> +                      I915_GEM_DOMAIN_RENDER,
> +                      0,
> +                      gpgpu->aux_offset.surface_heap_offset +
> +                      heap->binding_table[2] +
> +                      offsetof(gen7_surface_state_t, ss1),
> +                      gpgpu->constant_b.bo);
> +  return gpgpu->constant_b.bo;
> +}
> +
>  
>  /* Map address space with two 2GB surfaces. One surface for untyped message and
>   * one surface for byte scatters / gathers. Actually the HW does not require two
> @@ -1416,7 +1454,6 @@ intel_set_gpgpu_callbacks(int device_id)
>    cl_gpgpu_state_init = (cl_gpgpu_state_init_cb *) intel_gpgpu_state_init;
>    cl_gpgpu_set_perf_counters = (cl_gpgpu_set_perf_counters_cb *) intel_gpgpu_set_perf_counters;
>    cl_gpgpu_upload_curbes = (cl_gpgpu_upload_curbes_cb *) intel_gpgpu_upload_curbes;
> -  cl_gpgpu_alloc_constant_buffer  = (cl_gpgpu_alloc_constant_buffer_cb *) intel_gpgpu_alloc_constant_buffer;
>    cl_gpgpu_states_setup = (cl_gpgpu_states_setup_cb *) intel_gpgpu_states_setup;
>    cl_gpgpu_upload_samplers = (cl_gpgpu_upload_samplers_cb *) intel_gpgpu_upload_samplers;
>    cl_gpgpu_batch_reset = (cl_gpgpu_batch_reset_cb *) intel_gpgpu_batch_reset;
> @@ -1443,6 +1480,7 @@ intel_set_gpgpu_callbacks(int device_id)
>  
>    if (IS_HASWELL(device_id)) {
>      cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen75;
> +    cl_gpgpu_alloc_constant_buffer  = (cl_gpgpu_alloc_constant_buffer_cb *) intel_gpgpu_alloc_constant_buffer_gen75;
>      intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen75;
>      cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen75;
>      intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen75;
> @@ -1451,6 +1489,7 @@ intel_set_gpgpu_callbacks(int device_id)
>    }
>    else if (IS_IVYBRIDGE(device_id)) {
>      cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen7;
> +    cl_gpgpu_alloc_constant_buffer  = (cl_gpgpu_alloc_constant_buffer_cb *) intel_gpgpu_alloc_constant_buffer;
>      if (IS_BAYTRAIL_T(device_id)) {
>        intel_gpgpu_set_L3 = intel_gpgpu_set_L3_baytrail;
>        intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_baytrail;
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list