[Beignet] [PATCH] runtime: set correct state for constant buffer on hsw.
Zhigang Gong
zhigang.gong at linux.intel.com
Tue Jul 29 18:11:02 PDT 2014
LGTM. Pushed with slightly change to add a _gen7 suffix at the original
intel_gpgpu_alloc_constant_buffer() routine.
Thanks.
On Tue, Jul 29, 2014 at 03:41:38PM +0800, Ruiling Song wrote:
> According to spec, should set I965_SURCHAN_SELECT_XXX on hsw.
> Then we can use sampler message to read constant surface.
>
> This fix the regression in unit test brought by:
> 'GBE: Optimize constant load with sampler.'
>
> Signed-off-by: Ruiling Song <ruiling.song at intel.com>
> ---
> src/intel/intel_gpgpu.c | 41 ++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 40 insertions(+), 1 deletion(-)
>
> diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
> index 72fe8e2..37e5b33 100644
> --- a/src/intel/intel_gpgpu.c
> +++ b/src/intel/intel_gpgpu.c
> @@ -723,6 +723,44 @@ intel_gpgpu_alloc_constant_buffer(intel_gpgpu_t *gpgpu, uint32_t size)
> return gpgpu->constant_b.bo;
> }
>
> +static dri_bo*
> +intel_gpgpu_alloc_constant_buffer_gen75(intel_gpgpu_t *gpgpu, uint32_t size)
> +{
> + uint32_t s = size - 1;
> + assert(size != 0);
> +
> + surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
> + gen7_surface_state_t *ss2 = (gen7_surface_state_t *) heap->surface[2];
> + memset(ss2, 0, sizeof(gen7_surface_state_t));
> + ss2->ss0.surface_type = I965_SURFACE_BUFFER;
> + ss2->ss0.surface_format = I965_SURFACEFORMAT_R32G32B32A32_UINT;
> + ss2->ss2.width = s & 0x7f; /* bits 6:0 of sz */
> + ss2->ss2.height = (s >> 7) & 0x3fff; /* bits 20:7 of sz */
> + ss2->ss3.depth = (s >> 21) & 0x3ff; /* bits 30:21 of sz */
> + ss2->ss5.cache_control = cl_gpgpu_get_cache_ctrl();
> + ss2->ss7.shader_r = I965_SURCHAN_SELECT_RED;
> + ss2->ss7.shader_g = I965_SURCHAN_SELECT_GREEN;
> + ss2->ss7.shader_b = I965_SURCHAN_SELECT_BLUE;
> + ss2->ss7.shader_a = I965_SURCHAN_SELECT_ALPHA;
> + heap->binding_table[2] = offsetof(surface_heap_t, surface) + 2* sizeof(gen7_surface_state_t);
> +
> + if(gpgpu->constant_b.bo)
> + dri_bo_unreference(gpgpu->constant_b.bo);
> + gpgpu->constant_b.bo = drm_intel_bo_alloc(gpgpu->drv->bufmgr, "CONSTANT_BUFFER", s, 64);
> + if (gpgpu->constant_b.bo == NULL)
> + return NULL;
> + ss2->ss1.base_addr = gpgpu->constant_b.bo->offset;
> + dri_bo_emit_reloc(gpgpu->aux_buf.bo,
> + I915_GEM_DOMAIN_RENDER,
> + I915_GEM_DOMAIN_RENDER,
> + 0,
> + gpgpu->aux_offset.surface_heap_offset +
> + heap->binding_table[2] +
> + offsetof(gen7_surface_state_t, ss1),
> + gpgpu->constant_b.bo);
> + return gpgpu->constant_b.bo;
> +}
> +
>
> /* Map address space with two 2GB surfaces. One surface for untyped message and
> * one surface for byte scatters / gathers. Actually the HW does not require two
> @@ -1416,7 +1454,6 @@ intel_set_gpgpu_callbacks(int device_id)
> cl_gpgpu_state_init = (cl_gpgpu_state_init_cb *) intel_gpgpu_state_init;
> cl_gpgpu_set_perf_counters = (cl_gpgpu_set_perf_counters_cb *) intel_gpgpu_set_perf_counters;
> cl_gpgpu_upload_curbes = (cl_gpgpu_upload_curbes_cb *) intel_gpgpu_upload_curbes;
> - cl_gpgpu_alloc_constant_buffer = (cl_gpgpu_alloc_constant_buffer_cb *) intel_gpgpu_alloc_constant_buffer;
> cl_gpgpu_states_setup = (cl_gpgpu_states_setup_cb *) intel_gpgpu_states_setup;
> cl_gpgpu_upload_samplers = (cl_gpgpu_upload_samplers_cb *) intel_gpgpu_upload_samplers;
> cl_gpgpu_batch_reset = (cl_gpgpu_batch_reset_cb *) intel_gpgpu_batch_reset;
> @@ -1443,6 +1480,7 @@ intel_set_gpgpu_callbacks(int device_id)
>
> if (IS_HASWELL(device_id)) {
> cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen75;
> + cl_gpgpu_alloc_constant_buffer = (cl_gpgpu_alloc_constant_buffer_cb *) intel_gpgpu_alloc_constant_buffer_gen75;
> intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen75;
> cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen75;
> intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen75;
> @@ -1451,6 +1489,7 @@ intel_set_gpgpu_callbacks(int device_id)
> }
> else if (IS_IVYBRIDGE(device_id)) {
> cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen7;
> + cl_gpgpu_alloc_constant_buffer = (cl_gpgpu_alloc_constant_buffer_cb *) intel_gpgpu_alloc_constant_buffer;
> if (IS_BAYTRAIL_T(device_id)) {
> intel_gpgpu_set_L3 = intel_gpgpu_set_L3_baytrail;
> intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_baytrail;
> --
> 1.7.10.4
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list