[Beignet] [PATCH] Separate flush and invalidate in function intel_gpgpu_pipe_control.

Zhigang Gong zhigang.gong at linux.intel.com
Sun Dec 28 18:21:52 PST 2014


LGTM, just pushed, thanks.

On Fri, Dec 26, 2014 at 02:57:58PM +0800, Yang Rong wrote:
> From: Luo Xionghu <xionghu.luo at intel.com>
> 
> HSW has a limitation when PIPECONTROL with RO Cache Invalidation:
> Prior to programming a PIPECONTROL command with any of the RO cache invalidation bit set,
> program a PIPECONTROL flush command with CS stall bit and HDC Flush bit set.
> 
> So must use two PIPECONTROL commands to flush and invalidate L3 cache in HSW.
> This patch fix some random fails which has very heavy DC read/write in HSW.
> ---
>  src/cl_command_queue_gen7.c |  2 +-
>  src/intel/intel_gpgpu.c     | 36 +++++++++++++++++++++++++++++++++++-
>  2 files changed, 36 insertions(+), 2 deletions(-)
> 
> diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
> index ba015ca..734267a 100644
> --- a/src/cl_command_queue_gen7.c
> +++ b/src/cl_command_queue_gen7.c
> @@ -31,7 +31,7 @@
>  #include <string.h>
>  
>  #define MAX_GROUP_SIZE_IN_HALFSLICE   512
> -static INLINE size_t cl_kernel_compute_batch_sz(cl_kernel k) { return 256+128; }
> +static INLINE size_t cl_kernel_compute_batch_sz(cl_kernel k) { return 256+256; }
>  
>  /* "Varing" payload is the part of the curbe that changes accross threads in the
>   *  same work group. Right now, it consists in local IDs and block IPs
> diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
> index c80a11b..3471be0 100644
> --- a/src/intel/intel_gpgpu.c
> +++ b/src/intel/intel_gpgpu.c
> @@ -104,6 +104,9 @@ intel_gpgpu_load_curbe_buffer_t *intel_gpgpu_load_curbe_buffer = NULL;
>  typedef void (intel_gpgpu_load_idrt_t)(intel_gpgpu_t *gpgpu);
>  intel_gpgpu_load_idrt_t *intel_gpgpu_load_idrt = NULL;
>  
> +typedef void (intel_gpgpu_pipe_control_t)(intel_gpgpu_t *gpgpu);
> +intel_gpgpu_pipe_control_t *intel_gpgpu_pipe_control = NULL;
> +
>  static void
>  intel_gpgpu_sync(void *buf)
>  {
> @@ -542,7 +545,7 @@ intel_gpgpu_write_timestamp(intel_gpgpu_t *gpgpu, int idx)
>  }
>  
>  static void
> -intel_gpgpu_pipe_control(intel_gpgpu_t *gpgpu)
> +intel_gpgpu_pipe_control_gen7(intel_gpgpu_t *gpgpu)
>  {
>    gen6_pipe_control_t* pc = (gen6_pipe_control_t*)
>      intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t));
> @@ -561,6 +564,34 @@ intel_gpgpu_pipe_control(intel_gpgpu_t *gpgpu)
>  }
>  
>  static void
> +intel_gpgpu_pipe_control_gen75(intel_gpgpu_t *gpgpu)
> +{
> +  gen6_pipe_control_t* pc = (gen6_pipe_control_t*)
> +    intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t));
> +  memset(pc, 0, sizeof(*pc));
> +  pc->dw0.length = SIZEOF32(gen6_pipe_control_t) - 2;
> +  pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL;
> +  pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL;
> +  pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D;
> +  pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX;
> +  pc->dw1.cs_stall = 1;
> +  pc->dw1.dc_flush_enable = 1;
> +
> +  pc = (gen6_pipe_control_t*)
> +    intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t));
> +  memset(pc, 0, sizeof(*pc));
> +  pc->dw0.length = SIZEOF32(gen6_pipe_control_t) - 2;
> +  pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL;
> +  pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL;
> +  pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D;
> +  pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX;
> +  pc->dw1.render_target_cache_flush_enable = 1;
> +  pc->dw1.texture_cache_invalidation_enable = 1;
> +  pc->dw1.cs_stall = 1;
> +  ADVANCE_BATCH(gpgpu->batch);
> +}
> +
> +static void
>  intel_gpgpu_set_L3_gen7(intel_gpgpu_t *gpgpu, uint32_t use_slm)
>  {
>    BEGIN_BATCH(gpgpu->batch, 9);
> @@ -1925,6 +1956,7 @@ intel_set_gpgpu_callbacks(int device_id)
>      intel_gpgpu_load_curbe_buffer = intel_gpgpu_load_curbe_buffer_gen8;
>      intel_gpgpu_load_idrt = intel_gpgpu_load_idrt_gen8;
>      cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler_gen8;
> +    intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen7;
>      return;
>    }
>  
> @@ -1943,6 +1975,7 @@ intel_set_gpgpu_callbacks(int device_id)
>      intel_gpgpu_post_action = intel_gpgpu_post_action_gen75;
>      intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_gen7; //HSW same as ivb
>      intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen75;
> +    intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen75;
>    }
>    else if (IS_IVYBRIDGE(device_id)) {
>      cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen7;
> @@ -1957,5 +1990,6 @@ intel_set_gpgpu_callbacks(int device_id)
>      intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen7;
>      intel_gpgpu_post_action = intel_gpgpu_post_action_gen7;
>      intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen7;
> +    intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen7;
>    }
>  }
> -- 
> 1.9.1
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list