[Beignet] [PATCH] Separate flush and invalidate in function intel_gpgpu_pipe_control.

Yang Rong rong.r.yang at intel.com
Thu Dec 25 22:57:58 PST 2014


From: Luo Xionghu <xionghu.luo at intel.com>

HSW has a limitation when PIPECONTROL with RO Cache Invalidation:
Prior to programming a PIPECONTROL command with any of the RO cache invalidation bit set,
program a PIPECONTROL flush command with CS stall bit and HDC Flush bit set.

So must use two PIPECONTROL commands to flush and invalidate L3 cache in HSW.
This patch fix some random fails which has very heavy DC read/write in HSW.
---
 src/cl_command_queue_gen7.c |  2 +-
 src/intel/intel_gpgpu.c     | 36 +++++++++++++++++++++++++++++++++++-
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index ba015ca..734267a 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -31,7 +31,7 @@
 #include <string.h>
 
 #define MAX_GROUP_SIZE_IN_HALFSLICE   512
-static INLINE size_t cl_kernel_compute_batch_sz(cl_kernel k) { return 256+128; }
+static INLINE size_t cl_kernel_compute_batch_sz(cl_kernel k) { return 256+256; }
 
 /* "Varing" payload is the part of the curbe that changes accross threads in the
  *  same work group. Right now, it consists in local IDs and block IPs
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index c80a11b..3471be0 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -104,6 +104,9 @@ intel_gpgpu_load_curbe_buffer_t *intel_gpgpu_load_curbe_buffer = NULL;
 typedef void (intel_gpgpu_load_idrt_t)(intel_gpgpu_t *gpgpu);
 intel_gpgpu_load_idrt_t *intel_gpgpu_load_idrt = NULL;
 
+typedef void (intel_gpgpu_pipe_control_t)(intel_gpgpu_t *gpgpu);
+intel_gpgpu_pipe_control_t *intel_gpgpu_pipe_control = NULL;
+
 static void
 intel_gpgpu_sync(void *buf)
 {
@@ -542,7 +545,7 @@ intel_gpgpu_write_timestamp(intel_gpgpu_t *gpgpu, int idx)
 }
 
 static void
-intel_gpgpu_pipe_control(intel_gpgpu_t *gpgpu)
+intel_gpgpu_pipe_control_gen7(intel_gpgpu_t *gpgpu)
 {
   gen6_pipe_control_t* pc = (gen6_pipe_control_t*)
     intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t));
@@ -561,6 +564,34 @@ intel_gpgpu_pipe_control(intel_gpgpu_t *gpgpu)
 }
 
 static void
+intel_gpgpu_pipe_control_gen75(intel_gpgpu_t *gpgpu)
+{
+  gen6_pipe_control_t* pc = (gen6_pipe_control_t*)
+    intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t));
+  memset(pc, 0, sizeof(*pc));
+  pc->dw0.length = SIZEOF32(gen6_pipe_control_t) - 2;
+  pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL;
+  pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL;
+  pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D;
+  pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX;
+  pc->dw1.cs_stall = 1;
+  pc->dw1.dc_flush_enable = 1;
+
+  pc = (gen6_pipe_control_t*)
+    intel_batchbuffer_alloc_space(gpgpu->batch, sizeof(gen6_pipe_control_t));
+  memset(pc, 0, sizeof(*pc));
+  pc->dw0.length = SIZEOF32(gen6_pipe_control_t) - 2;
+  pc->dw0.instruction_subopcode = GEN7_PIPE_CONTROL_SUBOPCODE_3D_CONTROL;
+  pc->dw0.instruction_opcode = GEN7_PIPE_CONTROL_OPCODE_3D_CONTROL;
+  pc->dw0.instruction_pipeline = GEN7_PIPE_CONTROL_3D;
+  pc->dw0.instruction_type = GEN7_PIPE_CONTROL_INSTRUCTION_GFX;
+  pc->dw1.render_target_cache_flush_enable = 1;
+  pc->dw1.texture_cache_invalidation_enable = 1;
+  pc->dw1.cs_stall = 1;
+  ADVANCE_BATCH(gpgpu->batch);
+}
+
+static void
 intel_gpgpu_set_L3_gen7(intel_gpgpu_t *gpgpu, uint32_t use_slm)
 {
   BEGIN_BATCH(gpgpu->batch, 9);
@@ -1925,6 +1956,7 @@ intel_set_gpgpu_callbacks(int device_id)
     intel_gpgpu_load_curbe_buffer = intel_gpgpu_load_curbe_buffer_gen8;
     intel_gpgpu_load_idrt = intel_gpgpu_load_idrt_gen8;
     cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler_gen8;
+    intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen7;
     return;
   }
 
@@ -1943,6 +1975,7 @@ intel_set_gpgpu_callbacks(int device_id)
     intel_gpgpu_post_action = intel_gpgpu_post_action_gen75;
     intel_gpgpu_read_ts_reg = intel_gpgpu_read_ts_reg_gen7; //HSW same as ivb
     intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen75;
+    intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen75;
   }
   else if (IS_IVYBRIDGE(device_id)) {
     cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen7;
@@ -1957,5 +1990,6 @@ intel_set_gpgpu_callbacks(int device_id)
     intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen7;
     intel_gpgpu_post_action = intel_gpgpu_post_action_gen7;
     intel_gpgpu_setup_bti = intel_gpgpu_setup_bti_gen7;
+    intel_gpgpu_pipe_control = intel_gpgpu_pipe_control_gen7;
   }
 }
-- 
1.9.1



More information about the Beignet mailing list