[Beignet] [PATCH 2/8] BDW: enable SLM in BDW.

Yang Rong rong.r.yang at intel.com
Sun Sep 28 22:38:31 PDT 2014


BDW's SLM control register change to L3CNTLREG, offset is 0x7034.

Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
 src/intel/intel_defines.h |  2 ++
 src/intel/intel_gpgpu.c   | 51 +++++++++++++++++++++++++++++------------------
 2 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/src/intel/intel_defines.h b/src/intel/intel_defines.h
index b424233..496a9eb 100644
--- a/src/intel/intel_defines.h
+++ b/src/intel/intel_defines.h
@@ -304,6 +304,8 @@
 #define GEN7_L3_CNTL_REG2_ADDRESS_OFFSET         (0xB020)
 #define GEN7_L3_CNTL_REG3_ADDRESS_OFFSET         (0xB024)
 
+#define GEN8_L3_CNTL_REG_ADDRESS_OFFSET          (0x7034)
+
 // To issue pipe controls (reset L3 / SLM or stall)
 #define GEN7_PIPE_CONTROL_MEDIA 0x2
 #define GEN7_PIPE_CONTROL_3D 0x3
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index b4d0846..fa7333e 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -586,6 +586,22 @@ intel_gpgpu_set_L3_gen75(intel_gpgpu_t *gpgpu, uint32_t use_slm)
 }
 
 static void
+intel_gpgpu_set_L3_gen8(intel_gpgpu_t *gpgpu, uint32_t use_slm)
+{
+  BEGIN_BATCH(gpgpu->batch, 3);
+  OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2 */
+  OUT_BATCH(gpgpu->batch, GEN8_L3_CNTL_REG_ADDRESS_OFFSET);
+  if(use_slm)
+    OUT_BATCH(gpgpu->batch, 0x60000121);  /* {SLM=192, URB=128, Rest=384} */
+  else
+    OUT_BATCH(gpgpu->batch, 0x80000140);  /* {SLM=0, URB=256, Rest=512, Sum=768} */
+
+  //if(use_slm)
+  //  gpgpu->batch->enable_slm = 1;
+  intel_gpgpu_pipe_control(gpgpu);
+}
+
+static void
 intel_gpgpu_batch_start(intel_gpgpu_t *gpgpu)
 {
   intel_batchbuffer_start_atomic(gpgpu->batch, 256);
@@ -1122,25 +1138,22 @@ intel_gpgpu_build_idrt_gen8(intel_gpgpu_t *gpgpu, cl_gpgpu_kernel *kernel)
   desc->desc5.curbe_read_offset = 0;
 
   /* Barriers / SLM are automatically handled on Gen7+ */
-  if (gpgpu->drv->gen_ver == 7 || gpgpu->drv->gen_ver == 75) {
-    size_t slm_sz = kernel->slm_sz;
-    desc->desc6.group_threads_num = kernel->use_slm ? kernel->thread_n : 0;
-    desc->desc6.barrier_enable = kernel->use_slm;
-    if (slm_sz <= 4*KB)
-      slm_sz = 4*KB;
-    else if (slm_sz <= 8*KB)
-      slm_sz = 8*KB;
-    else if (slm_sz <= 16*KB)
-      slm_sz = 16*KB;
-    else if (slm_sz <= 32*KB)
-      slm_sz = 32*KB;
-    else
-      slm_sz = 64*KB;
-    slm_sz = slm_sz >> 12;
-    desc->desc6.slm_sz = slm_sz;
-  }
+  size_t slm_sz = kernel->slm_sz;
+  /* group_threads_num should not be set to 0 even if the barrier is disabled per bspec */
+  desc->desc6.group_threads_num = kernel->thread_n;
+  desc->desc6.barrier_enable = kernel->use_slm;
+  if (slm_sz <= 4*KB)
+    slm_sz = 4*KB;
+  else if (slm_sz <= 8*KB)
+    slm_sz = 8*KB;
+  else if (slm_sz <= 16*KB)
+    slm_sz = 16*KB;
+  else if (slm_sz <= 32*KB)
+    slm_sz = 32*KB;
   else
-    desc->desc6.group_threads_num = kernel->barrierID; /* BarrierID on GEN6 */
+    slm_sz = 64*KB;
+  slm_sz = slm_sz >> 12;
+  desc->desc6.slm_sz = slm_sz;
 }
 
 static int
@@ -1651,7 +1664,7 @@ intel_set_gpgpu_callbacks(int device_id)
 
   if (IS_BROADWELL(device_id)) {
     cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen75;
-    intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen75;
+    intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen8;
     cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen8;
     intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen75;
     intel_gpgpu_post_action = intel_gpgpu_post_action_gen75;
-- 
1.8.3.2



More information about the Beignet mailing list