[Beignet] [PATCH] CL: make the scratch size as a device resource attribute.

Song, Ruiling ruiling.song at intel.com
Mon Feb 17 02:44:37 CET 2014


Yes, you are right, put it into device attribute is a good idea. The patch lgtm

-----Original Message-----
From: beignet-bounces at lists.freedesktop.org [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of Zhigang Gong
Sent: Friday, February 14, 2014 4:29 PM
To: beignet at lists.freedesktop.org
Cc: Gong, Zhigang
Subject: [Beignet] [PATCH] CL: make the scratch size as a device resource attribute.

Actually, the scratch size is much like the local memory size which should be a device dependent information.

This patch is to put scratch mem size to the device attribute structure. And when the kernel needs more than the maximum scratch memory, we just return a out-of-resource error rather than trigger an assertion.

Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
 src/cl_command_queue_gen7.c |   21 ++++++++++-----------
 src/cl_device_id.h          |    1 +
 src/cl_gen75_device.h       |    1 +
 src/cl_gen7_device.h        |    1 +
 4 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index 83fe21d..348211c 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -255,15 +255,6 @@ cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker)
   cl_gpgpu_set_stack(gpgpu, offset, stack_sz, cc_llc_l3);  }
 
-static void
-cl_setup_scratch(cl_gpgpu gpgpu, cl_kernel ker) -{
-  int32_t scratch_sz = gbe_kernel_get_scratch_size(ker->opaque);
-  /* Per HW Spec, it only allows 12KB scratch memory per HW thread now */
-  assert(scratch_sz <= 12*1024);
-  cl_gpgpu_set_scratch(gpgpu, scratch_sz); -}
-
 LOCAL cl_int
 cl_command_queue_ND_range_gen7(cl_command_queue queue,
                                cl_kernel ker, @@ -279,6 +270,7 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
   const uint32_t simd_sz = cl_kernel_get_simd_width(ker);
   size_t i, batch_sz = 0u, local_sz = 0u;
   size_t cst_sz = ker->curbe_sz= gbe_kernel_get_curbe_size(ker->opaque);
+  int32_t scratch_sz = gbe_kernel_get_scratch_size(ker->opaque);
   size_t thread_n = 0u;
   cl_int err = CL_SUCCESS;
 
@@ -295,11 +287,17 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
   kernel.thread_n = thread_n = (local_sz + simd_sz - 1) / simd_sz;
   kernel.curbe_sz = cst_sz;
 
+  if (scratch_sz > ker->program->ctx->device->scratch_mem_size) {
+    fprintf(stderr, "Beignet: Out of scratch memory %d.\n", scratch_sz);
+    return CL_OUT_OF_RESOURCES;
+  }
   /* Curbe step 1: fill the constant urb buffer data shared by all threads */
   if (ker->curbe) {
     kernel.slm_sz = cl_curbe_fill(ker, work_dim, global_wk_off, global_wk_sz, local_wk_sz, thread_n);
-    if (kernel.slm_sz > ker->program->ctx->device->local_mem_size)
+    if (kernel.slm_sz > ker->program->ctx->device->local_mem_size) {
+      fprintf(stderr, "Beignet: Out of shared local memory %d.\n", 
+ kernel.slm_sz);
       return CL_OUT_OF_RESOURCES;
+    }
   }
 
   /* Setup the kernel */
@@ -315,7 +313,8 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
   /* Bind all samplers */
   cl_gpgpu_bind_sampler(gpgpu, ker->samplers, ker->sampler_sz);
 
-  cl_setup_scratch(gpgpu, ker);
+  cl_gpgpu_set_scratch(gpgpu, scratch_sz);
+
   /* Bind a stack if needed */
   cl_bind_stack(gpgpu, ker);
 
diff --git a/src/cl_device_id.h b/src/cl_device_id.h index 4ece26c..5f7c9fe 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -68,6 +68,7 @@ struct _cl_device_id {
   cl_uint  max_constant_args;
   cl_device_local_mem_type local_mem_type;
   cl_ulong local_mem_size;
+  cl_ulong scratch_mem_size;
   cl_bool  error_correction_support;
   cl_bool  host_unified_memory;
   size_t   profiling_timer_resolution;
diff --git a/src/cl_gen75_device.h b/src/cl_gen75_device.h index 7bf662e..682ee06 100644
--- a/src/cl_gen75_device.h
+++ b/src/cl_gen75_device.h
@@ -24,6 +24,7 @@
 .global_mem_cache_size = 8 << 10, /* XXX */  .local_mem_type = CL_GLOBAL,  .local_mem_size = 64 << 10,
+.scratch_mem_size = 2 << 20,
 
 #include "cl_gt_device.h"
 
diff --git a/src/cl_gen7_device.h b/src/cl_gen7_device.h index e198d6f..69cc0b9 100644
--- a/src/cl_gen7_device.h
+++ b/src/cl_gen7_device.h
@@ -23,6 +23,7 @@
 .global_mem_cache_size = 8 << 10, /* XXX */  .local_mem_type = CL_GLOBAL,  .local_mem_size = 64 << 10,
+.scratch_mem_size = 12 << 10,
 
 #include "cl_gt_device.h"
 
--
1.7.9.5

_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list