[Beignet] [PATCH] CL: make the scratch size as a device resource attribute.
Song, Ruiling
ruiling.song at intel.com
Mon Feb 17 02:44:37 CET 2014
Yes, you are right, put it into device attribute is a good idea. The patch lgtm
-----Original Message-----
From: beignet-bounces at lists.freedesktop.org [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of Zhigang Gong
Sent: Friday, February 14, 2014 4:29 PM
To: beignet at lists.freedesktop.org
Cc: Gong, Zhigang
Subject: [Beignet] [PATCH] CL: make the scratch size as a device resource attribute.
Actually, the scratch size is much like the local memory size which should be a device dependent information.
This patch is to put scratch mem size to the device attribute structure. And when the kernel needs more than the maximum scratch memory, we just return a out-of-resource error rather than trigger an assertion.
Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
src/cl_command_queue_gen7.c | 21 ++++++++++-----------
src/cl_device_id.h | 1 +
src/cl_gen75_device.h | 1 +
src/cl_gen7_device.h | 1 +
4 files changed, 13 insertions(+), 11 deletions(-)
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index 83fe21d..348211c 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -255,15 +255,6 @@ cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker)
cl_gpgpu_set_stack(gpgpu, offset, stack_sz, cc_llc_l3); }
-static void
-cl_setup_scratch(cl_gpgpu gpgpu, cl_kernel ker) -{
- int32_t scratch_sz = gbe_kernel_get_scratch_size(ker->opaque);
- /* Per HW Spec, it only allows 12KB scratch memory per HW thread now */
- assert(scratch_sz <= 12*1024);
- cl_gpgpu_set_scratch(gpgpu, scratch_sz); -}
-
LOCAL cl_int
cl_command_queue_ND_range_gen7(cl_command_queue queue,
cl_kernel ker, @@ -279,6 +270,7 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
const uint32_t simd_sz = cl_kernel_get_simd_width(ker);
size_t i, batch_sz = 0u, local_sz = 0u;
size_t cst_sz = ker->curbe_sz= gbe_kernel_get_curbe_size(ker->opaque);
+ int32_t scratch_sz = gbe_kernel_get_scratch_size(ker->opaque);
size_t thread_n = 0u;
cl_int err = CL_SUCCESS;
@@ -295,11 +287,17 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
kernel.thread_n = thread_n = (local_sz + simd_sz - 1) / simd_sz;
kernel.curbe_sz = cst_sz;
+ if (scratch_sz > ker->program->ctx->device->scratch_mem_size) {
+ fprintf(stderr, "Beignet: Out of scratch memory %d.\n", scratch_sz);
+ return CL_OUT_OF_RESOURCES;
+ }
/* Curbe step 1: fill the constant urb buffer data shared by all threads */
if (ker->curbe) {
kernel.slm_sz = cl_curbe_fill(ker, work_dim, global_wk_off, global_wk_sz, local_wk_sz, thread_n);
- if (kernel.slm_sz > ker->program->ctx->device->local_mem_size)
+ if (kernel.slm_sz > ker->program->ctx->device->local_mem_size) {
+ fprintf(stderr, "Beignet: Out of shared local memory %d.\n",
+ kernel.slm_sz);
return CL_OUT_OF_RESOURCES;
+ }
}
/* Setup the kernel */
@@ -315,7 +313,8 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
/* Bind all samplers */
cl_gpgpu_bind_sampler(gpgpu, ker->samplers, ker->sampler_sz);
- cl_setup_scratch(gpgpu, ker);
+ cl_gpgpu_set_scratch(gpgpu, scratch_sz);
+
/* Bind a stack if needed */
cl_bind_stack(gpgpu, ker);
diff --git a/src/cl_device_id.h b/src/cl_device_id.h index 4ece26c..5f7c9fe 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -68,6 +68,7 @@ struct _cl_device_id {
cl_uint max_constant_args;
cl_device_local_mem_type local_mem_type;
cl_ulong local_mem_size;
+ cl_ulong scratch_mem_size;
cl_bool error_correction_support;
cl_bool host_unified_memory;
size_t profiling_timer_resolution;
diff --git a/src/cl_gen75_device.h b/src/cl_gen75_device.h index 7bf662e..682ee06 100644
--- a/src/cl_gen75_device.h
+++ b/src/cl_gen75_device.h
@@ -24,6 +24,7 @@
.global_mem_cache_size = 8 << 10, /* XXX */ .local_mem_type = CL_GLOBAL, .local_mem_size = 64 << 10,
+.scratch_mem_size = 2 << 20,
#include "cl_gt_device.h"
diff --git a/src/cl_gen7_device.h b/src/cl_gen7_device.h index e198d6f..69cc0b9 100644
--- a/src/cl_gen7_device.h
+++ b/src/cl_gen7_device.h
@@ -23,6 +23,7 @@
.global_mem_cache_size = 8 << 10, /* XXX */ .local_mem_type = CL_GLOBAL, .local_mem_size = 64 << 10,
+.scratch_mem_size = 12 << 10,
#include "cl_gt_device.h"
--
1.7.9.5
_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list