[Beignet] [PATCH] fix clGetKernelWorkGroupInfo built-in kernel fail.
Zhigang Gong
zhigang.gong at linux.intel.com
Wed Sep 10 20:56:56 PDT 2014
function cl_check_builtin_kernel_dimension() should be a static function
which only is used within cl_device_id.c.
The other part LGTM.
I will change it before push, no need for a new version.
On Wed, Sep 10, 2014 at 11:31:32AM +0800, xionghu.luo at intel.com wrote:
> From: Luo Xionghu <xionghu.luo at intel.com>
>
> add CL_KERNEL_GLOBAL_WORK_SIZE option for clGetKernelWorkGroupInfo.
>
> v2: should return the max global work size instead of current work size.
> This funtion need return CL_INVALID_VALUE if the device is not a custom
> device or kernel is not a built-in kernel.
> we have 3 kind of built-in kernels for 1d/2d/3d memories, the max global
> work size are decided by the dimension and memory type.
> the piglit fail is caused by calling NON built-in kernels, so need send
> patch to piglit later.
>
> Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
> ---
> src/cl_device_id.c | 34 +++++++++++++++++++++++++++++
> src/cl_device_id.h | 3 +++
> src/cl_gt_device.h | 3 +++
> src/cl_kernel.h | 2 ++
> utests/CMakeLists.txt | 1 +
> utests/builtin_kernel_max_global_size.cpp | 30 +++++++++++++++++++++++++
> 6 files changed, 73 insertions(+)
> create mode 100644 utests/builtin_kernel_max_global_size.cpp
>
> diff --git a/src/cl_device_id.c b/src/cl_device_id.c
> index a0f0c99..5b24fcb 100644
> --- a/src/cl_device_id.c
> +++ b/src/cl_device_id.c
> @@ -515,6 +515,22 @@ cl_device_get_version(cl_device_id device, cl_int *ver)
>
> #include "cl_kernel.h"
> #include "cl_program.h"
> +LOCAL int
> +cl_check_builtin_kernel_dimension(cl_kernel kernel, cl_device_id device)
> +{
> + const char * n = cl_kernel_get_name(kernel);
> + const char * builtin_kernels_2d = "__cl_copy_image_2d_to_2d;__cl_copy_image_2d_to_buffer;__cl_copy_buffer_to_image_2d;__cl_fill_image_2d;__cl_fill_image_2d_array;";
> + const char * builtin_kernels_3d = "__cl_copy_image_3d_to_2d;__cl_copy_image_2d_to_3d;__cl_copy_image_3d_to_3d;__cl_copy_image_3d_to_buffer;__cl_copy_buffer_to_image_3d;__cl_fill_image_3d";
> + if (!strstr(device->built_in_kernels, n)){
> + return 0;
> + }else if(strstr(builtin_kernels_2d, n)){
> + return 2;
> + }else if(strstr(builtin_kernels_3d, n)){
> + return 3;
> + }else
> + return 1;
> +
> +}
>
> LOCAL size_t
> cl_get_kernel_max_wg_sz(cl_kernel kernel)
> @@ -543,6 +559,7 @@ cl_get_kernel_workgroup_info(cl_kernel kernel,
> size_t* param_value_size_ret)
> {
> int err = CL_SUCCESS;
> + int dimension = 0;
> if (UNLIKELY(device != &intel_ivb_gt1_device &&
> device != &intel_ivb_gt2_device &&
> device != &intel_baytrail_t_device &&
> @@ -573,6 +590,23 @@ cl_get_kernel_workgroup_info(cl_kernel kernel,
> }
> DECL_FIELD(COMPILE_WORK_GROUP_SIZE, kernel->compile_wg_sz)
> DECL_FIELD(PRIVATE_MEM_SIZE, kernel->stack_size)
> + case CL_KERNEL_GLOBAL_WORK_SIZE:
> + dimension = cl_check_builtin_kernel_dimension(kernel, device);
> + if ( !dimension ) return CL_INVALID_VALUE;
> + if (param_value_size_ret != NULL)
> + *param_value_size_ret = sizeof(device->max_1d_global_work_sizes);
> + if (param_value) {
> + if (dimension == 1) {
> + memcpy(param_value, device->max_1d_global_work_sizes, sizeof(device->max_1d_global_work_sizes));
> + }else if(dimension == 2){
> + memcpy(param_value, device->max_2d_global_work_sizes, sizeof(device->max_2d_global_work_sizes));
> + }else if(dimension == 3){
> + memcpy(param_value, device->max_3d_global_work_sizes, sizeof(device->max_3d_global_work_sizes));
> + }else
> + return CL_INVALID_VALUE;
> +
> + return CL_SUCCESS;
> + }
> default:
> return CL_INVALID_VALUE;
> };
> diff --git a/src/cl_device_id.h b/src/cl_device_id.h
> index c4f8227..31bce47 100644
> --- a/src/cl_device_id.h
> +++ b/src/cl_device_id.h
> @@ -30,6 +30,9 @@ struct _cl_device_id {
> cl_uint max_work_item_dimensions; // should be 3.
> size_t max_work_item_sizes[3]; // equal to maximum work group size.
> size_t max_work_group_size; // maximum work group size under simd16 mode.
> + size_t max_1d_global_work_sizes[3]; // maximum 1d global work size for builtin kernels.
> + size_t max_2d_global_work_sizes[3]; // maximum 2d global work size for builtin kernels.
> + size_t max_3d_global_work_sizes[3]; // maximum 3d global work size for builtin kernels.
> cl_uint preferred_vector_width_char;
> cl_uint preferred_vector_width_short;
> cl_uint preferred_vector_width_int;
> diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
> index 33ef1f0..3cd54eb 100644
> --- a/src/cl_gt_device.h
> +++ b/src/cl_gt_device.h
> @@ -21,6 +21,9 @@
> .device_type = CL_DEVICE_TYPE_GPU,
> .vendor_id = 0, /* == device_id (set when requested) */
> .max_work_item_dimensions = 3,
> +.max_1d_global_work_sizes = {1024 * 1024 * 256, 1, 1},
> +.max_2d_global_work_sizes = {8192, 8192, 1},
> +.max_3d_global_work_sizes = {8192, 8192, 2048},
> .preferred_vector_width_char = 8,
> .preferred_vector_width_short = 8,
> .preferred_vector_width_int = 4,
> diff --git a/src/cl_kernel.h b/src/cl_kernel.h
> index f4ed8d3..85a997d 100644
> --- a/src/cl_kernel.h
> +++ b/src/cl_kernel.h
> @@ -59,6 +59,8 @@ struct _cl_kernel {
> cl_ulong local_mem_sz; /* local memory size specified in kernel args. */
> size_t compile_wg_sz[3]; /* Required workgroup size by __attribute__((reqd_work_gro
> up_size(X, Y, Z))) qualifier.*/
> + size_t global_work_sz[3]; /* maximum global size that can be used to execute a kernel
> + (i.e. global_work_size argument to clEnqueueNDRangeKernel.)*/
> size_t stack_size; /* stack size per work item. */
> cl_argument *args; /* To track argument setting */
> uint32_t arg_n:31; /* Number of arguments */
> diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
> index b30e6f9..034f112 100644
> --- a/utests/CMakeLists.txt
> +++ b/utests/CMakeLists.txt
> @@ -181,6 +181,7 @@ set (utests_sources
> test_printf.cpp
> enqueue_fill_buf.cpp
> enqueue_built_in_kernels.cpp
> + builtin_kernel_max_global_size.cpp
> image_1D_buffer.cpp
> compare_image_2d_and_1d_array.cpp
> compiler_constant_expr.cpp
> diff --git a/utests/builtin_kernel_max_global_size.cpp b/utests/builtin_kernel_max_global_size.cpp
> new file mode 100644
> index 0000000..c777564
> --- /dev/null
> +++ b/utests/builtin_kernel_max_global_size.cpp
> @@ -0,0 +1,30 @@
> +#include "utest_helper.hpp"
> +
> +void builtin_kernel_max_global_size(void)
> +{
> + char* built_in_kernel_names;
> + size_t built_in_kernels_size;
> + cl_int err = CL_SUCCESS;
> + size_t ret_sz;
> +
> +
> + OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_BUILT_IN_KERNELS, 0, 0, &built_in_kernels_size);
> + built_in_kernel_names = (char* )malloc(built_in_kernels_size * sizeof(char) );
> + OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_BUILT_IN_KERNELS, built_in_kernels_size, (void*)built_in_kernel_names, &ret_sz);
> + OCL_ASSERT(ret_sz == built_in_kernels_size);
> + cl_program built_in_prog = clCreateProgramWithBuiltInKernels(ctx, 1, &device, built_in_kernel_names, &err);
> + OCL_ASSERT(built_in_prog != NULL);
> + cl_kernel builtin_kernel_1d = clCreateKernel(built_in_prog, "__cl_copy_region_unalign_src_offset", &err);
> + OCL_ASSERT(builtin_kernel_1d != NULL);
> + size_t param_value_size;
> + void* param_value;
> + clGetKernelWorkGroupInfo(builtin_kernel_1d, device, CL_KERNEL_GLOBAL_WORK_SIZE, 0, NULL, ¶m_value_size);
> + param_value = malloc(param_value_size);
> + clGetKernelWorkGroupInfo(builtin_kernel_1d, device, CL_KERNEL_GLOBAL_WORK_SIZE, param_value_size, param_value, 0);
> + OCL_ASSERT(*(size_t*)param_value == 256 * 1024 *1024);
> + clReleaseKernel(builtin_kernel_1d);
> + clReleaseProgram(built_in_prog);
> + free(param_value);
> +}
> +
> +MAKE_UTEST_FROM_FUNCTION(builtin_kernel_max_global_size);
> --
> 1.7.9.5
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list