[Beignet] [PATCH V2 15/17] Utest: Add workgroup broadcast tests
Xiuli Pan
xiuli.pan at intel.com
Wed Apr 13 05:52:56 UTC 2016
Hi Grigore,
I found what is wrong with broadcast 2D and 3D, see inline comments.
You are useing int for the input date!
Thanks
Xiuli
On Mon, Apr 11, 2016 at 05:40:56PM +0300, Grigore Lupescu wrote:
> From: Grigore Lupescu <grigore.lupescu at intel.com>
>
> Added the following unit tests:
> compiler_workgroup_broadcast_1D_int
> compiler_workgroup_broadcast_1D_long
> compiler_workgroup_broadcast_2D_int
> compiler_workgroup_broadcast_2D_long
> compiler_workgroup_broadcast_3D_int
> compiler_workgroup_broadcast_3D_long
>
> Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com>
> ---
> kernels/compiler_workgroup_broadcast.cl | 128 ++++++++++++-
> utests/compiler_workgroup_broadcast.cpp | 319 +++++++++++++++++++++++++++++---
> 2 files changed, 410 insertions(+), 37 deletions(-)
>
> diff --git a/kernels/compiler_workgroup_broadcast.cl b/kernels/compiler_workgroup_broadcast.cl
> index 4df74e3..47ff0b7 100644
> --- a/kernels/compiler_workgroup_broadcast.cl
> +++ b/kernels/compiler_workgroup_broadcast.cl
> @@ -1,9 +1,121 @@
> -kernel void compiler_workgroup_broadcast(global uint *src, global uint *dst) {
> - uint val = src[get_group_id(0)*(get_local_size(1) * get_local_size(0))
> - + get_group_id(1)*(get_local_size(1) * get_local_size(0) * get_num_groups(0))
> - + get_local_id(1)* get_local_size(0) + get_local_id(0)];
> - uint bv = work_group_broadcast(val, 8, 3);
> - dst[get_group_id(0)*(get_local_size(1) * get_local_size(0))
> - + get_group_id(1)*(get_local_size(1) * get_local_size(0) * get_num_groups(0))
> - + get_local_id(1)* get_local_size(0) + get_local_id(0)] = bv;
> +/*
> + * Workgroup broadcast 1D functions
> + */
> +
> +kernel void compiler_workgroup_broadcast_1D_int(global int *src,
> + global int *dst,
> + uint wg_local_x,
> + uint wg_local_y,
> + uint wg_local_z)
> +{
> + uint offset = 0;
> + uint index = offset + get_global_id(0);
> +
> + int val = src[index];
> + int broadcast_val = work_group_broadcast(val,
> + wg_local_x);
> + dst[index] = broadcast_val;
> +}
> +
> +kernel void compiler_workgroup_broadcast_1D_long(global long *src,
> + global long *dst,
> + uint wg_local_x,
> + uint wg_local_y,
> + uint wg_local_z)
> +{
> + uint offset = 0;
> + uint index = offset + get_global_id(0);
> +
> + long val = src[index];
> + long broadcast_val = work_group_broadcast(val,
> + wg_local_x);
> + dst[index] = broadcast_val;
> +}
> +
> +/*
> + * Workgroup broadcast 2D functions
> + */
> +kernel void compiler_workgroup_broadcast_2D_int(global int *src,
> + global int *dst,
> + uint wg_local_x,
> + uint wg_local_y,
> + uint wg_local_z)
> +{
> + uint lsize = get_local_size(0) * get_local_size(1);
> + uint offset = get_group_id(0) * lsize +
> + get_group_id(1) * get_num_groups(0) * lsize;
> + uint index = offset + get_local_id(0) +
> + get_local_id(1) * get_local_size(0);
> +
> + int val = src[index];
> + int broadcast_val = work_group_broadcast(val,
> + wg_local_x,
> + wg_local_y);
> + dst[index] = broadcast_val;
> +}
> +
> +kernel void compiler_workgroup_broadcast_2D_long(global long *src,
> + global long *dst,
> + uint wg_local_x,
> + uint wg_local_y,
> + uint wg_local_z)
> +{
> + uint lsize = get_local_size(0) * get_local_size(1);
> + uint offset = get_group_id(0) * lsize +
> + get_group_id(1) * get_num_groups(0) * lsize;
> + uint index = offset + get_local_id(0) +
> + get_local_id(1) * get_local_size(0);
> +
> + long val = src[index];
> + long broadcast_val = work_group_broadcast(val,
> + wg_local_x,
> + wg_local_y);
> + dst[index] = broadcast_val;
> +}
> +
> +/*
> + * Workgroup broadcast 3D functions
> + */
> +kernel void compiler_workgroup_broadcast_3D_int(global int *src,
> + global int *dst,
> + uint wg_local_x,
> + uint wg_local_y,
> + uint wg_local_z)
> +{
> + uint lsize = get_local_size(0) * get_local_size(1) * get_local_size(2);
> + uint offset = get_group_id(0) * lsize +
> + get_group_id(1) * get_num_groups(0) * lsize +
> + get_group_id(2) * get_num_groups(1) * get_num_groups(0) * lsize;
> + uint index = offset + get_local_id(0) +
> + get_local_id(1) * get_local_size(0) +
> + get_local_id(2) * get_local_size(1) * get_local_size(0);
> +
> + int val = src[index];
> + int broadcast_val = work_group_broadcast(val,
> + wg_local_x,
> + wg_local_y,
> + wg_local_z);
> + dst[index] = broadcast_val;
> +}
> +
> +kernel void compiler_workgroup_broadcast_3D_long(global long *src,
> + global long *dst,
> + uint wg_local_x,
> + uint wg_local_y,
> + uint wg_local_z)
> +{
> + uint lsize = get_local_size(0) * get_local_size(1) * get_local_size(2);
> + uint offset = get_group_id(0) * lsize +
> + get_group_id(1) * get_num_groups(0) * lsize +
> + get_group_id(2) * get_num_groups(0) * get_num_groups(1) * lsize;
> + uint index = offset + get_local_id(0) +
> + get_local_id(1) * get_local_size(0) +
> + get_local_id(2) * get_local_size(1) * get_local_size(0);
> +
> + long val = src[index];
> + long broadcast_val = work_group_broadcast(val,
> + wg_local_x,
> + wg_local_y,
> + wg_local_z);
> + dst[index] = broadcast_val;
> }
> diff --git a/utests/compiler_workgroup_broadcast.cpp b/utests/compiler_workgroup_broadcast.cpp
> index d45e5d8..aff95ff 100644
> --- a/utests/compiler_workgroup_broadcast.cpp
> +++ b/utests/compiler_workgroup_broadcast.cpp
> @@ -3,45 +3,306 @@
> #include <iostream>
> #include "utest_helper.hpp"
>
> -void compiler_workgroup_broadcast(void)
> +using namespace std;
> +
> +/* set to 1 for debug, output of input-expected data */
> +#define DEBUG_STDOUT 0
> +
> +/* NDRANGE */
> +#define WG_GLOBAL_SIZE_X 16
> +#define WG_GLOBAL_SIZE_Y 4
> +#define WG_GLOBAL_SIZE_Z 4
> +
> +#define WG_LOCAL_SIZE_X 16
> +#define WG_LOCAL_SIZE_Y 2
> +#define WG_LOCAL_SIZE_Z 2
> +
> +/* TODO debug bellow case, lid2 always stays 0, instead of 0 and 1
> + *
> + * #define WG_GLOBAL_SIZE_X 16
> + * #define WG_GLOBAL_SIZE_Y 1
> + * #define WG_GLOBAL_SIZE_Z 4
> + *
> + * #define WG_LOCAL_SIZE_X 16
> + * #define WG_LOCAL_SIZE_Y 1
> + * #define WG_LOCAL_SIZE_Z 2
> + */
> +
> +#define WG_LOCAL_X 5
> +#define WG_LOCAL_Y 0
> +#define WG_LOCAL_Z 0
> +
> +enum WG_BROADCAST
> +{
> + WG_BROADCAST_1D,
> + WG_BROADCAST_2D,
> + WG_BROADCAST_3D
> +};
> +
> +/*
> + * Generic compute-expected function for op BROADCAST type
> + * and any variable type
> + */
> +template<class T>
> +static void compute_expected(WG_BROADCAST wg_broadcast,
> + T* input,
> + T* expected,
> + uint32_t wg_global_size,
> + uint32_t wg_local_size)
> +{
> + if(wg_broadcast == WG_BROADCAST_1D)
> + {
> + for(uint32_t i = 0; i < wg_local_size; i++)
> + expected[i] = input[WG_LOCAL_X];
> + }
> + else if(wg_broadcast == WG_BROADCAST_2D)
> + {
> + for(uint32_t i = 0; i < wg_local_size; i++)
> + expected[i] =
> + input[WG_LOCAL_X +
> + WG_LOCAL_Y * WG_LOCAL_SIZE_X];
> + }
> + else if(wg_broadcast == WG_BROADCAST_3D)
> + {
> + for(uint32_t i = 0; i < wg_local_size; i++)
> + expected[i] =
> + input[WG_LOCAL_X +
> + WG_LOCAL_Y * WG_LOCAL_SIZE_X +
> + WG_LOCAL_Z * WG_LOCAL_SIZE_X * WG_LOCAL_SIZE_Y];
> + }
> +}
> +
> +/*
> + * Generic input-expected generate function for op BROADCAST type
> + * and any variable type
> + */
> +template<class T>
> +static void generate_data(WG_BROADCAST wg_broadcast,
> + T* &input,
> + T* &expected,
> + uint32_t &wg_global_size,
> + uint32_t &wg_local_size)
> +{
> + if(wg_broadcast == WG_BROADCAST_1D)
> + {
> + wg_global_size = WG_GLOBAL_SIZE_X;
> + wg_local_size = WG_LOCAL_SIZE_X;
> + }
> + else if(wg_broadcast == WG_BROADCAST_2D)
> + {
> + wg_global_size = WG_GLOBAL_SIZE_X * WG_GLOBAL_SIZE_Y;
> + wg_local_size = WG_LOCAL_SIZE_X * WG_LOCAL_SIZE_Y;
> + }
> + else if(wg_broadcast == WG_BROADCAST_3D)
> + {
> + wg_global_size = WG_GLOBAL_SIZE_X * WG_GLOBAL_SIZE_Y * WG_GLOBAL_SIZE_Z;
> + wg_local_size = WG_LOCAL_SIZE_X * WG_LOCAL_SIZE_Y * WG_LOCAL_SIZE_Z;
> + }
> +
> + /* allocate input and expected arrays */
> + input = new T[wg_global_size];
> + expected = new T[wg_global_size];
> +
> + /* base value for all data types */
> + T base_val = (long)7 << (sizeof(T) * 5 - 3);
> +
> + /* seed for random inputs */
> + srand (time(NULL));
> +
> + /* generate inputs and expected values */
> + for(uint32_t gid = 0; gid < wg_global_size; gid += wg_local_size)
> + {
> +#if DEBUG_STDOUT
> + cout << endl << "IN: " << endl;
> +#endif
> +
> + /* input values */
> + for(uint32_t lid = 0; lid < wg_local_size; lid++)
> + {
> + /* initially 0, augment after */
> + input[gid + lid] = 0;
> +
> + /* check all data types, test ideal for QWORD types */
> + input[gid + lid] += ((rand() % 2 - 1) * base_val);
> + /* add trailing random bits, tests GENERAL cases */
> + input[gid + lid] += (rand() % 112);
> +
> +#if DEBUG_STDOUT
> + /* output generated input */
> + cout << setw(4) << input[gid + lid] << ", " ;
> + if((lid + 1) % 8 == 0)
> + cout << endl;
> +#endif
> + }
> +
> + /* expected values */
> + compute_expected(wg_broadcast, input + gid, expected + gid, wg_global_size, wg_local_size);
> +
> +#if DEBUG_STDOUT
> + /* output expected input */
> + cout << endl << "EXP: " << endl;
> + for(uint32_t lid = 0; lid < wg_local_size; lid++){
> + cout << setw(4) << expected[gid + lid] << ", " ;
> + if((lid + 1) % 8 == 0)
> + cout << endl;
> + }
> +#endif
> +
> + }
> +}
> +
> +/*
> + * Generic workgroup utest function for op BROADCAST type
> + * and any variable type
> + */
> +template<class T>
> +static void workgroup_generic(WG_BROADCAST wg_broadcast,
> + T* input,
> + T* expected)
> {
> - const size_t n0 = 32;
> - const size_t n1 = 16;
> - const size_t n = n0 * n1;
> - uint32_t src[n];
> + uint32_t wg_global_size = 0;
> + uint32_t wg_local_size = 0;
>
> - // Setup kernel and buffers
> - OCL_CREATE_KERNEL("compiler_workgroup_broadcast");
> - OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL);
> - OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
> + cl_uint wg_local_x = WG_LOCAL_X;
> + cl_uint wg_local_y = WG_LOCAL_Y;
> + cl_uint wg_local_z = WG_LOCAL_Z;
> +
> + /* input and expected data */
> + generate_data(wg_broadcast, input, expected, wg_global_size, wg_local_size);
> +
> + /* prepare input for datatype */
> + OCL_CREATE_BUFFER(buf[0], 0, wg_global_size * sizeof(T), NULL);
> + OCL_CREATE_BUFFER(buf[1], 0, wg_global_size * sizeof(T), NULL);
> OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
> OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
> - globals[0] = n0;
> - globals[1] = n1;
> - locals[0] = 16;
> - locals[1] = 16;
> + OCL_SET_ARG(2, sizeof(cl_uint), &wg_local_x);
> + OCL_SET_ARG(3, sizeof(cl_uint), &wg_local_y);
> + OCL_SET_ARG(4, sizeof(cl_uint), &wg_local_z);
>
> - for (int32_t i = 0; i < (int32_t) n; ++i) {
> - src[i] = i;
> - }
> + /* set input data for GPU */
> OCL_MAP_BUFFER(0);
> - memcpy(buf_data[0], src, sizeof(src));
> + memcpy(buf_data[0], input, wg_global_size * sizeof(T));
> OCL_UNMAP_BUFFER(0);
>
> - // Run the kernel on GPU
> - OCL_NDRANGE(2);
> -
> - // Compare
> - OCL_MAP_BUFFER(1);
> - for (int32_t i = 0; i < (int32_t) n/2; ++i) {
> -// printf("%u ", ((uint32_t *)buf_data[1])[i]);
> - OCL_ASSERT(((uint32_t *)buf_data[1])[i] == 56);
> + /* run the kernel on GPU */
> + if(wg_broadcast == WG_BROADCAST_1D)
> + {
> + globals[0] = WG_GLOBAL_SIZE_X;
> + locals[0] = WG_LOCAL_SIZE_X;
> + OCL_NDRANGE(1);
> + }
> + else if(wg_broadcast == WG_BROADCAST_2D)
> + {
> + globals[0] = WG_GLOBAL_SIZE_X;
> + locals[0] = WG_LOCAL_SIZE_X;
> + globals[1] = WG_GLOBAL_SIZE_Y;
> + locals[1] = WG_LOCAL_SIZE_Y;
> + OCL_NDRANGE(2);
> }
> - for (int32_t i = n/2; i < (int32_t) n; ++i) {
> - // printf("%u ", ((uint32_t *)buf_data[1])[i]);
> - OCL_ASSERT(((uint32_t *)buf_data[1])[i] == 312);
> + else if(wg_broadcast == WG_BROADCAST_3D)
> + {
> + globals[0] = WG_GLOBAL_SIZE_X;
> + locals[0] = WG_LOCAL_SIZE_X;
> + globals[1] = WG_GLOBAL_SIZE_Y;
> + locals[1] = WG_LOCAL_SIZE_Y;
> + globals[2] = WG_GLOBAL_SIZE_Z;
> + locals[2] = WG_LOCAL_SIZE_Y;
> + OCL_NDRANGE(3);
> }
> +
> + /* check if mismatch */
> + OCL_MAP_BUFFER(1);
> + uint32_t mismatches = 0;
> +
> + for (uint32_t i = 0; i < wg_global_size; i++)
> + if(((T *)buf_data[1])[i] != *(expected + i))
> + {
> + /* found mismatch, increment */
> + mismatches++;
> +
> +#if DEBUG_STDOUT
> + /* output mismatch */
> + cout << "Err at " << i << ", " <<
> + ((T *)buf_data[1])[i] << " != " << *(expected + i) << endl;
> +#endif
> + }
> +
> +#if DEBUG_STDOUT
> + /* output mismatch count */
> + cout << "mismatches " << mismatches << endl;
> +#endif
> +
> OCL_UNMAP_BUFFER(1);
> +
> + OCL_ASSERT(mismatches == 0);
> +}
> +
> +/*
> + * Workgroup broadcast 1D functions
> + */
> +void compiler_workgroup_broadcast_1D_int(void)
> +{
> + cl_int *input = NULL;
> + cl_int *expected = NULL;
> + OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
> + "compiler_workgroup_broadcast_1D_int");
> + workgroup_generic(WG_BROADCAST_1D, input, expected);
> }
> +MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_1D_int);
>
> -MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast);
> +void compiler_workgroup_broadcast_1D_long(void)
> +{
> + cl_long *input = NULL;
> + cl_long *expected = NULL;
> + OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
> + "compiler_workgroup_broadcast_1D_long");
> + workgroup_generic(WG_BROADCAST_1D, input, expected);
> +}
> +MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_1D_long);
> +
> +/*
> + * Workgroup broadcast 2D functions
> + */
> +void compiler_workgroup_broadcast_2D_int(void)
> +{
> + cl_int *input = NULL;
> + cl_int *expected = NULL;
> + OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
> + "compiler_workgroup_broadcast_2D_int");
> + workgroup_generic(WG_BROADCAST_2D, input, expected);
> +}
> +MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_2D_int);
> +
> +void compiler_workgroup_broadcast_2D_long(void)
> +{
> + cl_int *input = NULL;
> + cl_int *expected = NULL;
cl_long here!
> + OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
> + "compiler_workgroup_broadcast_2D_long");
> + workgroup_generic(WG_BROADCAST_2D, input, expected);
> +}
> +MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_2D_long);
> +
> +
> +/*
> + * Workgroup broadcast 3D functions
> + */
> +void compiler_workgroup_broadcast_3D_int(void)
> +{
> + cl_int *input = NULL;
> + cl_int *expected = NULL;
> + OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
> + "compiler_workgroup_broadcast_3D_int");
> + workgroup_generic(WG_BROADCAST_3D, input, expected);
> +}
> +MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_3D_int);
> +
> +void compiler_workgroup_broadcast_3D_long(void)
> +{
> + cl_int *input = NULL;
> + cl_int *expected = NULL;
cl_long here, too!
> + OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
> + "compiler_workgroup_broadcast_3D_long");
> + workgroup_generic(WG_BROADCAST_3D, input, expected);
> +}
> +MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_3D_long);
> --
> 2.5.0
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list