[Beignet] [PATCH 2/3] cl_intel_media_block_io READ related unit tests.

Pan, Xiuli xiuli.pan at intel.com
Wed Mar 8 06:11:13 UTC 2017


LGTM, except the extension should be in first patch.

-----Original Message-----
From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of xionghu.luo at intel.com
Sent: Friday, March 3, 2017 12:08 AM
To: beignet at lists.freedesktop.org
Cc: Luo, Xionghu <xionghu.luo at intel.com>
Subject: [Beignet] [PATCH 2/3] cl_intel_media_block_io READ related unit tests.

From: Luo Xionghu <xionghu.luo at intel.com>

Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
---
 kernels/compiler_subgroup_image_block_read.cl | 115 ++++++++++++++
 src/cl_extensions.h                           |   1 +
 utests/CMakeLists.txt                         |   1 +
 utests/compiler_subgroup_media_block_read.cpp | 212 ++++++++++++++++++++++++++
 utests/utest_helper.cpp                       |  20 +++
 utests/utest_helper.hpp                       |   3 +
 6 files changed, 352 insertions(+)
 create mode 100644 utests/compiler_subgroup_media_block_read.cpp

diff --git a/kernels/compiler_subgroup_image_block_read.cl b/kernels/compiler_subgroup_image_block_read.cl
index fa079b7..596265f 100644
--- a/kernels/compiler_subgroup_image_block_read.cl
+++ b/kernels/compiler_subgroup_image_block_read.cl
@@ -62,3 +62,118 @@ __kernel void compiler_subgroup_image_block_read_us8(image2d_t src, global ushor
   dst[id] = tmp;
 }
 #endif
+#ifdef MEDIA_BLOCK_IO
+__kernel void compiler_subgroup_media_block_read_ui1(image2d_t src, 
+global uint *dst) {
+  int id = get_global_id(0);
+  int yid = get_global_id(1);
+  int2 coord = 
+(int2)(get_sub_group_size()*get_sub_group_id()*sizeof(uint) + 
+sizeof(uint) * get_group_id(0) * get_local_size(0),yid);
+  uint tmp = intel_sub_group_media_block_read_ui(coord, 16, 1, src);
+  dst[yid * get_global_size(0) + id] = tmp; } __kernel void 
+compiler_subgroup_media_block_read_ui2(image2d_t src, global uint2 
+*dst) {
+  int id = get_global_id(0);
+  int yid = get_global_id(1);
+  int2 coord = 
+(int2)(get_sub_group_size()*get_sub_group_id()*sizeof(uint) + 
+sizeof(uint) * get_group_id(0) * get_local_size(0),yid*2);
+  uint2 tmp = intel_sub_group_media_block_read_ui2(coord, 16, 2, src);
+  dst[yid * get_global_size(0) + id] = tmp; } __kernel void 
+compiler_subgroup_media_block_read_ui4(image2d_t src, global uint4 
+*dst) {
+  int id = get_global_id(0);
+  int yid = get_global_id(1);
+  int2 coord = 
+(int2)(get_sub_group_size()*get_sub_group_id()*sizeof(uint) + 
+sizeof(uint) * get_group_id(0) * get_local_size(0),yid*4);
+  uint4 tmp = intel_sub_group_media_block_read_ui4(coord, 16, 4, src);
+  dst[yid * get_global_size(0) + id] = tmp; } __kernel void 
+compiler_subgroup_media_block_read_ui8(image2d_t src, global uint8 
+*dst) {
+  int id = get_global_id(0);
+  int yid = get_global_id(1);
+  int2 coord = 
+(int2)(get_sub_group_size()*get_sub_group_id()*sizeof(uint) + 
+sizeof(uint) * get_group_id(0) * get_local_size(0),yid*8);
+  uint8 tmp = intel_sub_group_media_block_read_ui8(coord, 16, 8, src);
+  dst[yid * get_global_size(0) + id] = tmp; } __kernel void 
+compiler_subgroup_media_block_read_us1(image2d_t src, global ushort 
+*dst) {
+  int id = get_global_id(0);
+  int yid = get_global_id(1);
+  int2 coord = 
+(int2)(get_sub_group_size()*get_sub_group_id()*sizeof(ushort) + 
+sizeof(ushort) * get_group_id(0) * get_local_size(0),yid);
+  ushort tmp = intel_sub_group_media_block_read_us(coord, 16, 1, src);
+  dst[yid * get_global_size(0) + id] = tmp; } __kernel void 
+compiler_subgroup_media_block_read_us2(image2d_t src, global ushort2 
+*dst) {
+  int id = get_global_id(0);
+  int yid = get_global_id(1);
+  int2 coord = 
+(int2)(get_sub_group_size()*get_sub_group_id()*sizeof(ushort) + 
+sizeof(ushort) * get_group_id(0) * get_local_size(0),yid*2);
+  ushort2 tmp = intel_sub_group_media_block_read_us2(coord, 16, 2, 
+src);
+  dst[yid * get_global_size(0) + id] = tmp; } __kernel void 
+compiler_subgroup_media_block_read_us4(image2d_t src, global ushort4 
+*dst) {
+  int id = get_global_id(0);
+  int yid = get_global_id(1);
+  int2 coord = 
+(int2)(get_sub_group_size()*get_sub_group_id()*sizeof(ushort) + 
+sizeof(ushort) * get_group_id(0) * get_local_size(0),yid*4);
+  ushort4 tmp = intel_sub_group_media_block_read_us4(coord, 16, 4, 
+src);
+  dst[yid * get_global_size(0) + id] = tmp; } __kernel void 
+compiler_subgroup_media_block_read_us8(image2d_t src, global ushort8 
+*dst) {
+  int id = get_global_id(0);
+  int yid = get_global_id(1);
+  int2 coord = 
+(int2)(get_sub_group_size()*get_sub_group_id()*sizeof(ushort) + 
+sizeof(ushort) * get_group_id(0) * get_local_size(0),yid*8);
+  ushort8 tmp = intel_sub_group_media_block_read_us8(coord, 16, 8, 
+src);
+  dst[yid * get_global_size(0) + id] = tmp; } __kernel void  
+__attribute__((intel_reqd_sub_group_size(8)))
+compiler_subgroup_media_block_read_us16(image2d_t src, global ushort16 
+*dst) {
+  int id = get_global_id(0);
+  int yid = get_global_id(1);
+  int2 coord = 
+(int2)(get_sub_group_size()*get_sub_group_id()*sizeof(ushort) + 
+sizeof(ushort) * get_group_id(0) * get_local_size(0),yid*16);
+  ushort16 tmp = intel_sub_group_media_block_read_us16(coord, 8, 16, 
+src);
+  dst[yid * get_global_size(0) + id] = tmp; } __kernel void 
+compiler_subgroup_media_block_read_uc1(image2d_t src, global uchar 
+*dst) {
+  int id = get_global_id(0);
+  int yid = get_global_id(1);
+  int2 coord = 
+(int2)(get_sub_group_size()*get_sub_group_id()*sizeof(char) + 
+sizeof(char) * get_group_id(0) * get_local_size(0),yid);
+  uchar tmp = intel_sub_group_media_block_read_uc(coord, 16, 1, src);
+  dst[yid * get_global_size(0) + id] = tmp; } __kernel void 
+compiler_subgroup_media_block_read_uc2(image2d_t src, global uchar2 
+*dst) {
+  int id = get_global_id(0);
+  int yid = get_global_id(1);
+  int2 coord = 
+(int2)(get_sub_group_size()*get_sub_group_id()*sizeof(char) + 
+sizeof(char) * get_group_id(0) * get_local_size(0),yid*2);
+  uchar2 tmp = intel_sub_group_media_block_read_uc2(coord, 16, 2, src);
+  dst[yid * get_global_size(0) + id] = tmp; } __kernel void 
+compiler_subgroup_media_block_read_uc4(image2d_t src, global uchar4 
+*dst) {
+  int id = get_global_id(0);
+  int yid = get_global_id(1);
+  int2 coord = 
+(int2)(get_sub_group_size()*get_sub_group_id()*sizeof(char) + 
+sizeof(char) * get_group_id(0) * get_local_size(0),yid*4);
+  uchar4 tmp = intel_sub_group_media_block_read_uc4(coord, 16, 4, src);
+  dst[yid * get_global_size(0) + id] = tmp; } __kernel void 
+compiler_subgroup_media_block_read_uc8(image2d_t src, global uchar8 
+*dst) {
+  int id = get_global_id(0);
+  int yid = get_global_id(1);
+  int2 coord = 
+(int2)(get_sub_group_size()*get_sub_group_id()*sizeof(char) + 
+sizeof(char) * get_group_id(0) * get_local_size(0),yid*8);
+  uchar8 tmp = intel_sub_group_media_block_read_uc8(coord, 16, 8, src);
+  dst[yid * get_global_size(0) + id] = tmp; } __kernel void 
+compiler_subgroup_media_block_read_uc16(image2d_t src, global uchar16 
+*dst) {
+  int id = get_global_id(0);
+  int yid = get_global_id(1);
+  int2 coord = 
+(int2)(get_sub_group_size()*get_sub_group_id()*sizeof(char) + 
+sizeof(char) * get_group_id(0) * get_local_size(0),yid*16);
+  uchar16 tmp = intel_sub_group_media_block_read_uc16(coord, 16, 16, 
+src);
+  dst[yid * get_global_size(0) + id] = tmp; } #endif
diff --git a/src/cl_extensions.h b/src/cl_extensions.h index 55747a7..af0855e 100644
--- a/src/cl_extensions.h
+++ b/src/cl_extensions.h
@@ -30,6 +30,7 @@
   DECL_EXT(intel_motion_estimation) \
   DECL_EXT(intel_subgroups) \
   DECL_EXT(intel_subgroups_short) \
+  DECL_EXT(intel_media_block_io) \
   DECL_EXT(intel_planar_yuv)
 
 #define DECL_GL_EXTENSIONS \
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index f7a9046..736b21c 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -178,6 +178,7 @@ set (utests_sources
   compiler_subgroup_buffer_block_write.cpp
   compiler_subgroup_image_block_read.cpp
   compiler_subgroup_image_block_write.cpp
+  compiler_subgroup_media_block_read.cpp
   compiler_async_stride_copy.cpp
   compiler_insn_selection_min.cpp
   compiler_insn_selection_max.cpp
diff --git a/utests/compiler_subgroup_media_block_read.cpp b/utests/compiler_subgroup_media_block_read.cpp
new file mode 100644
index 0000000..41289a2
--- /dev/null
+++ b/utests/compiler_subgroup_media_block_read.cpp
@@ -0,0 +1,212 @@
+#include <cstdint>
+#include <cstring>
+#include <iostream>
+#include "utest_helper.hpp"
+
+using namespace std;
+
+/* set to 1 for debug, output of input-expected data */
+#define DEBUG_STDOUT    0
+
+/* NDRANGE */
+#define WG_GLOBAL_SIZE_X  64
+#define WG_GLOBAL_SIZE_Y  1
+#define WG_LOCAL_SIZE_X   64
+#define WG_LOCAL_SIZE_Y   1
+/*
+ * Generic compute-expected function for meida block read  */ 
+template<class T> static void compute_expected_media(T* input,
+                             T* expected,
+                             size_t VEC_SIZE) {
+    for(uint32_t w = 0; w < WG_GLOBAL_SIZE_X; ++w) {
+        for (uint32_t h = 0; h < VEC_SIZE * WG_GLOBAL_SIZE_Y; ++h) {
+          expected[h + w * VEC_SIZE * WG_GLOBAL_SIZE_Y] = w;
+    }
+  }
+}
+
+/*
+ * Generic input-expected generate function for media block read  */ 
+template<class T> static void generate_data_media(T* &input,
+                          T* &expected,
+                          size_t VEC_SIZE) {
+  /* allocate input and expected arrays */
+  int* input_ui = new int[WG_GLOBAL_SIZE_X * WG_GLOBAL_SIZE_Y * 
+VEC_SIZE];
+  input = (T*)input_ui;
+  expected = new T[WG_GLOBAL_SIZE_X * WG_GLOBAL_SIZE_Y * VEC_SIZE];
+
+  /* seed for random inputs */
+  srand (time(NULL));
+
+#if DEBUG_STDOUT
+    cout << endl << "IN: " << endl;
+#endif
+  uint32_t rpitch = sizeof(uint32_t) * WG_GLOBAL_SIZE_X / sizeof(T);
+  /* generate inputs and expected values */
+  for (uint32_t h = 0; h < VEC_SIZE * WG_GLOBAL_SIZE_Y; ++h) {
+    for(uint32_t w = 0; w < WG_GLOBAL_SIZE_X; ++w)
+    {
+      /* initially 0, augment after */
+      input[w + h * rpitch] = w;
+
+#if DEBUG_STDOUT
+      /* output generated input */
+      cout << setw(4) << (uint32_t)input[w + h * rpitch] << ", " ;
+      if((w+ 1) % 8 == 0)
+            cout << endl;
+#endif
+    }
+  }
+  /* expected values */
+  compute_expected_media(input, expected, VEC_SIZE);
+
+#if DEBUG_STDOUT
+  /* output expected input */
+  cout << endl << "EXP: " << endl;
+  for(uint32_t gid = 0; gid < WG_GLOBAL_SIZE_X; gid++)
+  {
+    cout << "(";
+    for(uint32_t vsz = 0; vsz < VEC_SIZE * WG_GLOBAL_SIZE_Y; vsz++)
+      cout << setw(4) << (uint32_t)expected[gid* VEC_SIZE * WG_GLOBAL_SIZE_Y + vsz] << ", " ;
+    cout << ")";
+    if((gid + 1) % 8 == 0)
+        cout << endl;
+    cout << endl;
+  }
+#endif
+}
+
+/*
+ * Generic subgroup utest function for media block read  */ 
+template<class T> static void subgroup_generic_media(T* input,
+                             T* expected,
+                             size_t VEC_SIZE,
+                             const char* kernel_name) {
+  OCL_CALL(cl_kernel_init, "compiler_subgroup_image_block_read.cl", kernel_name,
+           SOURCE, "-DMEDIA_BLOCK_IO");
+
+  cl_image_format format;
+  cl_image_desc desc;
+
+  memset(&desc, 0x0, sizeof(cl_image_desc));  memset(&format, 0x0, 
+ sizeof(cl_image_format));
+
+  /* get simd size */
+  globals[0] = WG_GLOBAL_SIZE_X;
+  globals[1] = WG_GLOBAL_SIZE_Y;
+  locals[0] = WG_LOCAL_SIZE_X;
+  locals[1] = WG_LOCAL_SIZE_Y;
+  size_t SIMD_SIZE = 0;
+  
+ OCL_CALL(utestclGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_MAX_S
+ UB_GROUP_SIZE_FOR_NDRANGE_KHR,sizeof(size_t)*1,locals,sizeof(size_t),&
+ SIMD_SIZE,NULL);
+
+  if(SIMD_SIZE != 16) {
+    printf("skip! this case could only run under simd16 mode!\n");
+    return;
+  }
+
+  size_t buf_sz = VEC_SIZE * WG_GLOBAL_SIZE_X * WG_GLOBAL_SIZE_Y;
+  /* input and expected data */
+  generate_data_media(input, expected, VEC_SIZE);
+
+  /* prepare input for datatype */
+  format.image_channel_order = CL_R;
+  format.image_channel_data_type = CL_UNSIGNED_INT32;  desc.image_type 
+ = CL_MEM_OBJECT_IMAGE2D;  desc.image_width = WG_GLOBAL_SIZE_X;  
+ desc.image_height = VEC_SIZE * WG_GLOBAL_SIZE_Y;  desc.image_row_pitch 
+ = WG_GLOBAL_SIZE_X * sizeof(uint32_t);
+
+  OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, &format, &desc, 
+ input);  OCL_CREATE_BUFFER(buf[1], 0, buf_sz * sizeof(T), NULL);
+
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);  OCL_SET_ARG(1, 
+ sizeof(cl_mem), &buf[1]);
+
+  /* run the kernel on GPU */
+  OCL_NDRANGE(2);
+
+  /* check if mismatch */
+  OCL_MAP_BUFFER(1);
+  uint32_t mismatches = 0;
+
+  for (uint32_t i = 0; i < buf_sz; i++)
+    if(((T *)buf_data[1])[i] != *(expected + i))
+    {
+      /* found mismatch, increment */
+      mismatches++;
+
+#if DEBUG_STDOUT
+      /* output mismatch */
+      cout << "Err at " << i << ", " <<
+        (uint32_t)((T *)buf_data[1])[i] << " != " << 
+(uint32_t)*(expected + i) << endl; #endif
+    }
+
+#if DEBUG_STDOUT
+  /* output mismatch count */
+  cout << "mismatches " << mismatches << endl; #endif
+
+  OCL_UNMAP_BUFFER(1);
+
+  OCL_ASSERT(mismatches == 0);
+  free(input);
+  free(expected);
+}
+
+#define compiler_subgroup_media_block_read_ui(name, kernel, vec_size) \ 
+void name(void)  \ { \
+  if(!cl_check_media_block_io()) \
+    return; \
+  cl_uint *input = NULL; \
+  cl_uint *expected = NULL; \
+  subgroup_generic_media(input, expected, vec_size, #kernel); \ }  \ 
+MAKE_UTEST_FROM_FUNCTION(name); 
+compiler_subgroup_media_block_read_ui(compiler_subgroup_media_block_rea
+d_ui1, compiler_subgroup_media_block_read_ui1, 1) 
+compiler_subgroup_media_block_read_ui(compiler_subgroup_media_block_rea
+d_ui2, compiler_subgroup_media_block_read_ui2, 2) 
+compiler_subgroup_media_block_read_ui(compiler_subgroup_media_block_rea
+d_ui4, compiler_subgroup_media_block_read_ui4, 4) 
+compiler_subgroup_media_block_read_ui(compiler_subgroup_media_block_rea
+d_ui8, compiler_subgroup_media_block_read_ui8, 8)
+
+#define compiler_subgroup_media_block_read_us(name, kernel, vec_size) \ 
+void name(void)  \ { \
+  if(!cl_check_media_block_io()) \
+    return; \
+  cl_ushort *input = NULL; \
+  cl_ushort *expected = NULL; \
+  subgroup_generic_media(input, expected, vec_size, #kernel); \ }  \ 
+MAKE_UTEST_FROM_FUNCTION(name); 
+compiler_subgroup_media_block_read_us(compiler_subgroup_media_block_rea
+d_us1, compiler_subgroup_media_block_read_us1, 1) 
+compiler_subgroup_media_block_read_us(compiler_subgroup_media_block_rea
+d_us2, compiler_subgroup_media_block_read_us2, 2) 
+compiler_subgroup_media_block_read_us(compiler_subgroup_media_block_read_us4, compiler_subgroup_media_block_read_us4, 4) compiler_subgroup_media_block_read_us(compiler_subgroup_media_block_read_us8, compiler_subgroup_media_block_read_us8, 8) //the us16 case could only run under SIMD8 mode.
+//compiler_subgroup_media_block_read_us(compiler_subgroup_media_block_r
+ead_us16, compiler_subgroup_media_block_read_us16, 16)
+
+#define compiler_subgroup_media_block_read_uc(name, kernel, vec_size) \ 
+void name(void)  \ { \
+  if(!cl_check_media_block_io()) \
+    return; \
+  cl_uchar *input = NULL; \
+  cl_uchar *expected = NULL; \
+  subgroup_generic_media(input, expected, vec_size, #kernel); \ }  \ 
+MAKE_UTEST_FROM_FUNCTION(name); 
+compiler_subgroup_media_block_read_uc(compiler_subgroup_media_block_rea
+d_uc1, compiler_subgroup_media_block_read_uc1, 1) 
+compiler_subgroup_media_block_read_uc(compiler_subgroup_media_block_rea
+d_uc2, compiler_subgroup_media_block_read_uc2, 2) 
+compiler_subgroup_media_block_read_uc(compiler_subgroup_media_block_rea
+d_uc4, compiler_subgroup_media_block_read_uc4, 4) 
+compiler_subgroup_media_block_read_uc(compiler_subgroup_media_block_rea
+d_uc8, compiler_subgroup_media_block_read_uc8, 8) 
+compiler_subgroup_media_block_read_uc(compiler_subgroup_media_block_rea
+d_uc16, compiler_subgroup_media_block_read_uc16, 16)
diff --git a/utests/utest_helper.cpp b/utests/utest_helper.cpp index 7052a14..f4487c1 100644
--- a/utests/utest_helper.cpp
+++ b/utests/utest_helper.cpp
@@ -939,6 +939,26 @@ int cl_check_subgroups_short(void)
   return 1;
 }
 
+int cl_check_media_block_io(void)
+{
+  if (!cl_check_subgroups())
+    return 0;
+  std::string extStr;
+  size_t param_value_size;
+  OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, 0, 0, 
+&param_value_size);
+  std::vector<char> param_value(param_value_size);
+  OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, param_value_size,
+           param_value.empty() ? NULL : &param_value.front(), 
+&param_value_size);
+  if (!param_value.empty())
+    extStr = std::string(&param_value.front(), param_value_size-1);
+
+  if (std::strstr(extStr.c_str(), "cl_intel_media_block_io") == NULL) {
+    printf("No cl_intel_media_block_io, Skip!");
+    return 0;
+  }
+  return 1;
+}
+
 int cl_check_ocl20(bool or_beignet)
 {
   size_t param_value_size;
diff --git a/utests/utest_helper.hpp b/utests/utest_helper.hpp index e2a6a88..5dc381e 100644
--- a/utests/utest_helper.hpp
+++ b/utests/utest_helper.hpp
@@ -303,6 +303,9 @@ extern int cl_check_beignet(void);
 /* Check is intel subgroups enabled. */  extern int cl_check_subgroups(void);
 
+/* Check is intel_media_block_io enabled. */ extern int 
+cl_check_media_block_io(void);
+
 typedef cl_int(clGetKernelSubGroupInfoKHR_cb)(cl_kernel, cl_device_id,
                                               cl_kernel_sub_group_info, size_t,
                                               const void *, size_t, void *,
--
2.5.0

_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list