[Beignet] [PATCH V2 1/2] add utest to demo how to run CM kernerl via OpenCL APIs
Guo Yejun
yejun.guo at intel.com
Mon Nov 16 17:30:02 PST 2015
In this test case, the CM kernel is in VISA binary format, not in
GenX Binary format, it means that the CM jitter is needed to compile
the CM kernel from VISA format to GenX format, please refer to
cmrt_package_path/jitter/readme.txt to prepare the jitter.
v2: add comments about the CM jitter
Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
kernels/cmrt_utest_genx.isa | Bin 0 -> 847 bytes
utests/CMakeLists.txt | 6 +
utests/runtime_cmrt.cpp | 274 ++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 280 insertions(+)
create mode 100644 kernels/cmrt_utest_genx.isa
create mode 100644 utests/runtime_cmrt.cpp
diff --git a/kernels/cmrt_utest_genx.isa b/kernels/cmrt_utest_genx.isa
new file mode 100644
index 0000000000000000000000000000000000000000..ab0781e83970c0f9867191dad43900c7c9ffc27d
GIT binary patch
literal 847
zcmZ8fZBNrs6n=WUfkWOz at r^G`5r!tDz3UwKNnjz8h{jIGmnAo27Sd$xnyoX$U+zbr
zbD?XrH|M3#dCs}#oc8p*d+a(6YSTE$Mo}{UwFjW#0)HGj#j^#>2{@i6Uj|8p?&phc
z{F`<*xgHFoAJf52gs-zao8>q;F`g(Nryu&~X&#OG7o9T`7>~#I+S+^?=V$S))<&X}
zbTBg7^8<5v+0^~vILYEsH0fW(X at 8hZax=@LX>M+!^!8wwWiaz at j9g+g;;Y%U(XF7}
zGCi$Z82|JB9=)bEKTASenrjjj2`DX!u-cSE6Q?vs=mj3dH)BrJ3pAxEc}I3K4-S at j
zwu?orVxw@^N_QRg0)13+H^A)IcD;}By>h?jC-_*no2C1-Bs?pNJ(mh^iw}ea9$=AD
zBfdiWg7T6AwM4|JP(3UhxM~60s6Z`ZhlYNGW{aJ!C}!|%{<nAI9So+4%L+c=JNWhh
zVU^WA1>Z$jqq{G+)kP`N=sMvNjfG#`!7MKDGR?Xmb_L2N)t#!6=`DRnf(nvVciNn-
z;^2*aVZVayQirQl`%+SVTG9pVC|8ii9U_5_;&_r{skXeb at s0}D7_f-lq_X{xNYvgl
ZZ{d)uWee|Yo%<Gq<yR2OAc6Q*`~^X3aQOfL
literal 0
HcmV?d00001
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index b3a051d..fda4927 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -237,6 +237,12 @@ else()
SET(UTESTS_REQUIRED_X11_LIB "")
endif (X11_FOUND)
+if (CMRT_FOUND)
+ SET(utests_sources
+ ${utests_sources}
+ runtime_cmrt.cpp)
+endif (CMRT_FOUND)
+
SET (kernel_bin ${CMAKE_CURRENT_SOURCE_DIR}/../kernels/compiler_ceil)
list (GET GBE_BIN_GENERATER -1 GBE_BIN_FILE)
diff --git a/utests/runtime_cmrt.cpp b/utests/runtime_cmrt.cpp
new file mode 100644
index 0000000..837f09a
--- /dev/null
+++ b/utests/runtime_cmrt.cpp
@@ -0,0 +1,274 @@
+/*
+this test case shows how to execute CM kernel via OpenCL APIs.
+the CM kernel source code is already compiled into file "cmrt_utest_genx.isa" with offline compiler.
+
+I also copied the CM kernel source code and CM host source code here for your reference.
+
+CM kernel source code:
+#include <cm/cm.h>
+extern "C" _GENX_MAIN_ void
+simplemov(SurfaceIndex ibuf, SurfaceIndex obuf, uint d)
+{
+ matrix<uchar, 1, 4> in;
+ matrix<uchar, 1, 4> out;
+
+ uint h_pos = get_thread_origin_x();
+ uint v_pos = get_thread_origin_y();
+
+ read(ibuf, h_pos*4, v_pos, in);
+
+ out = in / d;
+ write(obuf, h_pos*4, v_pos, out);
+}
+
+CM host source code:
+#include "cm_rt.h"
+
+int main()
+{
+ FILE* pISA = fopen("cmrt_utest_genx.isa", "rb");
+ if (pISA == NULL) {
+ perror("cmrt_utest_genx.isa");
+ return -1;
+ }
+
+ fseek (pISA, 0, SEEK_END);
+ int codeSize = ftell (pISA);
+ rewind(pISA);
+
+ if(codeSize == 0)
+ {
+ perror("cmrt_utest_genx.isa");
+ return -1;
+ }
+
+ void *pCommonISACode = (BYTE*) malloc(codeSize);
+ if( !pCommonISACode )
+ {
+ return -1;
+ }
+
+ if (fread(pCommonISACode, 1, codeSize, pISA) != codeSize) {
+ perror("cmrt_utest_genx.isa");
+ return -1;
+ }
+ fclose(pISA);
+
+ unsigned int width = 256;
+ unsigned int height = 128;
+
+ unsigned char *src;
+ unsigned char *dst;
+ src = (unsigned char*) malloc(width*height*4);
+ dst = (unsigned char*) malloc(width*height*4);
+
+ for (unsigned int i = 0; i < width*height*4; i++) {
+ src[i] = i % 256;
+ dst[i] = 0;
+ }
+
+ CmDevice* pCmDev = NULL;;
+ UINT version = 0;
+
+ int result = CreateCmDevice( pCmDev, version );
+ if (result != CM_SUCCESS ) {
+ printf("CmDevice creation error");
+ return -1;
+ }
+ if( version < CM_1_0 ){
+ printf(" The runtime API version is later than runtime DLL version");
+ return -1;
+ }
+
+ CmProgram* program = NULL;
+ result = pCmDev->LoadProgram(pCommonISACode, codeSize, program);
+ if (result != CM_SUCCESS ) {
+ perror("CM LoadProgram error");
+ return -1;
+ }
+
+ CmKernel* kernel = NULL;
+ result = pCmDev->CreateKernel(program, CM_KERNEL_FUNCTION(simplemov) , kernel);
+ if (result != CM_SUCCESS ) {
+ perror("CM CreateKernel error");
+ return -1;
+ }
+
+ CmSurface2D* pInputSurf = NULL;
+ result = pCmDev->CreateSurface2D( width, height, CM_SURFACE_FORMAT_A8R8G8B8, pInputSurf );
+ if (result != CM_SUCCESS ) {
+ printf("CM CreateSurface2D error");
+ return -1;
+ }
+
+ CmSurface2D* pOutputSurf = NULL;
+ result = pCmDev->CreateSurface2D( width, height, CM_SURFACE_FORMAT_A8R8G8B8, pOutputSurf );
+ if (result != CM_SUCCESS ) {
+ printf("CM CreateSurface2D error");
+ return -1;
+ }
+
+ result = pInputSurf->WriteSurface( src, NULL );
+ if (result != CM_SUCCESS ) {
+ printf("CM WriteSurface error");
+ return -1;
+ }
+
+ kernel->SetThreadCount( width * height );
+
+ CmThreadSpace* pTS = NULL;
+ result = pCmDev->CreateThreadSpace(width, height, pTS);
+ if (result != CM_SUCCESS ) {
+ printf("CM WriteSurface error");
+ return -1;
+ }
+
+ SurfaceIndex * index0= NULL;
+ pInputSurf->GetIndex(index0);
+ kernel->SetKernelArg(0,sizeof(SurfaceIndex),index0);
+
+ SurfaceIndex * index1= NULL;
+ pOutputSurf->GetIndex(index1);
+ kernel->SetKernelArg(1,sizeof(SurfaceIndex),index1);
+
+ unsigned int d = 3;
+ kernel->SetKernelArg(2, sizeof(unsigned int), &d);
+
+ CmQueue* pCmQueue = NULL;
+ result = pCmDev->CreateQueue( pCmQueue );
+ if (result != CM_SUCCESS ) {
+ perror("CM CreateQueue error");
+ return -1;
+ }
+
+ CmTask *pKernelArray = NULL;
+
+ result = pCmDev->CreateTask(pKernelArray);
+ if (result != CM_SUCCESS ) {
+ printf("CmDevice CreateTask error");
+ return -1;
+ }
+
+ result = pKernelArray-> AddKernel (kernel);
+ if (result != CM_SUCCESS ) {
+ printf("CmDevice AddKernel error");
+ return -1;
+ }
+
+ CmEvent* e = NULL;
+ result = pCmQueue->Enqueue(pKernelArray, e, pTS);
+ if (result != CM_SUCCESS ) {
+ printf("CmDevice enqueue error");
+ return -1;
+ }
+
+ pCmDev->DestroyTask(pKernelArray);
+ result = pOutputSurf->ReadSurface( dst, e );
+ if (result != CM_SUCCESS ) {
+ printf("CM ReadSurface error");
+ return -1;
+ }
+
+ for (unsigned int i = 0; i < width*height*4; i++) {
+ if (src[i] / d != dst[i]) {
+ printf("test failed at %d, expected %d, got %d\n", i, src[i]/d, dst[i]);
+ return -1;
+ }
+ }
+
+ printf("test passed!\n");
+
+ result = DestroyCmDevice( pCmDev );
+
+ free(pCommonISACode);
+ free(src);
+ free(dst);
+
+ return 0;
+}
+
+*/
+
+#include "utest_helper.hpp"
+#include "utest_file_map.hpp"
+#include <string.h>
+
+void runtime_cmrt(void)
+{
+ uint32_t w = 256;
+ uint32_t h = 128;
+ cl_int status;
+ cl_int binary_status;
+ char *ker_path = NULL;
+
+ cl_file_map_t *fm = cl_file_map_new();
+ ker_path = cl_do_kiss_path("cmrt_utest_genx.isa", NULL);
+ OCL_ASSERT (cl_file_map_open(fm, ker_path) == CL_FILE_MAP_SUCCESS);
+
+ const unsigned char *kbin = (const unsigned char *)cl_file_map_begin(fm);
+ const size_t sz = cl_file_map_size(fm);
+
+ program = clCreateProgramWithBinary(ctx, 1,
+ &device, &sz, &kbin, &binary_status, &status);
+
+ OCL_ASSERT(program && status == CL_SUCCESS);
+
+ /* OCL requires to build the program even if it is created from a binary */
+ OCL_ASSERT(clBuildProgram(program, 1, &device, NULL, NULL, NULL) == CL_SUCCESS);
+
+ kernel = clCreateKernel(program, "simplemov", &status);
+ OCL_ASSERT(status == CL_SUCCESS);
+
+
+ cl_image_format format;
+ cl_image_desc desc;
+
+ memset(&desc, 0x0, sizeof(cl_image_desc));
+ memset(&format, 0x0, sizeof(cl_image_format));
+
+ format.image_channel_order = CL_BGRA;
+ format.image_channel_data_type = CL_UNORM_INT8;
+ desc.image_type = CL_MEM_OBJECT_IMAGE2D;
+ desc.image_width = w;
+ desc.image_height = h;
+ desc.image_row_pitch = 0;
+
+ OCL_CREATE_IMAGE(buf[0], 0, &format, &desc, NULL);
+ OCL_CREATE_IMAGE(buf[1], 0, &format, &desc, NULL);
+
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ uint8_t* src = (uint8_t*)buf_data[0];
+ uint8_t* dst = (uint8_t*)buf_data[1];
+ for (uint32_t j = 0; j < h; ++j)
+ for (uint32_t i = 0; i < w*4; i++) {
+ src[j * w * 4 + i] = i;
+ dst[j * w * 4 + i] = 0;
+ }
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+
+ unsigned int d = 3;
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(2, sizeof(unsigned int), &d);
+ globals[0] = w;
+ globals[1] = h;
+
+ //if kernel uses get_origin_thread_x/y, locals must be NULL to invoke pCmQueue->Enqueue
+ //if kernel uses cm_linear_global_id, locals must be not NULL to invoke pCmQueue->EnqueueWithGroup
+ OCL_CALL (clEnqueueNDRangeKernel, queue, kernel, 2, NULL, globals, NULL, 0, NULL, NULL);
+
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ src = (uint8_t*)buf_data[0];
+ dst = (uint8_t*)buf_data[1];
+ for (uint32_t j = 0; j < h; ++j)
+ for (uint32_t i = 0; i < w*4; i++) {
+ OCL_ASSERT(src[j * w * 4 + i] / d == dst[j * w * 4 + i]);
+ }
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+}
+
+MAKE_UTEST_FROM_FUNCTION(runtime_cmrt);
--
1.9.1
More information about the Beignet
mailing list