[Beignet] [PATCH V2 1/2] make Beignet as intermedia layer of CMRT
Yang, Rong R
rong.r.yang at intel.com
Wed Dec 9 00:38:26 PST 2015
LGTM, pushed, thanks.
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Guo, Yejun
> Sent: Thursday, December 3, 2015 16:53
> To: beignet at lists.freedesktop.org
> Subject: Re: [Beignet] [PATCH V2 1/2] make Beignet as intermedia layer of
> CMRT
>
> Ping for review, thanks.
>
> -----Original Message-----
> From: Guo, Yejun
> Sent: Thursday, November 19, 2015 2:03 AM
> To: beignet at lists.freedesktop.org
> Cc: Guo, Yejun
> Subject: [PATCH V2 1/2] make Beignet as intermedia layer of CMRT
>
> CMRT is C for Media Runtime on Intel GPU, see
> https://github.com/01org/cmrt.
> There is a request to make Beignet as intermedia layer of CMRT, in other
> words, application programer write OpenCL APIs to execute the CM kernel
> on GPU, the following shows the key code, and please refer to the next
> patch of unit test for detail.
>
> prog = clCreateProgramWithBinary("cm kernel"); clBuildProgram(prog);
> kernel = clCreateKernel(prog, "kernel name"); image = clCreateImage();
> clSetKernelArg(kernel, image); clEnqueueNDRangeKernel(kernel);
>
> Inside Beignet, once cm kernel is invoked, the following relative APIs will be
> directly passed to CMRT library (libcmrt.so) which is loaded via dlopen only
> when necessary. Since we use this simple method to keep the code clean,
> OpenCL spec is not strictly followed, and cl_event is not supported for this
> case.
>
> v2: add comments about the cm queue in fuction cmrt_enqueue
> Signed-off-by: Guo Yejun <yejun.guo at intel.com>
> ---
> CMakeLists.txt | 6 +
> src/CMakeLists.txt | 7 ++
> src/cl_api.c | 39 ++++++-
> src/cl_cmrt.cpp | 311
> +++++++++++++++++++++++++++++++++++++++++++++++++
> src/cl_cmrt.h | 45 +++++++
> src/cl_command_queue.c | 7 ++
> src/cl_command_queue.h | 2 +
> src/cl_device_id.h | 3 +
> src/cl_gt_device.h | 1 +
> src/cl_kernel.c | 11 ++
> src/cl_kernel.h | 2 +
> src/cl_mem.c | 7 ++
> src/cl_mem.h | 3 +
> src/cl_program.c | 57 +++++++--
> src/cl_program.h | 10 +-
> 15 files changed, 497 insertions(+), 14 deletions(-) create mode 100644
> src/cl_cmrt.cpp create mode 100644 src/cl_cmrt.h
>
> diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c6c373..3411b6a 100644
> --- a/CMakeLists.txt
> +++ b/CMakeLists.txt
> @@ -150,6 +150,12 @@ ELSE(DRM_INTEL_FOUND)
> MESSAGE(FATAL_ERROR "Looking for DRM Intel (>= 2.4.52) - not found")
> ENDIF(DRM_INTEL_FOUND)
>
> +# CMRT
> +pkg_check_modules(CMRT libcmrt)
> +IF(CMRT_FOUND)
> +INCLUDE_DIRECTORIES(${CMRT_INCLUDE_DIRS})
> +ENDIF(CMRT_FOUND)
> +
> # Threads
> Find_Package(Threads)
>
> diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c917e76..5f1872d
> 100644
> --- a/src/CMakeLists.txt
> +++ b/src/CMakeLists.txt
> @@ -101,6 +101,13 @@ if (X11_FOUND)
> x11/va_dri2.c)
> endif (X11_FOUND)
>
> +if (CMRT_FOUND)
> + set(CMAKE_CXX_FLAGS "-DHAS_CMRT ${CMAKE_CXX_FLAGS}")
> + set(CMAKE_CXX_FLAGS "-
> DCMRT_PATH=${CMRT_LIBRARY_DIRS}/libcmrt.so
> +${CMAKE_CXX_FLAGS}")
> + set(CMAKE_C_FLAGS "-DHAS_CMRT ${CMAKE_C_FLAGS}")
> + set(OPENCL_SRC ${OPENCL_SRC} cl_cmrt.cpp) endif (CMRT_FOUND)
> +
> if (EGL_FOUND AND MESA_SOURCE_FOUND)
> set (OPENCL_SRC ${OPENCL_SRC} cl_mem_gl.c cl_gl_api.c
> x11/mesa_egl_extension.c x11/mesa_egl_res_share.c
> intel/intel_dri_resource_sharing.c)
> SET(CMAKE_CXX_FLAGS "-DHAS_EGL ${CMAKE_CXX_FLAGS}") diff --git
> a/src/cl_api.c b/src/cl_api.c index ddd39cf..7cedf4b 100644
> --- a/src/cl_api.c
> +++ b/src/cl_api.c
> @@ -31,6 +31,7 @@
> #include "cl_accelerator_intel.h"
> #include "cl_alloc.h"
> #include "cl_utils.h"
> +#include "cl_cmrt.h"
>
> #include "CL/cl.h"
> #include "CL/cl_ext.h"
> @@ -276,6 +277,10 @@ clRetainDevice(cl_device_id device) cl_int
> clReleaseDevice(cl_device_id device) {
> +#ifdef HAS_CMRT
> + cmrt_destroy_device(device);
> +#endif
> +
> // XXX stub for C++ Bindings
> return CL_SUCCESS;
> }
> @@ -941,11 +946,11 @@ clBuildProgram(cl_program program,
> INVALID_DEVICE_IF (device_list[0] != program->ctx->device);
> }
>
> - /* TODO support create program from binary */
> assert(program->source_type == FROM_LLVM ||
> program->source_type == FROM_SOURCE ||
> program->source_type == FROM_LLVM_SPIR ||
> - program->source_type == FROM_BINARY);
> + program->source_type == FROM_BINARY ||
> + program->source_type == FROM_CMRT);
> if((err = cl_program_build(program, options)) != CL_SUCCESS) {
> goto error;
> }
> @@ -1244,7 +1249,13 @@ clSetKernelArg(cl_kernel kernel,
> {
> cl_int err = CL_SUCCESS;
> CHECK_KERNEL(kernel);
> - err = cl_kernel_set_arg(kernel, arg_index, arg_size, arg_value);
> +
> +#ifdef HAS_CMRT
> + if (kernel->cmrt_kernel != NULL)
> + err = cmrt_set_kernel_arg(kernel, arg_index, arg_size, arg_value);
> + else
> +#endif
> + err = cl_kernel_set_arg(kernel, arg_index, arg_size, arg_value);
> error:
> return err;
> }
> @@ -1532,6 +1543,12 @@ clFinish(cl_command_queue command_queue)
> cl_int err = CL_SUCCESS;
>
> CHECK_QUEUE (command_queue);
> +
> +#ifdef HAS_CMRT
> + if (command_queue->cmrt_event != NULL)
> + return cmrt_wait_for_task_finished(command_queue);
> +#endif
> +
> err = cl_command_queue_finish(command_queue);
>
> error:
> @@ -2655,6 +2672,11 @@ clEnqueueMapBuffer(cl_command_queue
> command_queue,
> goto error;
> }
>
> +#ifdef HAS_CMRT
> + if (command_queue->cmrt_event != NULL)
> + cmrt_wait_for_task_finished(command_queue);
> +#endif
> +
> TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, buffer->ctx);
>
> data = &no_wait_data;
> @@ -2743,6 +2765,11 @@ clEnqueueMapImage(cl_command_queue
> command_queue,
> goto error;
> }
>
> +#ifdef HAS_CMRT
> + if (command_queue->cmrt_event != NULL)
> + cmrt_wait_for_task_finished(command_queue);
> +#endif
> +
> TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, mem->ctx);
>
> data = &no_wait_data;
> @@ -2948,6 +2975,12 @@ clEnqueueNDRangeKernel(cl_command_queue
> command_queue,
> goto error;
> }
>
> +#ifdef HAS_CMRT
> + if (kernel->cmrt_kernel != NULL) {
> + err = cmrt_enqueue(command_queue, kernel, global_work_size,
> local_work_size);
> + goto error;
> + }
> +#endif
>
> /* XXX No event right now */
> //FATAL_IF(num_events_in_wait_list > 0, "Events are not supported"); diff
> --git a/src/cl_cmrt.cpp b/src/cl_cmrt.cpp new file mode 100644 index
> 0000000..25e4d82
> --- /dev/null
> +++ b/src/cl_cmrt.cpp
> @@ -0,0 +1,311 @@
> +#include "cl_cmrt.h"
> +#include "cl_device_id.h"
> +#include "intel/intel_defines.h"
> +#include "cl_command_queue.h"
> +
> +#include "cm_rt.h" //header file of libcmrt.so
> +typedef INT (*CreateCmDeviceFunc)(CmDevice * &pDevice, UINT &
> version,
> + CmDriverContext * drivercontext, UINT
> DevCreateOption); typedef
> +INT (*DestroyCmDeviceFunc)(CmDevice * &pDevice);
> +
> +#include <dlfcn.h>
> +
> +static void* dlhCMRT = NULL;
> +static CreateCmDeviceFunc pfnCreateCmDevice = NULL; static
> +DestroyCmDeviceFunc pfnDestroyCmDevice = NULL;
> +
> +#define XSTR(x) #x
> +#define STR(x) XSTR(x)
> +
> +class CmrtCleanup
> +{
> +public:
> + CmrtCleanup(){}
> + ~CmrtCleanup()
> + {
> + if (dlhCMRT != NULL)
> + dlclose(dlhCMRT);
> + }
> +};
> +
> +enum CMRT_MEM_TYPE
> +{
> + CMRT_BUFFER,
> + CMRT_SURFACE2D,
> +};
> +
> +static CmrtCleanup cmrtCleanup;
> +
> +static bool LoadCmrtLibrary()
> +{
> + if (dlhCMRT == NULL) {
> + dlhCMRT = dlopen(STR(CMRT_PATH), RTLD_LAZY | RTLD_LOCAL);
> +
> + if (dlhCMRT == NULL)
> + return false;
> +
> + pfnCreateCmDevice = (CreateCmDeviceFunc)dlsym(dlhCMRT,
> "CreateCmDevice");
> + if (pfnCreateCmDevice == NULL)
> + return false;
> +
> + pfnDestroyCmDevice = (DestroyCmDeviceFunc)dlsym(dlhCMRT,
> "DestroyCmDevice");
> + if (pfnDestroyCmDevice == NULL)
> + return false;
> + }
> + return true;
> +}
> +
> +cl_int cmrt_build_program(cl_program p, const char *options) {
> + CmDevice*& cmrt_device = (CmDevice*&)(p->ctx->device->cmrt_device);
> + int result;
> + if (cmrt_device == NULL)
> + {
> + if (!LoadCmrtLibrary())
> + return CL_DEVICE_NOT_AVAILABLE; //yes, the error is not accurate,
> but i do not find a bettere one
> +
> + CmDriverContext ctx;
> + ctx.shared_bufmgr = 1;
> + ctx.bufmgr = (drm_intel_bufmgr*)cl_context_get_bufmgr(p->ctx);
> + ctx.userptr_enabled = 0;
> + ctx.deviceid = p->ctx->device->device_id;
> + ctx.device_rev = -1;
> + UINT version = 0;
> + result = (*pfnCreateCmDevice)(cmrt_device, version, &ctx,
> CM_DEVICE_CREATE_OPTION_DEFAULT);
> + if (result != CM_SUCCESS)
> + return CL_DEVICE_NOT_AVAILABLE;
> + }
> +
> + CmProgram* cmrt_program = NULL;
> + result = cmrt_device->LoadProgram(p->binary, p->binary_sz,
> + cmrt_program, options); if (result != CM_SUCCESS)
> + return CL_COMPILE_PROGRAM_FAILURE;
> +
> + p->cmrt_program = cmrt_program;
> + cmrt_program->GetKernelCount(p->ker_n);
> + return CL_SUCCESS;
> +}
> +
> +cl_int cmrt_destroy_program(cl_program p) {
> + CmDevice* cmrt_device = (CmDevice*)(p->ctx->device->cmrt_device);
> + CmProgram*& cmrt_program = (CmProgram*&)(p->cmrt_program);
> + if (cmrt_device->DestroyProgram(cmrt_program) != CM_SUCCESS)
> + return CL_INVALID_PROGRAM;
> + return CL_SUCCESS;
> +}
> +
> +cl_int cmrt_destroy_device(cl_device_id device) {
> + CmDevice*& cmrt_device = (CmDevice*&)(device->cmrt_device);
> + if ((*pfnDestroyCmDevice)(cmrt_device) != CM_SUCCESS)
> + return CL_INVALID_DEVICE;
> + return CL_SUCCESS;
> +}
> +
> +void* cmrt_create_kernel(cl_program p, const char *name) {
> + CmDevice* cmrt_device = (CmDevice*)(p->ctx->device->cmrt_device);
> + CmKernel* cmrt_kernel = NULL;
> + int result = cmrt_device->CreateKernel((CmProgram*)(p->cmrt_program),
> +name, cmrt_kernel);
> + if (result != CM_SUCCESS)
> + return NULL;
> +
> + return cmrt_kernel;
> +}
> +
> +cl_int cmrt_destroy_kernel(cl_kernel k) {
> + CmDevice* cmrt_device =
> +(CmDevice*)(k->program->ctx->device->cmrt_device);
> + CmKernel*& cmrt_kernel = (CmKernel*&)(k->cmrt_kernel);
> + if (cmrt_device->DestroyKernel(cmrt_kernel) != CM_SUCCESS)
> + return CL_INVALID_KERNEL;
> + return CL_SUCCESS;
> +}
> +
> +cl_int cmrt_enqueue(cl_command_queue cq, cl_kernel k, const size_t*
> +global_work_size, const size_t* local_work_size) {
> + CmDevice* cmrt_device =
> +(CmDevice*)(k->program->ctx->device->cmrt_device);
> + CmKernel* cmrt_kernel = (CmKernel*)(k->cmrt_kernel);
> +
> + int result = 0;
> +
> + cmrt_kernel->SetThreadCount(global_work_size[0]*global_work_size[1]);
> +
> + //no need to destory queue explicitly, //and there is only one queue
> + instance within each device, //CreateQueue always returns the same
> + instance
> + CmQueue* pCmQueue = NULL;
> + cmrt_device->CreateQueue(pCmQueue);
> +
> + CmTask *pKernelArray = NULL;
> + cmrt_device->CreateTask(pKernelArray);
> +
> + pKernelArray->AddKernel(cmrt_kernel);
> +
> + CmEvent* e = NULL;
> +
> + if (local_work_size == NULL) {
> + CmThreadSpace* pTS = NULL;
> + cmrt_device->CreateThreadSpace(global_work_size[0],
> global_work_size[1], pTS);
> + result = pCmQueue->Enqueue(pKernelArray, e, pTS);
> + } else {
> + CmThreadGroupSpace* pTGS = NULL;
> + cmrt_device->CreateThreadGroupSpace(global_work_size[0],
> global_work_size[1], local_work_size[0], local_work_size[1], pTGS);
> + result = pCmQueue->EnqueueWithGroup(pKernelArray, e, pTGS);
> + cmrt_device->DestroyThreadGroupSpace(pTGS);
> + }
> +
> + if (result != CM_SUCCESS)
> + return CL_INVALID_OPERATION;
> +
> + cmrt_device->DestroyTask(pKernelArray);
> +
> + CmEvent*& olde = (CmEvent*&)cq->cmrt_event; if (olde != NULL)
> + pCmQueue->DestroyEvent(e);
> +
> + cq->cmrt_event = e;
> +
> + return CL_SUCCESS;
> +}
> +
> +static VA_CM_FORMAT GetCmrtFormat(_cl_mem_image* image) {
> + switch (image->intel_fmt)
> + {
> + case I965_SURFACEFORMAT_B8G8R8A8_UNORM:
> + return VA_CM_FMT_A8R8G8B8;
> + case I965_SURFACEFORMAT_B8G8R8X8_UNORM:
> + return VA_CM_FMT_X8R8G8B8;
> + case I965_SURFACEFORMAT_A8_UNORM:
> + return VA_CM_FMT_A8;
> + case I965_SURFACEFORMAT_R10G10B10A2_UNORM:
> + return VA_CM_FMT_A2B10G10R10;
> + case I965_SURFACEFORMAT_R16G16B16A16_UNORM:
> + return VA_CM_FMT_A16B16G16R16;
> + case I965_SURFACEFORMAT_L8_UNORM:
> + return VA_CM_FMT_L8;
> + case I965_SURFACEFORMAT_R16_UINT:
> + return VA_CM_FMT_R16U;
> + case I965_SURFACEFORMAT_R8_UNORM:
> + return VA_CM_FMT_R8U;
> + case I965_SURFACEFORMAT_L16_UNORM:
> + return VA_CM_FMT_L16;
> + case I965_SURFACEFORMAT_R32_FLOAT:
> + return VA_CM_FMT_R32F;
> + default:
> + return VA_CM_FMT_UNKNOWN;
> + }
> +}
> +
> +static bool CreateCmrtMemory(cl_mem mem) {
> + if (mem->cmrt_mem != NULL)
> + return true;
> +
> + CmDevice* cmrt_device = (CmDevice*)(mem->ctx->device->cmrt_device);
> + int result;
> + CmOsResource osResource;
> + osResource.bo_size = mem->size;
> + osResource.bo_flags = DRM_BO_HANDLE;
> + osResource.bo = (drm_intel_bo*)mem->bo; if (IS_IMAGE(mem)) {
> + _cl_mem_image* image = cl_mem_image(mem);
> + if (CL_MEM_OBJECT_IMAGE2D != image->image_type)
> + return CL_INVALID_ARG_VALUE;
> + osResource.format = GetCmrtFormat(image);
> + if (osResource.format == VA_CM_FMT_UNKNOWN)
> + return false;
> + osResource.aligned_width = image->row_pitch;
> + osResource.aligned_height = mem->size / image->row_pitch;
> + osResource.pitch = image->row_pitch;
> + osResource.tile_type = image->tiling;
> + osResource.orig_width = image->w;
> + osResource.orig_height = image->h;
> + CmSurface2D*& cmrt_surface2d = (CmSurface2D*&)(mem->cmrt_mem);
> + result = cmrt_device->CreateSurface2D(&osResource, cmrt_surface2d);
> + mem->cmrt_mem_type = CMRT_SURFACE2D; } else {
> + osResource.format = VA_CM_FMT_BUFFER;
> + osResource.buf_bytes = mem->size;
> + CmBuffer*& cmrt_buffer = (CmBuffer*&)(mem->cmrt_mem);
> + result = cmrt_device->CreateBuffer(&osResource, cmrt_buffer);
> + mem->cmrt_mem_type = CMRT_BUFFER;
> + }
> +
> + if (result != CM_SUCCESS)
> + return false;
> +
> + return true;
> +}
> +
> +cl_int cmrt_set_kernel_arg(cl_kernel k, cl_uint index, size_t sz, const
> +void *value) {
> + if(value == NULL)
> + return CL_INVALID_ARG_VALUE;
> +
> + CmKernel* cmrt_kernel = (CmKernel*)(k->cmrt_kernel);
> +
> + WORD argKind = -1;
> + if (cmrt_kernel->GetArgKind(index, argKind) != CM_SUCCESS)
> + return CL_INVALID_ARG_INDEX;
> +
> + int result;
> + if (argKind == ARG_KIND_GENERAL)
> + result = cmrt_kernel->SetKernelArg(index, sz, value); else {
> + cl_mem mem = *(cl_mem*)value;
> + if (mem->magic == CL_MAGIC_MEM_HEADER) {
> + if (!CreateCmrtMemory(mem))
> + return CL_INVALID_ARG_VALUE;
> +
> + SurfaceIndex * memIndex = NULL;
> + if (mem->cmrt_mem_type == CMRT_BUFFER) {
> + CmBuffer* cmrt_buffer = (CmBuffer*)(mem->cmrt_mem);
> + cmrt_buffer->GetIndex(memIndex);
> + } else {
> + CmSurface2D* cmrt_surface2d = (CmSurface2D*)(mem->cmrt_mem);
> + cmrt_surface2d->GetIndex(memIndex);
> + }
> + result = cmrt_kernel->SetKernelArg(index, sizeof(SurfaceIndex),
> memIndex);
> + } else
> + return CL_INVALID_ARG_VALUE;
> + }
> +
> + if (result != CM_SUCCESS)
> + return CL_INVALID_KERNEL_ARGS;
> +
> + return CL_SUCCESS;
> +}
> +
> +cl_int cmrt_destroy_memory(cl_mem mem)
> +{
> + CmDevice* cmrt_device = (CmDevice*)(mem->ctx->device->cmrt_device);
> + if (mem->cmrt_mem_type == CMRT_BUFFER) {
> + CmBuffer*& cmrt_buffer = (CmBuffer*&)(mem->cmrt_mem);
> + cmrt_device->DestroySurface(cmrt_buffer);
> + } else {
> + CmSurface2D*& cmrt_surface2d = (CmSurface2D*&)(mem->cmrt_mem);
> + cmrt_device->DestroySurface(cmrt_surface2d);
> + }
> + return CL_SUCCESS;
> +}
> +
> +cl_int cmrt_destroy_event(cl_command_queue cq) {
> + CmEvent*& cmrt_event = (CmEvent*&)(cq->cmrt_event);
> + CmDevice* cmrt_device = (CmDevice*)(cq->ctx->device->cmrt_device);
> + CmQueue* pCmQueue = NULL;
> + cmrt_event->WaitForTaskFinished();
> + cmrt_device->CreateQueue(pCmQueue);
> + pCmQueue->DestroyEvent(cmrt_event);
> + return CL_SUCCESS;
> +}
> +
> +cl_int cmrt_wait_for_task_finished(cl_command_queue cq) {
> + CmEvent* cmrt_event = (CmEvent*)(cq->cmrt_event);
> + cmrt_event->WaitForTaskFinished();
> + return CL_SUCCESS;
> +}
> diff --git a/src/cl_cmrt.h b/src/cl_cmrt.h new file mode 100644 index
> 0000000..316095c
> --- /dev/null
> +++ b/src/cl_cmrt.h
> @@ -0,0 +1,45 @@
> +/*
> + * Copyright @2015 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see
> <http://www.gnu.org/licenses/>.
> + *
> + * Author: Guo Yejun <yejun.guo at intel.com> */
> +
> +#ifndef __CL_CMRT_H__
> +#define __CL_CMRT_H__
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include "cl_kernel.h"
> +#include "cl_program.h"
> +
> +cl_int cmrt_build_program(cl_program p, const char *options); cl_int
> +cmrt_destroy_program(cl_program p); cl_int
> +cmrt_destroy_device(cl_device_id device);
> +void* cmrt_create_kernel(cl_program p, const char *name); cl_int
> +cmrt_destroy_kernel(cl_kernel k); cl_int
> cmrt_enqueue(cl_command_queue
> +cq, cl_kernel k, const size_t* global_work_size, const size_t*
> +local_work_size); cl_int cmrt_set_kernel_arg(cl_kernel k, cl_uint
> +index, size_t sz, const void *value); cl_int cmrt_destroy_memory(cl_mem
> +mem); cl_int cmrt_destroy_event(cl_command_queue cq); cl_int
> +cmrt_wait_for_task_finished(cl_command_queue cq);
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif
> diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index
> 033e7df..549f648 100644
> --- a/src/cl_command_queue.c
> +++ b/src/cl_command_queue.c
> @@ -31,6 +31,7 @@
> #include "cl_khr_icd.h"
> #include "cl_event.h"
> #include "performance.h"
> +#include "cl_cmrt.h"
>
> #include <assert.h>
> #include <stdio.h>
> @@ -47,6 +48,7 @@ cl_command_queue_new(cl_context ctx)
> queue->magic = CL_MAGIC_QUEUE_HEADER;
> queue->ref_n = 1;
> queue->ctx = ctx;
> + queue->cmrt_event = NULL;
> if ((queue->thread_data = cl_thread_data_create()) == NULL) {
> goto error;
> }
> @@ -76,6 +78,11 @@ cl_command_queue_delete(cl_command_queue
> queue)
> assert(queue);
> if (atomic_dec(&queue->ref_n) != 1) return;
>
> +#ifdef HAS_CMRT
> + if (queue->cmrt_event != NULL)
> + cmrt_destroy_event(queue);
> +#endif
> +
> // If there is a list of valid events, we need to give them
> // a chance to call the call-back function.
> cl_event_update_last_events(queue,1);
> diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h index
> 2cd6739..d1b8c44 100644
> --- a/src/cl_command_queue.h
> +++ b/src/cl_command_queue.h
> @@ -44,6 +44,8 @@ struct _cl_command_queue {
> cl_command_queue prev, next; /* We chain the command queues
> together */
> void *thread_data; /* Used to store thread context data */
> cl_mem perf; /* Where to put the perf counters */
> +
> + void* cmrt_event; /* the latest CmEvent* of the command
> queue */
> };
>
> /* The macro to get the thread specified gpgpu struct. */ diff --git
> a/src/cl_device_id.h b/src/cl_device_id.h index e971735..619fa0a 100644
> --- a/src/cl_device_id.h
> +++ b/src/cl_device_id.h
> @@ -123,6 +123,9 @@ struct _cl_device_id {
> uint32_t atomic_test_result;
> uint32_t image_pitch_alignment;
> uint32_t image_base_address_alignment;
> +
> + //inited as NULL, created only when cmrt kernel is used
> + void* cmrt_device; //realtype: CmDevice*
> };
>
> /* Get a device from the given platform */ diff --git a/src/cl_gt_device.h
> b/src/cl_gt_device.h index d8089c2..b4c610e 100644
> --- a/src/cl_gt_device.h
> +++ b/src/cl_gt_device.h
> @@ -131,3 +131,4 @@ DECL_INFO_STRING(spir_versions,
> "1.2") .device_reference_count = 1, .image_pitch_alignment =
> 1, .image_base_address_alignment = 4096,
> +.cmrt_device = NULL
> diff --git a/src/cl_kernel.c b/src/cl_kernel.c index b2d1955..b380abe 100644
> --- a/src/cl_kernel.c
> +++ b/src/cl_kernel.c
> @@ -28,6 +28,7 @@
> #include "CL/cl.h"
> #include "cl_sampler.h"
> #include "cl_accelerator_intel.h"
> +#include "cl_cmrt.h"
>
> #include <stdio.h>
> #include <string.h>
> @@ -41,6 +42,15 @@ cl_kernel_delete(cl_kernel k)
> uint32_t i;
> if (k == NULL) return;
>
> +#ifdef HAS_CMRT
> + if (k->cmrt_kernel != NULL) {
> + cmrt_destroy_kernel(k);
> + k->magic = CL_MAGIC_DEAD_HEADER; /* For safety */
> + cl_free(k);
> + return;
> + }
> +#endif
> +
> /* We are not done with the kernel */
> if (atomic_dec(&k->ref_n) > 1) return;
> /* Release one reference on all bos we own */ @@ -71,6 +81,7 @@
> cl_kernel_new(cl_program p)
> k->ref_n = 1;
> k->magic = CL_MAGIC_KERNEL_HEADER;
> k->program = p;
> + k->cmrt_kernel = NULL;
>
> exit:
> return k;
> diff --git a/src/cl_kernel.h b/src/cl_kernel.h index 7f59162..05a882e 100644
> --- a/src/cl_kernel.h
> +++ b/src/cl_kernel.h
> @@ -69,6 +69,8 @@ struct _cl_kernel {
> uint32_t arg_n:30; /* Number of arguments */
> uint32_t ref_its_program:1; /* True only for the user kernel (created by
> clCreateKernel) */
> uint32_t vme:1; /* True only if it is a built-in kernel for VME */
> +
> + void* cmrt_kernel; /* CmKernel* */
> };
>
> /* Allocate an empty kernel */
> diff --git a/src/cl_mem.c b/src/cl_mem.c index 9a6bb83..1f03c69 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -27,6 +27,7 @@
> #include "cl_khr_icd.h"
> #include "cl_kernel.h"
> #include "cl_command_queue.h"
> +#include "cl_cmrt.h"
>
> #include "CL/cl.h"
> #include "CL/cl_intel.h"
> @@ -268,6 +269,7 @@ cl_mem_allocate(enum cl_mem_type type,
> mem->flags = flags;
> mem->is_userptr = 0;
> mem->offset = 0;
> + mem->cmrt_mem = NULL;
> if (mem->type == CL_MEM_IMAGE_TYPE) {
> cl_mem_image(mem)->is_image_from_buffer = 0;
> }
> @@ -1166,6 +1168,11 @@ cl_mem_delete(cl_mem mem)
> }
> #endif
>
> +#ifdef HAS_CMRT
> + if (mem->cmrt_mem != NULL)
> + cmrt_destroy_memory(mem);
> +#endif
> +
> /* iff we are a image, delete the 1d buffer if has. */
> if (IS_IMAGE(mem)) {
> if (cl_mem_image(mem)->buffer_1d) { diff --git a/src/cl_mem.h
> b/src/cl_mem.h index fb24115..c8f256d 100644
> --- a/src/cl_mem.h
> +++ b/src/cl_mem.h
> @@ -95,6 +95,9 @@ typedef struct _cl_mem {
> cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */
> uint8_t is_userptr; /* CL_MEM_USE_HOST_PTR is enabled*/
> size_t offset; /* offset of host_ptr to the page beginning, only for
> CL_MEM_USE_HOST_PTR*/
> +
> + uint8_t cmrt_mem_type; /* CmBuffer, CmSurface2D, ... */
> + void* cmrt_mem;
> } _cl_mem;
>
> struct _cl_mem_image {
> diff --git a/src/cl_program.c b/src/cl_program.c index 98b6d51..1dca673
> 100644
> --- a/src/cl_program.c
> +++ b/src/cl_program.c
> @@ -25,6 +25,7 @@
> #include "cl_utils.h"
> #include "cl_khr_icd.h"
> #include "cl_gbe_loader.h"
> +#include "cl_cmrt.h"
> #include "CL/cl.h"
> #include "CL/cl_intel.h"
>
> @@ -92,10 +93,17 @@ cl_program_delete(cl_program p)
> p->ctx->programs = p->next;
> pthread_mutex_unlock(&p->ctx->program_lock);
>
> - cl_free(p->bin); /* Free the blob */
> - for (i = 0; i < p->ker_n; ++i) /* Free the kernels */
> - cl_kernel_delete(p->ker[i]);
> - cl_free(p->ker);
> +#ifdef HAS_CMRT
> + if (p->cmrt_program != NULL)
> + cmrt_destroy_program(p);
> + else
> +#endif
> + {
> + cl_free(p->bin); /* Free the blob */
> + for (i = 0; i < p->ker_n; ++i) /* Free the kernels */
> + cl_kernel_delete(p->ker[i]);
> + cl_free(p->ker);
> + }
>
> /* Program belongs to their parent context */
> cl_context_delete(p->ctx);
> @@ -123,6 +131,7 @@ cl_program_new(cl_context ctx)
> p->ref_n = 1;
> p->magic = CL_MAGIC_PROGRAM_HEADER;
> p->ctx = ctx;
> + p->cmrt_program = NULL;
> p->build_log = calloc(1000, sizeof(char));
> if (p->build_log)
> p->build_log_max_sz = 1000;
> @@ -172,12 +181,14 @@ static const unsigned char
> binary_type_header[BHI_MAX][BINARY_HEADER_LENGTH]= \
> {{'B','C', 0xC0, 0xDE},
> {1, 'B', 'C', 0xC0, 0xDE},
> {2, 'B', 'C', 0xC0, 0xDE},
> - {0, 'G','E', 'N', 'C'}};
> + {0, 'G','E', 'N', 'C'},
> + {'C','I', 'S', 'A'},
> + };
>
> LOCAL cl_bool headerCompare(const unsigned char *BufPtr,
> BINARY_HEADER_INDEX index) {
> bool matched = true;
> - int length = index == BHI_SPIR ? BINARY_HEADER_LENGTH -
> 1 :BINARY_HEADER_LENGTH;
> + int length = (index == BHI_SPIR || index == BHI_CMRT) ?
> + BINARY_HEADER_LENGTH -1 :BINARY_HEADER_LENGTH;
> int i = 0;
> for (i = 0; i < length; ++i)
> {
> @@ -190,6 +201,7 @@ LOCAL cl_bool headerCompare(const unsigned char
> *BufPtr, BINARY_HEADER_INDEX ind #define isLLVM_C_O(BufPtr)
> headerCompare(BufPtr, BHI_COMPIRED_OBJECT) #define
> isLLVM_LIB(BufPtr) headerCompare(BufPtr, BHI_LIBRARY) #define
> isGenBinary(BufPtr) headerCompare(BufPtr, BHI_GEN_BINARY)
> +#define isCMRT(BufPtr) headerCompare(BufPtr, BHI_CMRT)
>
> LOCAL cl_program
> cl_program_create_from_binary(cl_context ctx,
> @@ -236,8 +248,9 @@ cl_program_create_from_binary(cl_context ctx,
> program->binary_sz = lengths[0];
> program->source_type = FROM_BINARY;
>
> - if(isSPIR((unsigned char*)program->binary)) {
> -
> + if (isCMRT((unsigned char*)program->binary)) {
> + program->source_type = FROM_CMRT;
> + }else if(isSPIR((unsigned char*)program->binary)) {
> char* typed_binary;
> TRY_ALLOC(typed_binary, cl_calloc(lengths[0]+1, sizeof(char)));
> memcpy(typed_binary+1, binaries[0], lengths[0]); @@ -518,6 +531,20 @@
> cl_program_build(cl_program p, const char *options)
> goto error;
> }
>
> +#if HAS_CMRT
> + if (p->source_type == FROM_CMRT) {
> + //only here we begins to invoke cmrt
> + //break spec to return other errors such as CL_DEVICE_NOT_FOUND
> + err = cmrt_build_program(p, options);
> + if (err == CL_SUCCESS) {
> + p->build_status = CL_BUILD_SUCCESS;
> + p->binary_type = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
> + return CL_SUCCESS;
> + } else
> + goto error;
> + }
> +#endif
> +
> if (!check_cl_version_option(p, options)) {
> err = CL_BUILD_PROGRAM_FAILURE;
> goto error;
> @@ -833,6 +860,20 @@ cl_program_create_kernel(cl_program p, const char
> *name, cl_int *errcode_ret)
> cl_int err = CL_SUCCESS;
> uint32_t i = 0;
>
> +#ifdef HAS_CMRT
> + if (p->cmrt_program != NULL) {
> + void* cmrt_kernel = cmrt_create_kernel(p, name);
> + if (cmrt_kernel != NULL) {
> + to = cl_kernel_new(p);
> + to->cmrt_kernel = cmrt_kernel;
> + goto exit;
> + } else {
> + err = CL_INVALID_KERNEL_NAME;
> + goto error;
> + }
> + }
> +#endif
> +
> /* Find the program first */
> for (i = 0; i < p->ker_n; ++i) {
> assert(p->ker[i]);
> diff --git a/src/cl_program.h b/src/cl_program.h index 63ad16d..899a31a
> 100644
> --- a/src/cl_program.h
> +++ b/src/cl_program.h
> @@ -34,14 +34,16 @@ enum {
> FROM_SOURCE = 0,
> FROM_LLVM = 1,
> FROM_BINARY = 2,
> - FROM_LLVM_SPIR = 3
> + FROM_LLVM_SPIR = 3,
> + FROM_CMRT = 4,
> };
>
> typedef enum _BINARY_HEADER_INDEX {
> BHI_SPIR = 0,
> BHI_COMPIRED_OBJECT = 1,
> BHI_LIBRARY = 2,
> - BHI_GEN_BINARY = 3, /*remember update BHI_MAX if add option.*/
> + BHI_GEN_BINARY = 3,
> + BHI_CMRT = 4,
> BHI_MAX,
> }BINARY_HEADER_INDEX;
>
> @@ -61,13 +63,15 @@ struct _cl_program {
> size_t binary_sz; /* The binary size. */
> uint32_t binary_type; /* binary type: COMPILED_OBJECT(LLVM IR),
> LIBRARY(LLVM IR with option "-create-library"), or EXECUTABLE(GEN binary).
> */
> uint32_t ker_n; /* Number of declared kernels */
> - uint32_t source_type:2; /* Built from binary, source or LLVM */
> + uint32_t source_type:3; /* Built from binary, source, CMRT or LLVM*/
> uint32_t is_built:1; /* Did we call clBuildProgram on it? */
> int32_t build_status; /* build status. */
> char *build_opts; /* The build options for this program */
> size_t build_log_max_sz; /*build log maximum size in byte.*/
> char *build_log; /* The build log for this program. */
> size_t build_log_sz; /* The actual build log size.*/
> +
> + void* cmrt_program; /* real type: CmProgram* */
> };
>
> /* Create a empty program */
> --
> 1.9.1
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list