[Beignet] [PATCH V2 1/2] make Beignet as intermedia layer of CMRT

Yang, Rong R rong.r.yang at intel.com
Wed Dec 9 00:38:26 PST 2015


LGTM, pushed, thanks.

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Guo, Yejun
> Sent: Thursday, December 3, 2015 16:53
> To: beignet at lists.freedesktop.org
> Subject: Re: [Beignet] [PATCH V2 1/2] make Beignet as intermedia layer of
> CMRT
> 
> Ping for review, thanks.
> 
> -----Original Message-----
> From: Guo, Yejun
> Sent: Thursday, November 19, 2015 2:03 AM
> To: beignet at lists.freedesktop.org
> Cc: Guo, Yejun
> Subject: [PATCH V2 1/2] make Beignet as intermedia layer of CMRT
> 
> CMRT is C for Media Runtime on Intel GPU, see
> https://github.com/01org/cmrt.
> There is a request to make Beignet as intermedia layer of CMRT, in other
> words, application programer write OpenCL APIs to execute the CM kernel
> on GPU, the following shows the key code, and please refer to the next
> patch of unit test for detail.
> 
> prog = clCreateProgramWithBinary("cm kernel"); clBuildProgram(prog);
> kernel = clCreateKernel(prog, "kernel name"); image = clCreateImage();
> clSetKernelArg(kernel, image); clEnqueueNDRangeKernel(kernel);
> 
> Inside Beignet, once cm kernel is invoked, the following relative APIs will be
> directly passed to CMRT library (libcmrt.so) which is loaded via dlopen only
> when necessary. Since we use this simple method to keep the code clean,
> OpenCL spec is not strictly followed, and cl_event is not supported for this
> case.
> 
> v2: add comments about the cm queue in fuction cmrt_enqueue
> Signed-off-by: Guo Yejun <yejun.guo at intel.com>
> ---
>  CMakeLists.txt         |   6 +
>  src/CMakeLists.txt     |   7 ++
>  src/cl_api.c           |  39 ++++++-
>  src/cl_cmrt.cpp        | 311
> +++++++++++++++++++++++++++++++++++++++++++++++++
>  src/cl_cmrt.h          |  45 +++++++
>  src/cl_command_queue.c |   7 ++
>  src/cl_command_queue.h |   2 +
>  src/cl_device_id.h     |   3 +
>  src/cl_gt_device.h     |   1 +
>  src/cl_kernel.c        |  11 ++
>  src/cl_kernel.h        |   2 +
>  src/cl_mem.c           |   7 ++
>  src/cl_mem.h           |   3 +
>  src/cl_program.c       |  57 +++++++--
>  src/cl_program.h       |  10 +-
>  15 files changed, 497 insertions(+), 14 deletions(-)  create mode 100644
> src/cl_cmrt.cpp  create mode 100644 src/cl_cmrt.h
> 
> diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c6c373..3411b6a 100644
> --- a/CMakeLists.txt
> +++ b/CMakeLists.txt
> @@ -150,6 +150,12 @@ ELSE(DRM_INTEL_FOUND)
>    MESSAGE(FATAL_ERROR "Looking for DRM Intel (>= 2.4.52) - not found")
>  ENDIF(DRM_INTEL_FOUND)
> 
> +# CMRT
> +pkg_check_modules(CMRT libcmrt)
> +IF(CMRT_FOUND)
> +INCLUDE_DIRECTORIES(${CMRT_INCLUDE_DIRS})
> +ENDIF(CMRT_FOUND)
> +
>  # Threads
>  Find_Package(Threads)
> 
> diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c917e76..5f1872d
> 100644
> --- a/src/CMakeLists.txt
> +++ b/src/CMakeLists.txt
> @@ -101,6 +101,13 @@ if (X11_FOUND)
>        x11/va_dri2.c)
>  endif (X11_FOUND)
> 
> +if (CMRT_FOUND)
> +  set(CMAKE_CXX_FLAGS "-DHAS_CMRT ${CMAKE_CXX_FLAGS}")
> +  set(CMAKE_CXX_FLAGS "-
> DCMRT_PATH=${CMRT_LIBRARY_DIRS}/libcmrt.so
> +${CMAKE_CXX_FLAGS}")
> +  set(CMAKE_C_FLAGS "-DHAS_CMRT ${CMAKE_C_FLAGS}")
> +  set(OPENCL_SRC ${OPENCL_SRC} cl_cmrt.cpp) endif (CMRT_FOUND)
> +
>  if (EGL_FOUND AND MESA_SOURCE_FOUND)
>  set (OPENCL_SRC ${OPENCL_SRC} cl_mem_gl.c cl_gl_api.c
> x11/mesa_egl_extension.c x11/mesa_egl_res_share.c
> intel/intel_dri_resource_sharing.c)
>  SET(CMAKE_CXX_FLAGS "-DHAS_EGL ${CMAKE_CXX_FLAGS}") diff --git
> a/src/cl_api.c b/src/cl_api.c index ddd39cf..7cedf4b 100644
> --- a/src/cl_api.c
> +++ b/src/cl_api.c
> @@ -31,6 +31,7 @@
>  #include "cl_accelerator_intel.h"
>  #include "cl_alloc.h"
>  #include "cl_utils.h"
> +#include "cl_cmrt.h"
> 
>  #include "CL/cl.h"
>  #include "CL/cl_ext.h"
> @@ -276,6 +277,10 @@ clRetainDevice(cl_device_id device)  cl_int
> clReleaseDevice(cl_device_id device)  {
> +#ifdef HAS_CMRT
> +  cmrt_destroy_device(device);
> +#endif
> +
>    // XXX stub for C++ Bindings
>    return CL_SUCCESS;
>  }
> @@ -941,11 +946,11 @@ clBuildProgram(cl_program            program,
>      INVALID_DEVICE_IF (device_list[0] != program->ctx->device);
>    }
> 
> -  /* TODO support create program from binary */
>    assert(program->source_type == FROM_LLVM ||
>           program->source_type == FROM_SOURCE ||
>           program->source_type == FROM_LLVM_SPIR ||
> -         program->source_type == FROM_BINARY);
> +         program->source_type == FROM_BINARY ||
> +         program->source_type == FROM_CMRT);
>    if((err = cl_program_build(program, options)) != CL_SUCCESS) {
>      goto error;
>    }
> @@ -1244,7 +1249,13 @@ clSetKernelArg(cl_kernel     kernel,
>  {
>    cl_int err = CL_SUCCESS;
>    CHECK_KERNEL(kernel);
> -  err = cl_kernel_set_arg(kernel, arg_index, arg_size, arg_value);
> +
> +#ifdef HAS_CMRT
> +  if (kernel->cmrt_kernel != NULL)
> +    err = cmrt_set_kernel_arg(kernel, arg_index, arg_size, arg_value);
> +  else
> +#endif
> +    err = cl_kernel_set_arg(kernel, arg_index, arg_size, arg_value);
>  error:
>    return err;
>  }
> @@ -1532,6 +1543,12 @@ clFinish(cl_command_queue command_queue)
>    cl_int err = CL_SUCCESS;
> 
>    CHECK_QUEUE (command_queue);
> +
> +#ifdef HAS_CMRT
> +  if (command_queue->cmrt_event != NULL)
> +    return cmrt_wait_for_task_finished(command_queue);
> +#endif
> +
>    err = cl_command_queue_finish(command_queue);
> 
>  error:
> @@ -2655,6 +2672,11 @@ clEnqueueMapBuffer(cl_command_queue
> command_queue,
>      goto error;
>    }
> 
> +#ifdef HAS_CMRT
> +  if (command_queue->cmrt_event != NULL)
> +    cmrt_wait_for_task_finished(command_queue);
> +#endif
> +
>    TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, buffer->ctx);
> 
>    data = &no_wait_data;
> @@ -2743,6 +2765,11 @@ clEnqueueMapImage(cl_command_queue
> command_queue,
>      goto error;
>    }
> 
> +#ifdef HAS_CMRT
> +  if (command_queue->cmrt_event != NULL)
> +    cmrt_wait_for_task_finished(command_queue);
> +#endif
> +
>    TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, mem->ctx);
> 
>    data = &no_wait_data;
> @@ -2948,6 +2975,12 @@ clEnqueueNDRangeKernel(cl_command_queue
> command_queue,
>      goto error;
>    }
> 
> +#ifdef HAS_CMRT
> +  if (kernel->cmrt_kernel != NULL) {
> +    err = cmrt_enqueue(command_queue, kernel, global_work_size,
> local_work_size);
> +    goto error;
> +  }
> +#endif
> 
>    /* XXX No event right now */
>    //FATAL_IF(num_events_in_wait_list > 0, "Events are not supported"); diff
> --git a/src/cl_cmrt.cpp b/src/cl_cmrt.cpp new file mode 100644 index
> 0000000..25e4d82
> --- /dev/null
> +++ b/src/cl_cmrt.cpp
> @@ -0,0 +1,311 @@
> +#include "cl_cmrt.h"
> +#include "cl_device_id.h"
> +#include "intel/intel_defines.h"
> +#include "cl_command_queue.h"
> +
> +#include "cm_rt.h"      //header file of libcmrt.so
> +typedef INT (*CreateCmDeviceFunc)(CmDevice * &pDevice, UINT &
> version,
> +			    CmDriverContext * drivercontext, UINT
> DevCreateOption); typedef
> +INT (*DestroyCmDeviceFunc)(CmDevice * &pDevice);
> +
> +#include <dlfcn.h>
> +
> +static void* dlhCMRT = NULL;
> +static CreateCmDeviceFunc pfnCreateCmDevice = NULL; static
> +DestroyCmDeviceFunc pfnDestroyCmDevice = NULL;
> +
> +#define XSTR(x) #x
> +#define STR(x) XSTR(x)
> +
> +class CmrtCleanup
> +{
> +public:
> +  CmrtCleanup(){}
> +  ~CmrtCleanup()
> +  {
> +    if (dlhCMRT != NULL)
> +      dlclose(dlhCMRT);
> +  }
> +};
> +
> +enum CMRT_MEM_TYPE
> +{
> +    CMRT_BUFFER,
> +    CMRT_SURFACE2D,
> +};
> +
> +static CmrtCleanup cmrtCleanup;
> +
> +static bool LoadCmrtLibrary()
> +{
> +  if (dlhCMRT == NULL) {
> +    dlhCMRT = dlopen(STR(CMRT_PATH), RTLD_LAZY | RTLD_LOCAL);
> +
> +    if (dlhCMRT == NULL)
> +      return false;
> +
> +    pfnCreateCmDevice = (CreateCmDeviceFunc)dlsym(dlhCMRT,
> "CreateCmDevice");
> +    if (pfnCreateCmDevice == NULL)
> +      return false;
> +
> +    pfnDestroyCmDevice = (DestroyCmDeviceFunc)dlsym(dlhCMRT,
> "DestroyCmDevice");
> +    if (pfnDestroyCmDevice == NULL)
> +      return false;
> +  }
> +  return true;
> +}
> +
> +cl_int cmrt_build_program(cl_program p, const char *options) {
> +  CmDevice*& cmrt_device = (CmDevice*&)(p->ctx->device->cmrt_device);
> +  int result;
> +  if (cmrt_device == NULL)
> +  {
> +    if (!LoadCmrtLibrary())
> +      return CL_DEVICE_NOT_AVAILABLE;   //yes, the error is not accurate,
> but i do not find a bettere one
> +
> +    CmDriverContext ctx;
> +    ctx.shared_bufmgr = 1;
> +    ctx.bufmgr = (drm_intel_bufmgr*)cl_context_get_bufmgr(p->ctx);
> +    ctx.userptr_enabled = 0;
> +    ctx.deviceid = p->ctx->device->device_id;
> +    ctx.device_rev = -1;
> +    UINT version = 0;
> +    result = (*pfnCreateCmDevice)(cmrt_device, version, &ctx,
> CM_DEVICE_CREATE_OPTION_DEFAULT);
> +    if (result != CM_SUCCESS)
> +      return CL_DEVICE_NOT_AVAILABLE;
> +  }
> +
> +  CmProgram* cmrt_program = NULL;
> +  result = cmrt_device->LoadProgram(p->binary, p->binary_sz,
> + cmrt_program, options);  if (result != CM_SUCCESS)
> +    return CL_COMPILE_PROGRAM_FAILURE;
> +
> +  p->cmrt_program = cmrt_program;
> +  cmrt_program->GetKernelCount(p->ker_n);
> +  return CL_SUCCESS;
> +}
> +
> +cl_int cmrt_destroy_program(cl_program p) {
> +  CmDevice* cmrt_device = (CmDevice*)(p->ctx->device->cmrt_device);
> +  CmProgram*& cmrt_program = (CmProgram*&)(p->cmrt_program);
> +  if (cmrt_device->DestroyProgram(cmrt_program) != CM_SUCCESS)
> +    return CL_INVALID_PROGRAM;
> +  return CL_SUCCESS;
> +}
> +
> +cl_int cmrt_destroy_device(cl_device_id device) {
> +  CmDevice*& cmrt_device = (CmDevice*&)(device->cmrt_device);
> +  if ((*pfnDestroyCmDevice)(cmrt_device) != CM_SUCCESS)
> +    return CL_INVALID_DEVICE;
> +  return CL_SUCCESS;
> +}
> +
> +void* cmrt_create_kernel(cl_program p, const char *name) {
> +  CmDevice* cmrt_device = (CmDevice*)(p->ctx->device->cmrt_device);
> +  CmKernel* cmrt_kernel = NULL;
> +  int result = cmrt_device->CreateKernel((CmProgram*)(p->cmrt_program),
> +name, cmrt_kernel);
> +  if (result != CM_SUCCESS)
> +    return NULL;
> +
> +  return cmrt_kernel;
> +}
> +
> +cl_int cmrt_destroy_kernel(cl_kernel k) {
> +  CmDevice* cmrt_device =
> +(CmDevice*)(k->program->ctx->device->cmrt_device);
> +  CmKernel*& cmrt_kernel = (CmKernel*&)(k->cmrt_kernel);
> +  if (cmrt_device->DestroyKernel(cmrt_kernel) != CM_SUCCESS)
> +    return CL_INVALID_KERNEL;
> +  return CL_SUCCESS;
> +}
> +
> +cl_int cmrt_enqueue(cl_command_queue cq, cl_kernel k, const size_t*
> +global_work_size, const size_t* local_work_size) {
> +  CmDevice* cmrt_device =
> +(CmDevice*)(k->program->ctx->device->cmrt_device);
> +  CmKernel* cmrt_kernel = (CmKernel*)(k->cmrt_kernel);
> +
> +  int result = 0;
> +
> +  cmrt_kernel->SetThreadCount(global_work_size[0]*global_work_size[1]);
> +
> +  //no need to destory queue explicitly,  //and there is only one queue
> + instance within each device,  //CreateQueue always returns the same
> + instance
> +  CmQueue* pCmQueue = NULL;
> +  cmrt_device->CreateQueue(pCmQueue);
> +
> +  CmTask *pKernelArray = NULL;
> +  cmrt_device->CreateTask(pKernelArray);
> +
> +  pKernelArray->AddKernel(cmrt_kernel);
> +
> +  CmEvent* e = NULL;
> +
> +  if (local_work_size == NULL) {
> +    CmThreadSpace* pTS = NULL;
> +    cmrt_device->CreateThreadSpace(global_work_size[0],
> global_work_size[1], pTS);
> +    result = pCmQueue->Enqueue(pKernelArray, e, pTS);
> +  } else {
> +    CmThreadGroupSpace* pTGS = NULL;
> +	cmrt_device->CreateThreadGroupSpace(global_work_size[0],
> global_work_size[1], local_work_size[0], local_work_size[1], pTGS);
> +    result = pCmQueue->EnqueueWithGroup(pKernelArray, e, pTGS);
> +    cmrt_device->DestroyThreadGroupSpace(pTGS);
> +  }
> +
> +  if (result != CM_SUCCESS)
> +    return CL_INVALID_OPERATION;
> +
> +  cmrt_device->DestroyTask(pKernelArray);
> +
> +  CmEvent*& olde = (CmEvent*&)cq->cmrt_event;  if (olde != NULL)
> +    pCmQueue->DestroyEvent(e);
> +
> +  cq->cmrt_event = e;
> +
> +  return CL_SUCCESS;
> +}
> +
> +static VA_CM_FORMAT GetCmrtFormat(_cl_mem_image* image) {
> +    switch (image->intel_fmt)
> +    {
> +    case I965_SURFACEFORMAT_B8G8R8A8_UNORM:
> +      return VA_CM_FMT_A8R8G8B8;
> +    case I965_SURFACEFORMAT_B8G8R8X8_UNORM:
> +      return VA_CM_FMT_X8R8G8B8;
> +    case I965_SURFACEFORMAT_A8_UNORM:
> +      return VA_CM_FMT_A8;
> +    case I965_SURFACEFORMAT_R10G10B10A2_UNORM:
> +      return VA_CM_FMT_A2B10G10R10;
> +    case I965_SURFACEFORMAT_R16G16B16A16_UNORM:
> +      return VA_CM_FMT_A16B16G16R16;
> +    case I965_SURFACEFORMAT_L8_UNORM:
> +      return VA_CM_FMT_L8;
> +    case I965_SURFACEFORMAT_R16_UINT:
> +      return VA_CM_FMT_R16U;
> +    case I965_SURFACEFORMAT_R8_UNORM:
> +      return VA_CM_FMT_R8U;
> +    case I965_SURFACEFORMAT_L16_UNORM:
> +      return VA_CM_FMT_L16;
> +    case I965_SURFACEFORMAT_R32_FLOAT:
> +      return VA_CM_FMT_R32F;
> +    default:
> +      return VA_CM_FMT_UNKNOWN;
> +    }
> +}
> +
> +static bool CreateCmrtMemory(cl_mem mem) {
> +  if (mem->cmrt_mem != NULL)
> +    return true;
> +
> +  CmDevice* cmrt_device = (CmDevice*)(mem->ctx->device->cmrt_device);
> +  int result;
> +  CmOsResource osResource;
> +  osResource.bo_size = mem->size;
> +  osResource.bo_flags = DRM_BO_HANDLE;
> +  osResource.bo = (drm_intel_bo*)mem->bo;  if (IS_IMAGE(mem)) {
> +    _cl_mem_image* image = cl_mem_image(mem);
> +    if (CL_MEM_OBJECT_IMAGE2D != image->image_type)
> +      return CL_INVALID_ARG_VALUE;
> +    osResource.format = GetCmrtFormat(image);
> +    if (osResource.format == VA_CM_FMT_UNKNOWN)
> +      return false;
> +    osResource.aligned_width = image->row_pitch;
> +    osResource.aligned_height = mem->size / image->row_pitch;
> +    osResource.pitch = image->row_pitch;
> +    osResource.tile_type = image->tiling;
> +    osResource.orig_width = image->w;
> +    osResource.orig_height = image->h;
> +    CmSurface2D*& cmrt_surface2d = (CmSurface2D*&)(mem->cmrt_mem);
> +    result = cmrt_device->CreateSurface2D(&osResource, cmrt_surface2d);
> +    mem->cmrt_mem_type = CMRT_SURFACE2D;  } else {
> +    osResource.format = VA_CM_FMT_BUFFER;
> +    osResource.buf_bytes = mem->size;
> +    CmBuffer*& cmrt_buffer = (CmBuffer*&)(mem->cmrt_mem);
> +    result = cmrt_device->CreateBuffer(&osResource, cmrt_buffer);
> +    mem->cmrt_mem_type = CMRT_BUFFER;
> +  }
> +
> +  if (result != CM_SUCCESS)
> +    return false;
> +
> +  return true;
> +}
> +
> +cl_int cmrt_set_kernel_arg(cl_kernel k, cl_uint index, size_t sz, const
> +void *value) {
> +  if(value == NULL)
> +    return CL_INVALID_ARG_VALUE;
> +
> +  CmKernel* cmrt_kernel = (CmKernel*)(k->cmrt_kernel);
> +
> +  WORD argKind = -1;
> +  if (cmrt_kernel->GetArgKind(index, argKind) != CM_SUCCESS)
> +    return CL_INVALID_ARG_INDEX;
> +
> +  int result;
> +  if (argKind == ARG_KIND_GENERAL)
> +    result = cmrt_kernel->SetKernelArg(index, sz, value);  else {
> +    cl_mem mem = *(cl_mem*)value;
> +    if (mem->magic == CL_MAGIC_MEM_HEADER) {
> +      if (!CreateCmrtMemory(mem))
> +        return CL_INVALID_ARG_VALUE;
> +
> +      SurfaceIndex * memIndex = NULL;
> +      if (mem->cmrt_mem_type == CMRT_BUFFER) {
> +        CmBuffer* cmrt_buffer = (CmBuffer*)(mem->cmrt_mem);
> +        cmrt_buffer->GetIndex(memIndex);
> +      } else {
> +        CmSurface2D* cmrt_surface2d = (CmSurface2D*)(mem->cmrt_mem);
> +        cmrt_surface2d->GetIndex(memIndex);
> +      }
> +      result = cmrt_kernel->SetKernelArg(index, sizeof(SurfaceIndex),
> memIndex);
> +    } else
> +      return CL_INVALID_ARG_VALUE;
> +  }
> +
> +  if (result != CM_SUCCESS)
> +    return CL_INVALID_KERNEL_ARGS;
> +
> +  return CL_SUCCESS;
> +}
> +
> +cl_int cmrt_destroy_memory(cl_mem mem)
> +{
> +  CmDevice* cmrt_device = (CmDevice*)(mem->ctx->device->cmrt_device);
> +  if (mem->cmrt_mem_type == CMRT_BUFFER) {
> +    CmBuffer*& cmrt_buffer = (CmBuffer*&)(mem->cmrt_mem);
> +    cmrt_device->DestroySurface(cmrt_buffer);
> +  } else {
> +    CmSurface2D*& cmrt_surface2d = (CmSurface2D*&)(mem->cmrt_mem);
> +    cmrt_device->DestroySurface(cmrt_surface2d);
> +  }
> +  return CL_SUCCESS;
> +}
> +
> +cl_int cmrt_destroy_event(cl_command_queue cq) {
> +  CmEvent*& cmrt_event = (CmEvent*&)(cq->cmrt_event);
> +  CmDevice* cmrt_device = (CmDevice*)(cq->ctx->device->cmrt_device);
> +  CmQueue* pCmQueue = NULL;
> +  cmrt_event->WaitForTaskFinished();
> +  cmrt_device->CreateQueue(pCmQueue);
> +  pCmQueue->DestroyEvent(cmrt_event);
> +  return CL_SUCCESS;
> +}
> +
> +cl_int cmrt_wait_for_task_finished(cl_command_queue cq) {
> +  CmEvent* cmrt_event = (CmEvent*)(cq->cmrt_event);
> +  cmrt_event->WaitForTaskFinished();
> +  return CL_SUCCESS;
> +}
> diff --git a/src/cl_cmrt.h b/src/cl_cmrt.h new file mode 100644 index
> 0000000..316095c
> --- /dev/null
> +++ b/src/cl_cmrt.h
> @@ -0,0 +1,45 @@
> +/*
> + * Copyright @2015 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see
> <http://www.gnu.org/licenses/>.
> + *
> + * Author: Guo Yejun <yejun.guo at intel.com>  */
> +
> +#ifndef __CL_CMRT_H__
> +#define __CL_CMRT_H__
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include "cl_kernel.h"
> +#include "cl_program.h"
> +
> +cl_int cmrt_build_program(cl_program p, const char *options); cl_int
> +cmrt_destroy_program(cl_program p); cl_int
> +cmrt_destroy_device(cl_device_id device);
> +void* cmrt_create_kernel(cl_program p, const char *name); cl_int
> +cmrt_destroy_kernel(cl_kernel k); cl_int
> cmrt_enqueue(cl_command_queue
> +cq, cl_kernel k, const size_t* global_work_size, const size_t*
> +local_work_size); cl_int cmrt_set_kernel_arg(cl_kernel k, cl_uint
> +index, size_t sz, const void *value); cl_int cmrt_destroy_memory(cl_mem
> +mem); cl_int cmrt_destroy_event(cl_command_queue cq); cl_int
> +cmrt_wait_for_task_finished(cl_command_queue cq);
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif
> diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index
> 033e7df..549f648 100644
> --- a/src/cl_command_queue.c
> +++ b/src/cl_command_queue.c
> @@ -31,6 +31,7 @@
>  #include "cl_khr_icd.h"
>  #include "cl_event.h"
>  #include "performance.h"
> +#include "cl_cmrt.h"
> 
>  #include <assert.h>
>  #include <stdio.h>
> @@ -47,6 +48,7 @@ cl_command_queue_new(cl_context ctx)
>    queue->magic = CL_MAGIC_QUEUE_HEADER;
>    queue->ref_n = 1;
>    queue->ctx = ctx;
> +  queue->cmrt_event = NULL;
>    if ((queue->thread_data = cl_thread_data_create()) == NULL) {
>      goto error;
>    }
> @@ -76,6 +78,11 @@ cl_command_queue_delete(cl_command_queue
> queue)
>    assert(queue);
>    if (atomic_dec(&queue->ref_n) != 1) return;
> 
> +#ifdef HAS_CMRT
> +  if (queue->cmrt_event != NULL)
> +    cmrt_destroy_event(queue);
> +#endif
> +
>    // If there is a list of valid events, we need to give them
>    // a chance to call the call-back function.
>    cl_event_update_last_events(queue,1);
> diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h index
> 2cd6739..d1b8c44 100644
> --- a/src/cl_command_queue.h
> +++ b/src/cl_command_queue.h
> @@ -44,6 +44,8 @@ struct _cl_command_queue {
>    cl_command_queue prev, next;         /* We chain the command queues
> together */
>    void *thread_data;                   /* Used to store thread context data */
>    cl_mem perf;                         /* Where to put the perf counters */
> +
> +  void* cmrt_event;                    /* the latest CmEvent* of the command
> queue */
>  };
> 
>  /* The macro to get the thread specified gpgpu struct. */ diff --git
> a/src/cl_device_id.h b/src/cl_device_id.h index e971735..619fa0a 100644
> --- a/src/cl_device_id.h
> +++ b/src/cl_device_id.h
> @@ -123,6 +123,9 @@ struct _cl_device_id {
>    uint32_t atomic_test_result;
>    uint32_t image_pitch_alignment;
>    uint32_t image_base_address_alignment;
> +
> +  //inited as NULL, created only when cmrt kernel is used
> +  void* cmrt_device;  //realtype: CmDevice*
>  };
> 
>  /* Get a device from the given platform */ diff --git a/src/cl_gt_device.h
> b/src/cl_gt_device.h index d8089c2..b4c610e 100644
> --- a/src/cl_gt_device.h
> +++ b/src/cl_gt_device.h
> @@ -131,3 +131,4 @@ DECL_INFO_STRING(spir_versions,
> "1.2")  .device_reference_count = 1,  .image_pitch_alignment =
> 1,  .image_base_address_alignment = 4096,
> +.cmrt_device = NULL
> diff --git a/src/cl_kernel.c b/src/cl_kernel.c index b2d1955..b380abe 100644
> --- a/src/cl_kernel.c
> +++ b/src/cl_kernel.c
> @@ -28,6 +28,7 @@
>  #include "CL/cl.h"
>  #include "cl_sampler.h"
>  #include "cl_accelerator_intel.h"
> +#include "cl_cmrt.h"
> 
>  #include <stdio.h>
>  #include <string.h>
> @@ -41,6 +42,15 @@ cl_kernel_delete(cl_kernel k)
>    uint32_t i;
>    if (k == NULL) return;
> 
> +#ifdef HAS_CMRT
> +  if (k->cmrt_kernel != NULL) {
> +    cmrt_destroy_kernel(k);
> +    k->magic = CL_MAGIC_DEAD_HEADER; /* For safety */
> +    cl_free(k);
> +    return;
> +  }
> +#endif
> +
>    /* We are not done with the kernel */
>    if (atomic_dec(&k->ref_n) > 1) return;
>    /* Release one reference on all bos we own */ @@ -71,6 +81,7 @@
> cl_kernel_new(cl_program p)
>    k->ref_n = 1;
>    k->magic = CL_MAGIC_KERNEL_HEADER;
>    k->program = p;
> +  k->cmrt_kernel = NULL;
> 
>  exit:
>    return k;
> diff --git a/src/cl_kernel.h b/src/cl_kernel.h index 7f59162..05a882e 100644
> --- a/src/cl_kernel.h
> +++ b/src/cl_kernel.h
> @@ -69,6 +69,8 @@ struct _cl_kernel {
>    uint32_t arg_n:30;          /* Number of arguments */
>    uint32_t ref_its_program:1; /* True only for the user kernel (created by
> clCreateKernel) */
>    uint32_t vme:1;             /* True only if it is a built-in kernel for VME */
> +
> +  void* cmrt_kernel;          /* CmKernel* */
>  };
> 
>  /* Allocate an empty kernel */
> diff --git a/src/cl_mem.c b/src/cl_mem.c index 9a6bb83..1f03c69 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -27,6 +27,7 @@
>  #include "cl_khr_icd.h"
>  #include "cl_kernel.h"
>  #include "cl_command_queue.h"
> +#include "cl_cmrt.h"
> 
>  #include "CL/cl.h"
>  #include "CL/cl_intel.h"
> @@ -268,6 +269,7 @@ cl_mem_allocate(enum cl_mem_type type,
>    mem->flags = flags;
>    mem->is_userptr = 0;
>    mem->offset = 0;
> +  mem->cmrt_mem = NULL;
>    if (mem->type == CL_MEM_IMAGE_TYPE) {
>      cl_mem_image(mem)->is_image_from_buffer = 0;
>    }
> @@ -1166,6 +1168,11 @@ cl_mem_delete(cl_mem mem)
>    }
>  #endif
> 
> +#ifdef HAS_CMRT
> +  if (mem->cmrt_mem != NULL)
> +    cmrt_destroy_memory(mem);
> +#endif
> +
>    /* iff we are a image, delete the 1d buffer if has. */
>    if (IS_IMAGE(mem)) {
>      if (cl_mem_image(mem)->buffer_1d) { diff --git a/src/cl_mem.h
> b/src/cl_mem.h index fb24115..c8f256d 100644
> --- a/src/cl_mem.h
> +++ b/src/cl_mem.h
> @@ -95,6 +95,9 @@ typedef  struct _cl_mem {
>    cl_mem_dstr_cb *dstr_cb;  /* The destroy callback. */
>    uint8_t is_userptr;       /* CL_MEM_USE_HOST_PTR is enabled*/
>    size_t offset;            /* offset of host_ptr to the page beginning, only for
> CL_MEM_USE_HOST_PTR*/
> +
> +  uint8_t cmrt_mem_type;    /* CmBuffer, CmSurface2D, ... */
> +  void* cmrt_mem;
>  } _cl_mem;
> 
>  struct _cl_mem_image {
> diff --git a/src/cl_program.c b/src/cl_program.c index 98b6d51..1dca673
> 100644
> --- a/src/cl_program.c
> +++ b/src/cl_program.c
> @@ -25,6 +25,7 @@
>  #include "cl_utils.h"
>  #include "cl_khr_icd.h"
>  #include "cl_gbe_loader.h"
> +#include "cl_cmrt.h"
>  #include "CL/cl.h"
>  #include "CL/cl_intel.h"
> 
> @@ -92,10 +93,17 @@ cl_program_delete(cl_program p)
>        p->ctx->programs = p->next;
>    pthread_mutex_unlock(&p->ctx->program_lock);
> 
> -  cl_free(p->bin);               /* Free the blob */
> -  for (i = 0; i < p->ker_n; ++i) /* Free the kernels */
> -    cl_kernel_delete(p->ker[i]);
> -  cl_free(p->ker);
> +#ifdef HAS_CMRT
> +  if (p->cmrt_program != NULL)
> +    cmrt_destroy_program(p);
> +  else
> +#endif
> +  {
> +    cl_free(p->bin);               /* Free the blob */
> +    for (i = 0; i < p->ker_n; ++i) /* Free the kernels */
> +      cl_kernel_delete(p->ker[i]);
> +    cl_free(p->ker);
> +  }
> 
>    /* Program belongs to their parent context */
>    cl_context_delete(p->ctx);
> @@ -123,6 +131,7 @@ cl_program_new(cl_context ctx)
>    p->ref_n = 1;
>    p->magic = CL_MAGIC_PROGRAM_HEADER;
>    p->ctx = ctx;
> +  p->cmrt_program = NULL;
>    p->build_log = calloc(1000, sizeof(char));
>    if (p->build_log)
>      p->build_log_max_sz = 1000;
> @@ -172,12 +181,14 @@ static const unsigned char
> binary_type_header[BHI_MAX][BINARY_HEADER_LENGTH]=  \
>                                                {{'B','C', 0xC0, 0xDE},
>                                                 {1, 'B', 'C', 0xC0, 0xDE},
>                                                 {2, 'B', 'C', 0xC0, 0xDE},
> -                                               {0, 'G','E', 'N', 'C'}};
> +                                               {0, 'G','E', 'N', 'C'},
> +                                               {'C','I', 'S', 'A'},
> +                                               };
> 
>  LOCAL cl_bool headerCompare(const unsigned char *BufPtr,
> BINARY_HEADER_INDEX index)  {
>    bool matched = true;
> -  int length = index == BHI_SPIR ? BINARY_HEADER_LENGTH -
> 1 :BINARY_HEADER_LENGTH;
> +  int length = (index == BHI_SPIR || index == BHI_CMRT) ?
> + BINARY_HEADER_LENGTH -1 :BINARY_HEADER_LENGTH;
>    int i = 0;
>    for (i = 0; i < length; ++i)
>    {
> @@ -190,6 +201,7 @@ LOCAL cl_bool headerCompare(const unsigned char
> *BufPtr, BINARY_HEADER_INDEX ind  #define isLLVM_C_O(BufPtr)
> headerCompare(BufPtr, BHI_COMPIRED_OBJECT)  #define
> isLLVM_LIB(BufPtr)  headerCompare(BufPtr, BHI_LIBRARY)  #define
> isGenBinary(BufPtr) headerCompare(BufPtr, BHI_GEN_BINARY)
> +#define isCMRT(BufPtr)      headerCompare(BufPtr, BHI_CMRT)
> 
>  LOCAL cl_program
>  cl_program_create_from_binary(cl_context             ctx,
> @@ -236,8 +248,9 @@ cl_program_create_from_binary(cl_context             ctx,
>    program->binary_sz = lengths[0];
>    program->source_type = FROM_BINARY;
> 
> -  if(isSPIR((unsigned char*)program->binary)) {
> -
> +  if (isCMRT((unsigned char*)program->binary)) {
> +    program->source_type = FROM_CMRT;
> +  }else if(isSPIR((unsigned char*)program->binary)) {
>      char* typed_binary;
>      TRY_ALLOC(typed_binary, cl_calloc(lengths[0]+1, sizeof(char)));
>      memcpy(typed_binary+1, binaries[0], lengths[0]); @@ -518,6 +531,20 @@
> cl_program_build(cl_program p, const char *options)
>      goto error;
>    }
> 
> +#if HAS_CMRT
> +  if (p->source_type == FROM_CMRT) {
> +    //only here we begins to invoke cmrt
> +    //break spec to return other errors such as CL_DEVICE_NOT_FOUND
> +    err = cmrt_build_program(p, options);
> +    if (err == CL_SUCCESS) {
> +      p->build_status = CL_BUILD_SUCCESS;
> +      p->binary_type = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
> +      return CL_SUCCESS;
> +    } else
> +      goto error;
> +  }
> +#endif
> +
>    if (!check_cl_version_option(p, options)) {
>      err = CL_BUILD_PROGRAM_FAILURE;
>      goto error;
> @@ -833,6 +860,20 @@ cl_program_create_kernel(cl_program p, const char
> *name, cl_int *errcode_ret)
>    cl_int err = CL_SUCCESS;
>    uint32_t i = 0;
> 
> +#ifdef HAS_CMRT
> +  if (p->cmrt_program != NULL) {
> +    void* cmrt_kernel = cmrt_create_kernel(p, name);
> +    if (cmrt_kernel != NULL) {
> +      to = cl_kernel_new(p);
> +      to->cmrt_kernel = cmrt_kernel;
> +      goto exit;
> +    } else {
> +      err = CL_INVALID_KERNEL_NAME;
> +      goto error;
> +    }
> +  }
> +#endif
> +
>    /* Find the program first */
>    for (i = 0; i < p->ker_n; ++i) {
>      assert(p->ker[i]);
> diff --git a/src/cl_program.h b/src/cl_program.h index 63ad16d..899a31a
> 100644
> --- a/src/cl_program.h
> +++ b/src/cl_program.h
> @@ -34,14 +34,16 @@ enum {
>    FROM_SOURCE = 0,
>    FROM_LLVM = 1,
>    FROM_BINARY = 2,
> -  FROM_LLVM_SPIR = 3
> +  FROM_LLVM_SPIR = 3,
> +  FROM_CMRT = 4,
>  };
> 
>  typedef enum _BINARY_HEADER_INDEX {
>    BHI_SPIR = 0,
>    BHI_COMPIRED_OBJECT = 1,
>    BHI_LIBRARY = 2,
> -  BHI_GEN_BINARY = 3, /*remember update BHI_MAX if add option.*/
> +  BHI_GEN_BINARY = 3,
> +  BHI_CMRT = 4,
>    BHI_MAX,
>  }BINARY_HEADER_INDEX;
> 
> @@ -61,13 +63,15 @@ struct _cl_program {
>    size_t binary_sz;       /* The binary size. */
>    uint32_t binary_type;   /* binary type: COMPILED_OBJECT(LLVM IR),
> LIBRARY(LLVM IR with option "-create-library"), or EXECUTABLE(GEN binary).
> */
>    uint32_t ker_n;         /* Number of declared kernels */
> -  uint32_t source_type:2; /* Built from binary, source or LLVM */
> +  uint32_t source_type:3; /* Built from binary, source, CMRT or LLVM*/
>    uint32_t is_built:1;    /* Did we call clBuildProgram on it? */
>    int32_t build_status;   /* build status. */
>    char *build_opts;       /* The build options for this program */
>    size_t build_log_max_sz; /*build log maximum size in byte.*/
>    char *build_log;         /* The build log for this program. */
>    size_t build_log_sz;    /* The actual build log size.*/
> +
> +  void* cmrt_program;      /* real type: CmProgram* */
>  };
> 
>  /* Create a empty program */
> --
> 1.9.1
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list