[Beignet] [PATCH V3 1/3] support CL_MEM_USE_HOST_PTR with userptr for cl buffer

Zhigang Gong zhigang.gong at linux.intel.com
Fri Nov 7 00:25:38 PST 2014


Pushed with slight change. Thanks.

On Fri, Nov 07, 2014 at 04:18:54PM +0800, Guo Yejun wrote:
> userptr is used to wrap a memory pointer (page aligned) supplied
> by user space into a buffer object accessed by GPU, and so no extra
> copy is needed. It is supported starting from linux kernel 3.16
> and libdrm 2.4.58.
> 
> This patch is originally finished by Zhenyu Wang <zhenyuw at linux.intel.com>,
> I did a little change and some code clean.
> 
> No regression issue found on IVB+Ubuntu14.10 with libdrm upgraded with tests:
> beignet/utests, piglit, OpenCV/test&perf, conformance/basic&mem_host_flags&buffers
> 
> V2: add page align limit for data size, add comments for kernel without MMU_NOTIFIER
> V3: add runtime check with host_unified_memory, return CL_MEM_OBJECT_ALLOCATION_FAILURE if failed
> Signed-off-by: Guo Yejun <yejun.guo at intel.com>
> ---
>  CMakeLists.txt           | 11 +++++++++--
>  src/CMakeLists.txt       |  5 +++++
>  src/cl_api.c             | 10 +++++++---
>  src/cl_driver.h          |  3 +++
>  src/cl_driver_defs.c     |  1 +
>  src/cl_enqueue.c         | 19 ++++++++++++-------
>  src/cl_mem.c             | 37 ++++++++++++++++++++++++++++++++-----
>  src/cl_mem.h             |  2 ++
>  src/cl_mem_gl.c          |  2 +-
>  src/intel/intel_driver.c | 15 +++++++++++++++
>  10 files changed, 87 insertions(+), 18 deletions(-)
> 
> diff --git a/CMakeLists.txt b/CMakeLists.txt
> index 40cb74c..15386f9 100644
> --- a/CMakeLists.txt
> +++ b/CMakeLists.txt
> @@ -108,7 +108,7 @@ ENDIF(X11_FOUND)
>  # DRM
>  pkg_check_modules(DRM REQUIRED libdrm)
>  IF(DRM_FOUND)
> -  MESSAGE(STATUS "Looking for DRM - found at ${DRM_PREFIX}")
> +  MESSAGE(STATUS "Looking for DRM - found at ${DRM_PREFIX} ${DRM_VERSION}")
>    INCLUDE_DIRECTORIES(${DRM_INCLUDE_DIRS})
>  ELSE(DRM_FOUND)
>    MESSAGE(STATUS "Looking for DRM - not found")
> @@ -118,7 +118,14 @@ ENDIF(DRM_FOUND)
>  pkg_check_modules(DRM_INTEL libdrm_intel>=2.4.52)
>  IF(DRM_INTEL_FOUND)
>    INCLUDE_DIRECTORIES(${DRM_INTEL_INCLUDE_DIRS})
> -  MESSAGE(STATUS "Looking for DRM Intel - found at ${DRM_INTEL_PREFIX}")
> +  MESSAGE(STATUS "Looking for DRM Intel - found at ${DRM_INTEL_PREFIX} ${DRM_INTEL_VERSION}")
> +  #userptr support starts from 2.4.57, but 2.4.58 is the actual stable release
> +  IF(DRM_INTEL_VERSION VERSION_GREATER 2.4.57)
> +    MESSAGE(STATUS "Enable userptr support")
> +    SET(DRM_INTEL_USERPTR "enable")
> +  ELSE(DRM_INTEL_VERSION VERSION_GREATER 2.4.57)
> +    MESSAGE(STATUS "Disable userptr support")
> +  ENDIF(DRM_INTEL_VERSION VERSION_GREATER 2.4.57)
>  ELSE(DRM_INTEL_FOUND)
>    MESSAGE(FATAL_ERROR "Looking for DRM Intel (>= 2.4.52) - not found")
>  ENDIF(DRM_INTEL_FOUND)
> diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
> index fc5de89..7182bad 100644
> --- a/src/CMakeLists.txt
> +++ b/src/CMakeLists.txt
> @@ -109,6 +109,11 @@ SET(CMAKE_CXX_FLAGS "-DHAS_OCLIcd ${CMAKE_CXX_FLAGS}")
>  SET(CMAKE_C_FLAGS "-DHAS_OCLIcd ${CMAKE_C_FLAGS}")
>  endif (OCLIcd_FOUND)
>  
> +if (DRM_INTEL_USERPTR)
> +SET(CMAKE_CXX_FLAGS "-DHAS_USERPTR ${CMAKE_CXX_FLAGS}")
> +SET(CMAKE_C_FLAGS "-DHAS_USERPTR ${CMAKE_C_FLAGS}")
> +endif (DRM_INTEL_USERPTR)
> +
>  set(GIT_SHA1 "git_sha1.h")
>  add_custom_target(${GIT_SHA1} ALL
>    COMMAND chmod +x ${CMAKE_CURRENT_SOURCE_DIR}/git_sha1.sh
> diff --git a/src/cl_api.c b/src/cl_api.c
> index 05d3093..1f24638 100644
> --- a/src/cl_api.c
> +++ b/src/cl_api.c
> @@ -2665,9 +2665,13 @@ clEnqueueMapBuffer(cl_command_queue  command_queue,
>      ptr = data->ptr;
>      if(event) cl_event_set_status(*event, CL_COMPLETE);
>    } else {
> -    if ((ptr = cl_mem_map_gtt_unsync(buffer)) == NULL) {
> -      err = CL_MAP_FAILURE;
> -      goto error;
> +    if (buffer->is_userptr)
> +      ptr = buffer->host_ptr;
> +    else {
> +      if ((ptr = cl_mem_map_gtt_unsync(buffer)) == NULL) {
> +        err = CL_MAP_FAILURE;
> +        goto error;
> +      }
>      }
>    }
>    err = _cl_map_mem(buffer, ptr, &mem_ptr, offset, size, NULL, NULL);
> diff --git a/src/cl_driver.h b/src/cl_driver.h
> index 638b791..8697ff2 100644
> --- a/src/cl_driver.h
> +++ b/src/cl_driver.h
> @@ -285,6 +285,9 @@ extern cl_gpgpu_walker_cb *cl_gpgpu_walker;
>  typedef cl_buffer (cl_buffer_alloc_cb)(cl_buffer_mgr, const char*, size_t, size_t);
>  extern cl_buffer_alloc_cb *cl_buffer_alloc;
>  
> +typedef cl_buffer (cl_buffer_alloc_userptr_cb)(cl_buffer_mgr, const char*, void *, size_t, unsigned long);
> +extern cl_buffer_alloc_userptr_cb *cl_buffer_alloc_userptr;
> +
>  /* Set a buffer's tiling mode */
>  typedef cl_buffer (cl_buffer_set_tiling_cb)(cl_buffer, int tiling, size_t stride);
>  extern cl_buffer_set_tiling_cb *cl_buffer_set_tiling;
> diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
> index c31b6fc..1335c20 100644
> --- a/src/cl_driver_defs.c
> +++ b/src/cl_driver_defs.c
> @@ -29,6 +29,7 @@ LOCAL cl_driver_get_device_id_cb *cl_driver_get_device_id = NULL;
>  
>  /* Buffer */
>  LOCAL cl_buffer_alloc_cb *cl_buffer_alloc = NULL;
> +LOCAL cl_buffer_alloc_userptr_cb *cl_buffer_alloc_userptr = NULL;
>  LOCAL cl_buffer_set_tiling_cb *cl_buffer_set_tiling = NULL;
>  LOCAL cl_buffer_alloc_from_texture_cb *cl_buffer_alloc_from_texture = NULL;
>  LOCAL cl_buffer_release_from_texture_cb *cl_buffer_release_from_texture = NULL;
> diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
> index db0bce7..5bdb7cd 100644
> --- a/src/cl_enqueue.c
> +++ b/src/cl_enqueue.c
> @@ -234,11 +234,15 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data)
>           mem->type == CL_MEM_SUBBUFFER_TYPE);
>    struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
>  
> -  if(data->unsync_map == 1)
> -    //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
> -    ptr = cl_mem_map_gtt(mem);
> -  else
> -    ptr = cl_mem_map_auto(mem, data->write_map ? 1 : 0);
> +  if (mem->is_userptr)
> +    ptr = mem->host_ptr;
> +  else {
> +    if(data->unsync_map == 1)
> +      //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
> +      ptr = cl_mem_map_gtt(mem);
> +    else
> +      ptr = cl_mem_map_auto(mem, data->write_map ? 1 : 0);
> +  }
>  
>    if (ptr == NULL) {
>      err = CL_MAP_FAILURE;
> @@ -246,7 +250,7 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data)
>    }
>    data->ptr = ptr;
>  
> -  if(mem->flags & CL_MEM_USE_HOST_PTR) {
> +  if((mem->flags & CL_MEM_USE_HOST_PTR) && !mem->is_userptr) {
>      assert(mem->host_ptr);
>      ptr = (char*)ptr + data->offset + buffer->sub_offset;
>      memcpy(mem->host_ptr + data->offset + buffer->sub_offset, ptr, data->size);
> @@ -331,7 +335,8 @@ cl_int cl_enqueue_unmap_mem_object(enqueue_data *data)
>        assert(mapped_ptr >= memobj->host_ptr &&
>          mapped_ptr + mapped_size <= memobj->host_ptr + memobj->size);
>        /* Sync the data. */
> -      memcpy(v_ptr, mapped_ptr, mapped_size);
> +      if (!memobj->is_userptr)
> +        memcpy(v_ptr, mapped_ptr, mapped_size);
>      } else {
>        CHECK_IMAGE(memobj, image);
>  
> diff --git a/src/cl_mem.c b/src/cl_mem.c
> index 16bd613..9e38670 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -33,6 +33,7 @@
>  #include <assert.h>
>  #include <stdio.h>
>  #include <string.h>
> +#include <unistd.h>
>  
>  #define FIELD_SIZE(CASE,TYPE)               \
>    case JOIN(CL_,CASE):                      \
> @@ -223,6 +224,7 @@ cl_mem_allocate(enum cl_mem_type type,
>                  cl_mem_flags flags,
>                  size_t sz,
>                  cl_int is_tiled,
> +                void *host_ptr,
>                  cl_int *errcode)
>  {
>    cl_buffer_mgr bufmgr = NULL;
> @@ -251,6 +253,7 @@ cl_mem_allocate(enum cl_mem_type type,
>    mem->ref_n = 1;
>    mem->magic = CL_MAGIC_MEM_HEADER;
>    mem->flags = flags;
> +  mem->is_userptr = 0;
>  
>    if (sz != 0) {
>      /* Pinning will require stricter alignment rules */
> @@ -260,7 +263,28 @@ cl_mem_allocate(enum cl_mem_type type,
>      /* Allocate space in memory */
>      bufmgr = cl_context_get_bufmgr(ctx);
>      assert(bufmgr);
> +
> +#ifdef HAS_USERPTR
> +    if (ctx->device->host_unified_memory) {
> +      /* currently only cl buf is supported, will add cl image support later */
> +      if ((flags & CL_MEM_USE_HOST_PTR) && host_ptr != NULL) {
> +          /* userptr not support tiling */
> +          if (!is_tiled) {
> +              int page_size = getpagesize();
> +              if ((((unsigned long)host_ptr | sz) & (page_size - 1)) == 0) {
> +                mem->is_userptr = 1;
> +                mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", host_ptr, sz, 0);
> +              }
> +          }
> +      }
> +    }
> +
> +    if (!mem->is_userptr)
> +      mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
> +#else
>      mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
> +#endif
> +
>      if (UNLIKELY(mem->bo == NULL)) {
>        err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
>        goto error;
> @@ -387,12 +411,15 @@ cl_mem_new_buffer(cl_context ctx,
>    sz = ALIGN(sz, 4);
>  
>    /* Create the buffer in video memory */
> -  mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, flags, sz, CL_FALSE, &err);
> +  mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, flags, sz, CL_FALSE, data, &err);
>    if (mem == NULL || err != CL_SUCCESS)
>      goto error;
>  
>    /* Copy the data if required */
> -  if (flags & CL_MEM_COPY_HOST_PTR || flags & CL_MEM_USE_HOST_PTR)
> +  if (flags & CL_MEM_COPY_HOST_PTR)
> +    cl_buffer_subdata(mem->bo, 0, sz, data);
> +
> +  if ((flags & CL_MEM_USE_HOST_PTR) && !mem->is_userptr)
>      cl_buffer_subdata(mem->bo, 0, sz, data);
>  
>    if (flags & CL_MEM_USE_HOST_PTR || flags & CL_MEM_COPY_HOST_PTR)
> @@ -762,7 +789,7 @@ _cl_mem_new_image(cl_context ctx,
>      sz = aligned_pitch * aligned_h * depth;
>    }
>  
> -  mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, &err);
> +  mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err);
>    if (mem == NULL || err != CL_SUCCESS)
>      goto error;
>  
> @@ -1834,7 +1861,7 @@ LOCAL cl_mem cl_mem_new_libva_buffer(cl_context ctx,
>    cl_int err = CL_SUCCESS;
>    cl_mem mem = NULL;
>  
> -  mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, 0, 0, CL_FALSE, &err);
> +  mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, 0, 0, CL_FALSE, NULL, &err);
>    if (mem == NULL || err != CL_SUCCESS)
>      goto error;
>  
> @@ -1875,7 +1902,7 @@ LOCAL cl_mem cl_mem_new_libva_image(cl_context ctx,
>      goto error;
>    }
>  
> -  mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, 0, 0, 0, &err);
> +  mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, 0, 0, 0, NULL, &err);
>    if (mem == NULL || err != CL_SUCCESS) {
>      err = CL_OUT_OF_HOST_MEMORY;
>      goto error;
> diff --git a/src/cl_mem.h b/src/cl_mem.h
> index 95c5f05..2e9dd5a 100644
> --- a/src/cl_mem.h
> +++ b/src/cl_mem.h
> @@ -92,6 +92,7 @@ typedef  struct _cl_mem {
>    int map_ref;              /* The mapped count. */
>    uint8_t mapped_gtt;       /* This object has mapped gtt, for unmap. */
>    cl_mem_dstr_cb *dstr_cb;  /* The destroy callback. */
> +  uint8_t is_userptr;    /* CL_MEM_USE_HOST_PTR is enabled*/
>  } _cl_mem;
>  
>  struct _cl_mem_image {
> @@ -262,6 +263,7 @@ cl_mem_allocate(enum cl_mem_type type,
>                  cl_mem_flags flags,
>                  size_t sz,
>                  cl_int is_tiled,
> +                void *host_ptr,
>                  cl_int *errcode);
>  
>  void
> diff --git a/src/cl_mem_gl.c b/src/cl_mem_gl.c
> index 28d2ac6..3640908 100644
> --- a/src/cl_mem_gl.c
> +++ b/src/cl_mem_gl.c
> @@ -63,7 +63,7 @@ cl_mem_new_gl_texture(cl_context ctx,
>      goto error;
>    }
>  
> -  mem = cl_mem_allocate(CL_MEM_GL_IMAGE_TYPE, ctx, flags, 0, 0, &err);
> +  mem = cl_mem_allocate(CL_MEM_GL_IMAGE_TYPE, ctx, flags, 0, 0, NULL, &err);
>    if (mem == NULL || err != CL_SUCCESS)
>      goto error;
>  
> diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
> index bb97220..fc037cc 100644
> --- a/src/intel/intel_driver.c
> +++ b/src/intel/intel_driver.c
> @@ -690,6 +690,20 @@ cl_buffer intel_share_image_from_libva(cl_context ctx,
>    return (cl_buffer)intel_bo;
>  }
>  
> +static cl_buffer intel_buffer_alloc_userptr(cl_buffer_mgr bufmgr, const char* name, void *data,size_t size, unsigned long flags)
> +{
> +#ifdef HAS_USERPTR
> +  drm_intel_bo *bo;
> +  bo = drm_intel_bo_alloc_userptr((drm_intel_bufmgr *)bufmgr, name, data, I915_TILING_NONE, 0, size, flags);
> +  /* Fallback to unsynchronized userptr allocation if kernel has no MMU notifier enabled. */
> +  if (bo == NULL)
> +    bo = drm_intel_bo_alloc_userptr((drm_intel_bufmgr *)bufmgr, name, data, I915_TILING_NONE, 0, size, flags | I915_USERPTR_UNSYNCHRONIZED);
> +  return (cl_buffer)bo;
> +#else
> +  return NULL;
> +#endif
> +}
> +
>  static int32_t get_intel_tiling(cl_int tiling, uint32_t *intel_tiling)
>  {
>    switch (tiling) {
> @@ -734,6 +748,7 @@ intel_setup_callbacks(void)
>    cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *) intel_driver_get_bufmgr;
>    cl_driver_get_device_id = (cl_driver_get_device_id_cb *) intel_get_device_id;
>    cl_buffer_alloc = (cl_buffer_alloc_cb *) drm_intel_bo_alloc;
> +  cl_buffer_alloc_userptr = (cl_buffer_alloc_userptr_cb*) intel_buffer_alloc_userptr;
>    cl_buffer_set_tiling = (cl_buffer_set_tiling_cb *) intel_buffer_set_tiling;
>  #if defined(HAS_EGL)
>    cl_buffer_alloc_from_texture = (cl_buffer_alloc_from_texture_cb *) intel_alloc_buffer_from_texture;
> -- 
> 2.1.0
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list