[Beignet] [PATCH 1/2] support CL_MEM_USE_HOST_PTR with userptr for cl buffer

Guo, Yejun yejun.guo at intel.com
Sun Nov 2 20:23:10 PST 2014


Yes, you are right, will modify accordingly. And also do more tests.

-----Original Message-----
From: Zhenyu Wang [mailto:zhenyuw at linux.intel.com] 
Sent: Monday, November 03, 2014 11:33 AM
To: Guo, Yejun
Cc: beignet at lists.freedesktop.org
Subject: Re: [Beignet] [PATCH 1/2] support CL_MEM_USE_HOST_PTR with userptr for cl buffer


On 2014.11.03 10:30:32 +0800, Guo Yejun wrote:
> @@ -260,7 +263,29 @@ cl_mem_allocate(enum cl_mem_type type,
>      /* Allocate space in memory */
>      bufmgr = cl_context_get_bufmgr(ctx);
>      assert(bufmgr);
> +
> +#ifdef HAS_USERPTR
> +    //currently only cl buf is supported, will add cl image support later
> +    if ((flags & CL_MEM_USE_HOST_PTR) && host_ptr != NULL) {
> +        if (!is_tiled) {    //userptr not support tiling
> +            int page_size = getpagesize();
> +            int alignSZ = ALIGN(sz, page_size);
> +            if ((((unsigned long)host_ptr) & (page_size - 1)) == 0) { //page aligned
> +              mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL memory object", host_ptr, alignSZ, 0);
> +              if (mem->bo != NULL)
> +                mem->is_userptr = 1;
> +              else
> +                fprintf(stderr, "failed to alloc with host ptr %p %lu, fallback\n", host_ptr, sz);
> +            }
> +        }
> +    }

My original version won't try to extend size as page aligned, as userptr only supports memory that's really page aligned. I worry that you might make kernel handle range that extends original VMA which will confuse mmu notifier code.


> +
> +    if (!mem->is_userptr)
> +      mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, 
> +alignment); #else
>      mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, 
> alignment);
> +#endif
> +
>      if (UNLIKELY(mem->bo == NULL)) {
>        err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
>        goto error;
> @@ -387,12 +412,15 @@ cl_mem_new_buffer(cl_context ctx,
>    sz = ALIGN(sz, 4);
>  
>    /* Create the buffer in video memory */
> -  mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, flags, sz, CL_FALSE, 
> &err);
> +  mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, flags, sz, CL_FALSE, 
> + data, &err);
>    if (mem == NULL || err != CL_SUCCESS)
>      goto error;
>  
>    /* Copy the data if required */
> -  if (flags & CL_MEM_COPY_HOST_PTR || flags & CL_MEM_USE_HOST_PTR)
> +  if (flags & CL_MEM_COPY_HOST_PTR)
> +    cl_buffer_subdata(mem->bo, 0, sz, data);
> +
> +  if ((flags & CL_MEM_USE_HOST_PTR) && !mem->is_userptr)
>      cl_buffer_subdata(mem->bo, 0, sz, data);
>  
>    if (flags & CL_MEM_USE_HOST_PTR || flags & CL_MEM_COPY_HOST_PTR) @@ 
> -762,7 +790,7 @@ _cl_mem_new_image(cl_context ctx,
>      sz = aligned_pitch * aligned_h * depth;
>    }
>  
> -  mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != 
> CL_NO_TILE, &err);
> +  mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != 
> + CL_NO_TILE, NULL, &err);
>    if (mem == NULL || err != CL_SUCCESS)
>      goto error;
>  
> @@ -1834,7 +1862,7 @@ LOCAL cl_mem cl_mem_new_libva_buffer(cl_context ctx,
>    cl_int err = CL_SUCCESS;
>    cl_mem mem = NULL;
>  
> -  mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, 0, 0, CL_FALSE, 
> &err);
> +  mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, 0, 0, CL_FALSE, 
> + NULL, &err);
>    if (mem == NULL || err != CL_SUCCESS)
>      goto error;
>  
> @@ -1875,7 +1903,7 @@ LOCAL cl_mem cl_mem_new_libva_image(cl_context ctx,
>      goto error;
>    }
>  
> -  mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, 0, 0, 0, &err);
> +  mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, 0, 0, 0, NULL, &err);
>    if (mem == NULL || err != CL_SUCCESS) {
>      err = CL_OUT_OF_HOST_MEMORY;
>      goto error;
> diff --git a/src/cl_mem.h b/src/cl_mem.h index 95c5f05..2e9dd5a 100644
> --- a/src/cl_mem.h
> +++ b/src/cl_mem.h
> @@ -92,6 +92,7 @@ typedef  struct _cl_mem {
>    int map_ref;              /* The mapped count. */
>    uint8_t mapped_gtt;       /* This object has mapped gtt, for unmap. */
>    cl_mem_dstr_cb *dstr_cb;  /* The destroy callback. */
> +  uint8_t is_userptr;    /* CL_MEM_USE_HOST_PTR is enabled*/
>  } _cl_mem;
>  
>  struct _cl_mem_image {
> @@ -262,6 +263,7 @@ cl_mem_allocate(enum cl_mem_type type,
>                  cl_mem_flags flags,
>                  size_t sz,
>                  cl_int is_tiled,
> +                void *host_ptr,
>                  cl_int *errcode);
>  
>  void
> diff --git a/src/cl_mem_gl.c b/src/cl_mem_gl.c index 28d2ac6..3640908 
> 100644
> --- a/src/cl_mem_gl.c
> +++ b/src/cl_mem_gl.c
> @@ -63,7 +63,7 @@ cl_mem_new_gl_texture(cl_context ctx,
>      goto error;
>    }
>  
> -  mem = cl_mem_allocate(CL_MEM_GL_IMAGE_TYPE, ctx, flags, 0, 0, 
> &err);
> +  mem = cl_mem_allocate(CL_MEM_GL_IMAGE_TYPE, ctx, flags, 0, 0, NULL, 
> + &err);
>    if (mem == NULL || err != CL_SUCCESS)
>      goto error;
>  
> diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c index 
> bb97220..4f56409 100644
> --- a/src/intel/intel_driver.c
> +++ b/src/intel/intel_driver.c
> @@ -690,6 +690,19 @@ cl_buffer intel_share_image_from_libva(cl_context ctx,
>    return (cl_buffer)intel_bo;
>  }
>  
> +static cl_buffer intel_buffer_alloc_userptr(cl_buffer_mgr bufmgr, 
> +const char* name, void *data,size_t size, unsigned long flags) { 
> +#ifdef HAS_USERPTR
> +  drm_intel_bo *bo;
> +  bo = drm_intel_bo_alloc_userptr((drm_intel_bufmgr *)bufmgr, name, 
> +data, I915_TILING_NONE, 0, size, flags);
> +  if (bo == NULL)
> +    bo = drm_intel_bo_alloc_userptr((drm_intel_bufmgr *)bufmgr, name, 
> +data, I915_TILING_NONE, 0, size, flags | 
> +I915_USERPTR_UNSYNCHRONIZED);
> +  return (cl_buffer)bo;
> +#else
> +  return NULL;
> +#endif
> +}

indent, and might add comment on why we could retry with unsync flag.

> +
>  static int32_t get_intel_tiling(cl_int tiling, uint32_t 
> *intel_tiling)  {
>    switch (tiling) {
> @@ -734,6 +747,7 @@ intel_setup_callbacks(void)
>    cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *) intel_driver_get_bufmgr;
>    cl_driver_get_device_id = (cl_driver_get_device_id_cb *) intel_get_device_id;
>    cl_buffer_alloc = (cl_buffer_alloc_cb *) drm_intel_bo_alloc;
> +  cl_buffer_alloc_userptr = (cl_buffer_alloc_userptr_cb*) 
> + intel_buffer_alloc_userptr;
>    cl_buffer_set_tiling = (cl_buffer_set_tiling_cb *) 
> intel_buffer_set_tiling;  #if defined(HAS_EGL)
>    cl_buffer_alloc_from_texture = (cl_buffer_alloc_from_texture_cb *) 
> intel_alloc_buffer_from_texture;
> --
> 2.1.0
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet

--
Open Source Technology Center, Intel ltd.

$gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827


More information about the Beignet mailing list