[Beignet] [PATCH 1/2] enable CL_MEM_ALLOC_HOST_PTR with user_ptr to avoid copy between GPU/CPU

Yang, Rong R rong.r.yang at intel.com
Mon Dec 1 22:43:31 PST 2014


The patchset LGTM.

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Guo Yejun
> Sent: Tuesday, December 2, 2014 09:31
> To: beignet at lists.freedesktop.org
> Cc: Guo, Yejun
> Subject: [Beignet] [PATCH 1/2] enable CL_MEM_ALLOC_HOST_PTR with
> user_ptr to avoid copy between GPU/CPU
> 
> when user ptr is enabled, allocates page aligned system memory for
> CL_MEM_ALLOC_HOST_PTR inside the driver and wraps it as GPU memory
> to avoid the copy between GPU and CPU.
> 
> and also do some code refine for the relative user_ptr code.
> 
> tests verified: beignet/utest, conformance/basic, buffers, mem_host_flags
> 
> Signed-off-by: Guo Yejun <yejun.guo at intel.com>
> ---
>  src/cl_device_id.c |  8 ++++----
>  src/cl_mem.c       | 37 +++++++++++++++++++++++++++----------
>  src/cl_mem.h       |  4 ++--
>  3 files changed, 33 insertions(+), 16 deletions(-)
> 
> diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 5ef0bde..711f8ae
> 100644
> --- a/src/cl_device_id.c
> +++ b/src/cl_device_id.c
> @@ -27,6 +27,7 @@
>  #include "cl_thread.h"
>  #include "CL/cl.h"
>  #include "cl_gbe_loader.h"
> +#include "cl_alloc.h"
> 
>  #include <assert.h>
>  #include <stdio.h>
> @@ -407,15 +408,14 @@ brw_gt3_break:
>    cl_buffer_mgr bufmgr = cl_driver_get_bufmgr(dummy);
> 
>    const size_t sz = 4096;
> -  void* host_ptr = NULL;
> -  int err = posix_memalign(&host_ptr, 4096, sz);
> -  if (err == 0) {
> +  void* host_ptr = cl_aligned_malloc(sz, 4096);;  if (host_ptr != NULL)
> + {
>      cl_buffer bo = cl_buffer_alloc_userptr(bufmgr, "CL memory object",
> host_ptr, sz, 0);
>      if (bo == NULL)
>        ret->host_unified_memory = CL_FALSE;
>      else
>        cl_buffer_unreference(bo);
> -    free(host_ptr);
> +    cl_free(host_ptr);
>    }
>    else
>      ret->host_unified_memory = CL_FALSE; diff --git a/src/cl_mem.c
> b/src/cl_mem.c index 1f1b668..7528964 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -266,16 +266,26 @@ cl_mem_allocate(enum cl_mem_type type,
> 
>  #ifdef HAS_USERPTR
>      if (ctx->device->host_unified_memory) {
> +      int page_size = getpagesize();
>        /* currently only cl buf is supported, will add cl image support later */
> -      if ((flags & CL_MEM_USE_HOST_PTR) && host_ptr != NULL) {
> -        /* userptr not support tiling */
> -        if (!is_tiled) {
> -          int page_size = getpagesize();
> -          if ((((unsigned long)host_ptr | sz) & (page_size - 1)) == 0) {
> -            mem->is_userptr = 1;
> -            mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory
> object", host_ptr, sz, 0);
> +      if (type == CL_MEM_BUFFER_TYPE) {
> +        if (flags & CL_MEM_USE_HOST_PTR) {
> +          assert(host_ptr != NULL);
> +          /* userptr not support tiling */
> +          if (!is_tiled) {
> +            if ((((unsigned long)host_ptr | sz) & (page_size - 1)) == 0) {
> +              mem->is_userptr = 1;
> +              mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory
> object", host_ptr, sz, 0);
> +            }
>            }
>          }
> +        else if (flags & CL_MEM_ALLOC_HOST_PTR) {
> +          const size_t alignedSZ = ALIGN(sz, page_size);
> +          void* internal_host_ptr = cl_aligned_malloc(alignedSZ, page_size);
> +          mem->host_ptr = internal_host_ptr;
> +          mem->is_userptr = 1;
> +          mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory
> object", internal_host_ptr, alignedSZ, 0);
> +        }
>        }
>      }
> 
> @@ -416,13 +426,17 @@ cl_mem_new_buffer(cl_context ctx,
>      goto error;
> 
>    /* Copy the data if required */
> -  if (flags & CL_MEM_COPY_HOST_PTR)
> -    cl_buffer_subdata(mem->bo, 0, sz, data);
> +  if (flags & CL_MEM_COPY_HOST_PTR) {
> +    if (mem->is_userptr)
> +      memcpy(mem->host_ptr, data, sz);
> +    else
> +      cl_buffer_subdata(mem->bo, 0, sz, data);  }
> 
>    if ((flags & CL_MEM_USE_HOST_PTR) && !mem->is_userptr)
>      cl_buffer_subdata(mem->bo, 0, sz, data);
> 
> -  if (flags & CL_MEM_USE_HOST_PTR || flags & CL_MEM_COPY_HOST_PTR)
> +  if (flags & CL_MEM_USE_HOST_PTR)
>      mem->host_ptr = data;
> 
>  exit:
> @@ -1085,6 +1099,9 @@ cl_mem_delete(cl_mem mem)
>      cl_buffer_unreference(mem->bo);
>    }
> 
> +  if (mem->is_userptr && (mem->flags & CL_MEM_ALLOC_HOST_PTR))
> +    cl_free(mem->host_ptr);
> +
>    cl_free(mem);
>  }
> 
> diff --git a/src/cl_mem.h b/src/cl_mem.h index ac1175d..1641dcc 100644
> --- a/src/cl_mem.h
> +++ b/src/cl_mem.h
> @@ -86,13 +86,13 @@ typedef  struct _cl_mem {
>    size_t size;              /* original request size, not alignment size, used in
> constant buffer */
>    cl_context ctx;           /* Context it belongs to */
>    cl_mem_flags flags;       /* Flags specified at the creation time */
> -  void * host_ptr;          /* Pointer of the host mem specified by
> CL_MEM_ALLOC_HOST_PTR */
> +  void * host_ptr;          /* Pointer of the host mem specified by
> CL_MEM_ALLOC_HOST_PTR, CL_MEM_USE_HOST_PTR */
>    cl_mapped_ptr* mapped_ptr;/* Store the mapped addresses and size by
> caller. */
>    int mapped_ptr_sz;        /* The array size of mapped_ptr. */
>    int map_ref;              /* The mapped count. */
>    uint8_t mapped_gtt;       /* This object has mapped gtt, for unmap. */
>    cl_mem_dstr_cb *dstr_cb;  /* The destroy callback. */
> -  uint8_t is_userptr;    /* CL_MEM_USE_HOST_PTR is enabled*/
> +  uint8_t is_userptr;       /* CL_MEM_USE_HOST_PTR is enabled*/
>  } _cl_mem;
> 
>  struct _cl_mem_image {
> --
> 1.9.1
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list