[Beignet] [PATCH 1/2] enable CL_MEM_ALLOC_HOST_PTR with user_ptr to avoid copy between GPU/CPU
Yang, Rong R
rong.r.yang at intel.com
Mon Dec 1 22:43:31 PST 2014
The patchset LGTM.
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Guo Yejun
> Sent: Tuesday, December 2, 2014 09:31
> To: beignet at lists.freedesktop.org
> Cc: Guo, Yejun
> Subject: [Beignet] [PATCH 1/2] enable CL_MEM_ALLOC_HOST_PTR with
> user_ptr to avoid copy between GPU/CPU
>
> when user ptr is enabled, allocates page aligned system memory for
> CL_MEM_ALLOC_HOST_PTR inside the driver and wraps it as GPU memory
> to avoid the copy between GPU and CPU.
>
> and also do some code refine for the relative user_ptr code.
>
> tests verified: beignet/utest, conformance/basic, buffers, mem_host_flags
>
> Signed-off-by: Guo Yejun <yejun.guo at intel.com>
> ---
> src/cl_device_id.c | 8 ++++----
> src/cl_mem.c | 37 +++++++++++++++++++++++++++----------
> src/cl_mem.h | 4 ++--
> 3 files changed, 33 insertions(+), 16 deletions(-)
>
> diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 5ef0bde..711f8ae
> 100644
> --- a/src/cl_device_id.c
> +++ b/src/cl_device_id.c
> @@ -27,6 +27,7 @@
> #include "cl_thread.h"
> #include "CL/cl.h"
> #include "cl_gbe_loader.h"
> +#include "cl_alloc.h"
>
> #include <assert.h>
> #include <stdio.h>
> @@ -407,15 +408,14 @@ brw_gt3_break:
> cl_buffer_mgr bufmgr = cl_driver_get_bufmgr(dummy);
>
> const size_t sz = 4096;
> - void* host_ptr = NULL;
> - int err = posix_memalign(&host_ptr, 4096, sz);
> - if (err == 0) {
> + void* host_ptr = cl_aligned_malloc(sz, 4096);; if (host_ptr != NULL)
> + {
> cl_buffer bo = cl_buffer_alloc_userptr(bufmgr, "CL memory object",
> host_ptr, sz, 0);
> if (bo == NULL)
> ret->host_unified_memory = CL_FALSE;
> else
> cl_buffer_unreference(bo);
> - free(host_ptr);
> + cl_free(host_ptr);
> }
> else
> ret->host_unified_memory = CL_FALSE; diff --git a/src/cl_mem.c
> b/src/cl_mem.c index 1f1b668..7528964 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -266,16 +266,26 @@ cl_mem_allocate(enum cl_mem_type type,
>
> #ifdef HAS_USERPTR
> if (ctx->device->host_unified_memory) {
> + int page_size = getpagesize();
> /* currently only cl buf is supported, will add cl image support later */
> - if ((flags & CL_MEM_USE_HOST_PTR) && host_ptr != NULL) {
> - /* userptr not support tiling */
> - if (!is_tiled) {
> - int page_size = getpagesize();
> - if ((((unsigned long)host_ptr | sz) & (page_size - 1)) == 0) {
> - mem->is_userptr = 1;
> - mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory
> object", host_ptr, sz, 0);
> + if (type == CL_MEM_BUFFER_TYPE) {
> + if (flags & CL_MEM_USE_HOST_PTR) {
> + assert(host_ptr != NULL);
> + /* userptr not support tiling */
> + if (!is_tiled) {
> + if ((((unsigned long)host_ptr | sz) & (page_size - 1)) == 0) {
> + mem->is_userptr = 1;
> + mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory
> object", host_ptr, sz, 0);
> + }
> }
> }
> + else if (flags & CL_MEM_ALLOC_HOST_PTR) {
> + const size_t alignedSZ = ALIGN(sz, page_size);
> + void* internal_host_ptr = cl_aligned_malloc(alignedSZ, page_size);
> + mem->host_ptr = internal_host_ptr;
> + mem->is_userptr = 1;
> + mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory
> object", internal_host_ptr, alignedSZ, 0);
> + }
> }
> }
>
> @@ -416,13 +426,17 @@ cl_mem_new_buffer(cl_context ctx,
> goto error;
>
> /* Copy the data if required */
> - if (flags & CL_MEM_COPY_HOST_PTR)
> - cl_buffer_subdata(mem->bo, 0, sz, data);
> + if (flags & CL_MEM_COPY_HOST_PTR) {
> + if (mem->is_userptr)
> + memcpy(mem->host_ptr, data, sz);
> + else
> + cl_buffer_subdata(mem->bo, 0, sz, data); }
>
> if ((flags & CL_MEM_USE_HOST_PTR) && !mem->is_userptr)
> cl_buffer_subdata(mem->bo, 0, sz, data);
>
> - if (flags & CL_MEM_USE_HOST_PTR || flags & CL_MEM_COPY_HOST_PTR)
> + if (flags & CL_MEM_USE_HOST_PTR)
> mem->host_ptr = data;
>
> exit:
> @@ -1085,6 +1099,9 @@ cl_mem_delete(cl_mem mem)
> cl_buffer_unreference(mem->bo);
> }
>
> + if (mem->is_userptr && (mem->flags & CL_MEM_ALLOC_HOST_PTR))
> + cl_free(mem->host_ptr);
> +
> cl_free(mem);
> }
>
> diff --git a/src/cl_mem.h b/src/cl_mem.h index ac1175d..1641dcc 100644
> --- a/src/cl_mem.h
> +++ b/src/cl_mem.h
> @@ -86,13 +86,13 @@ typedef struct _cl_mem {
> size_t size; /* original request size, not alignment size, used in
> constant buffer */
> cl_context ctx; /* Context it belongs to */
> cl_mem_flags flags; /* Flags specified at the creation time */
> - void * host_ptr; /* Pointer of the host mem specified by
> CL_MEM_ALLOC_HOST_PTR */
> + void * host_ptr; /* Pointer of the host mem specified by
> CL_MEM_ALLOC_HOST_PTR, CL_MEM_USE_HOST_PTR */
> cl_mapped_ptr* mapped_ptr;/* Store the mapped addresses and size by
> caller. */
> int mapped_ptr_sz; /* The array size of mapped_ptr. */
> int map_ref; /* The mapped count. */
> uint8_t mapped_gtt; /* This object has mapped gtt, for unmap. */
> cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */
> - uint8_t is_userptr; /* CL_MEM_USE_HOST_PTR is enabled*/
> + uint8_t is_userptr; /* CL_MEM_USE_HOST_PTR is enabled*/
> } _cl_mem;
>
> struct _cl_mem_image {
> --
> 1.9.1
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list