[Beignet] [PATCH] remove the page align limitation for host_ptr of CL_MEM_USE_HOST_PTR

Guo, Yejun yejun.guo at intel.com
Tue Dec 16 00:08:27 PST 2014


Ok, will modify accordingly.

-----Original Message-----
From: Zhigang Gong [mailto:zhigang.gong at linux.intel.com] 
Sent: Tuesday, December 16, 2014 3:04 PM
To: Guo, Yejun
Cc: beignet at lists.freedesktop.org
Subject: Re: [Beignet] [PATCH] remove the page align limitation for host_ptr of CL_MEM_USE_HOST_PTR

Two comments.

1. It's better to split the utest patch out to another patch.
2. Now we add a new offset to the mem base object. That base object is shared by both buffer object and image object.
   As we currently don't support userptr for image at all, we need to add som sanity check in the code path to make sure
   we never set a non-zero offset to a mem object which is attached to an image. We also need to add one sanity check
   in the cl_mem_auto() to make sure the non-zero offset is only for userptr memory object.

On Thu, Dec 11, 2014 at 10:26:02AM +0800, Guo Yejun wrote:
> the current limitation is both value and size of host_ptr should be 
> page aligned, remove the limitation by recording the offset to the 
> page starting address inside the driver.
> 
> tests verified: beignet/utest, beignet/benchmark and 
> conformance/basic, conformance/buffers, conformance/mem_host_flags
> 
> Signed-off-by: Guo Yejun <yejun.guo at intel.com>
> ---
>  benchmark/benchmark_use_host_ptr_buffer.cpp | 13 ++++++++++---
>  src/cl_command_queue.c                      |  4 ++--
>  src/cl_mem.c                                | 12 ++++++++----
>  src/cl_mem.h                                |  1 +
>  utests/runtime_use_host_ptr_buffer.cpp      | 15 +++++++++++----
>  5 files changed, 32 insertions(+), 13 deletions(-)
> 
> diff --git a/benchmark/benchmark_use_host_ptr_buffer.cpp 
> b/benchmark/benchmark_use_host_ptr_buffer.cpp
> index 7ede576..0021290 100644
> --- a/benchmark/benchmark_use_host_ptr_buffer.cpp
> +++ b/benchmark/benchmark_use_host_ptr_buffer.cpp
> @@ -5,13 +5,20 @@ int benchmark_use_host_ptr_buffer(void)
>  {
>    struct timeval start,stop;
>  
> -  const size_t n = 4096*4096;
> +  const size_t n = 4096*4096 + 256;
>  
>    // Setup kernel and buffers
>    OCL_CREATE_KERNEL("runtime_use_host_ptr_buffer");
>  
> -  int ret = posix_memalign(&buf_data[0], 4096, sizeof(uint32_t) * n);
> -  OCL_ASSERT(ret == 0);
> +  buf_data[0] = malloc(sizeof(uint32_t) * n);
> +
> +  //it does not matter if buf_data[0] is page aligned or not,  
> + //here, just to test the case that it is not page aligned.
> +  while ((unsigned long)buf_data[0] % 4096 == 0)  {
> +    free(buf_data[0]);
> +    buf_data[0] = malloc(sizeof(uint32_t) * n);  }
>  
>    for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = i;
>    OCL_CREATE_BUFFER(buf[0], CL_MEM_USE_HOST_PTR, n * 
> sizeof(uint32_t), buf_data[0]); diff --git a/src/cl_command_queue.c 
> b/src/cl_command_queue.c index 12530d7..62fd810 100644
> --- a/src/cl_command_queue.c
> +++ b/src/cl_command_queue.c
> @@ -170,9 +170,9 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k)
>      offset = interp_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, i);
>      if (k->args[i].mem->type == CL_MEM_SUBBUFFER_TYPE) {
>        struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)k->args[i].mem;
> -      cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, buffer->sub_offset, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i));
> +      cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, 
> + k->args[i].mem->offset + buffer->sub_offset, k->args[i].mem->size, 
> + interp_kernel_get_arg_bti(k->opaque, i));
>      } else {
> -      cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, 0, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i));
> +      cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, 
> + k->args[i].mem->offset, k->args[i].mem->size, 
> + interp_kernel_get_arg_bti(k->opaque, i));
>      }
>    }
>  
> diff --git a/src/cl_mem.c b/src/cl_mem.c index 3055bea..3b3421c 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -254,6 +254,7 @@ cl_mem_allocate(enum cl_mem_type type,
>    mem->magic = CL_MAGIC_MEM_HEADER;
>    mem->flags = flags;
>    mem->is_userptr = 0;
> +  mem->offset = 0;
>  
>    if (sz != 0) {
>      /* Pinning will require stricter alignment rules */ @@ -273,10 
> +274,11 @@ cl_mem_allocate(enum cl_mem_type type,
>            assert(host_ptr != NULL);
>            /* userptr not support tiling */
>            if (!is_tiled) {
> -            if ((((unsigned long)host_ptr | sz) & (page_size - 1)) == 0) {
> -              mem->is_userptr = 1;
> -              mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", host_ptr, sz, 0);
> -            }
> +            void* aligned_host_ptr = (void*)(((unsigned long)host_ptr) & (~(page_size - 1)));
> +            mem->offset = host_ptr - aligned_host_ptr;
> +            mem->is_userptr = 1;
> +            size_t aligned_sz = ALIGN((mem->offset + sz), page_size);
> +            mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr 
> + memory object", aligned_host_ptr, aligned_sz, 0);
>            }
>          }
>          else if (flags & CL_MEM_ALLOC_HOST_PTR) { @@ -502,6 +504,8 @@ 
> cl_mem_new_sub_buffer(cl_mem buffer,
>    mem->ref_n = 1;
>    mem->magic = CL_MAGIC_MEM_HEADER;
>    mem->flags = flags;
> +  mem->offset = buffer->offset;
> +  mem->is_userptr = buffer->is_userptr;
>    sub_buf->parent = (struct _cl_mem_buffer*)buffer;
>  
>    cl_mem_add_ref(buffer);
> diff --git a/src/cl_mem.h b/src/cl_mem.h index 1641dcc..ffe46a3 100644
> --- a/src/cl_mem.h
> +++ b/src/cl_mem.h
> @@ -93,6 +93,7 @@ typedef  struct _cl_mem {
>    uint8_t mapped_gtt;       /* This object has mapped gtt, for unmap. */
>    cl_mem_dstr_cb *dstr_cb;  /* The destroy callback. */
>    uint8_t is_userptr;       /* CL_MEM_USE_HOST_PTR is enabled*/
> +  size_t offset;            /* offset of host_ptr to the page beginning, only for CL_MEM_USE_HOST_PTR*/
>  } _cl_mem;
>  
>  struct _cl_mem_image {
> diff --git a/utests/runtime_use_host_ptr_buffer.cpp 
> b/utests/runtime_use_host_ptr_buffer.cpp
> index 79273c3..4ae5379 100644
> --- a/utests/runtime_use_host_ptr_buffer.cpp
> +++ b/utests/runtime_use_host_ptr_buffer.cpp
> @@ -2,13 +2,20 @@
>  
>  static void runtime_use_host_ptr_buffer(void)  {
> -  const size_t n = 4096*100;
> +  const size_t n = 4096*10 + 1111;
>  
>    // Setup kernel and buffers
>    OCL_CREATE_KERNEL("runtime_use_host_ptr_buffer");
>  
> -  int ret = posix_memalign(&buf_data[0], 4096, sizeof(uint32_t) * n);
> -  OCL_ASSERT(ret == 0);
> +  buf_data[0] = malloc(sizeof(uint32_t) * n);
> +
> +  //it does not matter if buf_data[0] is page aligned or not,  
> + //here, just to test the case that it is not page aligned.
> +  while ((unsigned long)buf_data[0] % 4096 == 0)  {
> +    free(buf_data[0]);
> +    buf_data[0] = malloc(sizeof(uint32_t) * n);  }
>  
>    for (uint32_t i = 0; i < n; ++i) ((uint32_t*)buf_data[0])[i] = i;
>    OCL_CREATE_BUFFER(buf[0], CL_MEM_USE_HOST_PTR, n * 
> sizeof(uint32_t), buf_data[0]); @@ -16,7 +23,7 @@ static void runtime_use_host_ptr_buffer(void)
>    // Run the kernel
>    OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
>    globals[0] = n;
> -  locals[0] = 256;
> +  locals[0] = 1;
>    OCL_NDRANGE(1);
>  
>    // Check result
> --
> 1.9.1
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list