[Beignet] [PATCH v3 3/4] Add constant pointer as argument support in runtime.

Zhigang Gong zhigang.gong at linux.intel.com
Mon Apr 22 02:39:24 PDT 2013


In general LGTM. I will applied it. 

Could you consider an optimization that don't use a real bo to represent a
constant memory object. As we will upload the constant content to payload,
and will never access the constant memory object itself. So just allocate
an normal buffer at constant memory object, and copy the data in. Then we
can optimize the sequence:
create a constant BO/map the bo/copy constant to the mapped bo/latter, map the constant bo/upload the constant to the curbe region.
to:
create a constant bo, allocate a normal buffer, copy the constant to the buffer/ upload the buffer to curbe region.


On Mon, Apr 22, 2013 at 01:11:51PM +0800, Yang Rong wrote:
> 
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
>  src/cl_command_queue.c      |   22 ++++++++++++++++++++--
>  src/cl_command_queue.h      |    2 ++
>  src/cl_command_queue_gen7.c |    7 +++++--
>  src/cl_kernel.c             |   16 +++++++++++++++-
>  src/cl_mem.c                |    1 +
>  src/cl_mem.h                |    1 +
>  6 files changed, 44 insertions(+), 5 deletions(-)
> 
> diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
> index a22884f..7d604c3 100644
> --- a/src/cl_command_queue.c
> +++ b/src/cl_command_queue.c
> @@ -1,4 +1,4 @@
> -/* 
> +/*
>   * Copyright © 2012 Intel Corporation
>   *
>   * This library is free software; you can redistribute it and/or
> @@ -108,7 +108,6 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k)
>      uint32_t offset; // location of the address in the curbe
>      arg_type = gbe_kernel_get_arg_type(k->opaque, i);
>      if (arg_type != GBE_ARG_GLOBAL_PTR &&
> -        arg_type != GBE_ARG_CONSTANT_PTR &&
>          arg_type != GBE_ARG_IMAGE &&
>          arg_type != GBE_ARG_SAMPLER)
>        continue;
> @@ -129,6 +128,25 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k)
>    return CL_SUCCESS;
>  }
>  
> +LOCAL cl_int cl_command_queue_upload_constant_buffer(cl_kernel k,
> +                                                       char * dst)
> +{
> +  int i;
> +  for(i = 0; i < k->arg_n; i++) {
> +    enum gbe_arg_type arg_type = gbe_kernel_get_arg_type(k->opaque, i);
> +
> +    if(arg_type == GBE_ARG_CONSTANT_PTR) {
> +      uint32_t offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_EXTRA_ARGUMENT, i+GBE_CONSTANT_BUFFER);
> +      cl_mem mem = k->args[i].mem;
> +      cl_buffer_map(mem->bo, 1);
> +      void * addr = cl_buffer_get_virtual(mem->bo);
> +      memcpy(dst + offset, addr, mem->size);
> +      cl_buffer_unmap(mem->bo);
> +    }
> +  }
> +  return CL_SUCCESS;
> +}
> +
>  #if USE_FULSIM
>  extern void drm_intel_bufmgr_gem_stop_aubfile(cl_buffer_mgr);
>  extern void drm_intel_bufmgr_gem_set_aubfile(cl_buffer_mgr, FILE*);
> diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h
> index 6387ae1..dcfc8c4 100644
> --- a/src/cl_command_queue.h
> +++ b/src/cl_command_queue.h
> @@ -70,5 +70,7 @@ extern cl_int cl_command_queue_finish(cl_command_queue);
>  /* Bind all the surfaces in the GPGPU state */
>  extern cl_int cl_command_queue_bind_surface(cl_command_queue, cl_kernel);
>  
> +/*update constant buffer to final curbe */
> +extern cl_int cl_command_queue_upload_constant_buffer(cl_kernel k, char * dst);
>  #endif /* __CL_COMMAND_QUEUE_H__ */
>  
> diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
> index 3a590bc..9402549 100644
> --- a/src/cl_command_queue_gen7.c
> +++ b/src/cl_command_queue_gen7.c
> @@ -186,7 +186,8 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
>    char *final_curbe = NULL;  /* Includes them and one sub-buffer per group */
>    cl_gpgpu_kernel kernel;
>    const uint32_t simd_sz = cl_kernel_get_simd_width(ker);
> -  size_t i, batch_sz = 0u, local_sz = 0u, cst_sz = ker->curbe_sz;
> +  size_t i, batch_sz = 0u, local_sz = 0u;
> +  size_t cst_sz = ker->curbe_sz= gbe_kernel_get_curbe_size(ker->opaque);
>    size_t thread_n = 0u;
>    cl_int err = CL_SUCCESS;
>  
> @@ -224,8 +225,10 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
>    if (ker->curbe) {
>      assert(cst_sz > 0);
>      TRY_ALLOC (final_curbe, (char*) alloca(thread_n * cst_sz));
> -      for (i = 0; i < thread_n; ++i)
> +    for (i = 0; i < thread_n; ++i) {
>          memcpy(final_curbe + cst_sz * i, ker->curbe, cst_sz);
> +        cl_command_queue_upload_constant_buffer(ker, final_curbe + cst_sz * i);
> +    }
>      TRY (cl_set_varying_payload, ker, final_curbe, local_wk_sz, simd_sz, cst_sz, thread_n);
>      cl_gpgpu_upload_constants(gpgpu, final_curbe, thread_n*cst_sz);
>    }
> diff --git a/src/cl_kernel.c b/src/cl_kernel.c
> index bbd4438..ec0e2e8 100644
> --- a/src/cl_kernel.c
> +++ b/src/cl_kernel.c
> @@ -1,4 +1,4 @@
> -/* 
> +/*
>   * Copyright © 2012 Intel Corporation
>   *
>   * This library is free software; you can redistribute it and/or
> @@ -154,6 +154,17 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value)
>    if (UNLIKELY((arg_type == GBE_ARG_IMAGE && !mem->is_image)
>       || (arg_type != GBE_ARG_IMAGE && mem->is_image)))
>        return CL_INVALID_ARG_VALUE;
> +
> +  if(arg_type == GBE_ARG_CONSTANT_PTR) {
> +    int32_t cbOffset;
> +    cbOffset = gbe_kernel_set_const_buffer_size(k->opaque, index, mem->size);
> +    //constant ptr's curbe offset changed, update it
> +    if(cbOffset >= 0) {
> +      offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index);
> +      *((uint32_t *)(k->curbe + offset)) = cbOffset;  //cb offset in curbe
> +    }
> +  }
> +
>    cl_mem_add_ref(mem);
>    if (k->args[index].mem)
>      cl_mem_delete(k->args[index].mem);
> @@ -177,6 +188,9 @@ cl_kernel_setup(cl_kernel k, gbe_kernel opaque)
>    cl_context ctx = k->program->ctx;
>    cl_buffer_mgr bufmgr = cl_context_get_bufmgr(ctx);
>  
> +  if(k->bo != NULL)
> +    cl_buffer_unreference(k->bo);
> +
>    /* Allocate the gen code here */
>    const uint32_t code_sz = gbe_kernel_get_code_size(opaque);
>    const char *code = gbe_kernel_get_code(opaque);
> diff --git a/src/cl_mem.c b/src/cl_mem.c
> index e89aafa..7d070d4 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -81,6 +81,7 @@ cl_mem_allocate(cl_context ctx,
>      err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
>      goto error;
>    }
> +  mem->size = sz;
>  
>    /* Append the buffer in the context buffer list */
>    pthread_mutex_lock(&ctx->buffer_lock);
> diff --git a/src/cl_mem.h b/src/cl_mem.h
> index 8e7a2dd..a0b6164 100644
> --- a/src/cl_mem.h
> +++ b/src/cl_mem.h
> @@ -36,6 +36,7 @@ struct _cl_mem {
>    uint64_t magic;           /* To identify it as a memory object */
>    volatile int ref_n;       /* This object is reference counted */
>    cl_buffer bo;             /* Data in GPU memory */
> +  size_t size;              /* original request size, not alignment size, used in constant buffer */
>    cl_mem prev, next;        /* We chain the memory buffers together */
>    cl_context ctx;           /* Context it belongs to */
>    cl_mem_flags flags;       /* Flags specified at the creation time */
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list