[Beignet] [PATCH 1/2] Backend: Refine get_enqueued_local_size and get_local_size

Yang, Rong R rong.r.yang at intel.com
Thu Mar 17 08:00:15 UTC 2016


LGTM, pushed.

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Xiuli Pan
> Sent: Wednesday, March 16, 2016 7:53
> To: beignet at lists.freedesktop.org
> Cc: Pan, Xiuli <xiuli.pan at intel.com>
> Subject: [Beignet] [PATCH 1/2] Backend: Refine get_enqueued_local_size
> and get_local_size
> 
> From: Pan Xiuli <xiuli.pan at intel.com>
> 
> Use curbe register for these two size.
> 
> Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
> ---
>  backend/src/backend/gen_insn_selection.cpp |  7 +++--
>  backend/src/backend/program.h              |  3 ++
>  backend/src/ir/profile.cpp                 |  4 +++
>  backend/src/ir/profile.hpp                 | 47 ++++++++++++++++--------------
>  backend/src/libocl/src/ocl_workitem.cl     | 17 ++++-------
>  backend/src/llvm/llvm_gen_backend.cpp      |  6 ++++
>  backend/src/llvm/llvm_gen_ocl_function.hxx |  3 ++
>  src/cl_command_queue_gen7.c                |  6 +++-
>  8 files changed, 57 insertions(+), 36 deletions(-)
> 
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index 90e15c3..77614b6 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -3423,8 +3423,11 @@ namespace gbe
>            reg == ir::ocl::lid1 ||
>            reg == ir::ocl::lid2 ||
>            reg == ir::ocl::lsize0 ||
> -          reg == ir::ocl::lsize1||
> -          reg == ir::ocl::lsize2)
> +          reg == ir::ocl::lsize1 ||
> +          reg == ir::ocl::lsize2 ||
> +          reg == ir::ocl::enqlsize0 ||
> +          reg == ir::ocl::enqlsize1 ||
> +          reg == ir::ocl::enqlsize2)
>          return true;
>        else
>          return false;
> diff --git a/backend/src/backend/program.h
> b/backend/src/backend/program.h index 4dd3ae3..a690e3d 100644
> --- a/backend/src/backend/program.h
> +++ b/backend/src/backend/program.h
> @@ -83,6 +83,9 @@ enum gbe_curbe_type {
>    GBE_CURBE_LOCAL_SIZE_X,
>    GBE_CURBE_LOCAL_SIZE_Y,
>    GBE_CURBE_LOCAL_SIZE_Z,
> +  GBE_CURBE_ENQUEUED_LOCAL_SIZE_X,
> +  GBE_CURBE_ENQUEUED_LOCAL_SIZE_Y,
> +  GBE_CURBE_ENQUEUED_LOCAL_SIZE_Z,
>    GBE_CURBE_GLOBAL_SIZE_X,
>    GBE_CURBE_GLOBAL_SIZE_Y,
>    GBE_CURBE_GLOBAL_SIZE_Z,
> diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp index
> ce5e8e7..0907d76 100644
> --- a/backend/src/ir/profile.cpp
> +++ b/backend/src/ir/profile.cpp
> @@ -35,6 +35,7 @@ namespace ir {
>          "group_id_0", "group_id_1", "group_id_2",
>          "num_groups_0", "num_groups_1", "num_groups_2",
>          "local_size_0", "local_size_1", "local_size_2",
> +        "enqueued_local_size_0", "enqueued_local_size_1",
> + "enqueued_local_size_2",
>          "global_size_0", "global_size_1", "global_size_2",
>          "global_offset_0", "global_offset_1", "global_offset_2",
>          "stack_pointer", "stack_buffer", @@ -71,6 +72,9 @@ namespace ir {
>        DECL_NEW_REG(FAMILY_DWORD, lsize0, 1, GBE_CURBE_LOCAL_SIZE_X);
>        DECL_NEW_REG(FAMILY_DWORD, lsize1, 1, GBE_CURBE_LOCAL_SIZE_Y);
>        DECL_NEW_REG(FAMILY_DWORD, lsize2, 1, GBE_CURBE_LOCAL_SIZE_Z);
> +      DECL_NEW_REG(FAMILY_DWORD, enqlsize0, 1,
> GBE_CURBE_ENQUEUED_LOCAL_SIZE_X);
> +      DECL_NEW_REG(FAMILY_DWORD, enqlsize1, 1,
> GBE_CURBE_ENQUEUED_LOCAL_SIZE_Y);
> +      DECL_NEW_REG(FAMILY_DWORD, enqlsize2, 1,
> + GBE_CURBE_ENQUEUED_LOCAL_SIZE_Z);
>        DECL_NEW_REG(FAMILY_DWORD, gsize0, 1,
> GBE_CURBE_GLOBAL_SIZE_X);
>        DECL_NEW_REG(FAMILY_DWORD, gsize1, 1,
> GBE_CURBE_GLOBAL_SIZE_Y);
>        DECL_NEW_REG(FAMILY_DWORD, gsize2, 1,
> GBE_CURBE_GLOBAL_SIZE_Z); diff --git a/backend/src/ir/profile.hpp
> b/backend/src/ir/profile.hpp index 35b49e3..b093adf 100644
> --- a/backend/src/ir/profile.hpp
> +++ b/backend/src/ir/profile.hpp
> @@ -53,28 +53,31 @@ namespace ir {
>      static const Register lsize0 = Register(9);    // get_local_size(0)
>      static const Register lsize1 = Register(10);   // get_local_size(1)
>      static const Register lsize2 = Register(11);   // get_local_size(2)
> -    static const Register gsize0 = Register(12);   // get_global_size(0)
> -    static const Register gsize1 = Register(13);   // get_global_size(1)
> -    static const Register gsize2 = Register(14);   // get_global_size(2)
> -    static const Register goffset0 = Register(15); // get_global_offset(0)
> -    static const Register goffset1 = Register(16); // get_global_offset(1)
> -    static const Register goffset2 = Register(17); // get_global_offset(2)
> -    static const Register stackptr = Register(18); // stack pointer
> -    static const Register stackbuffer = Register(19); // stack buffer base
> address.
> -    static const Register blockip = Register(20);  // blockip
> -    static const Register barrierid = Register(21);// barrierid
> -    static const Register threadn = Register(22);  // number of threads
> -    static const Register workdim = Register(23);  // work dimention.
> -    static const Register zero = Register(24);     //  scalar register holds zero.
> -    static const Register one = Register(25);     //  scalar register holds one.
> -    static const Register retVal = Register(26);   // helper register to do data
> flow analysis.
> -    static const Register printfbptr = Register(27); // printf buffer address .
> -    static const Register printfiptr = Register(28); // printf index buffer address.
> -    static const Register dwblockip = Register(29);  // blockip
> -    static const Register threadid = Register(30); // the thread id of this thread.
> -    static const Register constant_addrspace = Register(31);  // starting
> address of program-scope constant
> -    static const Register stacksize = Register(32); // stack buffer total size
> -    static const uint32_t regNum = 33;             // number of special registers
> +    static const Register enqlsize0 = Register(12);    // get_local_size(0)
> +    static const Register enqlsize1 = Register(13);   // get_local_size(1)
> +    static const Register enqlsize2 = Register(14);   // get_local_size(2)
> +    static const Register gsize0 = Register(15);   // get_global_size(0)
> +    static const Register gsize1 = Register(16);   // get_global_size(1)
> +    static const Register gsize2 = Register(17);   // get_global_size(2)
> +    static const Register goffset0 = Register(18); // get_global_offset(0)
> +    static const Register goffset1 = Register(19); // get_global_offset(1)
> +    static const Register goffset2 = Register(20); // get_global_offset(2)
> +    static const Register stackptr = Register(21); // stack pointer
> +    static const Register stackbuffer = Register(22); // stack buffer base
> address.
> +    static const Register blockip = Register(23);  // blockip
> +    static const Register barrierid = Register(24);// barrierid
> +    static const Register threadn = Register(25);  // number of threads
> +    static const Register workdim = Register(26);  // work dimention.
> +    static const Register zero = Register(27);     //  scalar register holds zero.
> +    static const Register one = Register(28);     //  scalar register holds one.
> +    static const Register retVal = Register(29);   // helper register to do data
> flow analysis.
> +    static const Register printfbptr = Register(30); // printf buffer address .
> +    static const Register printfiptr = Register(31); // printf index buffer
> address.
> +    static const Register dwblockip = Register(32);  // blockip
> +    static const Register threadid = Register(33); // the thread id of this thread.
> +    static const Register constant_addrspace = Register(34);  // starting
> address of program-scope constant
> +    static const Register stacksize = Register(35); // stack buffer total size
> +    static const uint32_t regNum = 36;             // number of special registers
>      extern const char *specialRegMean[];           // special register name.
>    } /* namespace ocl */
> 
> diff --git a/backend/src/libocl/src/ocl_workitem.cl
> b/backend/src/libocl/src/ocl_workitem.cl
> index 235f12b..dc8fa6d 100644
> --- a/backend/src/libocl/src/ocl_workitem.cl
> +++ b/backend/src/libocl/src/ocl_workitem.cl
> @@ -30,6 +30,7 @@ PURE CONST unsigned int
> __gen_ocl_##NAME##1(void); \  PURE CONST unsigned int
> __gen_ocl_##NAME##2(void);
>  DECL_INTERNAL_WORK_ITEM_FN(get_group_id)
>  DECL_INTERNAL_WORK_ITEM_FN(get_local_id)
> +DECL_INTERNAL_WORK_ITEM_FN(get_enqueued_local_size)
>  DECL_INTERNAL_WORK_ITEM_FN(get_local_size)
>  DECL_INTERNAL_WORK_ITEM_FN(get_global_size)
>  DECL_INTERNAL_WORK_ITEM_FN(get_global_offset)
> @@ -46,6 +47,7 @@ OVERLOADABLE unsigned NAME(unsigned int dim)
> {             \
> 
>  DECL_PUBLIC_WORK_ITEM_FN(get_group_id, 0)
> DECL_PUBLIC_WORK_ITEM_FN(get_local_id, 0)
> +DECL_PUBLIC_WORK_ITEM_FN(get_enqueued_local_size, 1)
>  DECL_PUBLIC_WORK_ITEM_FN(get_local_size, 1)
> DECL_PUBLIC_WORK_ITEM_FN(get_global_size, 1)
> DECL_PUBLIC_WORK_ITEM_FN(get_global_offset, 0) @@ -53,14 +55,7 @@
> DECL_PUBLIC_WORK_ITEM_FN(get_num_groups, 1)  #undef
> DECL_PUBLIC_WORK_ITEM_FN
> 
>  OVERLOADABLE uint get_global_id(uint dim) {
> -  return get_local_id(dim) + get_local_size(dim) * get_group_id(dim) +
> get_global_offset(dim); -}
> -
> -OVERLOADABLE uint get_enqueued_local_size (uint dimindx) -{
> -  //TODO: should be different with get_local_size when support
> -  //non-uniform work-group size
> -  return get_local_size(dimindx);
> +  return get_local_id(dim) + get_enqueued_local_size(dim) *
> + get_group_id(dim) + get_global_offset(dim);
>  }
> 
>  OVERLOADABLE uint get_global_linear_id(void) @@ -80,8 +75,8 @@
> OVERLOADABLE uint get_local_linear_id(void)  {
>    uint dim = __gen_ocl_get_work_dim();
>    if (dim == 1) return get_local_id(0);
> -  else if (dim == 2) return get_local_id(1) * get_local_size (0) +
> get_local_id(0);
> -  else if (dim == 3) return (get_local_id(2) * get_local_size(1) *
> get_local_size(0)) +
> -                            (get_local_id(1) * get_local_size(0)) + get_local_id(0);
> +  else if (dim == 2) return get_local_id(1) *
> + get_enqueued_local_size(0) + get_local_id(0);  else if (dim == 3) return
> (get_local_id(2) * get_enqueued_local_size(1) * get_local_size(0)) +
> +                            (get_local_id(1) *
> + get_enqueued_local_size(0)) + get_local_id(0);
>    else return 0;
>  }
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp
> b/backend/src/llvm/llvm_gen_backend.cpp
> index cca1781..4a98678 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -3507,6 +3507,12 @@ namespace gbe
>          regTranslator.newScalarProxy(ir::ocl::lsize1, dst); break;
>        case GEN_OCL_GET_LOCAL_SIZE2:
>          regTranslator.newScalarProxy(ir::ocl::lsize2, dst); break;
> +      case GEN_OCL_GET_ENQUEUED_LOCAL_SIZE0:
> +        regTranslator.newScalarProxy(ir::ocl::enqlsize0, dst); break;
> +      case GEN_OCL_GET_ENQUEUED_LOCAL_SIZE1:
> +        regTranslator.newScalarProxy(ir::ocl::enqlsize1, dst); break;
> +      case GEN_OCL_GET_ENQUEUED_LOCAL_SIZE2:
> +        regTranslator.newScalarProxy(ir::ocl::enqlsize2, dst); break;
>        case GEN_OCL_GET_GLOBAL_SIZE0:
>          regTranslator.newScalarProxy(ir::ocl::gsize0, dst); break;
>        case GEN_OCL_GET_GLOBAL_SIZE1:
> diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx
> b/backend/src/llvm/llvm_gen_ocl_function.hxx
> index 7bd59fc..09feb1a 100644
> --- a/backend/src/llvm/llvm_gen_ocl_function.hxx
> +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
> @@ -10,6 +10,9 @@ DECL_LLVM_GEN_FUNCTION(GET_NUM_GROUPS2,
> __gen_ocl_get_num_groups2)
> DECL_LLVM_GEN_FUNCTION(GET_LOCAL_SIZE0, __gen_ocl_get_local_size0)
> DECL_LLVM_GEN_FUNCTION(GET_LOCAL_SIZE1, __gen_ocl_get_local_size1)
> DECL_LLVM_GEN_FUNCTION(GET_LOCAL_SIZE2, __gen_ocl_get_local_size2)
> +DECL_LLVM_GEN_FUNCTION(GET_ENQUEUED_LOCAL_SIZE0,
> +__gen_ocl_get_enqueued_local_size0)
> +DECL_LLVM_GEN_FUNCTION(GET_ENQUEUED_LOCAL_SIZE1,
> +__gen_ocl_get_enqueued_local_size1)
> +DECL_LLVM_GEN_FUNCTION(GET_ENQUEUED_LOCAL_SIZE2,
> +__gen_ocl_get_enqueued_local_size2)
>  DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_SIZE0,
> __gen_ocl_get_global_size0)
> DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_SIZE1,
> __gen_ocl_get_global_size1)
> DECL_LLVM_GEN_FUNCTION(GET_GLOBAL_SIZE2,
> __gen_ocl_get_global_size2) diff --git a/src/cl_command_queue_gen7.c
> b/src/cl_command_queue_gen7.c index 38cf56b..6bfacbf 100644
> --- a/src/cl_command_queue_gen7.c
> +++ b/src/cl_command_queue_gen7.c
> @@ -221,6 +221,7 @@ cl_curbe_fill(cl_kernel ker,
>                const size_t *global_wk_off,
>                const size_t *global_wk_sz,
>                const size_t *local_wk_sz,
> +              const size_t *enqueued_local_wk_sz,
>                size_t thread_n)
>  {
>    int32_t offset;
> @@ -230,6 +231,9 @@ cl_curbe_fill(cl_kernel ker,
>    UPLOAD(GBE_CURBE_LOCAL_SIZE_X, local_wk_sz[0]);
>    UPLOAD(GBE_CURBE_LOCAL_SIZE_Y, local_wk_sz[1]);
>    UPLOAD(GBE_CURBE_LOCAL_SIZE_Z, local_wk_sz[2]);
> +  UPLOAD(GBE_CURBE_ENQUEUED_LOCAL_SIZE_X,
> enqueued_local_wk_sz[0]);
> + UPLOAD(GBE_CURBE_ENQUEUED_LOCAL_SIZE_Y,
> enqueued_local_wk_sz[1]);
> + UPLOAD(GBE_CURBE_ENQUEUED_LOCAL_SIZE_Z,
> enqueued_local_wk_sz[2]);
>    UPLOAD(GBE_CURBE_GLOBAL_SIZE_X, global_wk_sz[0]);
>    UPLOAD(GBE_CURBE_GLOBAL_SIZE_Y, global_wk_sz[1]);
>    UPLOAD(GBE_CURBE_GLOBAL_SIZE_Z, global_wk_sz[2]); @@ -374,7
> +378,7 @@ cl_command_queue_ND_range_gen7(cl_command_queue
> queue,
>    }
>    /* Curbe step 1: fill the constant urb buffer data shared by all threads */
>    if (ker->curbe) {
> -    kernel.slm_sz = cl_curbe_fill(ker, work_dim, global_wk_off, global_wk_sz,
> local_wk_sz, thread_n);
> +    kernel.slm_sz = cl_curbe_fill(ker, work_dim, global_wk_off,
> + global_wk_sz,local_wk_sz ,local_wk_sz, thread_n);
>      if (kernel.slm_sz > ker->program->ctx->device->local_mem_size) {
>        fprintf(stderr, "Beignet: Out of shared local memory %d.\n",
> kernel.slm_sz);
>        return CL_OUT_OF_RESOURCES;
> --
> 2.5.0
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list