[Mesa-dev] [RFC 4/4] gallium: Distinguish between shader IR's in get_compute_param.

Marek Olšák maraeo at gmail.com
Sun Mar 27 17:54:25 UTC 2016


The idea sounds good to me.

Marek

On Fri, Mar 25, 2016 at 2:43 AM, Bas Nieuwenhuizen
<bas at basnieuwenhuizen.nl> wrote:
> For radeonsi, native and TGSI use different compilers and this results
> in different limits for different IR's.
>
> The set we strictly need for radeonsi is only the MAX_BLOCK_SIZE
> and MAX_THREADS_PER_BLOCK params, but I added a few others as shader
> related that seemed like they would also typically depend on the
> compiler.
>
> Radeonsi needs these params as we need to restrict the number of
> used registers for blocks of > 256 threads, we do not know the
> block size in advance for clover and cannot use shader variants
> due to clover only giving native code.
>
> Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
> ---
>  src/gallium/docs/source/screen.rst                | 18 ++++++-------
>  src/gallium/drivers/ilo/ilo_screen.c              |  1 +
>  src/gallium/drivers/nouveau/nv50/nv50_screen.c    |  2 +-
>  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c    |  2 +-
>  src/gallium/drivers/r600/r600_pipe.c              |  2 +-
>  src/gallium/drivers/radeon/r600_pipe_common.c     |  3 ++-
>  src/gallium/drivers/radeonsi/si_pipe.c            |  2 +-
>  src/gallium/drivers/trace/tr_screen.c             |  5 ++--
>  src/gallium/include/pipe/p_screen.h               | 13 ++++++----
>  src/gallium/state_trackers/clover/core/device.cpp | 31 +++++++++++++----------
>  src/gallium/tests/trivial/compute.c               |  4 ++-
>  src/mesa/state_tracker/st_extensions.c            | 13 +++++-----
>  12 files changed, 54 insertions(+), 42 deletions(-)
>
> diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst
> index 46ec381..52e07d2 100644
> --- a/src/gallium/docs/source/screen.rst
> +++ b/src/gallium/docs/source/screen.rst
> @@ -436,26 +436,26 @@ pipe_screen::get_compute_param.
>    ``processor-arch-manufacturer-os`` that will be passed on to the compiler.
>    This CAP is only relevant for drivers that specify PIPE_SHADER_IR_LLVM
>    or PIPE_SHADER_IR_NATIVE for their preferred IR.
> -  Value type: null-terminated string.
> +  Value type: null-terminated string. Shader related.
>  * ``PIPE_COMPUTE_CAP_GRID_DIMENSION``: Number of supported dimensions
> -  for grid and block coordinates.  Value type: ``uint64_t``.
> +  for grid and block coordinates.  Value type: ``uint64_t``. Shader related.
>  * ``PIPE_COMPUTE_CAP_MAX_GRID_SIZE``: Maximum grid size in block
> -  units.  Value type: ``uint64_t []``.
> +  units.  Value type: ``uint64_t []``.  Shader related.
>  * ``PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE``: Maximum block size in thread
> -  units.  Value type: ``uint64_t []``.
> +  units.  Value type: ``uint64_t []``. Shader related.
>  * ``PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK``: Maximum number of threads that
> -  a single block can contain.  Value type: ``uint64_t``.
> +  a single block can contain.  Value type: ``uint64_t``. Shader related.
>    This may be less than the product of the components of MAX_BLOCK_SIZE and is
>    usually limited by the number of threads that can be resident simultaneously
>    on a compute unit.
>  * ``PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE``: Maximum size of the GLOBAL
> -  resource.  Value type: ``uint64_t``.
> +  resource.  Value type: ``uint64_t``. Shader related.
>  * ``PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE``: Maximum size of the LOCAL
> -  resource.  Value type: ``uint64_t``.
> +  resource.  Value type: ``uint64_t``. Shader related.
>  * ``PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE``: Maximum size of the PRIVATE
> -  resource.  Value type: ``uint64_t``.
> +  resource.  Value type: ``uint64_t``. Shader related.
>  * ``PIPE_COMPUTE_CAP_MAX_INPUT_SIZE``: Maximum size of the INPUT
> -  resource.  Value type: ``uint64_t``.
> +  resource.  Value type: ``uint64_t``. Shader related.
>  * ``PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE``: Maximum size of a memory object
>    allocation in bytes.  Value type: ``uint64_t``.
>  * ``PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY``: Maximum frequency of the GPU
> diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c
> index 548d215..c5b5ab4 100644
> --- a/src/gallium/drivers/ilo/ilo_screen.c
> +++ b/src/gallium/drivers/ilo/ilo_screen.c
> @@ -179,6 +179,7 @@ ilo_get_video_param(struct pipe_screen *screen,
>
>  static int
>  ilo_get_compute_param(struct pipe_screen *screen,
> +                      unsigned ir_type,
>                        enum pipe_compute_cap param,
>                        void *ret)
>  {
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
> index 5836bb2..5c902d4 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
> @@ -358,7 +358,7 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
>  }
>
>  static int
> -nv50_screen_get_compute_param(struct pipe_screen *pscreen,
> +nv50_screen_get_compute_param(struct pipe_screen *pscreen, unsigned ir_type,
>                                enum pipe_compute_cap param, void *data)
>  {
>     struct nv50_screen *screen = nv50_screen(pscreen);
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> index 553c001..b821a15 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> @@ -386,7 +386,7 @@ nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
>  }
>
>  static int
> -nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
> +nvc0_screen_get_compute_param(struct pipe_screen *pscreen, unsigned ir_type,
>                                enum pipe_compute_cap param, void *data)
>  {
>     struct nvc0_screen *screen = nvc0_screen(pscreen);
> diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
> index b801191..a7d1af2 100644
> --- a/src/gallium/drivers/r600/r600_pipe.c
> +++ b/src/gallium/drivers/r600/r600_pipe.c
> @@ -499,7 +499,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
>         case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
>                 if (shader == PIPE_SHADER_COMPUTE) {
>                         uint64_t max_const_buffer_size;
> -                       pscreen->get_compute_param(pscreen,
> +                       pscreen->get_compute_param(pscreen, 0,
>                                 PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
>                                 &max_const_buffer_size);
>                         return max_const_buffer_size;
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
> index eed9d83..015d575 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.c
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.c
> @@ -603,6 +603,7 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
>  }
>
>  static int r600_get_compute_param(struct pipe_screen *screen,
> +        unsigned ir_type,
>          enum pipe_compute_cap param,
>          void *ret)
>  {
> @@ -669,7 +670,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
>                         uint64_t *max_global_size = ret;
>                         uint64_t max_mem_alloc_size;
>
> -                       r600_get_compute_param(screen,
> +                       r600_get_compute_param(screen, 0,
>                                 PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
>                                 &max_mem_alloc_size);
>
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
> index dd1103e..331b308 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -467,7 +467,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
>
>                 case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: {
>                         uint64_t max_const_buffer_size;
> -                       pscreen->get_compute_param(pscreen,
> +                       pscreen->get_compute_param(pscreen, 0,
>                                 PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
>                                 &max_const_buffer_size);
>                         return max_const_buffer_size;
> diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c
> index b24e185..e19121d 100644
> --- a/src/gallium/drivers/trace/tr_screen.c
> +++ b/src/gallium/drivers/trace/tr_screen.c
> @@ -174,7 +174,7 @@ trace_screen_get_paramf(struct pipe_screen *_screen,
>
>
>  static int
> -trace_screen_get_compute_param(struct pipe_screen *_screen,
> +trace_screen_get_compute_param(struct pipe_screen *_screen, unsigned ir_type,
>                                 enum pipe_compute_cap param, void *data)
>  {
>     struct trace_screen *tr_scr = trace_screen(_screen);
> @@ -184,10 +184,11 @@ trace_screen_get_compute_param(struct pipe_screen *_screen,
>     trace_dump_call_begin("pipe_screen", "get_compute_param");
>
>     trace_dump_arg(ptr, screen);
> +   trace_dump_arg(int, ir_type);
>     trace_dump_arg(int, param);
>     trace_dump_arg(ptr, data);
>
> -   result = screen->get_compute_param(screen, param, data);
> +   result = screen->get_compute_param(screen, ir_type, param, data);
>
>     trace_dump_ret(int, result);
>
> diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h
> index 4f30e75..6d44cf7 100644
> --- a/src/gallium/include/pipe/p_screen.h
> +++ b/src/gallium/include/pipe/p_screen.h
> @@ -109,13 +109,16 @@ struct pipe_screen {
>
>     /**
>      * Query a compute-specific capability/parameter/limit.
> -    * \param param  one of PIPE_COMPUTE_CAP_x
> -    * \param ret    pointer to a preallocated buffer that will be
> -    *               initialized to the parameter value, or NULL.
> -    * \return       size in bytes of the parameter value that would be
> -    *               returned.
> +    * \param ir_type shader IR type for which the param applies, or don't care
> +    *                if the param is not shader related
> +    * \param param   one of PIPE_COMPUTE_CAP_x
> +    * \param ret     pointer to a preallocated buffer that will be
> +    *                initialized to the parameter value, or NULL.
> +    * \return        size in bytes of the parameter value that would be
> +    *                returned.
>      */
>     int (*get_compute_param)(struct pipe_screen *,
> +                           unsigned ir_type,
>                             enum pipe_compute_cap param,
>                             void *ret);
>
> diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp
> index 1be2f64..97d1d92 100644
> --- a/src/gallium/state_trackers/clover/core/device.cpp
> +++ b/src/gallium/state_trackers/clover/core/device.cpp
> @@ -30,11 +30,12 @@ using namespace clover;
>  namespace {
>     template<typename T>
>     std::vector<T>
> -   get_compute_param(pipe_screen *pipe, pipe_compute_cap cap) {
> -      int sz = pipe->get_compute_param(pipe, cap, NULL);
> +   get_compute_param(pipe_screen *pipe, unsigned ir_format,
> +                     pipe_compute_cap cap) {
> +      int sz = pipe->get_compute_param(pipe, ir_format, cap, NULL);
>        std::vector<T> v(sz / sizeof(T));
>
> -      pipe->get_compute_param(pipe, cap, &v.front());
> +      pipe->get_compute_param(pipe, ir_format, cap, &v.front());
>        return v;
>     }
>  }
> @@ -115,19 +116,19 @@ device::max_samplers() const {
>
>  cl_ulong
>  device::max_mem_global() const {
> -   return get_compute_param<uint64_t>(pipe,
> +   return get_compute_param<uint64_t>(pipe, ir_format(),
>                                        PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE)[0];
>  }
>
>  cl_ulong
>  device::max_mem_local() const {
> -   return get_compute_param<uint64_t>(pipe,
> +   return get_compute_param<uint64_t>(pipe, ir_format(),
>                                        PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE)[0];
>  }
>
>  cl_ulong
>  device::max_mem_input() const {
> -   return get_compute_param<uint64_t>(pipe,
> +   return get_compute_param<uint64_t>(pipe, ir_format(),
>                                        PIPE_COMPUTE_CAP_MAX_INPUT_SIZE)[0];
>  }
>
> @@ -146,30 +147,30 @@ device::max_const_buffers() const {
>  size_t
>  device::max_threads_per_block() const {
>     return get_compute_param<uint64_t>(
> -      pipe, PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK)[0];
> +      pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK)[0];
>  }
>
>  cl_ulong
>  device::max_mem_alloc_size() const {
> -   return get_compute_param<uint64_t>(pipe,
> +   return get_compute_param<uint64_t>(pipe, ir_format(),
>                                        PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE)[0];
>  }
>
>  cl_uint
>  device::max_clock_frequency() const {
> -   return get_compute_param<uint32_t>(pipe,
> +   return get_compute_param<uint32_t>(pipe, ir_format(),
>                                        PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY)[0];
>  }
>
>  cl_uint
>  device::max_compute_units() const {
> -   return get_compute_param<uint32_t>(pipe,
> +   return get_compute_param<uint32_t>(pipe, ir_format(),
>                                        PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)[0];
>  }
>
>  bool
>  device::image_support() const {
> -   return get_compute_param<uint32_t>(pipe,
> +   return get_compute_param<uint32_t>(pipe, ir_format(),
>                                        PIPE_COMPUTE_CAP_IMAGES_SUPPORTED)[0];
>  }
>
> @@ -181,13 +182,15 @@ device::has_doubles() const {
>
>  std::vector<size_t>
>  device::max_block_size() const {
> -   auto v = get_compute_param<uint64_t>(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
> +   auto v = get_compute_param<uint64_t>(pipe, ir_format(),
> +                                        PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
>     return { v.begin(), v.end() };
>  }
>
>  cl_uint
>  device::subgroup_size() const {
> -   return get_compute_param<uint32_t>(pipe, PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
> +   return get_compute_param<uint32_t>(pipe, ir_format(),
> +                                      PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
>  }
>
>  std::string
> @@ -209,7 +212,7 @@ device::ir_format() const {
>  std::string
>  device::ir_target() const {
>     std::vector<char> target = get_compute_param<char>(
> -      pipe, PIPE_COMPUTE_CAP_IR_TARGET);
> +      pipe, ir_format(), PIPE_COMPUTE_CAP_IR_TARGET);
>     return { target.data() };
>  }
>
> diff --git a/src/gallium/tests/trivial/compute.c b/src/gallium/tests/trivial/compute.c
> index af3e3aa..c94e2e1 100644
> --- a/src/gallium/tests/trivial/compute.c
> +++ b/src/gallium/tests/trivial/compute.c
> @@ -58,7 +58,9 @@ struct context {
>                  uint64_t __v[4];                                        \
>                  int __i, __n;                                           \
>                                                                          \
> -                __n = ctx->screen->get_compute_param(ctx->screen, c, __v); \
> +                __n = ctx->screen->get_compute_param(ctx->screen,       \
> +                                                     PIPE_SHADER_IR_TGSI, \
> +                                                     c, __v);           \
>                  printf("%s: {", #c);                                    \
>                                                                          \
>                  for (__i = 0; __i < __n / sizeof(*__v); ++__i)          \
> diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
> index 2fdaba0..e56cc23 100644
> --- a/src/mesa/state_tracker/st_extensions.c
> +++ b/src/mesa/state_tracker/st_extensions.c
> @@ -1105,14 +1105,15 @@ void st_init_extensions(struct pipe_screen *screen,
>        if (compute_supported_irs & (1 << PIPE_SHADER_IR_TGSI)) {
>           uint64_t grid_size[3], block_size[3];
>
> -         screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_GRID_SIZE,
> -                                   grid_size);
> -         screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE,
> -                                   block_size);
> -         screen->get_compute_param(screen,
> +         screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
> +                                   PIPE_COMPUTE_CAP_MAX_GRID_SIZE, grid_size);
> +         screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
> +                                   PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE, block_size);
> +         screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
>                                     PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK,
>                                     &consts->MaxComputeWorkGroupInvocations);
> -         screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE,
> +         screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
> +                                   PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE,
>                                     &consts->MaxComputeSharedMemorySize);
>
>           for (i = 0; i < 3; i++) {
> --
> 2.7.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list