[Mesa-dev] [RFC 4/4] gallium: Distinguish between shader IR's in get_compute_param.

Bas Nieuwenhuizen bas at basnieuwenhuizen.nl
Fri Mar 25 01:43:50 UTC 2016


For radeonsi, native and TGSI use different compilers and this results
in different limits for different IR's.

The set we strictly need for radeonsi is only the MAX_BLOCK_SIZE
and MAX_THREADS_PER_BLOCK params, but I added a few others as shader
related that seemed like they would also typically depend on the
compiler.

Radeonsi needs these params as we need to restrict the number of
used registers for blocks of > 256 threads, we do not know the
block size in advance for clover and cannot use shader variants
due to clover only giving native code.

Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
---
 src/gallium/docs/source/screen.rst                | 18 ++++++-------
 src/gallium/drivers/ilo/ilo_screen.c              |  1 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.c    |  2 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c    |  2 +-
 src/gallium/drivers/r600/r600_pipe.c              |  2 +-
 src/gallium/drivers/radeon/r600_pipe_common.c     |  3 ++-
 src/gallium/drivers/radeonsi/si_pipe.c            |  2 +-
 src/gallium/drivers/trace/tr_screen.c             |  5 ++--
 src/gallium/include/pipe/p_screen.h               | 13 ++++++----
 src/gallium/state_trackers/clover/core/device.cpp | 31 +++++++++++++----------
 src/gallium/tests/trivial/compute.c               |  4 ++-
 src/mesa/state_tracker/st_extensions.c            | 13 +++++-----
 12 files changed, 54 insertions(+), 42 deletions(-)

diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst
index 46ec381..52e07d2 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -436,26 +436,26 @@ pipe_screen::get_compute_param.
   ``processor-arch-manufacturer-os`` that will be passed on to the compiler.
   This CAP is only relevant for drivers that specify PIPE_SHADER_IR_LLVM
   or PIPE_SHADER_IR_NATIVE for their preferred IR.
-  Value type: null-terminated string.
+  Value type: null-terminated string. Shader related.
 * ``PIPE_COMPUTE_CAP_GRID_DIMENSION``: Number of supported dimensions
-  for grid and block coordinates.  Value type: ``uint64_t``.
+  for grid and block coordinates.  Value type: ``uint64_t``. Shader related.
 * ``PIPE_COMPUTE_CAP_MAX_GRID_SIZE``: Maximum grid size in block
-  units.  Value type: ``uint64_t []``.
+  units.  Value type: ``uint64_t []``.  Shader related.
 * ``PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE``: Maximum block size in thread
-  units.  Value type: ``uint64_t []``.
+  units.  Value type: ``uint64_t []``. Shader related.
 * ``PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK``: Maximum number of threads that
-  a single block can contain.  Value type: ``uint64_t``.
+  a single block can contain.  Value type: ``uint64_t``. Shader related.
   This may be less than the product of the components of MAX_BLOCK_SIZE and is
   usually limited by the number of threads that can be resident simultaneously
   on a compute unit.
 * ``PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE``: Maximum size of the GLOBAL
-  resource.  Value type: ``uint64_t``.
+  resource.  Value type: ``uint64_t``. Shader related.
 * ``PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE``: Maximum size of the LOCAL
-  resource.  Value type: ``uint64_t``.
+  resource.  Value type: ``uint64_t``. Shader related.
 * ``PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE``: Maximum size of the PRIVATE
-  resource.  Value type: ``uint64_t``.
+  resource.  Value type: ``uint64_t``. Shader related.
 * ``PIPE_COMPUTE_CAP_MAX_INPUT_SIZE``: Maximum size of the INPUT
-  resource.  Value type: ``uint64_t``.
+  resource.  Value type: ``uint64_t``. Shader related.
 * ``PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE``: Maximum size of a memory object
   allocation in bytes.  Value type: ``uint64_t``.
 * ``PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY``: Maximum frequency of the GPU
diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c
index 548d215..c5b5ab4 100644
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -179,6 +179,7 @@ ilo_get_video_param(struct pipe_screen *screen,
 
 static int
 ilo_get_compute_param(struct pipe_screen *screen,
+                      unsigned ir_type,
                       enum pipe_compute_cap param,
                       void *ret)
 {
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 5836bb2..5c902d4 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -358,7 +358,7 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
 }
 
 static int
-nv50_screen_get_compute_param(struct pipe_screen *pscreen,
+nv50_screen_get_compute_param(struct pipe_screen *pscreen, unsigned ir_type,
                               enum pipe_compute_cap param, void *data)
 {
    struct nv50_screen *screen = nv50_screen(pscreen);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 553c001..b821a15 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -386,7 +386,7 @@ nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
 }
 
 static int
-nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
+nvc0_screen_get_compute_param(struct pipe_screen *pscreen, unsigned ir_type,
                               enum pipe_compute_cap param, void *data)
 {
    struct nvc0_screen *screen = nvc0_screen(pscreen);
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index b801191..a7d1af2 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -499,7 +499,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
 	case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
 		if (shader == PIPE_SHADER_COMPUTE) {
 			uint64_t max_const_buffer_size;
-			pscreen->get_compute_param(pscreen,
+			pscreen->get_compute_param(pscreen, 0,
 				PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
 				&max_const_buffer_size);
 			return max_const_buffer_size;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index eed9d83..015d575 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -603,6 +603,7 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
 }
 
 static int r600_get_compute_param(struct pipe_screen *screen,
+        unsigned ir_type,
         enum pipe_compute_cap param,
         void *ret)
 {
@@ -669,7 +670,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
 			uint64_t *max_global_size = ret;
 			uint64_t max_mem_alloc_size;
 
-			r600_get_compute_param(screen,
+			r600_get_compute_param(screen, 0,
 				PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
 				&max_mem_alloc_size);
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index dd1103e..331b308 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -467,7 +467,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
 
 		case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: {
 			uint64_t max_const_buffer_size;
-			pscreen->get_compute_param(pscreen,
+			pscreen->get_compute_param(pscreen, 0,
 				PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
 				&max_const_buffer_size);
 			return max_const_buffer_size;
diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c
index b24e185..e19121d 100644
--- a/src/gallium/drivers/trace/tr_screen.c
+++ b/src/gallium/drivers/trace/tr_screen.c
@@ -174,7 +174,7 @@ trace_screen_get_paramf(struct pipe_screen *_screen,
 
 
 static int
-trace_screen_get_compute_param(struct pipe_screen *_screen,
+trace_screen_get_compute_param(struct pipe_screen *_screen, unsigned ir_type,
                                enum pipe_compute_cap param, void *data)
 {
    struct trace_screen *tr_scr = trace_screen(_screen);
@@ -184,10 +184,11 @@ trace_screen_get_compute_param(struct pipe_screen *_screen,
    trace_dump_call_begin("pipe_screen", "get_compute_param");
 
    trace_dump_arg(ptr, screen);
+   trace_dump_arg(int, ir_type);
    trace_dump_arg(int, param);
    trace_dump_arg(ptr, data);
 
-   result = screen->get_compute_param(screen, param, data);
+   result = screen->get_compute_param(screen, ir_type, param, data);
 
    trace_dump_ret(int, result);
 
diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h
index 4f30e75..6d44cf7 100644
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -109,13 +109,16 @@ struct pipe_screen {
 
    /**
     * Query a compute-specific capability/parameter/limit.
-    * \param param  one of PIPE_COMPUTE_CAP_x
-    * \param ret    pointer to a preallocated buffer that will be
-    *               initialized to the parameter value, or NULL.
-    * \return       size in bytes of the parameter value that would be
-    *               returned.
+    * \param ir_type shader IR type for which the param applies, or don't care
+    *                if the param is not shader related
+    * \param param   one of PIPE_COMPUTE_CAP_x
+    * \param ret     pointer to a preallocated buffer that will be
+    *                initialized to the parameter value, or NULL.
+    * \return        size in bytes of the parameter value that would be
+    *                returned.
     */
    int (*get_compute_param)(struct pipe_screen *,
+			    unsigned ir_type,
 			    enum pipe_compute_cap param,
 			    void *ret);
 
diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp
index 1be2f64..97d1d92 100644
--- a/src/gallium/state_trackers/clover/core/device.cpp
+++ b/src/gallium/state_trackers/clover/core/device.cpp
@@ -30,11 +30,12 @@ using namespace clover;
 namespace {
    template<typename T>
    std::vector<T>
-   get_compute_param(pipe_screen *pipe, pipe_compute_cap cap) {
-      int sz = pipe->get_compute_param(pipe, cap, NULL);
+   get_compute_param(pipe_screen *pipe, unsigned ir_format,
+                     pipe_compute_cap cap) {
+      int sz = pipe->get_compute_param(pipe, ir_format, cap, NULL);
       std::vector<T> v(sz / sizeof(T));
 
-      pipe->get_compute_param(pipe, cap, &v.front());
+      pipe->get_compute_param(pipe, ir_format, cap, &v.front());
       return v;
    }
 }
@@ -115,19 +116,19 @@ device::max_samplers() const {
 
 cl_ulong
 device::max_mem_global() const {
-   return get_compute_param<uint64_t>(pipe,
+   return get_compute_param<uint64_t>(pipe, ir_format(),
                                       PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE)[0];
 }
 
 cl_ulong
 device::max_mem_local() const {
-   return get_compute_param<uint64_t>(pipe,
+   return get_compute_param<uint64_t>(pipe, ir_format(),
                                       PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE)[0];
 }
 
 cl_ulong
 device::max_mem_input() const {
-   return get_compute_param<uint64_t>(pipe,
+   return get_compute_param<uint64_t>(pipe, ir_format(),
                                       PIPE_COMPUTE_CAP_MAX_INPUT_SIZE)[0];
 }
 
@@ -146,30 +147,30 @@ device::max_const_buffers() const {
 size_t
 device::max_threads_per_block() const {
    return get_compute_param<uint64_t>(
-      pipe, PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK)[0];
+      pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK)[0];
 }
 
 cl_ulong
 device::max_mem_alloc_size() const {
-   return get_compute_param<uint64_t>(pipe,
+   return get_compute_param<uint64_t>(pipe, ir_format(),
                                       PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE)[0];
 }
 
 cl_uint
 device::max_clock_frequency() const {
-   return get_compute_param<uint32_t>(pipe,
+   return get_compute_param<uint32_t>(pipe, ir_format(),
                                       PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY)[0];
 }
 
 cl_uint
 device::max_compute_units() const {
-   return get_compute_param<uint32_t>(pipe,
+   return get_compute_param<uint32_t>(pipe, ir_format(),
                                       PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)[0];
 }
 
 bool
 device::image_support() const {
-   return get_compute_param<uint32_t>(pipe,
+   return get_compute_param<uint32_t>(pipe, ir_format(),
                                       PIPE_COMPUTE_CAP_IMAGES_SUPPORTED)[0];
 }
 
@@ -181,13 +182,15 @@ device::has_doubles() const {
 
 std::vector<size_t>
 device::max_block_size() const {
-   auto v = get_compute_param<uint64_t>(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
+   auto v = get_compute_param<uint64_t>(pipe, ir_format(),
+                                        PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
    return { v.begin(), v.end() };
 }
 
 cl_uint
 device::subgroup_size() const {
-   return get_compute_param<uint32_t>(pipe, PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
+   return get_compute_param<uint32_t>(pipe, ir_format(),
+                                      PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
 }
 
 std::string
@@ -209,7 +212,7 @@ device::ir_format() const {
 std::string
 device::ir_target() const {
    std::vector<char> target = get_compute_param<char>(
-      pipe, PIPE_COMPUTE_CAP_IR_TARGET);
+      pipe, ir_format(), PIPE_COMPUTE_CAP_IR_TARGET);
    return { target.data() };
 }
 
diff --git a/src/gallium/tests/trivial/compute.c b/src/gallium/tests/trivial/compute.c
index af3e3aa..c94e2e1 100644
--- a/src/gallium/tests/trivial/compute.c
+++ b/src/gallium/tests/trivial/compute.c
@@ -58,7 +58,9 @@ struct context {
                 uint64_t __v[4];                                        \
                 int __i, __n;                                           \
                                                                         \
-                __n = ctx->screen->get_compute_param(ctx->screen, c, __v); \
+                __n = ctx->screen->get_compute_param(ctx->screen,       \
+                                                     PIPE_SHADER_IR_TGSI, \
+                                                     c, __v);           \
                 printf("%s: {", #c);                                    \
                                                                         \
                 for (__i = 0; __i < __n / sizeof(*__v); ++__i)          \
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index 2fdaba0..e56cc23 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -1105,14 +1105,15 @@ void st_init_extensions(struct pipe_screen *screen,
       if (compute_supported_irs & (1 << PIPE_SHADER_IR_TGSI)) {
          uint64_t grid_size[3], block_size[3];
 
-         screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_GRID_SIZE,
-                                   grid_size);
-         screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE,
-                                   block_size);
-         screen->get_compute_param(screen,
+         screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
+                                   PIPE_COMPUTE_CAP_MAX_GRID_SIZE, grid_size);
+         screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
+                                   PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE, block_size);
+         screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
                                    PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK,
                                    &consts->MaxComputeWorkGroupInvocations);
-         screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE,
+         screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
+                                   PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE,
                                    &consts->MaxComputeSharedMemorySize);
 
          for (i = 0; i < 3; i++) {
-- 
2.7.4



More information about the mesa-dev mailing list