Mesa (master): panfrost: Add helpers to emit indirect dispatch jobs

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Apr 22 17:19:25 UTC 2021


Module: Mesa
Branch: master
Commit: 9b22cda364d92bd50c3bed12a28080ba0252f04d
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=9b22cda364d92bd50c3bed12a28080ba0252f04d

Author: Boris Brezillon <boris.brezillon at collabora.com>
Date:   Mon Apr 19 17:57:12 2021 +0200

panfrost: Add helpers to emit indirect dispatch jobs

Indirect dispatch is implemented using an extra compute job patching
the compute job header to apply the final num_workgroup values. Add
helpers to simplify emission of the such jobs.

Signed-off-by: Boris Brezillon <boris.brezillon at collabora.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10332>

---

 src/panfrost/Makefile.sources            |   2 +
 src/panfrost/lib/meson.build             |   1 +
 src/panfrost/lib/pan_device.h            |   7 +
 src/panfrost/lib/pan_indirect_dispatch.c | 282 +++++++++++++++++++++++++++++++
 src/panfrost/lib/pan_indirect_dispatch.h |  48 ++++++
 5 files changed, 340 insertions(+)

diff --git a/src/panfrost/Makefile.sources b/src/panfrost/Makefile.sources
index 2f55e07db6a..9e7be1314bc 100644
--- a/src/panfrost/Makefile.sources
+++ b/src/panfrost/Makefile.sources
@@ -42,6 +42,8 @@ lib_FILES := \
         lib/pan_device.h \
         lib/pan_encoder.h \
         lib/pan_format.c \
+        lib/pan_indirect_dispatch.c \
+        lib/pan_indirect_dispatch.h \
         lib/pan_indirect_draw.c \
         lib/pan_indirect_draw.h \
         lib/pan_invocation.c \
diff --git a/src/panfrost/lib/meson.build b/src/panfrost/lib/meson.build
index 0b08f0eeafb..b0ebff32ac3 100644
--- a/src/panfrost/lib/meson.build
+++ b/src/panfrost/lib/meson.build
@@ -29,6 +29,7 @@ libpanfrost_lib_files = files(
   'pan_blitter.c',
   'pan_cs.c',
   'pan_format.c',
+  'pan_indirect_dispatch.c',
   'pan_indirect_draw.c',
   'pan_invocation.c',
   'pan_sampler.c',
diff --git a/src/panfrost/lib/pan_device.h b/src/panfrost/lib/pan_device.h
index 44ac63f6b1e..976d34ed34a 100644
--- a/src/panfrost/lib/pan_device.h
+++ b/src/panfrost/lib/pan_device.h
@@ -139,6 +139,12 @@ struct pan_indirect_draw_shaders {
         struct panfrost_bo *varying_heap;
 };
 
+struct pan_indirect_dispatch {
+        struct panfrost_ubo_push push;
+        struct panfrost_bo *bin;
+        struct panfrost_bo *descs;
+};
+
 typedef uint32_t mali_pixel_format;
 
 struct panfrost_format {
@@ -195,6 +201,7 @@ struct panfrost_device {
         struct pan_blitter blitter;
         struct pan_blend_shaders blend_shaders;
         struct pan_indirect_draw_shaders indirect_draw_shaders;
+        struct pan_indirect_dispatch indirect_dispatch;
 
         /* Tiler heap shared across all tiler jobs, allocated against the
          * device since there's only a single tiler. Since this is invisible to
diff --git a/src/panfrost/lib/pan_indirect_dispatch.c b/src/panfrost/lib/pan_indirect_dispatch.c
new file mode 100644
index 00000000000..a78a10d8418
--- /dev/null
+++ b/src/panfrost/lib/pan_indirect_dispatch.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright (C) 2021 Collabora, Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <stdio.h>
+#include "pan_bo.h"
+#include "pan_shader.h"
+#include "pan_scoreboard.h"
+#include "pan_encoder.h"
+#include "pan_indirect_dispatch.h"
+#include "pan_pool.h"
+#include "pan_util.h"
+#include "panfrost-quirks.h"
+#include "compiler/nir/nir_builder.h"
+#include "util/u_memory.h"
+#include "util/macros.h"
+
+struct indirect_dispatch_inputs {
+        mali_ptr job;
+        mali_ptr indirect_dim;
+        mali_ptr num_wg_sysval[3];
+};
+
+static nir_ssa_def *
+get_input_data(nir_builder *b, unsigned offset, unsigned size)
+{
+        assert(!(offset & 0x3));
+        assert(size && !(size & 0x3));
+
+        return nir_load_ubo(b, 1, size,
+                            nir_imm_int(b, 0),
+                            nir_imm_int(b, offset),
+                            .align_mul = 4,
+                            .align_offset = 0,
+                            .range_base = 0,
+                            .range = ~0);
+}
+
+#define get_input_field(b, name) \
+        get_input_data(b, offsetof(struct indirect_dispatch_inputs, name), \
+                       sizeof(((struct indirect_dispatch_inputs *)0)->name) * 8)
+
+static mali_ptr
+get_rsd(const struct panfrost_device *dev)
+{
+        return dev->indirect_dispatch.descs->ptr.gpu;
+}
+
+static mali_ptr
+get_tls(const struct panfrost_device *dev)
+{
+        return dev->indirect_dispatch.descs->ptr.gpu +
+               MALI_RENDERER_STATE_LENGTH;
+}
+
+static mali_ptr
+get_ubos(struct pan_pool *pool,
+         const struct indirect_dispatch_inputs *inputs)
+{
+        struct panfrost_ptr inputs_buf =
+                panfrost_pool_alloc_aligned(pool, ALIGN_POT(sizeof(*inputs), 16), 16);
+
+        memcpy(inputs_buf.cpu, inputs, sizeof(*inputs));
+
+        /* The midgard compiler calls the uniform -> UBO lowering pass which
+         * increments UBOs index even if there's no uniform to move to UBO0.
+         */
+        unsigned num_ubos = pan_is_bifrost(pool->dev) ? 1 : 2;
+        struct panfrost_ptr ubos_buf =
+                panfrost_pool_alloc_desc_array(pool, num_ubos, UNIFORM_BUFFER);
+
+        void *inputs_ubo = ubos_buf.cpu;
+        if (num_ubos > 1) {
+                memset(ubos_buf.cpu, 0, MALI_UNIFORM_BUFFER_LENGTH);
+                inputs_ubo += MALI_UNIFORM_BUFFER_LENGTH;
+        }
+
+        pan_pack(inputs_ubo, UNIFORM_BUFFER, cfg) {
+                cfg.entries = DIV_ROUND_UP(sizeof(*inputs), 16);
+                cfg.pointer = inputs_buf.gpu;
+        }
+
+        return ubos_buf.gpu;
+}
+
+static mali_ptr
+get_push_uniforms(struct pan_pool *pool,
+                  const struct indirect_dispatch_inputs *inputs)
+{
+        const struct panfrost_device *dev = pool->dev;
+        struct panfrost_ptr push_consts_buf =
+                panfrost_pool_alloc_aligned(pool,
+                                            ALIGN(dev->indirect_dispatch.push.count * 4, 16),
+                                            16);
+        uint32_t *out = push_consts_buf.cpu;
+        uint8_t *in = (uint8_t *)inputs;
+
+        for (unsigned i = 0; i < dev->indirect_dispatch.push.count; ++i)
+                memcpy(out + i, in +  dev->indirect_dispatch.push.words[i].offset, 4);
+
+        return push_consts_buf.gpu;
+}
+
+unsigned
+pan_indirect_dispatch_emit(struct pan_pool *pool,
+                           struct pan_scoreboard *scoreboard,
+                           const struct pan_indirect_dispatch_info *dispatch_info)
+{
+        struct panfrost_device *dev = pool->dev;
+        struct panfrost_ptr job =
+                panfrost_pool_alloc_desc(pool, COMPUTE_JOB);
+        void *invocation =
+                pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION);
+        struct indirect_dispatch_inputs inputs = {
+                .job = dispatch_info->job,
+                .indirect_dim = dispatch_info->indirect_dim,
+                .num_wg_sysval = {
+                        dispatch_info->num_wg_sysval[0],
+                        dispatch_info->num_wg_sysval[1],
+                        dispatch_info->num_wg_sysval[2],
+                },
+        };
+
+        panfrost_pack_work_groups_compute(invocation,
+                                          1, 1, 1, 1, 1, 1,
+                                          false);
+
+        pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
+                cfg.job_task_split = 2;
+        }
+
+        pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) {
+                cfg.draw_descriptor_is_64b = true;
+                cfg.texture_descriptor_is_64b = !pan_is_bifrost(dev);
+                cfg.state = get_rsd(dev);
+                cfg.thread_storage = get_tls(pool->dev);
+                cfg.uniform_buffers = get_ubos(pool, &inputs);
+                cfg.push_uniforms = get_push_uniforms(pool, &inputs);
+        }
+
+        pan_section_pack(job.cpu, COMPUTE_JOB, DRAW_PADDING, cfg);
+
+        return panfrost_add_job(pool, scoreboard, MALI_JOB_TYPE_COMPUTE,
+                                false, true, 0, 0, &job, false);
+}
+
+void
+pan_indirect_dispatch_init(struct panfrost_device *dev)
+{
+        nir_builder b =
+                nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
+                                               pan_shader_get_compiler_options(dev),
+                                               "%s", "indirect_dispatch");
+        nir_variable_create(b.shader, nir_var_mem_ubo,
+                            glsl_uint_type(), "inputs");
+        b.shader->info.num_ubos++;
+
+        nir_ssa_def *zero = nir_imm_int(&b, 0);
+        nir_ssa_def *one = nir_imm_int(&b, 1);
+        nir_ssa_def *num_wg = nir_load_global(&b, get_input_field(&b, indirect_dim), 4, 3, 32);
+        nir_ssa_def *num_wg_x = nir_channel(&b, num_wg, 0);
+        nir_ssa_def *num_wg_y = nir_channel(&b, num_wg, 1);
+        nir_ssa_def *num_wg_z = nir_channel(&b, num_wg, 2);
+
+        nir_ssa_def *job_hdr_ptr = get_input_field(&b, job);
+        nir_ssa_def *num_wg_flat = nir_imul(&b, num_wg_x, nir_imul(&b, num_wg_y, num_wg_z));
+        nir_push_if(&b, nir_ieq(&b, num_wg_flat, zero));
+        nir_ssa_def *job_type_ptr = nir_iadd(&b, job_hdr_ptr, nir_imm_int64(&b, 4 * 4));
+        nir_ssa_def *w4 = nir_load_global(&b, job_type_ptr, 4, 1, 32);
+        w4 = nir_iand_imm(&b, w4, ~0xfe);
+        w4 = nir_ior(&b, w4, nir_imm_int(&b, MALI_JOB_TYPE_NULL << 1));
+        nir_store_global(&b, job_type_ptr, 4, w4, 1);
+        nir_push_else(&b, NULL);
+        nir_ssa_def *job_dim_ptr =
+                nir_iadd(&b, job_hdr_ptr,
+                         nir_imm_int64(&b, pan_section_offset(COMPUTE_JOB, INVOCATION)));
+        num_wg_x = nir_isub(&b, num_wg_x, one);
+        num_wg_y = nir_isub(&b, num_wg_y, one);
+        num_wg_z = nir_isub(&b, num_wg_z, one);
+        nir_ssa_def *job_dim = nir_load_global(&b, job_dim_ptr, 8, 2, 32);
+        nir_ssa_def *dims = nir_channel(&b, job_dim, 0);
+        nir_ssa_def *split = nir_channel(&b, job_dim, 1);
+        nir_ssa_def *num_wg_x_split = nir_iand_imm(&b, nir_ushr_imm(&b, split, 10), 0x3f);
+        nir_ssa_def *num_wg_y_split =
+                nir_iadd(&b, num_wg_x_split,
+                         nir_bcsel(&b,
+                                   nir_ieq(&b, num_wg_x, zero),
+                                   zero,
+                                   nir_iadd(&b, nir_ufind_msb(&b, num_wg_x), one)));
+        nir_ssa_def *num_wg_z_split =
+                nir_iadd(&b, num_wg_y_split,
+                         nir_bcsel(&b,
+                                   nir_ieq(&b, num_wg_y, zero),
+                                   zero,
+                                   nir_iadd(&b, nir_ufind_msb(&b, num_wg_y), one)));
+        split = nir_ior(&b, split,
+                        nir_ior(&b,
+                                nir_ishl(&b, num_wg_y_split, nir_imm_int(&b, 16)),
+                                nir_ishl(&b, num_wg_z_split, nir_imm_int(&b, 22))));
+        dims = nir_ior(&b, dims,
+                       nir_ior(&b, nir_ishl(&b, num_wg_x, num_wg_x_split),
+                               nir_ior(&b, nir_ishl(&b, num_wg_y, num_wg_y_split),
+                                       nir_ishl(&b, num_wg_z, num_wg_z_split))));
+
+        nir_store_global(&b, job_dim_ptr, 8, nir_vec2(&b, dims, split), 3);
+
+        nir_ssa_def *num_wg_x_ptr = get_input_field(&b, num_wg_sysval[0]);
+        nir_push_if(&b, nir_ine(&b, num_wg_x_ptr, nir_imm_int64(&b, 0)));
+        nir_store_global(&b, num_wg_x_ptr, 8, nir_channel(&b, num_wg, 0), 1);
+        nir_store_global(&b, get_input_field(&b, num_wg_sysval[1]), 8, nir_channel(&b, num_wg, 1), 1);
+        nir_store_global(&b, get_input_field(&b, num_wg_sysval[2]), 8, nir_channel(&b, num_wg, 2), 1);
+        nir_pop_if(&b, NULL);
+        nir_pop_if(&b, NULL);
+
+        struct panfrost_compile_inputs inputs = { .gpu_id = dev->gpu_id };
+        struct pan_shader_info shader_info;
+        struct util_dynarray binary;
+
+        util_dynarray_init(&binary, NULL);
+        pan_shader_compile(dev, b.shader, &inputs, &binary, &shader_info);
+
+        ralloc_free(b.shader);
+
+        assert(!shader_info.tls_size);
+        assert(!shader_info.wls_size);
+        assert(!shader_info.sysvals.sysval_count);
+
+        dev->indirect_dispatch.bin =
+                panfrost_bo_create(dev, binary.size, PAN_BO_EXECUTE);
+
+        memcpy(dev->indirect_dispatch.bin->ptr.cpu, binary.data, binary.size);
+        util_dynarray_fini(&binary);
+
+        dev->indirect_dispatch.push = shader_info.push;
+        dev->indirect_dispatch.descs =
+                panfrost_bo_create(dev,
+                                   MALI_RENDERER_STATE_LENGTH +
+                                   MALI_LOCAL_STORAGE_LENGTH,
+                                   0);
+
+        mali_ptr address = dev->indirect_dispatch.bin->ptr.gpu;
+        if (!pan_is_bifrost(dev))
+                address |= shader_info.midgard.first_tag;
+
+        void *rsd = dev->indirect_dispatch.descs->ptr.cpu;
+        pan_pack(rsd, RENDERER_STATE, cfg) {
+                pan_shader_prepare_rsd(dev, &shader_info, address, &cfg);
+        }
+
+        void *tsd = dev->indirect_dispatch.descs->ptr.cpu +
+                    MALI_RENDERER_STATE_LENGTH;
+        pan_pack(tsd, LOCAL_STORAGE, ls) {
+                ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
+        };
+}
+
+void
+pan_indirect_dispatch_cleanup(struct panfrost_device *dev)
+{
+        panfrost_bo_unreference(dev->indirect_dispatch.bin);
+        panfrost_bo_unreference(dev->indirect_dispatch.descs);
+}
diff --git a/src/panfrost/lib/pan_indirect_dispatch.h b/src/panfrost/lib/pan_indirect_dispatch.h
new file mode 100644
index 00000000000..c48f61dd39d
--- /dev/null
+++ b/src/panfrost/lib/pan_indirect_dispatch.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2021 Collabora, Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __PAN_INDIRECT_DISPATCH_SHADERS_H__
+#define __PAN_INDIRECT_DISPATCH_SHADERS_H__
+
+struct pan_device;
+struct pan_scoreboard;
+struct pan_pool;
+
+struct pan_indirect_dispatch_info {
+        mali_ptr job;
+        mali_ptr indirect_dim;
+        mali_ptr num_wg_sysval[3];
+};
+
+unsigned
+pan_indirect_dispatch_emit(struct pan_pool *pool,
+                           struct pan_scoreboard *scoreboard,
+                           const struct pan_indirect_dispatch_info *dispatch_info);
+
+void
+pan_indirect_dispatch_init(struct panfrost_device *dev);
+
+void
+pan_indirect_dispatch_cleanup(struct panfrost_device *dev);
+
+#endif



More information about the mesa-commit mailing list