Mesa (master): panfrost: Add helpers to emit indirect dispatch jobs
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Thu Apr 22 17:19:25 UTC 2021
Module: Mesa
Branch: master
Commit: 9b22cda364d92bd50c3bed12a28080ba0252f04d
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9b22cda364d92bd50c3bed12a28080ba0252f04d
Author: Boris Brezillon <boris.brezillon at collabora.com>
Date: Mon Apr 19 17:57:12 2021 +0200
panfrost: Add helpers to emit indirect dispatch jobs
Indirect dispatch is implemented using an extra compute job patching
the compute job header to apply the final num_workgroup values. Add
helpers to simplify emission of the such jobs.
Signed-off-by: Boris Brezillon <boris.brezillon at collabora.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10332>
---
src/panfrost/Makefile.sources | 2 +
src/panfrost/lib/meson.build | 1 +
src/panfrost/lib/pan_device.h | 7 +
src/panfrost/lib/pan_indirect_dispatch.c | 282 +++++++++++++++++++++++++++++++
src/panfrost/lib/pan_indirect_dispatch.h | 48 ++++++
5 files changed, 340 insertions(+)
diff --git a/src/panfrost/Makefile.sources b/src/panfrost/Makefile.sources
index 2f55e07db6a..9e7be1314bc 100644
--- a/src/panfrost/Makefile.sources
+++ b/src/panfrost/Makefile.sources
@@ -42,6 +42,8 @@ lib_FILES := \
lib/pan_device.h \
lib/pan_encoder.h \
lib/pan_format.c \
+ lib/pan_indirect_dispatch.c \
+ lib/pan_indirect_dispatch.h \
lib/pan_indirect_draw.c \
lib/pan_indirect_draw.h \
lib/pan_invocation.c \
diff --git a/src/panfrost/lib/meson.build b/src/panfrost/lib/meson.build
index 0b08f0eeafb..b0ebff32ac3 100644
--- a/src/panfrost/lib/meson.build
+++ b/src/panfrost/lib/meson.build
@@ -29,6 +29,7 @@ libpanfrost_lib_files = files(
'pan_blitter.c',
'pan_cs.c',
'pan_format.c',
+ 'pan_indirect_dispatch.c',
'pan_indirect_draw.c',
'pan_invocation.c',
'pan_sampler.c',
diff --git a/src/panfrost/lib/pan_device.h b/src/panfrost/lib/pan_device.h
index 44ac63f6b1e..976d34ed34a 100644
--- a/src/panfrost/lib/pan_device.h
+++ b/src/panfrost/lib/pan_device.h
@@ -139,6 +139,12 @@ struct pan_indirect_draw_shaders {
struct panfrost_bo *varying_heap;
};
+struct pan_indirect_dispatch {
+ struct panfrost_ubo_push push;
+ struct panfrost_bo *bin;
+ struct panfrost_bo *descs;
+};
+
typedef uint32_t mali_pixel_format;
struct panfrost_format {
@@ -195,6 +201,7 @@ struct panfrost_device {
struct pan_blitter blitter;
struct pan_blend_shaders blend_shaders;
struct pan_indirect_draw_shaders indirect_draw_shaders;
+ struct pan_indirect_dispatch indirect_dispatch;
/* Tiler heap shared across all tiler jobs, allocated against the
* device since there's only a single tiler. Since this is invisible to
diff --git a/src/panfrost/lib/pan_indirect_dispatch.c b/src/panfrost/lib/pan_indirect_dispatch.c
new file mode 100644
index 00000000000..a78a10d8418
--- /dev/null
+++ b/src/panfrost/lib/pan_indirect_dispatch.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright (C) 2021 Collabora, Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <stdio.h>
+#include "pan_bo.h"
+#include "pan_shader.h"
+#include "pan_scoreboard.h"
+#include "pan_encoder.h"
+#include "pan_indirect_dispatch.h"
+#include "pan_pool.h"
+#include "pan_util.h"
+#include "panfrost-quirks.h"
+#include "compiler/nir/nir_builder.h"
+#include "util/u_memory.h"
+#include "util/macros.h"
+
+struct indirect_dispatch_inputs {
+ mali_ptr job;
+ mali_ptr indirect_dim;
+ mali_ptr num_wg_sysval[3];
+};
+
+static nir_ssa_def *
+get_input_data(nir_builder *b, unsigned offset, unsigned size)
+{
+ assert(!(offset & 0x3));
+ assert(size && !(size & 0x3));
+
+ return nir_load_ubo(b, 1, size,
+ nir_imm_int(b, 0),
+ nir_imm_int(b, offset),
+ .align_mul = 4,
+ .align_offset = 0,
+ .range_base = 0,
+ .range = ~0);
+}
+
+#define get_input_field(b, name) \
+ get_input_data(b, offsetof(struct indirect_dispatch_inputs, name), \
+ sizeof(((struct indirect_dispatch_inputs *)0)->name) * 8)
+
+static mali_ptr
+get_rsd(const struct panfrost_device *dev)
+{
+ return dev->indirect_dispatch.descs->ptr.gpu;
+}
+
+static mali_ptr
+get_tls(const struct panfrost_device *dev)
+{
+ return dev->indirect_dispatch.descs->ptr.gpu +
+ MALI_RENDERER_STATE_LENGTH;
+}
+
+static mali_ptr
+get_ubos(struct pan_pool *pool,
+ const struct indirect_dispatch_inputs *inputs)
+{
+ struct panfrost_ptr inputs_buf =
+ panfrost_pool_alloc_aligned(pool, ALIGN_POT(sizeof(*inputs), 16), 16);
+
+ memcpy(inputs_buf.cpu, inputs, sizeof(*inputs));
+
+ /* The midgard compiler calls the uniform -> UBO lowering pass which
+ * increments UBOs index even if there's no uniform to move to UBO0.
+ */
+ unsigned num_ubos = pan_is_bifrost(pool->dev) ? 1 : 2;
+ struct panfrost_ptr ubos_buf =
+ panfrost_pool_alloc_desc_array(pool, num_ubos, UNIFORM_BUFFER);
+
+ void *inputs_ubo = ubos_buf.cpu;
+ if (num_ubos > 1) {
+ memset(ubos_buf.cpu, 0, MALI_UNIFORM_BUFFER_LENGTH);
+ inputs_ubo += MALI_UNIFORM_BUFFER_LENGTH;
+ }
+
+ pan_pack(inputs_ubo, UNIFORM_BUFFER, cfg) {
+ cfg.entries = DIV_ROUND_UP(sizeof(*inputs), 16);
+ cfg.pointer = inputs_buf.gpu;
+ }
+
+ return ubos_buf.gpu;
+}
+
+static mali_ptr
+get_push_uniforms(struct pan_pool *pool,
+ const struct indirect_dispatch_inputs *inputs)
+{
+ const struct panfrost_device *dev = pool->dev;
+ struct panfrost_ptr push_consts_buf =
+ panfrost_pool_alloc_aligned(pool,
+ ALIGN(dev->indirect_dispatch.push.count * 4, 16),
+ 16);
+ uint32_t *out = push_consts_buf.cpu;
+ uint8_t *in = (uint8_t *)inputs;
+
+ for (unsigned i = 0; i < dev->indirect_dispatch.push.count; ++i)
+ memcpy(out + i, in + dev->indirect_dispatch.push.words[i].offset, 4);
+
+ return push_consts_buf.gpu;
+}
+
+unsigned
+pan_indirect_dispatch_emit(struct pan_pool *pool,
+ struct pan_scoreboard *scoreboard,
+ const struct pan_indirect_dispatch_info *dispatch_info)
+{
+ struct panfrost_device *dev = pool->dev;
+ struct panfrost_ptr job =
+ panfrost_pool_alloc_desc(pool, COMPUTE_JOB);
+ void *invocation =
+ pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION);
+ struct indirect_dispatch_inputs inputs = {
+ .job = dispatch_info->job,
+ .indirect_dim = dispatch_info->indirect_dim,
+ .num_wg_sysval = {
+ dispatch_info->num_wg_sysval[0],
+ dispatch_info->num_wg_sysval[1],
+ dispatch_info->num_wg_sysval[2],
+ },
+ };
+
+ panfrost_pack_work_groups_compute(invocation,
+ 1, 1, 1, 1, 1, 1,
+ false);
+
+ pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
+ cfg.job_task_split = 2;
+ }
+
+ pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) {
+ cfg.draw_descriptor_is_64b = true;
+ cfg.texture_descriptor_is_64b = !pan_is_bifrost(dev);
+ cfg.state = get_rsd(dev);
+ cfg.thread_storage = get_tls(pool->dev);
+ cfg.uniform_buffers = get_ubos(pool, &inputs);
+ cfg.push_uniforms = get_push_uniforms(pool, &inputs);
+ }
+
+ pan_section_pack(job.cpu, COMPUTE_JOB, DRAW_PADDING, cfg);
+
+ return panfrost_add_job(pool, scoreboard, MALI_JOB_TYPE_COMPUTE,
+ false, true, 0, 0, &job, false);
+}
+
+void
+pan_indirect_dispatch_init(struct panfrost_device *dev)
+{
+ nir_builder b =
+ nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
+ pan_shader_get_compiler_options(dev),
+ "%s", "indirect_dispatch");
+ nir_variable_create(b.shader, nir_var_mem_ubo,
+ glsl_uint_type(), "inputs");
+ b.shader->info.num_ubos++;
+
+ nir_ssa_def *zero = nir_imm_int(&b, 0);
+ nir_ssa_def *one = nir_imm_int(&b, 1);
+ nir_ssa_def *num_wg = nir_load_global(&b, get_input_field(&b, indirect_dim), 4, 3, 32);
+ nir_ssa_def *num_wg_x = nir_channel(&b, num_wg, 0);
+ nir_ssa_def *num_wg_y = nir_channel(&b, num_wg, 1);
+ nir_ssa_def *num_wg_z = nir_channel(&b, num_wg, 2);
+
+ nir_ssa_def *job_hdr_ptr = get_input_field(&b, job);
+ nir_ssa_def *num_wg_flat = nir_imul(&b, num_wg_x, nir_imul(&b, num_wg_y, num_wg_z));
+ nir_push_if(&b, nir_ieq(&b, num_wg_flat, zero));
+ nir_ssa_def *job_type_ptr = nir_iadd(&b, job_hdr_ptr, nir_imm_int64(&b, 4 * 4));
+ nir_ssa_def *w4 = nir_load_global(&b, job_type_ptr, 4, 1, 32);
+ w4 = nir_iand_imm(&b, w4, ~0xfe);
+ w4 = nir_ior(&b, w4, nir_imm_int(&b, MALI_JOB_TYPE_NULL << 1));
+ nir_store_global(&b, job_type_ptr, 4, w4, 1);
+ nir_push_else(&b, NULL);
+ nir_ssa_def *job_dim_ptr =
+ nir_iadd(&b, job_hdr_ptr,
+ nir_imm_int64(&b, pan_section_offset(COMPUTE_JOB, INVOCATION)));
+ num_wg_x = nir_isub(&b, num_wg_x, one);
+ num_wg_y = nir_isub(&b, num_wg_y, one);
+ num_wg_z = nir_isub(&b, num_wg_z, one);
+ nir_ssa_def *job_dim = nir_load_global(&b, job_dim_ptr, 8, 2, 32);
+ nir_ssa_def *dims = nir_channel(&b, job_dim, 0);
+ nir_ssa_def *split = nir_channel(&b, job_dim, 1);
+ nir_ssa_def *num_wg_x_split = nir_iand_imm(&b, nir_ushr_imm(&b, split, 10), 0x3f);
+ nir_ssa_def *num_wg_y_split =
+ nir_iadd(&b, num_wg_x_split,
+ nir_bcsel(&b,
+ nir_ieq(&b, num_wg_x, zero),
+ zero,
+ nir_iadd(&b, nir_ufind_msb(&b, num_wg_x), one)));
+ nir_ssa_def *num_wg_z_split =
+ nir_iadd(&b, num_wg_y_split,
+ nir_bcsel(&b,
+ nir_ieq(&b, num_wg_y, zero),
+ zero,
+ nir_iadd(&b, nir_ufind_msb(&b, num_wg_y), one)));
+ split = nir_ior(&b, split,
+ nir_ior(&b,
+ nir_ishl(&b, num_wg_y_split, nir_imm_int(&b, 16)),
+ nir_ishl(&b, num_wg_z_split, nir_imm_int(&b, 22))));
+ dims = nir_ior(&b, dims,
+ nir_ior(&b, nir_ishl(&b, num_wg_x, num_wg_x_split),
+ nir_ior(&b, nir_ishl(&b, num_wg_y, num_wg_y_split),
+ nir_ishl(&b, num_wg_z, num_wg_z_split))));
+
+ nir_store_global(&b, job_dim_ptr, 8, nir_vec2(&b, dims, split), 3);
+
+ nir_ssa_def *num_wg_x_ptr = get_input_field(&b, num_wg_sysval[0]);
+ nir_push_if(&b, nir_ine(&b, num_wg_x_ptr, nir_imm_int64(&b, 0)));
+ nir_store_global(&b, num_wg_x_ptr, 8, nir_channel(&b, num_wg, 0), 1);
+ nir_store_global(&b, get_input_field(&b, num_wg_sysval[1]), 8, nir_channel(&b, num_wg, 1), 1);
+ nir_store_global(&b, get_input_field(&b, num_wg_sysval[2]), 8, nir_channel(&b, num_wg, 2), 1);
+ nir_pop_if(&b, NULL);
+ nir_pop_if(&b, NULL);
+
+ struct panfrost_compile_inputs inputs = { .gpu_id = dev->gpu_id };
+ struct pan_shader_info shader_info;
+ struct util_dynarray binary;
+
+ util_dynarray_init(&binary, NULL);
+ pan_shader_compile(dev, b.shader, &inputs, &binary, &shader_info);
+
+ ralloc_free(b.shader);
+
+ assert(!shader_info.tls_size);
+ assert(!shader_info.wls_size);
+ assert(!shader_info.sysvals.sysval_count);
+
+ dev->indirect_dispatch.bin =
+ panfrost_bo_create(dev, binary.size, PAN_BO_EXECUTE);
+
+ memcpy(dev->indirect_dispatch.bin->ptr.cpu, binary.data, binary.size);
+ util_dynarray_fini(&binary);
+
+ dev->indirect_dispatch.push = shader_info.push;
+ dev->indirect_dispatch.descs =
+ panfrost_bo_create(dev,
+ MALI_RENDERER_STATE_LENGTH +
+ MALI_LOCAL_STORAGE_LENGTH,
+ 0);
+
+ mali_ptr address = dev->indirect_dispatch.bin->ptr.gpu;
+ if (!pan_is_bifrost(dev))
+ address |= shader_info.midgard.first_tag;
+
+ void *rsd = dev->indirect_dispatch.descs->ptr.cpu;
+ pan_pack(rsd, RENDERER_STATE, cfg) {
+ pan_shader_prepare_rsd(dev, &shader_info, address, &cfg);
+ }
+
+ void *tsd = dev->indirect_dispatch.descs->ptr.cpu +
+ MALI_RENDERER_STATE_LENGTH;
+ pan_pack(tsd, LOCAL_STORAGE, ls) {
+ ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
+ };
+}
+
+void
+pan_indirect_dispatch_cleanup(struct panfrost_device *dev)
+{
+ panfrost_bo_unreference(dev->indirect_dispatch.bin);
+ panfrost_bo_unreference(dev->indirect_dispatch.descs);
+}
diff --git a/src/panfrost/lib/pan_indirect_dispatch.h b/src/panfrost/lib/pan_indirect_dispatch.h
new file mode 100644
index 00000000000..c48f61dd39d
--- /dev/null
+++ b/src/panfrost/lib/pan_indirect_dispatch.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2021 Collabora, Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __PAN_INDIRECT_DISPATCH_SHADERS_H__
+#define __PAN_INDIRECT_DISPATCH_SHADERS_H__
+
+struct pan_device;
+struct pan_scoreboard;
+struct pan_pool;
+
+struct pan_indirect_dispatch_info {
+ mali_ptr job;
+ mali_ptr indirect_dim;
+ mali_ptr num_wg_sysval[3];
+};
+
+unsigned
+pan_indirect_dispatch_emit(struct pan_pool *pool,
+ struct pan_scoreboard *scoreboard,
+ const struct pan_indirect_dispatch_info *dispatch_info);
+
+void
+pan_indirect_dispatch_init(struct panfrost_device *dev);
+
+void
+pan_indirect_dispatch_cleanup(struct panfrost_device *dev);
+
+#endif
More information about the mesa-commit
mailing list