Mesa (master): panfrost: Prepare things for indirect draws

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Apr 7 09:12:40 UTC 2021


Module: Mesa
Branch: master
Commit: 54526d8eca15a8a28e3847de155eb8c584cb286a
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=54526d8eca15a8a28e3847de155eb8c584cb286a

Author: Boris Brezillon <boris.brezillon at collabora.com>
Date:   Mon Jan 25 16:45:14 2021 +0100

panfrost: Prepare things for indirect draws

Several things need to be tweaked to re-use existing helpers for
indirect draws:

* Indirect draws should always be considered as instanced draws since
  we don't know in advance how many instances will be requested. For
  each vertex attribute buffer entry we store the element divisor which
  will be extracted by the compute shader and transformed into a HW
  divisor

* PRIMITIVE.index_count should be initialized to its default value
  (one, or zero after the minus(1) modification) waiting for the
  compute shader to patch it

Signed-off-by: Boris Brezillon <boris.brezillon at collabora.com>
Reviewed-by: Alyssa Rosenzweig <alyssa at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8700>

---

 src/gallium/drivers/panfrost/pan_cmdstream.c | 36 +++++++++++++++++++++++++---
 src/gallium/drivers/panfrost/pan_cmdstream.h |  1 +
 src/gallium/drivers/panfrost/pan_context.c   | 17 +++++++++----
 src/gallium/drivers/panfrost/pan_context.h   |  1 +
 4 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c
index 570ac064284..815f5c1b5ab 100644
--- a/src/gallium/drivers/panfrost/pan_cmdstream.c
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.c
@@ -1479,6 +1479,7 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch,
         struct panfrost_device *dev = pan_device(ctx->base.screen);
         struct panfrost_vertex_state *so = ctx->vertex;
         struct panfrost_shader_state *vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
+        bool instanced = ctx->indirect_draw || ctx->instance_count > 1;
         uint32_t image_mask = ctx->image_mask[PIPE_SHADER_VERTEX];
         unsigned nr_images = util_bitcount(image_mask);
 
@@ -1486,7 +1487,7 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch,
          * is enabled. Otherwise single record is gauranteed.
          * Also, we allocate more memory than what's needed here if either instancing
          * is enabled or images are present, this can be improved. */
-        unsigned bufs_per_attrib = (ctx->instance_count > 1 || nr_images > 0) ? 2 : 1;
+        unsigned bufs_per_attrib = (instanced || nr_images > 0) ? 2 : 1;
         unsigned nr_bufs = (vs->info.attribute_count * bufs_per_attrib) +
                            (pan_is_bifrost(dev) ? 1 : 0);
 
@@ -1550,9 +1551,32 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch,
                 /* When there is a divisor, the hardware-level divisor is
                  * the product of the instance divisor and the padded count */
                 unsigned divisor = elem->instance_divisor;
-                unsigned hw_divisor = ctx->padded_count * divisor;
                 unsigned stride = buf->stride;
 
+                if (ctx->indirect_draw) {
+                        /* With indirect draws we can't guess the vertex_count.
+                         * Pre-set the address, stride and size fields, the
+                         * compute shader do the rest.
+                         */
+                        pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
+                                cfg.pointer = addr;
+                                cfg.stride = stride;
+                                cfg.size = size;
+                        }
+
+                        /* We store the unmodified divisor in the continuation
+                         * slot so the compute shader can retrieve it.
+                         */
+                        pan_pack(bufs + k + 1, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) {
+                                cfg.divisor = divisor;
+                        }
+
+                        k += 2;
+                        continue;
+                }
+
+                unsigned hw_divisor = ctx->padded_count * divisor;
+
                 /* If there's a divisor(=1) but no instancing, we want every
                  * attribute to be the same */
 
@@ -1677,7 +1701,9 @@ panfrost_emit_varyings(struct panfrost_batch *batch,
                 unsigned stride, unsigned count)
 {
         unsigned size = stride * count;
-        mali_ptr ptr = panfrost_pool_alloc_aligned(&batch->invisible_pool, size, 64).gpu;
+        mali_ptr ptr =
+                batch->ctx->indirect_draw ? 0 :
+                panfrost_pool_alloc_aligned(&batch->invisible_pool, size, 64).gpu;
 
         pan_pack(slot, ATTRIBUTE_BUFFER, cfg) {
                 cfg.stride = stride;
@@ -2079,6 +2105,7 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
                                  mali_ptr *vs_attribs,
                                  mali_ptr *fs_attribs,
                                  mali_ptr *buffers,
+                                 unsigned *buffer_count,
                                  mali_ptr *position,
                                  mali_ptr *psiz)
 {
@@ -2157,6 +2184,9 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
         struct mali_attribute_buffer_packed *varyings =
                 (struct mali_attribute_buffer_packed *) T.cpu;
 
+        if (buffer_count)
+                *buffer_count = xfb_base + ctx->streamout.num_targets;
+
         /* Suppress prefetch on Bifrost */
         memset(varyings + (xfb_base * ctx->streamout.num_targets), 0, sizeof(*varyings));
 
diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.h b/src/gallium/drivers/panfrost/pan_cmdstream.h
index eab653b4d60..cdca802615b 100644
--- a/src/gallium/drivers/panfrost/pan_cmdstream.h
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.h
@@ -82,6 +82,7 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
                                  mali_ptr *vs_attribs,
                                  mali_ptr *fs_attribs,
                                  mali_ptr *buffers,
+                                 unsigned *buffer_count,
                                  mali_ptr *position,
                                  mali_ptr *psiz);
 
diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c
index 5d738f5a1cc..34eb5b58874 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -290,7 +290,9 @@ panfrost_draw_emit_vertex(struct panfrost_batch *batch,
                           const struct pipe_draw_info *info,
                           void *invocation_template,
                           mali_ptr shared_mem, mali_ptr vs_vary,
-                          mali_ptr varyings, void *job)
+                          mali_ptr varyings,
+                          mali_ptr attribs, mali_ptr attrib_bufs,
+                          void *job)
 {
         struct panfrost_context *ctx = batch->ctx;
         struct panfrost_device *device = pan_device(ctx->base.screen);
@@ -308,7 +310,8 @@ panfrost_draw_emit_vertex(struct panfrost_batch *batch,
                 if (!pan_is_bifrost(device))
                         cfg.texture_descriptor_is_64b = true;
                 cfg.state = panfrost_emit_compute_shader_meta(batch, PIPE_SHADER_VERTEX);
-                cfg.attributes = panfrost_emit_vertex_data(batch, &cfg.attribute_buffers);
+                cfg.attributes = attribs;
+                cfg.attribute_buffers = attrib_bufs;
                 cfg.varyings = vs_vary;
                 cfg.varying_buffers = vs_vary ? varyings : 0;
                 cfg.thread_storage = shared_mem;
@@ -390,7 +393,7 @@ panfrost_draw_emit_tiler(struct panfrost_batch *batch,
 
                 cfg.job_task_split = 6;
 
-                cfg.index_count = draw->count;
+                cfg.index_count = ctx->indirect_draw ? 1 : draw->count;
                 if (info->index_size) {
                         cfg.index_type = panfrost_translate_index_size(info->index_size);
                         cfg.indices = indices;
@@ -505,6 +508,7 @@ panfrost_direct_draw(struct panfrost_context *ctx,
         panfrost_batch_set_requirements(batch);
 
         /* Take into account a negative bias */
+        ctx->indirect_draw = false;
         ctx->vertex_count = draw->count + (info->index_size ? abs(info->index_bias) : 0);
         ctx->instance_count = info->instance_count;
         ctx->active_prim = info->mode;
@@ -556,11 +560,14 @@ panfrost_direct_draw(struct panfrost_context *ctx,
                                          ctx->padded_count *
                                          ctx->instance_count,
                                          &vs_vary, &fs_vary, &varyings,
-                                         &pos, &psiz);
+                                         NULL, &pos, &psiz);
+
+        mali_ptr attribs, attrib_bufs;
+        attribs = panfrost_emit_vertex_data(batch, &attrib_bufs);
 
         /* Fire off the draw itself */
         panfrost_draw_emit_vertex(batch, info, &invocation, shared_mem,
-                                  vs_vary, varyings, vertex.cpu);
+                                  vs_vary, varyings, attribs, attrib_bufs, vertex.cpu);
         panfrost_draw_emit_tiler(batch, info, draw, &invocation, shared_mem, indices,
                                  fs_vary, varyings, pos, psiz, tiler.cpu);
         panfrost_emit_vertex_tiler_jobs(batch, &vertex, &tiler);
diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h
index 9b9856e09a4..2450ec34c04 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -133,6 +133,7 @@ struct panfrost_context {
         uint64_t tf_prims_generated;
         struct panfrost_query *occlusion_query;
 
+        bool indirect_draw;
         unsigned vertex_count;
         unsigned instance_count;
         unsigned offset_start;



More information about the mesa-commit mailing list