Mesa (main): panfrost: Eliminate reserve_* functions

Tue Jun 1 19:26:48 UTC 2021

Module: Mesa
Branch: main
Commit: 478ae974a15a729f8f3a779948603fe39e0614fc
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=478ae974a15a729f8f3a779948603fe39e0614fc

Author: Alyssa Rosenzweig <alyssa at collabora.com>
Date:   Thu May 20 17:09:14 2021 -0400

panfrost: Eliminate reserve_* functions

We always want to reserve _something_, so reserve what we need at batch
creation time and stop trying to re-reserve in a zillion places after.
This has a neglible (<128 bytes per batch) increase in memory usage for
compute-only workloads, but given the amount of simplication, that's a
fair tradeoff.

Signed-off-by: Alyssa Rosenzweig <alyssa at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11074>

---

 src/gallium/drivers/panfrost/pan_blit.c    |  2 +-
 src/gallium/drivers/panfrost/pan_compute.c |  5 --
 src/gallium/drivers/panfrost/pan_context.c | 25 +++------
 src/gallium/drivers/panfrost/pan_job.c     | 88 +++++++-----------------------
 src/gallium/drivers/panfrost/pan_job.h     |  3 -
 5 files changed, 31 insertions(+), 92 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_blit.c b/src/gallium/drivers/panfrost/pan_blit.c
index 3ebce136285..c49f8210a18 100644
--- a/src/gallium/drivers/panfrost/pan_blit.c
+++ b/src/gallium/drivers/panfrost/pan_blit.c
@@ -298,7 +298,7 @@ panfrost_blit(struct pipe_context *pipe,
                 mali_ptr tiler = pan_is_bifrost(dev) ?
                                  panfrost_batch_get_bifrost_tiler(batch, ~0) : 0;
                 pan_blit(&bctx, &batch->pool, &batch->scoreboard,
-                         panfrost_batch_reserve_tls(batch, false), tiler);
+                                batch->tls.gpu, tiler);
 
                 /* We don't want this batch to interfere with subsequent draw
                  * calls, but we want to keep it in the list of pending batches
diff --git a/src/gallium/drivers/panfrost/pan_compute.c b/src/gallium/drivers/panfrost/pan_compute.c
index ad473b34a9d..a93a3ea37c9 100644
--- a/src/gallium/drivers/panfrost/pan_compute.c
+++ b/src/gallium/drivers/panfrost/pan_compute.c
@@ -103,11 +103,6 @@ panfrost_launch_grid(struct pipe_context *pipe,
         struct panfrost_device *dev = pan_device(pipe->screen);
         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
 
-        /* Reserve a thread storage descriptor now (will be emitted at submit
-         * time).
-         */
-        panfrost_batch_reserve_tls(batch, true);
-
         ctx->compute_grid = info;
 
         struct panfrost_ptr t =
diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c
index 238bca7d174..186b2d59251 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -227,8 +227,7 @@ static void
 panfrost_draw_emit_vertex(struct panfrost_batch *batch,
                           const struct pipe_draw_info *info,
                           void *invocation_template,
-                          mali_ptr shared_mem, mali_ptr vs_vary,
-                          mali_ptr varyings,
+                          mali_ptr vs_vary, mali_ptr varyings,
                           mali_ptr attribs, mali_ptr attrib_bufs,
                           void *job)
 {
@@ -252,7 +251,7 @@ panfrost_draw_emit_vertex(struct panfrost_batch *batch,
                 cfg.attribute_buffers = attrib_bufs;
                 cfg.varyings = vs_vary;
                 cfg.varying_buffers = vs_vary ? varyings : 0;
-                cfg.thread_storage = shared_mem;
+                cfg.thread_storage = batch->tls.gpu;
                 pan_emit_draw_descs(batch, &cfg, PIPE_SHADER_VERTEX);
         }
 
@@ -357,8 +356,7 @@ panfrost_draw_emit_tiler(struct panfrost_batch *batch,
                          const struct pipe_draw_info *info,
                          const struct pipe_draw_start_count_bias *draw,
                          void *invocation_template,
-                         mali_ptr shared_mem, mali_ptr indices,
-                         mali_ptr fs_vary, mali_ptr varyings,
+                         mali_ptr indices, mali_ptr fs_vary, mali_ptr varyings,
                          mali_ptr pos, mali_ptr psiz, void *job)
 {
         struct panfrost_context *ctx = batch->ctx;
@@ -438,7 +436,7 @@ panfrost_draw_emit_tiler(struct panfrost_batch *batch,
                 cfg.viewport = batch->viewport;
                 cfg.varyings = fs_vary;
                 cfg.varying_buffers = fs_vary ? varyings : 0;
-                cfg.thread_storage = shared_mem;
+                cfg.thread_storage = batch->tls.gpu;
 
                 /* For all primitives but lines DRAW.flat_shading_vertex must
                  * be set to 0 and the provoking vertex is selected with the
@@ -516,8 +514,6 @@ panfrost_direct_draw(struct panfrost_batch *batch,
 
         unsigned vertex_count = ctx->vertex_count;
 
-        mali_ptr shared_mem = panfrost_batch_reserve_tls(batch, false);
-
         unsigned min_index = 0, max_index = 0;
         mali_ptr indices = 0;
 
@@ -571,9 +567,9 @@ panfrost_direct_draw(struct panfrost_batch *batch,
         attribs = panfrost_emit_vertex_data(batch, &attrib_bufs);
 
         /* Fire off the draw itself */
-        panfrost_draw_emit_vertex(batch, info, &invocation, shared_mem,
+        panfrost_draw_emit_vertex(batch, info, &invocation,
                                   vs_vary, varyings, attribs, attrib_bufs, vertex.cpu);
-        panfrost_draw_emit_tiler(batch, info, draw, &invocation, shared_mem, indices,
+        panfrost_draw_emit_tiler(batch, info, draw, &invocation, indices,
                                  fs_vary, varyings, pos, psiz, tiler.cpu);
         panfrost_emit_vertex_tiler_jobs(batch, &vertex, &tiler);
 
@@ -603,8 +599,6 @@ panfrost_indirect_draw(struct panfrost_batch *batch,
         ctx->drawid = drawid_offset;
         ctx->indirect_draw = true;
 
-        mali_ptr shared_mem = panfrost_batch_reserve_tls(batch, false);
-
         struct panfrost_ptr tiler =
                 panfrost_pool_alloc_aligned(&batch->pool,
                                             pan_is_bifrost(dev) ?
@@ -660,10 +654,9 @@ panfrost_indirect_draw(struct panfrost_batch *batch,
         static struct mali_invocation_packed invocation;
 
         /* Fire off the draw itself */
-        panfrost_draw_emit_vertex(batch, info, &invocation, shared_mem,
-                                  vs_vary, varyings, attribs, attrib_bufs,
-                                  vertex.cpu);
-        panfrost_draw_emit_tiler(batch, info, draw, &invocation, shared_mem,
+        panfrost_draw_emit_vertex(batch, info, &invocation, vs_vary, varyings,
+                                  attribs, attrib_bufs, vertex.cpu);
+        panfrost_draw_emit_tiler(batch, info, draw, &invocation,
                                  index_buf ? index_buf->ptr.gpu : 0,
                                  fs_vary, varyings, pos, psiz, tiler.cpu);
 
diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c
index 0bbd27ba297..ce2df7c7f72 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -94,6 +94,25 @@ panfrost_batch_init(struct panfrost_context *ctx,
                         PAN_BO_INVISIBLE, 65536, "Varyings", false, true);
 
         panfrost_batch_add_fbo_bos(batch);
+
+        /* Reserve the framebuffer and local storage descriptors */
+        batch->framebuffer =
+                (dev->quirks & MIDGARD_SFBD) ?
+                panfrost_pool_alloc_desc(&batch->pool, SINGLE_TARGET_FRAMEBUFFER) :
+                panfrost_pool_alloc_desc_aggregate(&batch->pool,
+                                                   PAN_DESC(MULTI_TARGET_FRAMEBUFFER),
+                                                   PAN_DESC(ZS_CRC_EXTENSION),
+                                                   PAN_DESC_ARRAY(MAX2(key->nr_cbufs, 1), RENDER_TARGET));
+
+        /* Add the MFBD tag now, other tags will be added at submit-time */
+        if (!(dev->quirks & MIDGARD_SFBD))
+                batch->framebuffer.gpu |= MALI_FBD_TAG_IS_MFBD;
+
+        /* On Midgard, the TLS is embedded in the FB descriptor */
+        if (pan_is_bifrost(dev))
+                batch->tls = panfrost_pool_alloc_desc(&batch->pool, LOCAL_STORAGE);
+        else
+                batch->tls = batch->framebuffer;
 }
 
 static void
@@ -729,66 +748,6 @@ panfrost_batch_to_fb_info(const struct panfrost_batch *batch,
         }
 }
 
-static mali_ptr
-panfrost_batch_reserve_framebuffer(struct panfrost_batch *batch)
-{
-        struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
-
-        if (batch->framebuffer.gpu)
-                return batch->framebuffer.gpu;
-
-        /* If we haven't, reserve space for a framebuffer descriptor */
-
-        struct pan_image_view rts[8];
-        struct pan_image_view zs;
-        struct pan_image_view s;
-        struct pan_fb_info fb;
-
-        panfrost_batch_to_fb_info(batch, &fb, rts, &zs, &s, true);
-
-        unsigned zs_crc_count = pan_fbd_has_zs_crc_ext(dev, &fb) ? 1 : 0;
-        unsigned rt_count = MAX2(fb.rt_count, 1);
-        batch->framebuffer =
-                (dev->quirks & MIDGARD_SFBD) ?
-                panfrost_pool_alloc_desc(&batch->pool, SINGLE_TARGET_FRAMEBUFFER) :
-                panfrost_pool_alloc_desc_aggregate(&batch->pool,
-                                                   PAN_DESC(MULTI_TARGET_FRAMEBUFFER),
-                                                   PAN_DESC_ARRAY(zs_crc_count, ZS_CRC_EXTENSION),
-                                                   PAN_DESC_ARRAY(rt_count, RENDER_TARGET));
-
-        /* Add the MFBD tag now, other tags will be added when emitting the
-         * FB desc.
-         */
-        if (!(dev->quirks & MIDGARD_SFBD))
-                batch->framebuffer.gpu |= MALI_FBD_TAG_IS_MFBD;
-
-        return batch->framebuffer.gpu;
-}
-
-mali_ptr
-panfrost_batch_reserve_tls(struct panfrost_batch *batch, bool compute)
-{
-        struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
-
-        /* If we haven't, reserve space for the thread storage descriptor */
-
-        if (batch->tls.gpu)
-                return batch->tls.gpu;
-
-        if (pan_is_bifrost(dev) || compute) {
-                batch->tls = panfrost_pool_alloc_desc(&batch->pool, LOCAL_STORAGE);
-        } else {
-                /* On Midgard, the FB descriptor contains a thread storage
-                 * descriptor, and tiler jobs need more than thread storage
-                 * info. Let's point to the FB desc in that case.
-                 */
-                panfrost_batch_reserve_framebuffer(batch);
-                batch->tls = batch->framebuffer;
-        }
-
-        return batch->tls.gpu;
-}
-
 static void
 panfrost_batch_draw_wallpaper(struct panfrost_batch *batch,
                               struct pan_fb_info *fb)
@@ -962,15 +921,11 @@ panfrost_batch_submit(struct panfrost_batch *batch,
         if (!batch->scoreboard.first_job && !batch->clear)
                 goto out;
 
-        if (batch->scoreboard.first_tiler || batch->clear)
-                panfrost_batch_reserve_framebuffer(batch);
-
         struct pan_fb_info fb;
         struct pan_image_view rts[8], zs, s;
 
         panfrost_batch_to_fb_info(batch, &fb, rts, &zs, &s, false);
 
-        panfrost_batch_reserve_tls(batch, false);
         panfrost_batch_draw_wallpaper(batch, &fb);
 
 
@@ -981,13 +936,12 @@ panfrost_batch_submit(struct panfrost_batch *batch,
         }
 
         /* Now that all draws are in, we can finally prepare the
-         * FBD for the batch */
+         * FBD for the batch (if there is one). */
 
         panfrost_emit_tls(batch);
-
         panfrost_emit_tile_map(batch, &fb);
 
-        if (batch->framebuffer.gpu)
+        if (batch->scoreboard.first_tiler || batch->clear)
                 panfrost_emit_fbd(batch, &fb);
 
         ret = panfrost_batch_submit_jobs(batch, &fb, in_sync, out_sync);
diff --git a/src/gallium/drivers/panfrost/pan_job.h b/src/gallium/drivers/panfrost/pan_job.h
index fa920ecbd0e..1a6dc0b44ef 100644
--- a/src/gallium/drivers/panfrost/pan_job.h
+++ b/src/gallium/drivers/panfrost/pan_job.h
@@ -189,7 +189,4 @@ panfrost_batch_intersection_scissor(struct panfrost_batch *batch,
 mali_ptr
 panfrost_batch_get_bifrost_tiler(struct panfrost_batch *batch, unsigned vertex_count);
 
-mali_ptr
-panfrost_batch_reserve_tls(struct panfrost_batch *batch, bool compute);
-
 #endif