[Mesa-dev] [PATCH 2/5] panfrost: Allocate shaders in their own BOs

Tomeu Vizoso tomeu.vizoso at collabora.com
Mon Aug 5 15:18:33 UTC 2019


Instead of all shaders being stored in a single BO, have each shader in
its own.

This removes the need for a 16MB allocation per context, and allows us
to place transient blend shaders in BOs marked as executable (before
they were allocated in the transient pool, which shouldn't be
executable).

Signed-off-by: Tomeu Vizoso <tomeu.vizoso at collabora.com>
---
 src/gallium/drivers/panfrost/pan_allocate.h   |  2 +
 src/gallium/drivers/panfrost/pan_assemble.c   |  5 ++-
 src/gallium/drivers/panfrost/pan_blend.h      | 11 ++++--
 src/gallium/drivers/panfrost/pan_blend_cso.c  | 39 ++++++++++++++-----
 .../drivers/panfrost/pan_blend_shaders.c      |  7 +++-
 src/gallium/drivers/panfrost/pan_bo_cache.c   |  5 +--
 src/gallium/drivers/panfrost/pan_context.c    | 15 +++++--
 src/gallium/drivers/panfrost/pan_context.h    |  3 +-
 src/gallium/drivers/panfrost/pan_drm.c        |  5 ++-
 9 files changed, 68 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/panfrost/pan_allocate.h b/src/gallium/drivers/panfrost/pan_allocate.h
index 8d925ee38a4c..0e06567d2069 100644
--- a/src/gallium/drivers/panfrost/pan_allocate.h
+++ b/src/gallium/drivers/panfrost/pan_allocate.h
@@ -59,6 +59,8 @@ struct panfrost_bo {
         size_t size;
 
         int gem_handle;
+
+        uint32_t flags;
 };
 
 struct panfrost_memory {
diff --git a/src/gallium/drivers/panfrost/pan_assemble.c b/src/gallium/drivers/panfrost/pan_assemble.c
index 15f77585a25c..dd877056d3b5 100644
--- a/src/gallium/drivers/panfrost/pan_assemble.c
+++ b/src/gallium/drivers/panfrost/pan_assemble.c
@@ -43,6 +43,7 @@ panfrost_shader_compile(
                 gl_shader_stage stage,
                 struct panfrost_shader_state *state)
 {
+        struct panfrost_screen *screen = pan_screen(ctx->base.screen);
         uint8_t *dst;
 
         nir_shader *s;
@@ -80,7 +81,9 @@ panfrost_shader_compile(
          * I bet someone just thought that would be a cute pun. At least,
          * that's how I'd do it. */
 
-        meta->shader = panfrost_upload(&ctx->shaders, dst, size) | program.first_tag;
+        state->bo = panfrost_drm_create_bo(screen, size, PAN_ALLOCATE_EXECUTE);
+        memcpy(state->bo->cpu, dst, size);
+        meta->shader = state->bo->gpu | program.first_tag;
 
         util_dynarray_fini(&program.compiled);
 
diff --git a/src/gallium/drivers/panfrost/pan_blend.h b/src/gallium/drivers/panfrost/pan_blend.h
index a881e0945f48..1269a9b0aa08 100644
--- a/src/gallium/drivers/panfrost/pan_blend.h
+++ b/src/gallium/drivers/panfrost/pan_blend.h
@@ -33,8 +33,10 @@
 /* An internal blend shader descriptor, from the compiler */
 
 struct panfrost_blend_shader {
+        struct panfrost_context *ctx;
+
         /* The compiled shader in GPU memory */
-        struct panfrost_transfer shader;
+        struct panfrost_bo *bo;
 
         /* Byte count of the shader */
         unsigned size;
@@ -53,8 +55,11 @@ struct panfrost_blend_shader {
 /* A blend shader descriptor ready for actual use */
 
 struct panfrost_blend_shader_final {
-        /* The upload, possibly to transient memory */
-        mali_ptr gpu;
+        /* The compiled shader in GPU memory, possibly patched */
+        struct panfrost_bo *bo;
+
+        /* First instruction tag (for tagging the pointer) */
+        unsigned first_tag;
 
         /* Same meaning as panfrost_blend_shader */
         unsigned work_count;
diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.c b/src/gallium/drivers/panfrost/pan_blend_cso.c
index f685b25b41b3..92b94326b346 100644
--- a/src/gallium/drivers/panfrost/pan_blend_cso.c
+++ b/src/gallium/drivers/panfrost/pan_blend_cso.c
@@ -151,11 +151,23 @@ panfrost_bind_blend_state(struct pipe_context *pipe,
         ctx->dirty |= PAN_DIRTY_FS;
 }
 
+static void
+panfrost_delete_blend_shader(struct hash_entry *entry)
+{
+        struct panfrost_blend_shader *shader = (struct panfrost_blend_shader *)entry->data;
+        panfrost_bo_unreference(shader->ctx->base.screen, shader->bo);
+}
+
 static void
 panfrost_delete_blend_state(struct pipe_context *pipe,
-                            void *blend)
+                            void *cso)
 {
-        /* TODO: Free shader binary? */
+        struct panfrost_blend_state *blend = (struct panfrost_blend_state *) cso;
+
+        for (unsigned c = 0; c < 4; ++c) {
+                struct panfrost_blend_rt *rt = &blend->rt[c];
+                _mesa_hash_table_u64_clear(rt->shaders, panfrost_delete_blend_shader);
+        }
         ralloc_free(blend);
 }
 
@@ -208,6 +220,9 @@ panfrost_blend_constant(float *out, float *in, unsigned mask)
 struct panfrost_blend_final
 panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti)
 {
+        struct panfrost_screen *screen = pan_screen(ctx->base.screen);
+        struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
+
         /* Grab the format, falling back gracefully if called invalidly (which
          * has to happen for no-color-attachment FBOs, for instance)  */
         struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
@@ -241,23 +256,29 @@ panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti)
         struct panfrost_blend_shader *shader = panfrost_get_blend_shader(ctx, blend, fmt, rti);
         final.is_shader = true;
         final.shader.work_count = shader->work_count;
+        final.shader.first_tag = shader->first_tag;
 
         if (shader->patch_index) {
                 /* We have to specialize the blend shader to use constants, so
-                 * patch in the current constants and upload to transient
-                 * memory */
+                 * copy the shader and patch in the current constants */
 
-                float *patch = (float *) (shader->shader.cpu + shader->patch_index);
+                final.shader.bo = panfrost_drm_create_bo(screen, shader->size, PAN_ALLOCATE_EXECUTE);
+                memcpy(final.shader.bo->cpu, shader->bo->cpu, shader->size);
+
+                float *patch = (float *) (final.shader.bo->cpu + shader->patch_index);
                 memcpy(patch, ctx->blend_color.color, sizeof(float) * 4);
 
-                final.shader.gpu = panfrost_upload_transient(
-                                           ctx, shader->shader.cpu, shader->size);
+                /* Pass ownership to job */
+                panfrost_job_add_bo(job, final.shader.bo);
+                panfrost_bo_unreference(ctx->base.screen, final.shader.bo);
         } else {
                 /* No need to specialize further, use the preuploaded */
-                final.shader.gpu = shader->shader.gpu;
+                final.shader.bo = shader->bo;
+
+                /* Hold a ref to the BO for the duration of this job */
+                panfrost_job_add_bo(job, final.shader.bo);
         }
 
-        final.shader.gpu |= shader->first_tag;
         return final;
 }
 
diff --git a/src/gallium/drivers/panfrost/pan_blend_shaders.c b/src/gallium/drivers/panfrost/pan_blend_shaders.c
index 0063290d7e77..2cf8c89008f7 100644
--- a/src/gallium/drivers/panfrost/pan_blend_shaders.c
+++ b/src/gallium/drivers/panfrost/pan_blend_shaders.c
@@ -132,6 +132,9 @@ panfrost_compile_blend_shader(
         enum pipe_format format)
 {
         struct panfrost_blend_shader res;
+        struct panfrost_screen *screen = pan_screen(ctx->base.screen);
+
+        res.ctx = ctx;
 
         /* Build the shader */
 
@@ -177,8 +180,8 @@ panfrost_compile_blend_shader(
         int size = program.compiled.size;
         uint8_t *dst = program.compiled.data;
 
-        res.shader.cpu = mem_dup(dst, size);
-        res.shader.gpu = panfrost_upload(&ctx->shaders, dst, size);
+        res.bo = panfrost_drm_create_bo(screen, size, PAN_ALLOCATE_EXECUTE);
+        memcpy(res.bo->cpu, dst, size);
 
         /* At least two work registers are needed due to an encoding quirk */
         res.work_count = MAX2(program.work_register_count, 2);
diff --git a/src/gallium/drivers/panfrost/pan_bo_cache.c b/src/gallium/drivers/panfrost/pan_bo_cache.c
index fba495c1dd69..7378d0a8abea 100644
--- a/src/gallium/drivers/panfrost/pan_bo_cache.c
+++ b/src/gallium/drivers/panfrost/pan_bo_cache.c
@@ -84,11 +84,10 @@ panfrost_bo_cache_fetch(
 {
         struct list_head *bucket = pan_bucket(screen, size);
 
-        /* TODO: Honour flags? */
-
         /* Iterate the bucket looking for something suitable */
         list_for_each_entry_safe(struct panfrost_bo, entry, bucket, link) {
-                if (entry->size >= size) {
+                if (entry->size >= size &&
+                    entry->flags == flags) {
                         /* This one works, splice it out of the cache */
                         list_del(&entry->link);
 
diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c
index e9d18605dd02..94c2e76eff80 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -1140,7 +1140,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
 
                 if (blend.is_shader) {
                         ctx->fragment_shader_core.blend.shader =
-                                blend.shader.gpu;
+                                blend.shader.bo->gpu | blend.shader.first_tag;
                 } else {
                         ctx->fragment_shader_core.blend.shader = 0;
                 }
@@ -1215,7 +1215,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
                                 assert(!(is_srgb && blend.is_shader));
 
                                 if (blend.is_shader) {
-                                        rts[i].blend.shader = blend.shader.gpu;
+                                        rts[i].blend.shader = blend.shader.bo->gpu | blend.shader.first_tag;
                                 } else {
                                         rts[i].blend.equation = *blend.equation.equation;
                                         rts[i].blend.constant = blend.equation.constant;
@@ -1904,6 +1904,12 @@ panfrost_delete_shader_state(
                 DBG("Deleting TGSI shader leaks duplicated tokens\n");
         }
 
+        for (unsigned i = 0; i < cso->variant_count; ++i) {
+                struct panfrost_shader_state *shader_state = &cso->variants[i];
+                panfrost_bo_unreference(pctx->screen, shader_state->bo);
+                shader_state->bo = NULL;
+        }
+
         free(so);
 }
 
@@ -2035,6 +2041,7 @@ panfrost_bind_shader_state(
         enum pipe_shader_type type)
 {
         struct panfrost_context *ctx = pan_context(pctx);
+        struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
 
         ctx->shader[type] = hwcso;
 
@@ -2098,6 +2105,8 @@ panfrost_bind_shader_state(
 
                 shader_state->compiled = true;
         }
+
+        panfrost_job_add_bo(job, shader_state->bo);
 }
 
 static void
@@ -2528,7 +2537,6 @@ panfrost_destroy(struct pipe_context *pipe)
                 util_blitter_destroy(panfrost->blitter_wallpaper);
 
         panfrost_drm_free_slab(screen, &panfrost->scratchpad);
-        panfrost_drm_free_slab(screen, &panfrost->shaders);
         panfrost_drm_free_slab(screen, &panfrost->tiler_heap);
         panfrost_drm_free_slab(screen, &panfrost->tiler_dummy);
 
@@ -2673,7 +2681,6 @@ panfrost_setup_hardware(struct panfrost_context *ctx)
         struct panfrost_screen *screen = pan_screen(gallium->screen);
 
         panfrost_drm_allocate_slab(screen, &ctx->scratchpad, 64*4, false, 0, 0, 0);
-        panfrost_drm_allocate_slab(screen, &ctx->shaders, 4096, true, PAN_ALLOCATE_EXECUTE, 0, 0);
         panfrost_drm_allocate_slab(screen, &ctx->tiler_heap, 4096, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128);
         panfrost_drm_allocate_slab(screen, &ctx->tiler_dummy, 1, false, PAN_ALLOCATE_INVISIBLE, 0, 0);
 }
diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h
index e8d2417e0cb2..9a34e243b74b 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -108,7 +108,6 @@ struct panfrost_context {
         struct pipe_framebuffer_state pipe_framebuffer;
 
         struct panfrost_memory cmdstream_persistent;
-        struct panfrost_memory shaders;
         struct panfrost_memory scratchpad;
         struct panfrost_memory tiler_heap;
         struct panfrost_memory tiler_dummy;
@@ -224,6 +223,8 @@ struct panfrost_shader_state {
 
         /* Should we enable helper invocations */
         bool helper_invocations;
+
+        struct panfrost_bo *bo;
 };
 
 /* A collection of varyings (the CSO) */
diff --git a/src/gallium/drivers/panfrost/pan_drm.c b/src/gallium/drivers/panfrost/pan_drm.c
index 42cf17503344..8ae541ae11b6 100644
--- a/src/gallium/drivers/panfrost/pan_drm.c
+++ b/src/gallium/drivers/panfrost/pan_drm.c
@@ -88,6 +88,9 @@ panfrost_drm_create_bo(struct panfrost_screen *screen, size_t size,
         /* Kernel will fail (confusingly) with EPERM otherwise */
         assert(size > 0);
 
+        /* To maximize BO cache usage, don't allocate tiny BOs */
+        size = MAX2(size, 4096);
+
         unsigned translated_flags = 0;
 
         /* TODO: translate flags to kernel flags, if the kernel supports */
@@ -120,6 +123,7 @@ panfrost_drm_create_bo(struct panfrost_screen *screen, size_t size,
                 bo->size = create_bo.size;
                 bo->gpu = create_bo.offset;
                 bo->gem_handle = create_bo.handle;
+                bo->flags = flags;
         }
 
         /* Only mmap now if we know we need to. For CPU-invisible buffers, we
@@ -285,7 +289,6 @@ panfrost_drm_submit_vs_fs_job(struct panfrost_context *ctx, bool has_draws, bool
         struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
 
         /* TODO: Add here the transient pools */
-        panfrost_job_add_bo(job, ctx->shaders.bo);
         panfrost_job_add_bo(job, ctx->scratchpad.bo);
         panfrost_job_add_bo(job, ctx->tiler_heap.bo);
         panfrost_job_add_bo(job, job->polygon_list);
-- 
2.20.1



More information about the mesa-dev mailing list