[Mesa-dev] [PATCH v2 2/5] panfrost: Allocate shaders in their own BOs

Alyssa Rosenzweig alyssa.rosenzweig at collabora.com
Wed Aug 7 14:00:51 UTC 2019


Patches 2-5 have my R-b, looks good! :)

On Wed, Aug 07, 2019 at 10:36:54AM +0200, Tomeu Vizoso wrote:
> Instead of all shaders being stored in a single BO, have each shader in
> its own.
> 
> This removes the need for a 16MB allocation per context, and allows us
> to place transient blend shaders in BOs marked as executable (before
> they were allocated in the transient pool, which shouldn't be
> executable).
> 
> v2: - Store compiled blend shaders in a malloc'ed buffer, to avoid
>       reading from GPU-accessible memory when patching (Alyssa).
>     - Free struct panfrost_blend_shader (Alyssa).
>     - Give the job a reference to regular shaders when emitting
>       (Alyssa).
> 
> Signed-off-by: Tomeu Vizoso <tomeu.vizoso at collabora.com>
> ---
>  src/gallium/drivers/panfrost/pan_allocate.h   |  2 +
>  src/gallium/drivers/panfrost/pan_assemble.c   |  5 ++-
>  src/gallium/drivers/panfrost/pan_blend.h      | 13 ++++--
>  src/gallium/drivers/panfrost/pan_blend_cso.c  | 41 +++++++++++++------
>  .../drivers/panfrost/pan_blend_shaders.c      | 13 ++----
>  src/gallium/drivers/panfrost/pan_bo_cache.c   |  5 +--
>  src/gallium/drivers/panfrost/pan_context.c    | 14 +++++--
>  src/gallium/drivers/panfrost/pan_context.h    |  3 +-
>  src/gallium/drivers/panfrost/pan_drm.c        |  5 ++-
>  9 files changed, 66 insertions(+), 35 deletions(-)
> 
> diff --git a/src/gallium/drivers/panfrost/pan_allocate.h b/src/gallium/drivers/panfrost/pan_allocate.h
> index 8d925ee38a4c..0e06567d2069 100644
> --- a/src/gallium/drivers/panfrost/pan_allocate.h
> +++ b/src/gallium/drivers/panfrost/pan_allocate.h
> @@ -59,6 +59,8 @@ struct panfrost_bo {
>          size_t size;
>  
>          int gem_handle;
> +
> +        uint32_t flags;
>  };
>  
>  struct panfrost_memory {
> diff --git a/src/gallium/drivers/panfrost/pan_assemble.c b/src/gallium/drivers/panfrost/pan_assemble.c
> index 15f77585a25c..dd877056d3b5 100644
> --- a/src/gallium/drivers/panfrost/pan_assemble.c
> +++ b/src/gallium/drivers/panfrost/pan_assemble.c
> @@ -43,6 +43,7 @@ panfrost_shader_compile(
>                  gl_shader_stage stage,
>                  struct panfrost_shader_state *state)
>  {
> +        struct panfrost_screen *screen = pan_screen(ctx->base.screen);
>          uint8_t *dst;
>  
>          nir_shader *s;
> @@ -80,7 +81,9 @@ panfrost_shader_compile(
>           * I bet someone just thought that would be a cute pun. At least,
>           * that's how I'd do it. */
>  
> -        meta->shader = panfrost_upload(&ctx->shaders, dst, size) | program.first_tag;
> +        state->bo = panfrost_drm_create_bo(screen, size, PAN_ALLOCATE_EXECUTE);
> +        memcpy(state->bo->cpu, dst, size);
> +        meta->shader = state->bo->gpu | program.first_tag;
>  
>          util_dynarray_fini(&program.compiled);
>  
> diff --git a/src/gallium/drivers/panfrost/pan_blend.h b/src/gallium/drivers/panfrost/pan_blend.h
> index a881e0945f48..a29353ff0014 100644
> --- a/src/gallium/drivers/panfrost/pan_blend.h
> +++ b/src/gallium/drivers/panfrost/pan_blend.h
> @@ -33,8 +33,10 @@
>  /* An internal blend shader descriptor, from the compiler */
>  
>  struct panfrost_blend_shader {
> -        /* The compiled shader in GPU memory */
> -        struct panfrost_transfer shader;
> +        struct panfrost_context *ctx;
> +
> +        /* The compiled shader */
> +        void *buffer;
>  
>          /* Byte count of the shader */
>          unsigned size;
> @@ -53,8 +55,11 @@ struct panfrost_blend_shader {
>  /* A blend shader descriptor ready for actual use */
>  
>  struct panfrost_blend_shader_final {
> -        /* The upload, possibly to transient memory */
> -        mali_ptr gpu;
> +        /* The compiled shader in GPU memory, possibly patched */
> +        struct panfrost_bo *bo;
> +
> +        /* First instruction tag (for tagging the pointer) */
> +        unsigned first_tag;
>  
>          /* Same meaning as panfrost_blend_shader */
>          unsigned work_count;
> diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.c b/src/gallium/drivers/panfrost/pan_blend_cso.c
> index f685b25b41b3..08a86980ca88 100644
> --- a/src/gallium/drivers/panfrost/pan_blend_cso.c
> +++ b/src/gallium/drivers/panfrost/pan_blend_cso.c
> @@ -151,11 +151,24 @@ panfrost_bind_blend_state(struct pipe_context *pipe,
>          ctx->dirty |= PAN_DIRTY_FS;
>  }
>  
> +static void
> +panfrost_delete_blend_shader(struct hash_entry *entry)
> +{
> +        struct panfrost_blend_shader *shader = (struct panfrost_blend_shader *)entry->data;
> +        free(shader->buffer);
> +        free(shader);
> +}
> +
>  static void
>  panfrost_delete_blend_state(struct pipe_context *pipe,
> -                            void *blend)
> +                            void *cso)
>  {
> -        /* TODO: Free shader binary? */
> +        struct panfrost_blend_state *blend = (struct panfrost_blend_state *) cso;
> +
> +        for (unsigned c = 0; c < 4; ++c) {
> +                struct panfrost_blend_rt *rt = &blend->rt[c];
> +                _mesa_hash_table_u64_clear(rt->shaders, panfrost_delete_blend_shader);
> +        }
>          ralloc_free(blend);
>  }
>  
> @@ -208,6 +221,9 @@ panfrost_blend_constant(float *out, float *in, unsigned mask)
>  struct panfrost_blend_final
>  panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti)
>  {
> +        struct panfrost_screen *screen = pan_screen(ctx->base.screen);
> +        struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
> +
>          /* Grab the format, falling back gracefully if called invalidly (which
>           * has to happen for no-color-attachment FBOs, for instance)  */
>          struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
> @@ -241,23 +257,24 @@ panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti)
>          struct panfrost_blend_shader *shader = panfrost_get_blend_shader(ctx, blend, fmt, rti);
>          final.is_shader = true;
>          final.shader.work_count = shader->work_count;
> +        final.shader.first_tag = shader->first_tag;
> +
> +        /* Upload the shader */
> +        final.shader.bo = panfrost_drm_create_bo(screen, shader->size, PAN_ALLOCATE_EXECUTE);
> +        memcpy(final.shader.bo->cpu, shader->buffer, shader->size);
> +
> +        /* Pass BO ownership to job */
> +        panfrost_job_add_bo(job, final.shader.bo);
> +        panfrost_bo_unreference(ctx->base.screen, final.shader.bo);
>  
>          if (shader->patch_index) {
>                  /* We have to specialize the blend shader to use constants, so
> -                 * patch in the current constants and upload to transient
> -                 * memory */
> +                 * patch in the current constants */
>  
> -                float *patch = (float *) (shader->shader.cpu + shader->patch_index);
> +                float *patch = (float *) (final.shader.bo->cpu + shader->patch_index);
>                  memcpy(patch, ctx->blend_color.color, sizeof(float) * 4);
> -
> -                final.shader.gpu = panfrost_upload_transient(
> -                                           ctx, shader->shader.cpu, shader->size);
> -        } else {
> -                /* No need to specialize further, use the preuploaded */
> -                final.shader.gpu = shader->shader.gpu;
>          }
>  
> -        final.shader.gpu |= shader->first_tag;
>          return final;
>  }
>  
> diff --git a/src/gallium/drivers/panfrost/pan_blend_shaders.c b/src/gallium/drivers/panfrost/pan_blend_shaders.c
> index 0063290d7e77..2ee86b4e7db3 100644
> --- a/src/gallium/drivers/panfrost/pan_blend_shaders.c
> +++ b/src/gallium/drivers/panfrost/pan_blend_shaders.c
> @@ -133,6 +133,8 @@ panfrost_compile_blend_shader(
>  {
>          struct panfrost_blend_shader res;
>  
> +        res.ctx = ctx;
> +
>          /* Build the shader */
>  
>          nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_FRAGMENT, &midgard_nir_options, NULL);
> @@ -172,21 +174,14 @@ panfrost_compile_blend_shader(
>          midgard_program program;
>          midgard_compile_shader_nir(&ctx->compiler, shader, &program, true);
>  
> -        /* Upload the shader */
> -
> -        int size = program.compiled.size;
> -        uint8_t *dst = program.compiled.data;
> -
> -        res.shader.cpu = mem_dup(dst, size);
> -        res.shader.gpu = panfrost_upload(&ctx->shaders, dst, size);
> -
>          /* At least two work registers are needed due to an encoding quirk */
>          res.work_count = MAX2(program.work_register_count, 2);
>  
>          /* Allow us to patch later */
>          res.patch_index = program.blend_patch_offset;
>          res.first_tag = program.first_tag;
> -        res.size = size;
> +        res.size = program.compiled.size;
> +        res.buffer = program.compiled.data;
>  
>          return res;
>  }
> diff --git a/src/gallium/drivers/panfrost/pan_bo_cache.c b/src/gallium/drivers/panfrost/pan_bo_cache.c
> index fba495c1dd69..7378d0a8abea 100644
> --- a/src/gallium/drivers/panfrost/pan_bo_cache.c
> +++ b/src/gallium/drivers/panfrost/pan_bo_cache.c
> @@ -84,11 +84,10 @@ panfrost_bo_cache_fetch(
>  {
>          struct list_head *bucket = pan_bucket(screen, size);
>  
> -        /* TODO: Honour flags? */
> -
>          /* Iterate the bucket looking for something suitable */
>          list_for_each_entry_safe(struct panfrost_bo, entry, bucket, link) {
> -                if (entry->size >= size) {
> +                if (entry->size >= size &&
> +                    entry->flags == flags) {
>                          /* This one works, splice it out of the cache */
>                          list_del(&entry->link);
>  
> diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c
> index 284c246677df..2fc5ac361979 100644
> --- a/src/gallium/drivers/panfrost/pan_context.c
> +++ b/src/gallium/drivers/panfrost/pan_context.c
> @@ -1062,6 +1062,8 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
>  
>                  panfrost_patch_shader_state(ctx, variant, PIPE_SHADER_FRAGMENT, false);
>  
> +                panfrost_job_add_bo(job, variant->bo);
> +
>  #define COPY(name) ctx->fragment_shader_core.name = variant->tripipe->name
>  
>                  COPY(shader);
> @@ -1140,7 +1142,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
>  
>                  if (blend.is_shader) {
>                          ctx->fragment_shader_core.blend.shader =
> -                                blend.shader.gpu;
> +                                blend.shader.bo->gpu | blend.shader.first_tag;
>                  } else {
>                          ctx->fragment_shader_core.blend.shader = 0;
>                  }
> @@ -1215,7 +1217,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
>                                  assert(!(is_srgb && blend.is_shader));
>  
>                                  if (blend.is_shader) {
> -                                        rts[i].blend.shader = blend.shader.gpu;
> +                                        rts[i].blend.shader = blend.shader.bo->gpu | blend.shader.first_tag;
>                                  } else {
>                                          rts[i].blend.equation = *blend.equation.equation;
>                                          rts[i].blend.constant = blend.equation.constant;
> @@ -1904,6 +1906,12 @@ panfrost_delete_shader_state(
>                  DBG("Deleting TGSI shader leaks duplicated tokens\n");
>          }
>  
> +        for (unsigned i = 0; i < cso->variant_count; ++i) {
> +                struct panfrost_shader_state *shader_state = &cso->variants[i];
> +                panfrost_bo_unreference(pctx->screen, shader_state->bo);
> +                shader_state->bo = NULL;
> +        }
> +
>          free(so);
>  }
>  
> @@ -2528,7 +2536,6 @@ panfrost_destroy(struct pipe_context *pipe)
>                  util_blitter_destroy(panfrost->blitter_wallpaper);
>  
>          panfrost_drm_free_slab(screen, &panfrost->scratchpad);
> -        panfrost_drm_free_slab(screen, &panfrost->shaders);
>          panfrost_drm_free_slab(screen, &panfrost->tiler_heap);
>          panfrost_drm_free_slab(screen, &panfrost->tiler_dummy);
>  
> @@ -2673,7 +2680,6 @@ panfrost_setup_hardware(struct panfrost_context *ctx)
>          struct panfrost_screen *screen = pan_screen(gallium->screen);
>  
>          panfrost_drm_allocate_slab(screen, &ctx->scratchpad, 64*4, false, 0, 0, 0);
> -        panfrost_drm_allocate_slab(screen, &ctx->shaders, 4096, true, PAN_ALLOCATE_EXECUTE, 0, 0);
>          panfrost_drm_allocate_slab(screen, &ctx->tiler_heap, 4096, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128);
>          panfrost_drm_allocate_slab(screen, &ctx->tiler_dummy, 1, false, PAN_ALLOCATE_INVISIBLE, 0, 0);
>  }
> diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h
> index e8d2417e0cb2..9a34e243b74b 100644
> --- a/src/gallium/drivers/panfrost/pan_context.h
> +++ b/src/gallium/drivers/panfrost/pan_context.h
> @@ -108,7 +108,6 @@ struct panfrost_context {
>          struct pipe_framebuffer_state pipe_framebuffer;
>  
>          struct panfrost_memory cmdstream_persistent;
> -        struct panfrost_memory shaders;
>          struct panfrost_memory scratchpad;
>          struct panfrost_memory tiler_heap;
>          struct panfrost_memory tiler_dummy;
> @@ -224,6 +223,8 @@ struct panfrost_shader_state {
>  
>          /* Should we enable helper invocations */
>          bool helper_invocations;
> +
> +        struct panfrost_bo *bo;
>  };
>  
>  /* A collection of varyings (the CSO) */
> diff --git a/src/gallium/drivers/panfrost/pan_drm.c b/src/gallium/drivers/panfrost/pan_drm.c
> index 42cf17503344..8ae541ae11b6 100644
> --- a/src/gallium/drivers/panfrost/pan_drm.c
> +++ b/src/gallium/drivers/panfrost/pan_drm.c
> @@ -88,6 +88,9 @@ panfrost_drm_create_bo(struct panfrost_screen *screen, size_t size,
>          /* Kernel will fail (confusingly) with EPERM otherwise */
>          assert(size > 0);
>  
> +        /* To maximize BO cache usage, don't allocate tiny BOs */
> +        size = MAX2(size, 4096);
> +
>          unsigned translated_flags = 0;
>  
>          /* TODO: translate flags to kernel flags, if the kernel supports */
> @@ -120,6 +123,7 @@ panfrost_drm_create_bo(struct panfrost_screen *screen, size_t size,
>                  bo->size = create_bo.size;
>                  bo->gpu = create_bo.offset;
>                  bo->gem_handle = create_bo.handle;
> +                bo->flags = flags;
>          }
>  
>          /* Only mmap now if we know we need to. For CPU-invisible buffers, we
> @@ -285,7 +289,6 @@ panfrost_drm_submit_vs_fs_job(struct panfrost_context *ctx, bool has_draws, bool
>          struct panfrost_job *job = panfrost_get_job_for_fbo(ctx);
>  
>          /* TODO: Add here the transient pools */
> -        panfrost_job_add_bo(job, ctx->shaders.bo);
>          panfrost_job_add_bo(job, ctx->scratchpad.bo);
>          panfrost_job_add_bo(job, ctx->tiler_heap.bo);
>          panfrost_job_add_bo(job, job->polygon_list);
> -- 
> 2.20.1
> 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190807/7869cf6e/attachment-0001.sig>


More information about the mesa-dev mailing list