[Mesa-dev] [PATCH 3/3] etnaviv: Implement ICACHE

Christian Gmeiner christian.gmeiner at gmail.com
Sat Aug 5 16:13:30 UTC 2017


2017-07-24 10:28 GMT+02:00 Wladimir J. van der Laan <laanwj at gmail.com>:
> This patch adds support for large shaders on GC3000. For example the "terrain"
> glmark benchmark with a large fragment shader will work after this.
>
> If the GPU supports ICACHE, shaders larger than the available state area will
> be uploaded to a bo of their own and instructed to be loaded from memory on
> demand. Small shaders will be uploaded in the usual way. This mimics the
> behavior of the blob.
>
> On GPUs that don't support ICACHE, this patch should make no difference.
>
> Signed-off-by: Wladimir J. van der Laan <laanwj at gmail.com>

Reviewed-by: Christian Gmeiner <christian.gmeiner at gmail.com>

> ---
>  src/gallium/drivers/etnaviv/etnaviv_compiler.c |  3 +-
>  src/gallium/drivers/etnaviv/etnaviv_compiler.h |  5 +++
>  src/gallium/drivers/etnaviv/etnaviv_emit.c     | 52 ++++++++++++++++++--------
>  src/gallium/drivers/etnaviv/etnaviv_internal.h |  4 ++
>  src/gallium/drivers/etnaviv/etnaviv_screen.c   |  4 +-
>  src/gallium/drivers/etnaviv/etnaviv_shader.c   | 45 +++++++++++++++++++++-
>  6 files changed, 95 insertions(+), 18 deletions(-)
>
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler.c b/src/gallium/drivers/etnaviv/etnaviv_compiler.c
> index fbe66d0..0664d52 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_compiler.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler.c
> @@ -2277,7 +2277,7 @@ etna_compile_check_limits(struct etna_compile *c)
>     /* round up number of uniforms, including immediates, in units of four */
>     int num_uniforms = c->imm_base / 4 + (c->imm_size + 3) / 4;
>
> -   if (c->inst_ptr > c->specs->max_instructions) {
> +   if (!c->specs->has_icache && c->inst_ptr > c->specs->max_instructions) {
>        DBG("Number of instructions (%d) exceeds maximum %d", c->inst_ptr,
>            c->specs->max_instructions);
>        return false;
> @@ -2501,6 +2501,7 @@ etna_compile_shader(struct etna_shader_variant *v)
>     v->vs_pointsize_out_reg = -1;
>     v->ps_color_out_reg = -1;
>     v->ps_depth_out_reg = -1;
> +   v->needs_icache = c->inst_ptr > c->specs->max_instructions;
>     copy_uniform_state_to_shader(c, v);
>
>     if (c->info.processor == PIPE_SHADER_VERTEX) {
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler.h b/src/gallium/drivers/etnaviv/etnaviv_compiler.h
> index 88a093f..f5c1689 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_compiler.h
> +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler.h
> @@ -94,12 +94,17 @@ struct etna_shader_variant {
>     /* unknown input property (XX_INPUT_COUNT, field UNK8) */
>     uint32_t input_count_unk8;
>
> +   /* shader is larger than GPU instruction limit, thus needs icache */
> +   bool needs_icache;
> +
>     /* shader variants form a linked list */
>     struct etna_shader_variant *next;
>
>     /* replicated here to avoid passing extra ptrs everywhere */
>     struct etna_shader *shader;
>     struct etna_shader_key key;
> +
> +   struct etna_bo *bo; /* cached code memory bo handle (for icache) */
>  };
>
>  struct etna_varying {
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_emit.c b/src/gallium/drivers/etnaviv/etnaviv_emit.c
> index 273b3d0..c2117d5 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_emit.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_emit.c
> @@ -421,9 +421,6 @@ etna_emit_state(struct etna_context *ctx)
>     if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
>        /*00830*/ EMIT_STATE(VS_LOAD_BALANCING, ctx->shader_state.VS_LOAD_BALANCING);
>        /*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC);
> -      if (ctx->specs.has_shader_range_registers) {
> -         /*0085C*/ EMIT_STATE(VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
> -      }
>     }
>     if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
>        /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, ctx->viewport.PA_VIEWPORT_SCALE_X);
> @@ -534,10 +531,6 @@ etna_emit_state(struct etna_context *ctx)
>                                : ctx->shader_state.PS_TEMP_REGISTER_CONTROL);
>        /*01010*/ EMIT_STATE(PS_CONTROL, ctx->shader_state.PS_CONTROL);
>        /*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC);
> -      if (ctx->specs.has_shader_range_registers) {
> -         /*0101C*/ EMIT_STATE(PS_RANGE, ((ctx->shader_state.ps_inst_mem_size / 4 - 1 + 0x100) << 16) |
> -                                        0x100);
> -      }
>     }
>     if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_FRAMEBUFFER))) {
>        uint32_t val = etna_zsa_state(ctx->zsa)->PE_DEPTH_CONFIG;
> @@ -739,14 +732,43 @@ etna_emit_state(struct etna_context *ctx)
>     if (dirty & (ETNA_DIRTY_SHADER)) {
>        /* Special case: a new shader was loaded; simply re-load all uniforms and
>         * shader code at once */
> -      /*04000 or 0C000*/
> -      etna_set_state_multi(stream, ctx->specs.vs_offset,
> -                           ctx->shader_state.vs_inst_mem_size,
> -                           ctx->shader_state.VS_INST_MEM);
> -      /*06000 or 0D000*/
> -      etna_set_state_multi(stream, ctx->specs.ps_offset,
> -                           ctx->shader_state.ps_inst_mem_size,
> -                           ctx->shader_state.PS_INST_MEM);
> +      if (ctx->shader_state.VS_INST_ADDR.bo || ctx->shader_state.PS_INST_ADDR.bo) {
> +         assert(ctx->specs.has_icache && ctx->specs.has_shader_range_registers);
> +         /* Set icache (VS) */
> +         etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
> +         etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
> +               VIVS_VS_ICACHE_CONTROL_ENABLE |
> +               VIVS_VS_ICACHE_CONTROL_FLUSH_VS);
> +         assert(ctx->shader_state.VS_INST_ADDR.bo);
> +         etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR);
> +
> +         /* Set icache (PS) */
> +         etna_set_state(stream, VIVS_PS_RANGE, (ctx->shader_state.ps_inst_mem_size / 4 - 1) << 16);
> +         etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
> +               VIVS_VS_ICACHE_CONTROL_ENABLE |
> +               VIVS_VS_ICACHE_CONTROL_FLUSH_PS);
> +         assert(ctx->shader_state.PS_INST_ADDR.bo);
> +         etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR);
> +      } else {
> +         /* Upload shader directly, first flushing and disabling icache if
> +          * supported on this hw */
> +         if (ctx->specs.has_icache) {
> +            etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
> +                  VIVS_VS_ICACHE_CONTROL_FLUSH_PS |
> +                  VIVS_VS_ICACHE_CONTROL_FLUSH_VS);
> +         }
> +         if (ctx->specs.has_shader_range_registers) {
> +            etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
> +            etna_set_state(stream, VIVS_PS_RANGE, ((ctx->shader_state.ps_inst_mem_size / 4 - 1 + 0x100) << 16) |
> +                                        0x100);
> +         }
> +         etna_set_state_multi(stream, ctx->specs.vs_offset,
> +                              ctx->shader_state.vs_inst_mem_size,
> +                              ctx->shader_state.VS_INST_MEM);
> +         etna_set_state_multi(stream, ctx->specs.ps_offset,
> +                              ctx->shader_state.ps_inst_mem_size,
> +                              ctx->shader_state.PS_INST_MEM);
> +      }
>
>        if (ctx->specs.has_unified_uniforms) {
>           etna_set_state(stream, VIVS_VS_UNIFORM_BASE, 0);
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_internal.h b/src/gallium/drivers/etnaviv/etnaviv_internal.h
> index 5c13f23..a6544f6 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_internal.h
> +++ b/src/gallium/drivers/etnaviv/etnaviv_internal.h
> @@ -76,6 +76,8 @@ struct etna_specs {
>     unsigned single_buffer : 1;
>     /* has unified uniforms memory */
>     unsigned has_unified_uniforms : 1;
> +   /* can load shader instructions from memory */
> +   unsigned has_icache : 1;
>     /* can use any kind of wrapping mode on npot textures */
>     unsigned npot_tex_any_wrap;
>     /* number of bits per TS tile */
> @@ -250,6 +252,8 @@ struct compiled_shader_state {
>     uint32_t VS_UNIFORMS[ETNA_MAX_UNIFORMS * 4];
>     uint32_t *PS_INST_MEM;
>     uint32_t PS_UNIFORMS[ETNA_MAX_UNIFORMS * 4];
> +   struct etna_reloc PS_INST_ADDR;
> +   struct etna_reloc VS_INST_ADDR;
>  };
>
>  /* state of some 3d and common registers relevant to etna driver */
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c b/src/gallium/drivers/etnaviv/etnaviv_screen.c
> index 81480e9..4fcbe87 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
> @@ -665,7 +665,8 @@ etna_get_specs(struct etna_screen *screen)
>         * same.
>         */
>        screen->specs.ps_offset = 0x8000 + 0x1000;
> -      screen->specs.max_instructions = 256;
> +      screen->specs.max_instructions = 256; /* maximum number instructions for non-icache use */
> +      screen->specs.has_icache = true;
>     } else {
>        if (instruction_count > 256) { /* unified instruction memory? */
>           screen->specs.vs_offset = 0xC000;
> @@ -676,6 +677,7 @@ etna_get_specs(struct etna_screen *screen)
>           screen->specs.ps_offset = 0x6000;
>           screen->specs.max_instructions = instruction_count / 2;
>        }
> +      screen->specs.has_icache = false;
>     }
>
>     if (VIV_FEATURE(screen, chipMinorFeatures1, HALTI0)) {
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_shader.c b/src/gallium/drivers/etnaviv/etnaviv_shader.c
> index b5256e4..6012680 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_shader.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_shader.c
> @@ -29,12 +29,30 @@
>  #include "etnaviv_compiler.h"
>  #include "etnaviv_context.h"
>  #include "etnaviv_debug.h"
> +#include "etnaviv_screen.h"
>  #include "etnaviv_util.h"
>
>  #include "tgsi/tgsi_parse.h"
>  #include "util/u_math.h"
>  #include "util/u_memory.h"
>
> +/* Upload shader code to bo, if not already done */
> +static bool etna_icache_upload_shader(struct etna_context *ctx, struct etna_shader_variant *v)
> +{
> +   if (v->bo)
> +      return true;
> +   v->bo = etna_bo_new(ctx->screen->dev, v->code_size*4, DRM_ETNA_GEM_CACHE_UNCACHED);
> +   if (!v->bo)
> +      return false;
> +
> +   void *buf = etna_bo_map(v->bo);
> +   etna_bo_cpu_prep(v->bo, DRM_ETNA_PREP_WRITE);
> +   memcpy(buf, v->code, v->code_size*4);
> +   etna_bo_cpu_fini(v->bo);
> +   DBG("Uploaded %s of %u words to bo %p", v->processor == PIPE_SHADER_FRAGMENT ? "fs":"vs", v->code_size, v->bo);
> +   return true;
> +}
> +
>  /* Link vs and fs together: fill in shader_state from vs and fs
>   * as this function is called every time a new fs or vs is bound, the goal is to
>   * do little processing as possible here, and to precompute as much as possible in
> @@ -45,7 +63,7 @@
>   */
>  static bool
>  etna_link_shaders(struct etna_context *ctx, struct compiled_shader_state *cs,
> -                  const struct etna_shader_variant *vs, const struct etna_shader_variant *fs)
> +                  struct etna_shader_variant *vs, struct etna_shader_variant *fs)
>  {
>     struct etna_shader_link_info link = { };
>
> @@ -164,9 +182,32 @@ etna_link_shaders(struct etna_context *ctx, struct compiled_shader_state *cs,
>     /* reference instruction memory */
>     cs->vs_inst_mem_size = vs->code_size;
>     cs->VS_INST_MEM = vs->code;
> +
>     cs->ps_inst_mem_size = fs->code_size;
>     cs->PS_INST_MEM = fs->code;
>
> +   if (vs->needs_icache | fs->needs_icache) {
> +      /* If either of the shaders needs ICACHE, we use it for both. It is
> +       * either switched on or off for the entire shader processor.
> +       */
> +      if (!etna_icache_upload_shader(ctx, vs) ||
> +          !etna_icache_upload_shader(ctx, fs)) {
> +         assert(0);
> +         return false;
> +      }
> +
> +      cs->VS_INST_ADDR.bo = vs->bo;
> +      cs->VS_INST_ADDR.offset = 0;
> +      cs->VS_INST_ADDR.flags = ETNA_RELOC_READ;
> +      cs->PS_INST_ADDR.bo = fs->bo;
> +      cs->PS_INST_ADDR.offset = 0;
> +      cs->PS_INST_ADDR.flags = ETNA_RELOC_READ;
> +   } else {
> +      /* clear relocs */
> +      memset(&cs->VS_INST_ADDR, 0, sizeof(cs->VS_INST_ADDR));
> +      memset(&cs->PS_INST_ADDR, 0, sizeof(cs->PS_INST_ADDR));
> +   }
> +
>     return true;
>  }
>
> @@ -352,6 +393,8 @@ etna_delete_shader_state(struct pipe_context *pctx, void *ss)
>     while (v) {
>        t = v;
>        v = v->next;
> +      if (t->bo)
> +         etna_bo_del(t->bo);
>        etna_destroy_shader(t);
>     }
>
> --
> 2.7.4
>

greets
--
Christian Gmeiner, MSc

https://christian-gmeiner.info


More information about the mesa-dev mailing list