[Mesa-dev] [PATCH 3/3] etnaviv: Implement ICACHE
Christian Gmeiner
christian.gmeiner at gmail.com
Sat Aug 5 16:13:30 UTC 2017
2017-07-24 10:28 GMT+02:00 Wladimir J. van der Laan <laanwj at gmail.com>:
> This patch adds support for large shaders on GC3000. For example the "terrain"
> glmark benchmark with a large fragment shader will work after this.
>
> If the GPU supports ICACHE, shaders larger than the available state area will
> be uploaded to a bo of their own and instructed to be loaded from memory on
> demand. Small shaders will be uploaded in the usual way. This mimics the
> behavior of the blob.
>
> On GPUs that don't support ICACHE, this patch should make no difference.
>
> Signed-off-by: Wladimir J. van der Laan <laanwj at gmail.com>
Reviewed-by: Christian Gmeiner <christian.gmeiner at gmail.com>
> ---
> src/gallium/drivers/etnaviv/etnaviv_compiler.c | 3 +-
> src/gallium/drivers/etnaviv/etnaviv_compiler.h | 5 +++
> src/gallium/drivers/etnaviv/etnaviv_emit.c | 52 ++++++++++++++++++--------
> src/gallium/drivers/etnaviv/etnaviv_internal.h | 4 ++
> src/gallium/drivers/etnaviv/etnaviv_screen.c | 4 +-
> src/gallium/drivers/etnaviv/etnaviv_shader.c | 45 +++++++++++++++++++++-
> 6 files changed, 95 insertions(+), 18 deletions(-)
>
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler.c b/src/gallium/drivers/etnaviv/etnaviv_compiler.c
> index fbe66d0..0664d52 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_compiler.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler.c
> @@ -2277,7 +2277,7 @@ etna_compile_check_limits(struct etna_compile *c)
> /* round up number of uniforms, including immediates, in units of four */
> int num_uniforms = c->imm_base / 4 + (c->imm_size + 3) / 4;
>
> - if (c->inst_ptr > c->specs->max_instructions) {
> + if (!c->specs->has_icache && c->inst_ptr > c->specs->max_instructions) {
> DBG("Number of instructions (%d) exceeds maximum %d", c->inst_ptr,
> c->specs->max_instructions);
> return false;
> @@ -2501,6 +2501,7 @@ etna_compile_shader(struct etna_shader_variant *v)
> v->vs_pointsize_out_reg = -1;
> v->ps_color_out_reg = -1;
> v->ps_depth_out_reg = -1;
> + v->needs_icache = c->inst_ptr > c->specs->max_instructions;
> copy_uniform_state_to_shader(c, v);
>
> if (c->info.processor == PIPE_SHADER_VERTEX) {
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler.h b/src/gallium/drivers/etnaviv/etnaviv_compiler.h
> index 88a093f..f5c1689 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_compiler.h
> +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler.h
> @@ -94,12 +94,17 @@ struct etna_shader_variant {
> /* unknown input property (XX_INPUT_COUNT, field UNK8) */
> uint32_t input_count_unk8;
>
> + /* shader is larger than GPU instruction limit, thus needs icache */
> + bool needs_icache;
> +
> /* shader variants form a linked list */
> struct etna_shader_variant *next;
>
> /* replicated here to avoid passing extra ptrs everywhere */
> struct etna_shader *shader;
> struct etna_shader_key key;
> +
> + struct etna_bo *bo; /* cached code memory bo handle (for icache) */
> };
>
> struct etna_varying {
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_emit.c b/src/gallium/drivers/etnaviv/etnaviv_emit.c
> index 273b3d0..c2117d5 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_emit.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_emit.c
> @@ -421,9 +421,6 @@ etna_emit_state(struct etna_context *ctx)
> if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
> /*00830*/ EMIT_STATE(VS_LOAD_BALANCING, ctx->shader_state.VS_LOAD_BALANCING);
> /*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC);
> - if (ctx->specs.has_shader_range_registers) {
> - /*0085C*/ EMIT_STATE(VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
> - }
> }
> if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
> /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, ctx->viewport.PA_VIEWPORT_SCALE_X);
> @@ -534,10 +531,6 @@ etna_emit_state(struct etna_context *ctx)
> : ctx->shader_state.PS_TEMP_REGISTER_CONTROL);
> /*01010*/ EMIT_STATE(PS_CONTROL, ctx->shader_state.PS_CONTROL);
> /*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC);
> - if (ctx->specs.has_shader_range_registers) {
> - /*0101C*/ EMIT_STATE(PS_RANGE, ((ctx->shader_state.ps_inst_mem_size / 4 - 1 + 0x100) << 16) |
> - 0x100);
> - }
> }
> if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_FRAMEBUFFER))) {
> uint32_t val = etna_zsa_state(ctx->zsa)->PE_DEPTH_CONFIG;
> @@ -739,14 +732,43 @@ etna_emit_state(struct etna_context *ctx)
> if (dirty & (ETNA_DIRTY_SHADER)) {
> /* Special case: a new shader was loaded; simply re-load all uniforms and
> * shader code at once */
> - /*04000 or 0C000*/
> - etna_set_state_multi(stream, ctx->specs.vs_offset,
> - ctx->shader_state.vs_inst_mem_size,
> - ctx->shader_state.VS_INST_MEM);
> - /*06000 or 0D000*/
> - etna_set_state_multi(stream, ctx->specs.ps_offset,
> - ctx->shader_state.ps_inst_mem_size,
> - ctx->shader_state.PS_INST_MEM);
> + if (ctx->shader_state.VS_INST_ADDR.bo || ctx->shader_state.PS_INST_ADDR.bo) {
> + assert(ctx->specs.has_icache && ctx->specs.has_shader_range_registers);
> + /* Set icache (VS) */
> + etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
> + etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
> + VIVS_VS_ICACHE_CONTROL_ENABLE |
> + VIVS_VS_ICACHE_CONTROL_FLUSH_VS);
> + assert(ctx->shader_state.VS_INST_ADDR.bo);
> + etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR);
> +
> + /* Set icache (PS) */
> + etna_set_state(stream, VIVS_PS_RANGE, (ctx->shader_state.ps_inst_mem_size / 4 - 1) << 16);
> + etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
> + VIVS_VS_ICACHE_CONTROL_ENABLE |
> + VIVS_VS_ICACHE_CONTROL_FLUSH_PS);
> + assert(ctx->shader_state.PS_INST_ADDR.bo);
> + etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR);
> + } else {
> + /* Upload shader directly, first flushing and disabling icache if
> + * supported on this hw */
> + if (ctx->specs.has_icache) {
> + etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
> + VIVS_VS_ICACHE_CONTROL_FLUSH_PS |
> + VIVS_VS_ICACHE_CONTROL_FLUSH_VS);
> + }
> + if (ctx->specs.has_shader_range_registers) {
> + etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
> + etna_set_state(stream, VIVS_PS_RANGE, ((ctx->shader_state.ps_inst_mem_size / 4 - 1 + 0x100) << 16) |
> + 0x100);
> + }
> + etna_set_state_multi(stream, ctx->specs.vs_offset,
> + ctx->shader_state.vs_inst_mem_size,
> + ctx->shader_state.VS_INST_MEM);
> + etna_set_state_multi(stream, ctx->specs.ps_offset,
> + ctx->shader_state.ps_inst_mem_size,
> + ctx->shader_state.PS_INST_MEM);
> + }
>
> if (ctx->specs.has_unified_uniforms) {
> etna_set_state(stream, VIVS_VS_UNIFORM_BASE, 0);
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_internal.h b/src/gallium/drivers/etnaviv/etnaviv_internal.h
> index 5c13f23..a6544f6 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_internal.h
> +++ b/src/gallium/drivers/etnaviv/etnaviv_internal.h
> @@ -76,6 +76,8 @@ struct etna_specs {
> unsigned single_buffer : 1;
> /* has unified uniforms memory */
> unsigned has_unified_uniforms : 1;
> + /* can load shader instructions from memory */
> + unsigned has_icache : 1;
> /* can use any kind of wrapping mode on npot textures */
> unsigned npot_tex_any_wrap;
> /* number of bits per TS tile */
> @@ -250,6 +252,8 @@ struct compiled_shader_state {
> uint32_t VS_UNIFORMS[ETNA_MAX_UNIFORMS * 4];
> uint32_t *PS_INST_MEM;
> uint32_t PS_UNIFORMS[ETNA_MAX_UNIFORMS * 4];
> + struct etna_reloc PS_INST_ADDR;
> + struct etna_reloc VS_INST_ADDR;
> };
>
> /* state of some 3d and common registers relevant to etna driver */
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c b/src/gallium/drivers/etnaviv/etnaviv_screen.c
> index 81480e9..4fcbe87 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
> @@ -665,7 +665,8 @@ etna_get_specs(struct etna_screen *screen)
> * same.
> */
> screen->specs.ps_offset = 0x8000 + 0x1000;
> - screen->specs.max_instructions = 256;
> + screen->specs.max_instructions = 256; /* maximum number instructions for non-icache use */
> + screen->specs.has_icache = true;
> } else {
> if (instruction_count > 256) { /* unified instruction memory? */
> screen->specs.vs_offset = 0xC000;
> @@ -676,6 +677,7 @@ etna_get_specs(struct etna_screen *screen)
> screen->specs.ps_offset = 0x6000;
> screen->specs.max_instructions = instruction_count / 2;
> }
> + screen->specs.has_icache = false;
> }
>
> if (VIV_FEATURE(screen, chipMinorFeatures1, HALTI0)) {
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_shader.c b/src/gallium/drivers/etnaviv/etnaviv_shader.c
> index b5256e4..6012680 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_shader.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_shader.c
> @@ -29,12 +29,30 @@
> #include "etnaviv_compiler.h"
> #include "etnaviv_context.h"
> #include "etnaviv_debug.h"
> +#include "etnaviv_screen.h"
> #include "etnaviv_util.h"
>
> #include "tgsi/tgsi_parse.h"
> #include "util/u_math.h"
> #include "util/u_memory.h"
>
> +/* Upload shader code to bo, if not already done */
> +static bool etna_icache_upload_shader(struct etna_context *ctx, struct etna_shader_variant *v)
> +{
> + if (v->bo)
> + return true;
> + v->bo = etna_bo_new(ctx->screen->dev, v->code_size*4, DRM_ETNA_GEM_CACHE_UNCACHED);
> + if (!v->bo)
> + return false;
> +
> + void *buf = etna_bo_map(v->bo);
> + etna_bo_cpu_prep(v->bo, DRM_ETNA_PREP_WRITE);
> + memcpy(buf, v->code, v->code_size*4);
> + etna_bo_cpu_fini(v->bo);
> + DBG("Uploaded %s of %u words to bo %p", v->processor == PIPE_SHADER_FRAGMENT ? "fs":"vs", v->code_size, v->bo);
> + return true;
> +}
> +
> /* Link vs and fs together: fill in shader_state from vs and fs
> * as this function is called every time a new fs or vs is bound, the goal is to
> * do little processing as possible here, and to precompute as much as possible in
> @@ -45,7 +63,7 @@
> */
> static bool
> etna_link_shaders(struct etna_context *ctx, struct compiled_shader_state *cs,
> - const struct etna_shader_variant *vs, const struct etna_shader_variant *fs)
> + struct etna_shader_variant *vs, struct etna_shader_variant *fs)
> {
> struct etna_shader_link_info link = { };
>
> @@ -164,9 +182,32 @@ etna_link_shaders(struct etna_context *ctx, struct compiled_shader_state *cs,
> /* reference instruction memory */
> cs->vs_inst_mem_size = vs->code_size;
> cs->VS_INST_MEM = vs->code;
> +
> cs->ps_inst_mem_size = fs->code_size;
> cs->PS_INST_MEM = fs->code;
>
> + if (vs->needs_icache | fs->needs_icache) {
> + /* If either of the shaders needs ICACHE, we use it for both. It is
> + * either switched on or off for the entire shader processor.
> + */
> + if (!etna_icache_upload_shader(ctx, vs) ||
> + !etna_icache_upload_shader(ctx, fs)) {
> + assert(0);
> + return false;
> + }
> +
> + cs->VS_INST_ADDR.bo = vs->bo;
> + cs->VS_INST_ADDR.offset = 0;
> + cs->VS_INST_ADDR.flags = ETNA_RELOC_READ;
> + cs->PS_INST_ADDR.bo = fs->bo;
> + cs->PS_INST_ADDR.offset = 0;
> + cs->PS_INST_ADDR.flags = ETNA_RELOC_READ;
> + } else {
> + /* clear relocs */
> + memset(&cs->VS_INST_ADDR, 0, sizeof(cs->VS_INST_ADDR));
> + memset(&cs->PS_INST_ADDR, 0, sizeof(cs->PS_INST_ADDR));
> + }
> +
> return true;
> }
>
> @@ -352,6 +393,8 @@ etna_delete_shader_state(struct pipe_context *pctx, void *ss)
> while (v) {
> t = v;
> v = v->next;
> + if (t->bo)
> + etna_bo_del(t->bo);
> etna_destroy_shader(t);
> }
>
> --
> 2.7.4
>
greets
--
Christian Gmeiner, MSc
https://christian-gmeiner.info
More information about the mesa-dev
mailing list