[PATCH 13/19] etnaviv: GC7000: State changes for HALTI3..5

Christian Gmeiner christian.gmeiner at gmail.com
Sun Nov 5 15:16:51 UTC 2017


2017-10-30 17:16 GMT+01:00 Wladimir J. van der Laan <laanwj at gmail.com>:
> Update state objects to add new state, and emit function to emit new
> state.
>
> Signed-off-by: Wladimir J. van der Laan <laanwj at gmail.com>

Reviewed-by: Christian Gmeiner <christian.gmeiner at gmail.com>

> ---
>  src/gallium/drivers/etnaviv/etnaviv_emit.c     | 247 +++++++++++++++++++------
>  src/gallium/drivers/etnaviv/etnaviv_internal.h |   4 +
>  src/gallium/drivers/etnaviv/etnaviv_state.c    |  35 +++-
>  src/gallium/drivers/etnaviv/etnaviv_zsa.c      |   3 +-
>  4 files changed, 217 insertions(+), 72 deletions(-)
>
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_emit.c b/src/gallium/drivers/etnaviv/etnaviv_emit.c
> index f388a89..692275a 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_emit.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_emit.c
> @@ -291,6 +291,91 @@ required_stream_size(struct etna_context *ctx)
>     return size;
>  }
>
> +/* Emit state that only exists on HALTI5+ */
> +static void
> +emit_halti5_only_state(struct etna_context *ctx, int vs_output_count)
> +{
> +   struct etna_cmd_stream *stream = ctx->stream;
> +   uint32_t dirty = ctx->dirty;
> +   struct etna_coalesce coalesce;
> +
> +   etna_coalesce_start(stream, &coalesce);
> +   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
> +      /* Magic states (load balancing, inter-unit sync, buffers) */
> +      /*00870*/ EMIT_STATE(VS_HALTI5_OUTPUT_COUNT, vs_output_count | ((vs_output_count * 0x10) << 8));
> +      /*008A0*/ EMIT_STATE(VS_HALTI5_UNK008A0, 0x0001000e | ((0x110/vs_output_count) << 20));
> +      for (int x = 0; x < 4; ++x) {
> +         /*008E0*/ EMIT_STATE(VS_HALTI5_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
> +      }
> +   }
> +   if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
> +      for (int x = 0; x < 4; ++x) {
> +         /*008C0*/ EMIT_STATE(VS_HALTI5_INPUT(x), ctx->shader_state.VS_INPUT[x]);
> +      }
> +   }
> +   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
> +      /*00A90*/ EMIT_STATE(PA_VARYING_NUM_COMPONENTS(0), ctx->shader_state.GL_VARYING_NUM_COMPONENTS);
> +      /*00AA8*/ EMIT_STATE(PA_VS_OUTPUT_COUNT, vs_output_count);
> +      /*01080*/ EMIT_STATE(PS_VARYING_NUM_COMPONENTS(0), ctx->shader_state.GL_VARYING_NUM_COMPONENTS);
> +      /*03888*/ EMIT_STATE(GL_HALTI5_SH_SPECIALS, ctx->shader_state.GL_HALTI5_SH_SPECIALS);
> +   }
> +   etna_coalesce_end(stream, &coalesce);
> +}
> +
> +/* Emit state that no longer exists on HALTI5 */
> +static void
> +emit_pre_halti5_state(struct etna_context *ctx)
> +{
> +   struct etna_cmd_stream *stream = ctx->stream;
> +   uint32_t dirty = ctx->dirty;
> +   struct etna_coalesce coalesce;
> +
> +   etna_coalesce_start(stream, &coalesce);
> +   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
> +      /*00800*/ EMIT_STATE(VS_END_PC, ctx->shader_state.VS_END_PC);
> +   }
> +   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
> +      for (int x = 0; x < 4; ++x) {
> +        /*00810*/ EMIT_STATE(VS_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
> +      }
> +   }
> +   if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
> +      for (int x = 0; x < 4; ++x) {
> +        /*00820*/ EMIT_STATE(VS_INPUT(x), ctx->shader_state.VS_INPUT[x]);
> +      }
> +   }
> +   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
> +      /*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC);
> +   }
> +   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
> +      for (int x = 0; x < 10; ++x) {
> +         /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x), ctx->shader_state.PA_SHADER_ATTRIBUTES[x]);
> +      }
> +   }
> +   if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
> +      /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04, ctx->framebuffer.RA_MULTISAMPLE_UNK00E04);
> +      for (int x = 0; x < 4; ++x) {
> +         /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x), ctx->framebuffer.RA_MULTISAMPLE_UNK00E10[x]);
> +      }
> +      for (int x = 0; x < 16; ++x) {
> +         /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x), ctx->framebuffer.RA_CENTROID_TABLE[x]);
> +      }
> +   }
> +   if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
> +      /*01000*/ EMIT_STATE(PS_END_PC, ctx->shader_state.PS_END_PC);
> +   }
> +   if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
> +      /*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC);
> +   }
> +   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
> +      /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS, ctx->shader_state.GL_VARYING_NUM_COMPONENTS);
> +      for (int x = 0; x < 2; ++x) {
> +         /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x), ctx->shader_state.GL_VARYING_COMPONENT_USE[x]);
> +      }
> +   }
> +   etna_coalesce_end(stream, &coalesce);
> +}
> +
>  /* Weave state before draw operation. This function merges all the compiled
>   * state blocks under the context into one device register state. Parts of
>   * this state that are changed since last call (dirty) will be uploaded as
> @@ -350,16 +435,31 @@ etna_emit_state(struct etna_context *ctx)
>      * a) the number of vertex elements written matters: so write only active ones
>      * b) the vertex element states must all be written: do not skip entries that stay the same */
>     if (dirty & (ETNA_DIRTY_VERTEX_ELEMENTS)) {
> -      /* Special case: vertex elements must always be sent in full if changed */
> -      /*00600*/ etna_set_state_multi(stream, VIVS_FE_VERTEX_ELEMENT_CONFIG(0),
> -         ctx->vertex_elements->num_elements,
> -         ctx->vertex_elements->FE_VERTEX_ELEMENT_CONFIG);
> -      if (ctx->specs.halti >= 2) {
> -         /*00780*/ etna_set_state_multi(stream, VIVS_FE_GENERIC_ATTRIB_SCALE(0),
> +      if (ctx->specs.halti >= 5) {
> +         /*17800*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG0(0),
> +            ctx->vertex_elements->num_elements,
> +            ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG0);
> +         /*17A00*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_SCALE(0),
>              ctx->vertex_elements->num_elements,
>              ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE);
> +         /*17A80*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG1(0),
> +            ctx->vertex_elements->num_elements,
> +            ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG1);
> +      } else {
> +         /* Special case: vertex elements must always be sent in full if changed */
> +         /*00600*/ etna_set_state_multi(stream, VIVS_FE_VERTEX_ELEMENT_CONFIG(0),
> +            ctx->vertex_elements->num_elements,
> +            ctx->vertex_elements->FE_VERTEX_ELEMENT_CONFIG);
> +         if (ctx->specs.halti >= 2) {
> +            /*00780*/ etna_set_state_multi(stream, VIVS_FE_GENERIC_ATTRIB_SCALE(0),
> +               ctx->vertex_elements->num_elements,
> +               ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE);
> +         }
>        }
>     }
> +   unsigned vs_output_count = etna_rasterizer_state(ctx->rasterizer)->point_size_per_vertex
> +                           ? ctx->shader_state.VS_OUTPUT_COUNT_PSIZE
> +                           : ctx->shader_state.VS_OUTPUT_COUNT;
>
>     /* The following code is originally generated by gen_merge_state.py, to
>      * emit state in increasing order of address (this makes it possible to merge
> @@ -400,52 +500,47 @@ etna_emit_state(struct etna_context *ctx)
>        /*00644*/ EMIT_STATE_RELOC(FE_INDEX_STREAM_BASE_ADDR, &ctx->index_buffer.FE_INDEX_STREAM_BASE_ADDR);
>        /*00648*/ EMIT_STATE(FE_INDEX_STREAM_CONTROL, ctx->index_buffer.FE_INDEX_STREAM_CONTROL);
>     }
> -   if (likely(dirty & (ETNA_DIRTY_VERTEX_BUFFERS))) {
> -      /*0064C*/ EMIT_STATE_RELOC(FE_VERTEX_STREAM_BASE_ADDR, &ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_BASE_ADDR);
> -      /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_CONTROL);
> -   }
>     if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) {
>        /*00674*/ EMIT_STATE(FE_PRIMITIVE_RESTART_INDEX, ctx->index_buffer.FE_PRIMITIVE_RESTART_INDEX);
>     }
>     if (likely(dirty & (ETNA_DIRTY_VERTEX_BUFFERS))) {
> -      for (int x = 1; x < ctx->vertex_buffer.count; ++x) {
> -         /*00680*/ EMIT_STATE_RELOC(FE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
> -      }
> -      for (int x = 1; x < ctx->vertex_buffer.count; ++x) {
> -         if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
> -            /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);
> +      if (ctx->specs.halti >= 2) { /* HALTI2+: NFE_VERTEX_STREAMS */
> +         for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
> +            /*14600*/ EMIT_STATE_RELOC(NFE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
> +         }
> +         for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
> +            if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
> +               /*14640*/ EMIT_STATE(NFE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);
> +            }
> +         }
> +         for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
> +            if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
> +               /*14680*/ EMIT_STATE(NFE_VERTEX_STREAMS_UNK14680(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_UNK14680);
> +            }
> +         }
> +      } else {
> +         /*0064C*/ EMIT_STATE_RELOC(FE_VERTEX_STREAM_BASE_ADDR, &ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_BASE_ADDR);
> +         /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_CONTROL);
> +         for (int x = 1; x < ctx->vertex_buffer.count; ++x) {
> +            /*00680*/ EMIT_STATE_RELOC(FE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
> +         }
> +         for (int x = 1; x < ctx->vertex_buffer.count; ++x) {
> +            if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
> +               /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);
> +            }
>           }
>        }
>     }
> -   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
> -      /*00800*/ EMIT_STATE(VS_END_PC, ctx->shader_state.VS_END_PC);
> -   }
>     if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_RASTERIZER))) {
> -      bool point_size_per_vertex =
> -         etna_rasterizer_state(ctx->rasterizer)->point_size_per_vertex;
>
> -      /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT,
> -                           point_size_per_vertex
> -                              ? ctx->shader_state.VS_OUTPUT_COUNT_PSIZE
> -                              : ctx->shader_state.VS_OUTPUT_COUNT);
> +      /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT, vs_output_count);
>     }
>     if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
>        /*00808*/ EMIT_STATE(VS_INPUT_COUNT, ctx->shader_state.VS_INPUT_COUNT);
>        /*0080C*/ EMIT_STATE(VS_TEMP_REGISTER_CONTROL, ctx->shader_state.VS_TEMP_REGISTER_CONTROL);
>     }
>     if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
> -      for (int x = 0; x < 4; ++x) {
> -         /*00810*/ EMIT_STATE(VS_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
> -      }
> -   }
> -   if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
> -      for (int x = 0; x < 4; ++x) {
> -         /*00820*/ EMIT_STATE(VS_INPUT(x), ctx->shader_state.VS_INPUT[x]);
> -      }
> -   }
> -   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
>        /*00830*/ EMIT_STATE(VS_LOAD_BALANCING, ctx->shader_state.VS_LOAD_BALANCING);
> -      /*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC);
>     }
>     if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
>        /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, ctx->viewport.PA_VIEWPORT_SCALE_X);
> @@ -474,11 +569,6 @@ etna_emit_state(struct etna_context *ctx)
>        /*00A38*/ EMIT_STATE(PA_WIDE_LINE_WIDTH0, rasterizer->PA_LINE_WIDTH);
>        /*00A3C*/ EMIT_STATE(PA_WIDE_LINE_WIDTH1, rasterizer->PA_LINE_WIDTH);
>     }
> -   if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
> -      for (int x = 0; x < 10; ++x) {
> -         /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x), ctx->shader_state.PA_SHADER_ATTRIBUTES[x]);
> -      }
> -   }
>     if (unlikely(dirty & (ETNA_DIRTY_SCISSOR | ETNA_DIRTY_FRAMEBUFFER |
>                           ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_VIEWPORT))) {
>        /* this is a bit of a mess: rasterizer.scissor determines whether to use
> @@ -534,17 +624,7 @@ etna_emit_state(struct etna_context *ctx)
>     if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
>        /*00E00*/ EMIT_STATE(RA_CONTROL, ctx->shader_state.RA_CONTROL);
>     }
> -   if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
> -      /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04, ctx->framebuffer.RA_MULTISAMPLE_UNK00E04);
> -      for (int x = 0; x < 4; ++x) {
> -         /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x), ctx->framebuffer.RA_MULTISAMPLE_UNK00E10[x]);
> -      }
> -      for (int x = 0; x < 16; ++x) {
> -         /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x), ctx->framebuffer.RA_CENTROID_TABLE[x]);
> -      }
> -   }
>     if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
> -      /*01000*/ EMIT_STATE(PS_END_PC, ctx->shader_state.PS_END_PC);
>        /*01004*/ EMIT_STATE(PS_OUTPUT_REG, ctx->shader_state.PS_OUTPUT_REG);
>        /*01008*/ EMIT_STATE(PS_INPUT_COUNT,
>                             ctx->framebuffer.msaa_mode
> @@ -555,7 +635,6 @@ etna_emit_state(struct etna_context *ctx)
>                                ? ctx->shader_state.PS_TEMP_REGISTER_CONTROL_MSAA
>                                : ctx->shader_state.PS_TEMP_REGISTER_CONTROL);
>        /*01010*/ EMIT_STATE(PS_CONTROL, ctx->shader_state.PS_CONTROL);
> -      /*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC);
>     }
>     if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_FRAMEBUFFER))) {
>        uint32_t val = etna_zsa_state(ctx->zsa)->PE_DEPTH_CONFIG;
> @@ -641,6 +720,7 @@ etna_emit_state(struct etna_context *ctx)
>        /*01668*/ EMIT_STATE_RELOC(TS_DEPTH_SURFACE_BASE, &ctx->framebuffer.TS_DEPTH_SURFACE_BASE);
>        /*0166C*/ EMIT_STATE(TS_DEPTH_CLEAR_VALUE, ctx->framebuffer.TS_DEPTH_CLEAR_VALUE);
>     }
> +
>     if (unlikely(dirty & (ETNA_DIRTY_SAMPLER_VIEWS | ETNA_DIRTY_SAMPLERS))) {
>        for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
>           uint32_t val = 0; /* 0 == sampler inactive */
> @@ -710,16 +790,19 @@ etna_emit_state(struct etna_context *ctx)
>           }
>        }
>     }
> +
>     if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
>        /*0381C*/ EMIT_STATE(GL_VARYING_TOTAL_COMPONENTS, ctx->shader_state.GL_VARYING_TOTAL_COMPONENTS);
> -      /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS, ctx->shader_state.GL_VARYING_NUM_COMPONENTS);
> -      for (int x = 0; x < 2; ++x) {
> -         /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x), ctx->shader_state.GL_VARYING_COMPONENT_USE[x]);
> -      }
>     }
>     etna_coalesce_end(stream, &coalesce);
>     /* end only EMIT_STATE */
>
> +   /* Emit strongly architecture-specific state */
> +   if (ctx->specs.halti >= 5)
> +      emit_halti5_only_state(ctx, vs_output_count);
> +   else
> +      emit_pre_halti5_state(ctx);
> +
>     /* Insert a FE/PE stall as changing the shader instructions (and maybe
>      * the uniforms) can corrupt the previous in-progress draw operation.
>      * Observed with amoeba on GC2000 during the right-to-left rendering
> @@ -754,10 +837,40 @@ etna_emit_state(struct etna_context *ctx)
>           ctx->shader_state.PS_UNIFORMS, &ctx->shader_state.ps_uniforms_size);
>
>     /**** Large dynamically-sized state ****/
> +   bool do_uniform_flush = ctx->specs.halti < 5;
>     if (dirty & (ETNA_DIRTY_SHADER)) {
>        /* Special case: a new shader was loaded; simply re-load all uniforms and
>         * shader code at once */
> -      if (ctx->shader_state.VS_INST_ADDR.bo || ctx->shader_state.PS_INST_ADDR.bo) {
> +      /* This sequence is special, do not change ordering unless necessary. According to comment
> +         snippets in the Vivante kernel driver a process called "steering" goes on while programming
> +         shader state. This (as I understand it) means certain unified states are "steered"
> +         toward a specific shader unit (VS/PS/...) based on either explicit flags in register
> +         00860, or what other state is written before "auto-steering". So this means some
> +         state can legitimately be programmed multiple times.
> +       */
> +
> +      if (ctx->specs.halti >= 5) { /* ICACHE (HALTI5) */
> +         assert(ctx->shader_state.VS_INST_ADDR.bo && ctx->shader_state.PS_INST_ADDR.bo);
> +         /* Set icache (VS) */
> +         etna_set_state(stream, VIVS_VS_NEWRANGE_LOW, 0);
> +         etna_set_state(stream, VIVS_VS_NEWRANGE_HIGH, ctx->shader_state.vs_inst_mem_size / 4);
> +         assert(ctx->shader_state.VS_INST_ADDR.bo);
> +         etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR);
> +         etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002);
> +         etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, VIVS_VS_ICACHE_CONTROL_ENABLE);
> +         etna_set_state(stream, VIVS_VS_ICACHE_COUNT, ctx->shader_state.vs_inst_mem_size / 4 - 1);
> +
> +         /* Set icache (PS) */
> +         etna_set_state(stream, VIVS_PS_NEWRANGE_LOW, 0);
> +         etna_set_state(stream, VIVS_PS_NEWRANGE_HIGH, ctx->shader_state.ps_inst_mem_size / 4);
> +         assert(ctx->shader_state.PS_INST_ADDR.bo);
> +         etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR);
> +         etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002);
> +         etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, VIVS_VS_ICACHE_CONTROL_ENABLE);
> +         etna_set_state(stream, VIVS_PS_ICACHE_COUNT, ctx->shader_state.ps_inst_mem_size / 4 - 1);
> +
> +      } else if (ctx->shader_state.VS_INST_ADDR.bo || ctx->shader_state.PS_INST_ADDR.bo) {
> +         /* ICACHE (pre-HALTI5) */
>           assert(ctx->specs.has_icache && ctx->specs.has_shader_range_registers);
>           /* Set icache (VS) */
>           etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
> @@ -799,11 +912,14 @@ etna_emit_state(struct etna_context *ctx)
>           etna_set_state(stream, VIVS_VS_UNIFORM_BASE, 0);
>           etna_set_state(stream, VIVS_PS_UNIFORM_BASE, ctx->specs.max_vs_uniforms);
>        }
> -      etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
> +
> +      if (do_uniform_flush)
> +         etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
>        etna_set_state_multi(stream, ctx->specs.vs_uniforms_offset,
>                                       ctx->shader_state.vs_uniforms_size,
>                                       ctx->shader_state.VS_UNIFORMS);
> -      etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
> +      if (do_uniform_flush)
> +         etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
>        etna_set_state_multi(stream, ctx->specs.ps_uniforms_offset,
>                                       ctx->shader_state.ps_uniforms_size,
>                                       ctx->shader_state.PS_UNIFORMS);
> @@ -817,9 +933,17 @@ etna_emit_state(struct etna_context *ctx)
>               ctx->shader_state.vs_uniforms_size * 4);
>        memcpy(ctx->gpu3d.PS_UNIFORMS, ctx->shader_state.PS_UNIFORMS,
>               ctx->shader_state.ps_uniforms_size * 4);
> +
> +      if (ctx->specs.halti >= 5) {
> +         /* HALTI5 needs to be prompted to pre-fetch shaders */
> +         etna_set_state(stream, VIVS_VS_ICACHE_PREFETCH, 0x00000000);
> +         etna_set_state(stream, VIVS_PS_ICACHE_PREFETCH, 0x00000000);
> +         etna_stall(stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);
> +      }
>     } else {
>        /* ideally this cache would only be flushed if there are VS uniform changes */
> -      etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
> +      if (do_uniform_flush)
> +         etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
>        etna_coalesce_start(stream, &coalesce);
>        for (int x = 0; x < ctx->shader.vs->uniforms.const_count; ++x) {
>           if (ctx->gpu3d.VS_UNIFORMS[x] != ctx->shader_state.VS_UNIFORMS[x]) {
> @@ -830,7 +954,8 @@ etna_emit_state(struct etna_context *ctx)
>        etna_coalesce_end(stream, &coalesce);
>
>        /* ideally this cache would only be flushed if there are PS uniform changes */
> -      etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
> +      if (do_uniform_flush)
> +         etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
>        etna_coalesce_start(stream, &coalesce);
>        for (int x = 0; x < ctx->shader.fs->uniforms.const_count; ++x) {
>           if (ctx->gpu3d.PS_UNIFORMS[x] != ctx->shader_state.PS_UNIFORMS[x]) {
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_internal.h b/src/gallium/drivers/etnaviv/etnaviv_internal.h
> index e3d9e12..5a405d2 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_internal.h
> +++ b/src/gallium/drivers/etnaviv/etnaviv_internal.h
> @@ -216,12 +216,15 @@ struct compiled_framebuffer_state {
>  struct compiled_vertex_elements_state {
>     unsigned num_elements;
>     uint32_t FE_VERTEX_ELEMENT_CONFIG[VIVS_FE_VERTEX_ELEMENT_CONFIG__LEN];
> +   uint32_t NFE_GENERIC_ATTRIB_CONFIG0[VIVS_NFE_GENERIC_ATTRIB__LEN];
>     uint32_t NFE_GENERIC_ATTRIB_SCALE[VIVS_NFE_GENERIC_ATTRIB__LEN];
> +   uint32_t NFE_GENERIC_ATTRIB_CONFIG1[VIVS_NFE_GENERIC_ATTRIB__LEN];
>  };
>
>  /* Compiled context->set_vertex_buffer result */
>  struct compiled_set_vertex_buffer {
>     uint32_t FE_VERTEX_STREAM_CONTROL;
> +   uint32_t FE_VERTEX_STREAM_UNK14680;
>     struct etna_reloc FE_VERTEX_STREAM_BASE_ADDR;
>  };
>
> @@ -251,6 +254,7 @@ struct compiled_shader_state {
>     uint32_t GL_VARYING_TOTAL_COMPONENTS;
>     uint32_t GL_VARYING_NUM_COMPONENTS;
>     uint32_t GL_VARYING_COMPONENT_USE[2];
> +   uint32_t GL_HALTI5_SH_SPECIALS;
>     unsigned vs_inst_mem_size;
>     unsigned vs_uniforms_size;
>     unsigned ps_inst_mem_size;
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_state.c b/src/gallium/drivers/etnaviv/etnaviv_state.c
> index b2feb32..5447e8f 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_state.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_state.c
> @@ -135,7 +135,8 @@ etna_set_framebuffer_state(struct pipe_context *pctx,
>           VIVS_PE_COLOR_FORMAT_FORMAT(translate_rs_format(cbuf->base.format)) |
>           VIVS_PE_COLOR_FORMAT_COMPONENTS__MASK |
>           VIVS_PE_COLOR_FORMAT_OVERWRITE |
> -         COND(color_supertiled, VIVS_PE_COLOR_FORMAT_SUPER_TILED);
> +         COND(color_supertiled, VIVS_PE_COLOR_FORMAT_SUPER_TILED) |
> +         COND(color_supertiled && ctx->specs.halti >= 5, VIVS_PE_COLOR_FORMAT_SUPER_TILED_NEW);
>        /* VIVS_PE_COLOR_FORMAT_COMPONENTS() and
>         * VIVS_PE_COLOR_FORMAT_OVERWRITE comes from blend_state
>         * but only if we set the bits above. */
> @@ -211,7 +212,9 @@ etna_set_framebuffer_state(struct pipe_context *pctx,
>        cs->PE_DEPTH_CONFIG =
>           depth_format |
>           COND(depth_supertiled, VIVS_PE_DEPTH_CONFIG_SUPER_TILED) |
> -         VIVS_PE_DEPTH_CONFIG_DEPTH_MODE_Z;
> +         VIVS_PE_DEPTH_CONFIG_DEPTH_MODE_Z |
> +         COND(ctx->specs.halti >= 5, VIVS_PE_DEPTH_CONFIG_DISABLE_ZS) /* Needs to be enabled on GC7000, otherwise depth writes hang w/ TS - apparently it does something else now */
> +         ;
>        /* VIVS_PE_DEPTH_CONFIG_ONLY_DEPTH */
>        /* merged with depth_stencil_alpha */
>
> @@ -542,14 +545,26 @@ etna_vertex_elements_state_create(struct pipe_context *pctx,
>        assert(format_type != ETNA_NO_MATCH);
>        assert(normalize != ETNA_NO_MATCH);
>
> -      cs->FE_VERTEX_ELEMENT_CONFIG[idx] =
> -         COND(nonconsecutive, VIVS_FE_VERTEX_ELEMENT_CONFIG_NONCONSECUTIVE) |
> -         format_type |
> -         VIVS_FE_VERTEX_ELEMENT_CONFIG_NUM(util_format_get_nr_components(elements[idx].src_format)) |
> -         normalize | VIVS_FE_VERTEX_ELEMENT_CONFIG_ENDIAN(ENDIAN_MODE_NO_SWAP) |
> -         VIVS_FE_VERTEX_ELEMENT_CONFIG_STREAM(elements[idx].vertex_buffer_index) |
> -         VIVS_FE_VERTEX_ELEMENT_CONFIG_START(elements[idx].src_offset) |
> -         VIVS_FE_VERTEX_ELEMENT_CONFIG_END(end_offset - start_offset);
> +      if (ctx->specs.halti < 5) {
> +         cs->FE_VERTEX_ELEMENT_CONFIG[idx] =
> +            COND(nonconsecutive, VIVS_FE_VERTEX_ELEMENT_CONFIG_NONCONSECUTIVE) |
> +            format_type |
> +            VIVS_FE_VERTEX_ELEMENT_CONFIG_NUM(util_format_get_nr_components(elements[idx].src_format)) |
> +            normalize | VIVS_FE_VERTEX_ELEMENT_CONFIG_ENDIAN(ENDIAN_MODE_NO_SWAP) |
> +            VIVS_FE_VERTEX_ELEMENT_CONFIG_STREAM(elements[idx].vertex_buffer_index) |
> +            VIVS_FE_VERTEX_ELEMENT_CONFIG_START(elements[idx].src_offset) |
> +            VIVS_FE_VERTEX_ELEMENT_CONFIG_END(end_offset - start_offset);
> +      } else { /* HALTI5 spread vertex attrib config over two registers */
> +         cs->NFE_GENERIC_ATTRIB_CONFIG0[idx] =
> +            format_type |
> +            VIVS_NFE_GENERIC_ATTRIB_CONFIG0_NUM(util_format_get_nr_components(elements[idx].src_format)) |
> +            normalize | VIVS_NFE_GENERIC_ATTRIB_CONFIG0_ENDIAN(ENDIAN_MODE_NO_SWAP) |
> +            VIVS_NFE_GENERIC_ATTRIB_CONFIG0_STREAM(elements[idx].vertex_buffer_index) |
> +            VIVS_NFE_GENERIC_ATTRIB_CONFIG0_START(elements[idx].src_offset);
> +         cs->NFE_GENERIC_ATTRIB_CONFIG1[idx] =
> +            COND(nonconsecutive, VIVS_NFE_GENERIC_ATTRIB_CONFIG1_NONCONSECUTIVE) |
> +            VIVS_NFE_GENERIC_ATTRIB_CONFIG1_END(end_offset - start_offset);
> +      }
>        cs->NFE_GENERIC_ATTRIB_SCALE[idx] = 0x3f800000; /* 1 for integer, 1.0 for float */
>     }
>
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_zsa.c b/src/gallium/drivers/etnaviv/etnaviv_zsa.c
> index 22c2020..4e72cd2 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_zsa.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_zsa.c
> @@ -98,7 +98,8 @@ etna_zsa_state_create(struct pipe_context *pctx,
>                                                          : PIPE_FUNC_ALWAYS) |
>        COND(so->depth.writemask, VIVS_PE_DEPTH_CONFIG_WRITE_ENABLE) |
>        COND(early_z, VIVS_PE_DEPTH_CONFIG_EARLY_Z) |
> -      COND(disable_zs, VIVS_PE_DEPTH_CONFIG_DISABLE_ZS);
> +      /* this bit changed meaning with HALTI5: */
> +      COND(disable_zs && ctx->specs.halti < 5, VIVS_PE_DEPTH_CONFIG_DISABLE_ZS);
>     cs->PE_ALPHA_OP =
>        COND(so->alpha.enabled, VIVS_PE_ALPHA_OP_ALPHA_TEST) |
>        VIVS_PE_ALPHA_OP_ALPHA_FUNC(so->alpha.func) |
> --
> 2.7.4
>



-- 
greets
--
Christian Gmeiner, MSc

https://christian-gmeiner.info


More information about the etnaviv mailing list