[PATCH v2 12/17] etnaviv: GC7000: State changes for HALTI3..5
Christian Gmeiner
christian.gmeiner at gmail.com
Sat Nov 25 16:10:36 UTC 2017
2017-11-18 10:44 GMT+01:00 Wladimir J. van der Laan <laanwj at gmail.com>:
> Update state objects to add new state, and emit function to emit new
> state.
>
> Signed-off-by: Wladimir J. van der Laan <laanwj at gmail.com>
> Reviewed-by: Christian Gmeiner <christian.gmeiner at gmail.com>
> ---
> src/gallium/drivers/etnaviv/etnaviv_emit.c | 246 +++++++++++++++++++------
> src/gallium/drivers/etnaviv/etnaviv_internal.h | 4 +
> src/gallium/drivers/etnaviv/etnaviv_state.c | 35 +++-
> src/gallium/drivers/etnaviv/etnaviv_zsa.c | 3 +-
> 4 files changed, 216 insertions(+), 72 deletions(-)
>
> Conceptually unchanged since v1, but needed serious rebase.
>
I had to fix a conflict during git am in etnaviv_emit.c reagrding
ETNA_DIRTY_VERTEX_BUFFERS.
-----------8<-----------
diff --git a/src/gallium/drivers/etnaviv/etnaviv_emit.c
b/src/gallium/drivers/etnaviv/etnaviv_emit.c
index 5cb1415..79ba83c 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_emit.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_emit.c
@@ -133,6 +133,91 @@ required_stream_size(struct etna_context *ctx)
return size;
}
+/* Emit state that only exists on HALTI5+ */
+static void
+emit_halti5_only_state(struct etna_context *ctx, int vs_output_count)
+{
+ struct etna_cmd_stream *stream = ctx->stream;
+ uint32_t dirty = ctx->dirty;
+ struct etna_coalesce coalesce;
+
+ etna_coalesce_start(stream, &coalesce);
+ if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
+ /* Magic states (load balancing, inter-unit sync, buffers) */
+ /*00870*/ EMIT_STATE(VS_HALTI5_OUTPUT_COUNT, vs_output_count |
((vs_output_count * 0x10) << 8));
+ /*008A0*/ EMIT_STATE(VS_HALTI5_UNK008A0, 0x0001000e |
((0x110/vs_output_count) << 20));
+ for (int x = 0; x < 4; ++x) {
+ /*008E0*/ EMIT_STATE(VS_HALTI5_OUTPUT(x),
ctx->shader_state.VS_OUTPUT[x]);
+ }
+ }
+ if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
+ for (int x = 0; x < 4; ++x) {
+ /*008C0*/ EMIT_STATE(VS_HALTI5_INPUT(x),
ctx->shader_state.VS_INPUT[x]);
+ }
+ }
+ if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
+ /*00A90*/ EMIT_STATE(PA_VARYING_NUM_COMPONENTS(0),
ctx->shader_state.GL_VARYING_NUM_COMPONENTS);
+ /*00AA8*/ EMIT_STATE(PA_VS_OUTPUT_COUNT, vs_output_count);
+ /*01080*/ EMIT_STATE(PS_VARYING_NUM_COMPONENTS(0),
ctx->shader_state.GL_VARYING_NUM_COMPONENTS);
+ /*03888*/ EMIT_STATE(GL_HALTI5_SH_SPECIALS,
ctx->shader_state.GL_HALTI5_SH_SPECIALS);
+ }
+ etna_coalesce_end(stream, &coalesce);
+}
+
+/* Emit state that no longer exists on HALTI5 */
+static void
+emit_pre_halti5_state(struct etna_context *ctx)
+{
+ struct etna_cmd_stream *stream = ctx->stream;
+ uint32_t dirty = ctx->dirty;
+ struct etna_coalesce coalesce;
+
+ etna_coalesce_start(stream, &coalesce);
+ if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
+ /*00800*/ EMIT_STATE(VS_END_PC, ctx->shader_state.VS_END_PC);
+ }
+ if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
+ for (int x = 0; x < 4; ++x) {
+ /*00810*/ EMIT_STATE(VS_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
+ }
+ }
+ if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
+ for (int x = 0; x < 4; ++x) {
+ /*00820*/ EMIT_STATE(VS_INPUT(x), ctx->shader_state.VS_INPUT[x]);
+ }
+ }
+ if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
+ /*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC);
+ }
+ if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
+ for (int x = 0; x < 10; ++x) {
+ /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x),
ctx->shader_state.PA_SHADER_ATTRIBUTES[x]);
+ }
+ }
+ if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
+ /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04,
ctx->framebuffer.RA_MULTISAMPLE_UNK00E04);
+ for (int x = 0; x < 4; ++x) {
+ /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x),
ctx->framebuffer.RA_MULTISAMPLE_UNK00E10[x]);
+ }
+ for (int x = 0; x < 16; ++x) {
+ /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x),
ctx->framebuffer.RA_CENTROID_TABLE[x]);
+ }
+ }
+ if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
+ /*01000*/ EMIT_STATE(PS_END_PC, ctx->shader_state.PS_END_PC);
+ }
+ if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
+ /*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC);
+ }
+ if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
+ /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS,
ctx->shader_state.GL_VARYING_NUM_COMPONENTS);
+ for (int x = 0; x < 2; ++x) {
+ /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x),
ctx->shader_state.GL_VARYING_COMPONENT_USE[x]);
+ }
+ }
+ etna_coalesce_end(stream, &coalesce);
+}
+
/* Weave state before draw operation. This function merges all the compiled
* state blocks under the context into one device register state. Parts of
* this state that are changed since last call (dirty) will be uploaded as
@@ -197,16 +282,31 @@ etna_emit_state(struct etna_context *ctx)
* a) the number of vertex elements written matters: so write only
active ones
* b) the vertex element states must all be written: do not skip
entries that stay the same */
if (dirty & (ETNA_DIRTY_VERTEX_ELEMENTS)) {
- /* Special case: vertex elements must always be sent in full if
changed */
- /*00600*/ etna_set_state_multi(stream, VIVS_FE_VERTEX_ELEMENT_CONFIG(0),
- ctx->vertex_elements->num_elements,
- ctx->vertex_elements->FE_VERTEX_ELEMENT_CONFIG);
- if (ctx->specs.halti >= 2) {
- /*00780*/ etna_set_state_multi(stream,
VIVS_FE_GENERIC_ATTRIB_SCALE(0),
+ if (ctx->specs.halti >= 5) {
+ /*17800*/ etna_set_state_multi(stream,
VIVS_NFE_GENERIC_ATTRIB_CONFIG0(0),
+ ctx->vertex_elements->num_elements,
+ ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG0);
+ /*17A00*/ etna_set_state_multi(stream,
VIVS_NFE_GENERIC_ATTRIB_SCALE(0),
ctx->vertex_elements->num_elements,
ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE);
+ /*17A80*/ etna_set_state_multi(stream,
VIVS_NFE_GENERIC_ATTRIB_CONFIG1(0),
+ ctx->vertex_elements->num_elements,
+ ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG1);
+ } else {
+ /* Special case: vertex elements must always be sent in full
if changed */
+ /*00600*/ etna_set_state_multi(stream,
VIVS_FE_VERTEX_ELEMENT_CONFIG(0),
+ ctx->vertex_elements->num_elements,
+ ctx->vertex_elements->FE_VERTEX_ELEMENT_CONFIG);
+ if (ctx->specs.halti >= 2) {
+ /*00780*/ etna_set_state_multi(stream,
VIVS_FE_GENERIC_ATTRIB_SCALE(0),
+ ctx->vertex_elements->num_elements,
+ ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE);
+ }
}
}
+ unsigned vs_output_count =
etna_rasterizer_state(ctx->rasterizer)->point_size_per_vertex
+ ? ctx->shader_state.VS_OUTPUT_COUNT_PSIZE
+ : ctx->shader_state.VS_OUTPUT_COUNT;
/* The following code is originally generated by gen_merge_state.py, to
* emit state in increasing order of address (this makes it
possible to merge
@@ -247,52 +347,47 @@ etna_emit_state(struct etna_context *ctx)
/*00644*/ EMIT_STATE_RELOC(FE_INDEX_STREAM_BASE_ADDR,
&ctx->index_buffer.FE_INDEX_STREAM_BASE_ADDR);
/*00648*/ EMIT_STATE(FE_INDEX_STREAM_CONTROL,
ctx->index_buffer.FE_INDEX_STREAM_CONTROL);
}
- if (likely((dirty & (ETNA_DIRTY_VERTEX_BUFFERS) &&
ctx->specs.stream_count == 1))) {
- /*0064C*/ EMIT_STATE_RELOC(FE_VERTEX_STREAM_BASE_ADDR,
&ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_BASE_ADDR);
- /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL,
ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_CONTROL);
- }
if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) {
/*00674*/ EMIT_STATE(FE_PRIMITIVE_RESTART_INDEX,
ctx->index_buffer.FE_PRIMITIVE_RESTART_INDEX);
}
if (likely((dirty & (ETNA_DIRTY_VERTEX_BUFFERS)) &&
ctx->specs.stream_count > 1)) {
- for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
- /*00680*/ EMIT_STATE_RELOC(FE_VERTEX_STREAMS_BASE_ADDR(x),
&ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
- }
- for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
- if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
- /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x),
ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);
+ if (ctx->specs.halti >= 2) { /* HALTI2+: NFE_VERTEX_STREAMS */
+ for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
+ /*14600*/
EMIT_STATE_RELOC(NFE_VERTEX_STREAMS_BASE_ADDR(x),
&ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
+ }
+ for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
+ if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
+ /*14640*/ EMIT_STATE(NFE_VERTEX_STREAMS_CONTROL(x),
ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);
+ }
+ }
+ for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
+ if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
+ /*14680*/ EMIT_STATE(NFE_VERTEX_STREAMS_UNK14680(x),
ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_UNK14680);
+ }
+ }
+ } else {
+ /*0064C*/ EMIT_STATE_RELOC(FE_VERTEX_STREAM_BASE_ADDR,
&ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_BASE_ADDR);
+ /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL,
ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_CONTROL);
+ for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
+ /*00680*/
EMIT_STATE_RELOC(FE_VERTEX_STREAMS_BASE_ADDR(x),
&ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
+ }
+ for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
+ if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
+ /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x),
ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL);
+ }
}
}
}
- if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
- /*00800*/ EMIT_STATE(VS_END_PC, ctx->shader_state.VS_END_PC);
- }
if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_RASTERIZER))) {
- bool point_size_per_vertex =
- etna_rasterizer_state(ctx->rasterizer)->point_size_per_vertex;
- /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT,
- point_size_per_vertex
- ? ctx->shader_state.VS_OUTPUT_COUNT_PSIZE
- : ctx->shader_state.VS_OUTPUT_COUNT);
+ /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT, vs_output_count);
}
if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
/*00808*/ EMIT_STATE(VS_INPUT_COUNT, ctx->shader_state.VS_INPUT_COUNT);
/*0080C*/ EMIT_STATE(VS_TEMP_REGISTER_CONTROL,
ctx->shader_state.VS_TEMP_REGISTER_CONTROL);
}
if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
- for (int x = 0; x < 4; ++x) {
- /*00810*/ EMIT_STATE(VS_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
- }
- }
- if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
- for (int x = 0; x < 4; ++x) {
- /*00820*/ EMIT_STATE(VS_INPUT(x), ctx->shader_state.VS_INPUT[x]);
- }
- }
- if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
/*00830*/ EMIT_STATE(VS_LOAD_BALANCING,
ctx->shader_state.VS_LOAD_BALANCING);
- /*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC);
}
if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
/*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X,
ctx->viewport.PA_VIEWPORT_SCALE_X);
@@ -321,11 +416,6 @@ etna_emit_state(struct etna_context *ctx)
/*00A38*/ EMIT_STATE(PA_WIDE_LINE_WIDTH0, rasterizer->PA_LINE_WIDTH);
/*00A3C*/ EMIT_STATE(PA_WIDE_LINE_WIDTH1, rasterizer->PA_LINE_WIDTH);
}
- if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
- for (int x = 0; x < 10; ++x) {
- /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x),
ctx->shader_state.PA_SHADER_ATTRIBUTES[x]);
- }
- }
if (unlikely(dirty & (ETNA_DIRTY_SCISSOR | ETNA_DIRTY_FRAMEBUFFER |
ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_VIEWPORT))) {
/* this is a bit of a mess: rasterizer.scissor determines whether to use
@@ -381,17 +471,7 @@ etna_emit_state(struct etna_context *ctx)
if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
/*00E00*/ EMIT_STATE(RA_CONTROL, ctx->shader_state.RA_CONTROL);
}
- if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
- /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04,
ctx->framebuffer.RA_MULTISAMPLE_UNK00E04);
- for (int x = 0; x < 4; ++x) {
- /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x),
ctx->framebuffer.RA_MULTISAMPLE_UNK00E10[x]);
- }
- for (int x = 0; x < 16; ++x) {
- /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x),
ctx->framebuffer.RA_CENTROID_TABLE[x]);
- }
- }
if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
- /*01000*/ EMIT_STATE(PS_END_PC, ctx->shader_state.PS_END_PC);
/*01004*/ EMIT_STATE(PS_OUTPUT_REG, ctx->shader_state.PS_OUTPUT_REG);
/*01008*/ EMIT_STATE(PS_INPUT_COUNT,
ctx->framebuffer.msaa_mode
@@ -402,7 +482,6 @@ etna_emit_state(struct etna_context *ctx)
? ctx->shader_state.PS_TEMP_REGISTER_CONTROL_MSAA
: ctx->shader_state.PS_TEMP_REGISTER_CONTROL);
/*01010*/ EMIT_STATE(PS_CONTROL, ctx->shader_state.PS_CONTROL);
- /*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC);
}
if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_FRAMEBUFFER))) {
uint32_t val = etna_zsa_state(ctx->zsa)->PE_DEPTH_CONFIG;
@@ -583,12 +662,9 @@ etna_emit_state(struct etna_context *ctx)
}
}
}
+
if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
/*0381C*/ EMIT_STATE(GL_VARYING_TOTAL_COMPONENTS,
ctx->shader_state.GL_VARYING_TOTAL_COMPONENTS);
- /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS,
ctx->shader_state.GL_VARYING_NUM_COMPONENTS);
- for (int x = 0; x < 2; ++x) {
- /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x),
ctx->shader_state.GL_VARYING_COMPONENT_USE[x]);
- }
}
if (unlikely(ctx->specs.tex_astc && (dirty & (ETNA_DIRTY_SAMPLER_VIEWS)))) {
for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) {
@@ -601,6 +677,12 @@ etna_emit_state(struct etna_context *ctx)
etna_coalesce_end(stream, &coalesce);
/* end only EMIT_STATE */
+ /* Emit strongly architecture-specific state */
+ if (ctx->specs.halti >= 5)
+ emit_halti5_only_state(ctx, vs_output_count);
+ else
+ emit_pre_halti5_state(ctx);
+
/* Insert a FE/PE stall as changing the shader instructions (and maybe
* the uniforms) can corrupt the previous in-progress draw operation.
* Observed with amoeba on GC2000 during the right-to-left rendering
@@ -635,10 +717,40 @@ etna_emit_state(struct etna_context *ctx)
ctx->shader_state.PS_UNIFORMS, &ctx->shader_state.ps_uniforms_size);
/**** Large dynamically-sized state ****/
+ bool do_uniform_flush = ctx->specs.halti < 5;
if (dirty & (ETNA_DIRTY_SHADER)) {
/* Special case: a new shader was loaded; simply re-load all uniforms and
* shader code at once */
- if (ctx->shader_state.VS_INST_ADDR.bo ||
ctx->shader_state.PS_INST_ADDR.bo) {
+ /* This sequence is special, do not change ordering unless
necessary. According to comment
+ snippets in the Vivante kernel driver a process called
"steering" goes on while programming
+ shader state. This (as I understand it) means certain
unified states are "steered"
+ toward a specific shader unit (VS/PS/...) based on either
explicit flags in register
+ 00860, or what other state is written before
"auto-steering". So this means some
+ state can legitimately be programmed multiple times.
+ */
+
+ if (ctx->specs.halti >= 5) { /* ICACHE (HALTI5) */
+ assert(ctx->shader_state.VS_INST_ADDR.bo &&
ctx->shader_state.PS_INST_ADDR.bo);
+ /* Set icache (VS) */
+ etna_set_state(stream, VIVS_VS_NEWRANGE_LOW, 0);
+ etna_set_state(stream, VIVS_VS_NEWRANGE_HIGH,
ctx->shader_state.vs_inst_mem_size / 4);
+ assert(ctx->shader_state.VS_INST_ADDR.bo);
+ etna_set_state_reloc(stream, VIVS_VS_INST_ADDR,
&ctx->shader_state.VS_INST_ADDR);
+ etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002);
+ etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
VIVS_VS_ICACHE_CONTROL_ENABLE);
+ etna_set_state(stream, VIVS_VS_ICACHE_COUNT,
ctx->shader_state.vs_inst_mem_size / 4 - 1);
+
+ /* Set icache (PS) */
+ etna_set_state(stream, VIVS_PS_NEWRANGE_LOW, 0);
+ etna_set_state(stream, VIVS_PS_NEWRANGE_HIGH,
ctx->shader_state.ps_inst_mem_size / 4);
+ assert(ctx->shader_state.PS_INST_ADDR.bo);
+ etna_set_state_reloc(stream, VIVS_PS_INST_ADDR,
&ctx->shader_state.PS_INST_ADDR);
+ etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002);
+ etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
VIVS_VS_ICACHE_CONTROL_ENABLE);
+ etna_set_state(stream, VIVS_PS_ICACHE_COUNT,
ctx->shader_state.ps_inst_mem_size / 4 - 1);
+
+ } else if (ctx->shader_state.VS_INST_ADDR.bo ||
ctx->shader_state.PS_INST_ADDR.bo) {
+ /* ICACHE (pre-HALTI5) */
assert(ctx->specs.has_icache &&
ctx->specs.has_shader_range_registers);
/* Set icache (VS) */
etna_set_state(stream, VIVS_VS_RANGE,
(ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
@@ -680,11 +792,14 @@ etna_emit_state(struct etna_context *ctx)
etna_set_state(stream, VIVS_VS_UNIFORM_BASE, 0);
etna_set_state(stream, VIVS_PS_UNIFORM_BASE,
ctx->specs.max_vs_uniforms);
}
- etna_set_state(stream, VIVS_VS_UNIFORM_CACHE,
VIVS_VS_UNIFORM_CACHE_FLUSH);
+
+ if (do_uniform_flush)
+ etna_set_state(stream, VIVS_VS_UNIFORM_CACHE,
VIVS_VS_UNIFORM_CACHE_FLUSH);
etna_set_state_multi(stream, ctx->specs.vs_uniforms_offset,
ctx->shader_state.vs_uniforms_size,
ctx->shader_state.VS_UNIFORMS);
- etna_set_state(stream, VIVS_VS_UNIFORM_CACHE,
VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
+ if (do_uniform_flush)
+ etna_set_state(stream, VIVS_VS_UNIFORM_CACHE,
VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
etna_set_state_multi(stream, ctx->specs.ps_uniforms_offset,
ctx->shader_state.ps_uniforms_size,
ctx->shader_state.PS_UNIFORMS);
@@ -698,9 +813,17 @@ etna_emit_state(struct etna_context *ctx)
ctx->shader_state.vs_uniforms_size * 4);
memcpy(ctx->gpu3d.PS_UNIFORMS, ctx->shader_state.PS_UNIFORMS,
ctx->shader_state.ps_uniforms_size * 4);
+
+ if (ctx->specs.halti >= 5) {
+ /* HALTI5 needs to be prompted to pre-fetch shaders */
+ etna_set_state(stream, VIVS_VS_ICACHE_PREFETCH, 0x00000000);
+ etna_set_state(stream, VIVS_PS_ICACHE_PREFETCH, 0x00000000);
+ etna_stall(stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);
+ }
} else {
/* ideally this cache would only be flushed if there are VS
uniform changes */
- etna_set_state(stream, VIVS_VS_UNIFORM_CACHE,
VIVS_VS_UNIFORM_CACHE_FLUSH);
+ if (do_uniform_flush)
+ etna_set_state(stream, VIVS_VS_UNIFORM_CACHE,
VIVS_VS_UNIFORM_CACHE_FLUSH);
etna_coalesce_start(stream, &coalesce);
for (int x = 0; x < ctx->shader.vs->uniforms.const_count; ++x) {
if (ctx->gpu3d.VS_UNIFORMS[x] != ctx->shader_state.VS_UNIFORMS[x]) {
@@ -711,7 +834,8 @@ etna_emit_state(struct etna_context *ctx)
etna_coalesce_end(stream, &coalesce);
/* ideally this cache would only be flushed if there are PS
uniform changes */
- etna_set_state(stream, VIVS_VS_UNIFORM_CACHE,
VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
+ if (do_uniform_flush)
+ etna_set_state(stream, VIVS_VS_UNIFORM_CACHE,
VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
etna_coalesce_start(stream, &coalesce);
for (int x = 0; x < ctx->shader.fs->uniforms.const_count; ++x) {
if (ctx->gpu3d.PS_UNIFORMS[x] != ctx->shader_state.PS_UNIFORMS[x]) {
diff --git a/src/gallium/drivers/etnaviv/etnaviv_internal.h
b/src/gallium/drivers/etnaviv/etnaviv_internal.h
index e65332b..b8d2186 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_internal.h
+++ b/src/gallium/drivers/etnaviv/etnaviv_internal.h
@@ -218,12 +218,15 @@ struct compiled_framebuffer_state {
struct compiled_vertex_elements_state {
unsigned num_elements;
uint32_t FE_VERTEX_ELEMENT_CONFIG[VIVS_FE_VERTEX_ELEMENT_CONFIG__LEN];
+ uint32_t NFE_GENERIC_ATTRIB_CONFIG0[VIVS_NFE_GENERIC_ATTRIB__LEN];
uint32_t NFE_GENERIC_ATTRIB_SCALE[VIVS_NFE_GENERIC_ATTRIB__LEN];
+ uint32_t NFE_GENERIC_ATTRIB_CONFIG1[VIVS_NFE_GENERIC_ATTRIB__LEN];
};
/* Compiled context->set_vertex_buffer result */
struct compiled_set_vertex_buffer {
uint32_t FE_VERTEX_STREAM_CONTROL;
+ uint32_t FE_VERTEX_STREAM_UNK14680;
struct etna_reloc FE_VERTEX_STREAM_BASE_ADDR;
};
@@ -253,6 +256,7 @@ struct compiled_shader_state {
uint32_t GL_VARYING_TOTAL_COMPONENTS;
uint32_t GL_VARYING_NUM_COMPONENTS;
uint32_t GL_VARYING_COMPONENT_USE[2];
+ uint32_t GL_HALTI5_SH_SPECIALS;
unsigned vs_inst_mem_size;
unsigned vs_uniforms_size;
unsigned ps_inst_mem_size;
-----------8<-----------
Wladimir if it looks okay in your eyes I will push everything.
--
greets
--
Christian Gmeiner, MSc
https://christian-gmeiner.info
More information about the etnaviv
mailing list