[Mesa-dev] [PATCH] r600: implement callstack workaround for evergreen.
Elie Tournier
tournier.elie at gmail.com
Fri Mar 9 14:11:14 UTC 2018
On Fri, Mar 09, 2018 at 04:09:43PM +1000, Dave Airlie wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> This is ported from the sb backend, there are some issues with
> evergreen stacks on the boundary between entries and ALU_PUSH_BEFORE
> instructions.
>
> Whenever we are going to use a push before, we check the stack
> usage and if we have to use the workaround, then we switch to
> a separate push.
>
> I noticed this problem dealing with some of the soft fp64 shaders,
> in nosb mode, they are quite stack happy.
More than happy actually!
On my system, I get:
[1375/1375] skip: 4, pass: 1368, fail: 3
So thanks a lot.
>
> This fixes all the glitches and inconsistencies I've seen with them
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
Tested-by: Elie Tournier <elie.tournier at collabora.com>
> ---
> src/gallium/drivers/r600/r600_shader.c | 39 +++++++++++++++++++++++++++-------
> 1 file changed, 31 insertions(+), 8 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
> index 48750fb..3ca7890 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -377,7 +377,7 @@ struct r600_shader_tgsi_instruction {
> static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind);
> static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
> static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
> -static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
> +static inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
> static void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
> static int tgsi_else(struct r600_shader_ctx *ctx);
> static int tgsi_endif(struct r600_shader_ctx *ctx);
> @@ -393,6 +393,15 @@ static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
> static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg,
> unsigned dst_reg, unsigned mask);
>
> +static bool ctx_needs_stack_workaround_8xx(struct r600_shader_ctx *ctx)
> +{
> + if (ctx->bc->family == CHIP_HEMLOCK ||
> + ctx->bc->family == CHIP_CYPRESS ||
> + ctx->bc->family == CHIP_JUNIPER)
> + return false;
> + return true;
> +}
> +
> static bool ctx_has_doubles(struct r600_shader_ctx *ctx)
> {
> if (ctx->bc->family == CHIP_ARUBA ||
> @@ -10182,7 +10191,7 @@ static int pops(struct r600_shader_ctx *ctx, int pops)
> return 0;
> }
>
> -static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx,
> +static inline int callstack_update_max_depth(struct r600_shader_ctx *ctx,
> unsigned reason)
> {
> struct r600_stack_info *stack = &ctx->bc->stack;
> @@ -10200,7 +10209,7 @@ static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx,
> /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on
> * the stack must be reserved to hold the current active/continue
> * masks */
> - if (reason == FC_PUSH_VPM) {
> + if (reason == FC_PUSH_VPM || stack->push > 0) {
> elements += 2;
> }
> break;
> @@ -10226,7 +10235,7 @@ static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx,
> * NOTE: it seems we also need to reserve additional element in some
> * other cases, e.g. when we have 4 levels of PUSH_VPM in the shader,
> * then STACK_SIZE should be 2 instead of 1 */
> - if (reason == FC_PUSH_VPM) {
> + if (reason == FC_PUSH_VPM || stack->push > 0) {
> elements += 1;
> }
> break;
> @@ -10245,6 +10254,7 @@ static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx,
>
> if (entries > stack->max_entries)
> stack->max_entries = entries;
> + return elements;
> }
>
> static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason)
> @@ -10268,7 +10278,7 @@ static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason)
> }
> }
>
> -static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
> +static inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
> {
> switch (reason) {
> case FC_PUSH_VPM:
> @@ -10276,6 +10286,7 @@ static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
> break;
> case FC_PUSH_WQM:
> ++ctx->bc->stack.push_wqm;
> + break;
> case FC_LOOP:
> ++ctx->bc->stack.loop;
> break;
> @@ -10283,7 +10294,7 @@ static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
> assert(0);
> }
>
> - callstack_update_max_depth(ctx, reason);
> + return callstack_update_max_depth(ctx, reason);
> }
>
> static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
> @@ -10367,12 +10378,25 @@ static int emit_if(struct r600_shader_ctx *ctx, int opcode,
> struct r600_bytecode_alu_src *src)
> {
> int alu_type = CF_OP_ALU_PUSH_BEFORE;
> + bool needs_workaround = false;
> + int elems = callstack_push(ctx, FC_PUSH_VPM);
> +
> + if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1)
> + needs_workaround = true;
> +
> + if (ctx->bc->chip_class == EVERGREEN && ctx_needs_stack_workaround_8xx(ctx)) {
> + unsigned dmod1 = (elems - 1) % ctx->bc->stack.entry_size;
> + unsigned dmod2 = (elems) % ctx->bc->stack.entry_size;
> +
> + if (elems && (!dmod1 || !dmod2))
> + needs_workaround = true;
> + }
>
> /* There is a hardware bug on Cayman where a BREAK/CONTINUE followed by
> * LOOP_STARTxxx for nested loops may put the branch stack into a state
> * such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this
> * by replacing the ALU_PUSH_BEFORE with a PUSH + ALU */
> - if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1) {
> + if (needs_workaround) {
> r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH);
> ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
> alu_type = CF_OP_ALU;
> @@ -10384,7 +10408,6 @@ static int emit_if(struct r600_shader_ctx *ctx, int opcode,
>
> fc_pushlevel(ctx, FC_IF);
>
> - callstack_push(ctx, FC_PUSH_VPM);
> return 0;
> }
>
> --
> 2.9.5
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list