[Mesa-dev] [PATCH] r600: implement callstack workaround for evergreen.

Elie Tournier tournier.elie at gmail.com
Fri Mar 9 14:11:14 UTC 2018


On Fri, Mar 09, 2018 at 04:09:43PM +1000, Dave Airlie wrote:
> From: Dave Airlie <airlied at redhat.com>
> 
> This is ported from the sb backend, there are some issues with
> evergreen stacks on the boundary between entries and ALU_PUSH_BEFORE
> instructions.
> 
> Whenever we are going to use a push before, we check the stack
> usage and if we have to use the workaround, then we switch to
> a separate push.
> 
> I noticed this problem dealing with some of the soft fp64 shaders,
> in nosb mode, they are quite stack happy.
More than happy actually!
On my system, I get:
[1375/1375] skip: 4, pass: 1368, fail: 3
So thanks a lot.
> 
> This fixes all the glitches and inconsistencies I've seen with them
> 
> Signed-off-by: Dave Airlie <airlied at redhat.com>
Tested-by: Elie Tournier <elie.tournier at collabora.com>
> ---
>  src/gallium/drivers/r600/r600_shader.c | 39 +++++++++++++++++++++++++++-------
>  1 file changed, 31 insertions(+), 8 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
> index 48750fb..3ca7890 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -377,7 +377,7 @@ struct r600_shader_tgsi_instruction {
>  static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind);
>  static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
>  static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
> -static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
> +static inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
>  static void fc_pushlevel(struct r600_shader_ctx *ctx, int type);
>  static int tgsi_else(struct r600_shader_ctx *ctx);
>  static int tgsi_endif(struct r600_shader_ctx *ctx);
> @@ -393,6 +393,15 @@ static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
>  static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg,
>  			       unsigned dst_reg, unsigned mask);
>  
> +static bool ctx_needs_stack_workaround_8xx(struct r600_shader_ctx *ctx)
> +{
> +	if (ctx->bc->family == CHIP_HEMLOCK ||
> +	    ctx->bc->family == CHIP_CYPRESS ||
> +	    ctx->bc->family == CHIP_JUNIPER)
> +		return false;
> +	return true;
> +}
> +
>  static bool ctx_has_doubles(struct r600_shader_ctx *ctx)
>  {
>  	if (ctx->bc->family == CHIP_ARUBA ||
> @@ -10182,7 +10191,7 @@ static int pops(struct r600_shader_ctx *ctx, int pops)
>  	return 0;
>  }
>  
> -static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx,
> +static inline int callstack_update_max_depth(struct r600_shader_ctx *ctx,
>                                                unsigned reason)
>  {
>  	struct r600_stack_info *stack = &ctx->bc->stack;
> @@ -10200,7 +10209,7 @@ static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx,
>  		/* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on
>  		 * the stack must be reserved to hold the current active/continue
>  		 * masks */
> -		if (reason == FC_PUSH_VPM) {
> +		if (reason == FC_PUSH_VPM || stack->push > 0) {
>  			elements += 2;
>  		}
>  		break;
> @@ -10226,7 +10235,7 @@ static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx,
>  		 *    NOTE: it seems we also need to reserve additional element in some
>  		 *    other cases, e.g. when we have 4 levels of PUSH_VPM in the shader,
>  		 *    then STACK_SIZE should be 2 instead of 1 */
> -		if (reason == FC_PUSH_VPM) {
> +		if (reason == FC_PUSH_VPM || stack->push > 0) {
>  			elements += 1;
>  		}
>  		break;
> @@ -10245,6 +10254,7 @@ static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx,
>  
>  	if (entries > stack->max_entries)
>  		stack->max_entries = entries;
> +	return elements;
>  }
>  
>  static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason)
> @@ -10268,7 +10278,7 @@ static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason)
>  	}
>  }
>  
> -static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
> +static inline int callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
>  {
>  	switch (reason) {
>  	case FC_PUSH_VPM:
> @@ -10276,6 +10286,7 @@ static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
>  		break;
>  	case FC_PUSH_WQM:
>  		++ctx->bc->stack.push_wqm;
> +		break;
>  	case FC_LOOP:
>  		++ctx->bc->stack.loop;
>  		break;
> @@ -10283,7 +10294,7 @@ static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason)
>  		assert(0);
>  	}
>  
> -	callstack_update_max_depth(ctx, reason);
> +	return callstack_update_max_depth(ctx, reason);
>  }
>  
>  static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
> @@ -10367,12 +10378,25 @@ static int emit_if(struct r600_shader_ctx *ctx, int opcode,
>  		   struct r600_bytecode_alu_src *src)
>  {
>  	int alu_type = CF_OP_ALU_PUSH_BEFORE;
> +	bool needs_workaround = false;
> +	int elems = callstack_push(ctx, FC_PUSH_VPM);
> +
> +	if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1)
> +		needs_workaround = true;
> +
> +	if (ctx->bc->chip_class == EVERGREEN && ctx_needs_stack_workaround_8xx(ctx)) {
> +		unsigned dmod1 = (elems - 1) % ctx->bc->stack.entry_size;
> +		unsigned dmod2 = (elems) % ctx->bc->stack.entry_size;
> +
> +		if (elems && (!dmod1 || !dmod2))
> +			needs_workaround = true;
> +	}
>  
>  	/* There is a hardware bug on Cayman where a BREAK/CONTINUE followed by
>  	 * LOOP_STARTxxx for nested loops may put the branch stack into a state
>  	 * such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this
>  	 * by replacing the ALU_PUSH_BEFORE with a PUSH + ALU */
> -	if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1) {
> +	if (needs_workaround) {
>  		r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH);
>  		ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
>  		alu_type = CF_OP_ALU;
> @@ -10384,7 +10408,6 @@ static int emit_if(struct r600_shader_ctx *ctx, int opcode,
>  
>  	fc_pushlevel(ctx, FC_IF);
>  
> -	callstack_push(ctx, FC_PUSH_VPM);
>  	return 0;
>  }
>  
> -- 
> 2.9.5
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list