[Mesa-dev] [PATCH] r600g/sb: implement r600 gpr index workaround. (v3)

Glenn Kennard glenn.kennard at gmail.com
Tue Dec 9 17:53:21 PST 2014


On Wed, 10 Dec 2014 02:25:23 +0100, Dave Airlie <airlied at gmail.com> wrote:

> From: Dave Airlie <airlied at redhat.com>
>
> r600, rv610 and rv630 all have a bug in their GPR indexing
> and how the hw inserts access to PV.
>
> If the base index for the src is the same as the dst gpr
> in a previous group, then it will use PV instead of using
> the indexed gpr correctly.
>
> The workaround is to insert a NOP when you detect this.
>
> v2: add second part of fix detecting DST rel writes followed
> by same src base index reads.
>
> v3: forget adding stuff to structs, just iterate over the
> previous node group again, makes it more obvious.
>
> Fixes ~200 piglit regressions on rv635 since SB was introduced.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  src/gallium/drivers/r600/sb/sb_bc.h            |  2 +
>  src/gallium/drivers/r600/sb/sb_bc_finalize.cpp | 61  
> ++++++++++++++++++++++----
>  src/gallium/drivers/r600/sb/sb_context.cpp     |  2 +
>  src/gallium/drivers/r600/sb/sb_pass.h          |  5 ++-
>  4 files changed, 60 insertions(+), 10 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/sb/sb_bc.h  
> b/src/gallium/drivers/r600/sb/sb_bc.h
> index d03da98..6d3dc4d 100644
> --- a/src/gallium/drivers/r600/sb/sb_bc.h
> +++ b/src/gallium/drivers/r600/sb/sb_bc.h
> @@ -616,6 +616,8 @@ public:
>  	unsigned num_slots;
>  	bool uses_mova_gpr;
> +	bool r6xx_gpr_index_workaround;
> +
>  	bool stack_workaround_8xx;
>  	bool stack_workaround_9xx;
> diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp  
> b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
> index 3f362c4..9b59260 100644
> --- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
> +++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
> @@ -38,6 +38,18 @@
> namespace r600_sb {
> +void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) {
> +
> +	alu_group_node *g = sh.create_alu_group();
> +	alu_node *a = sh.create_alu();
> +
> +	a->bc.set_op(ALU_OP0_NOP);
> +	a->bc.last = 1;
> +
> +	g->push_back(a);
> +	b4->insert_before(g);
> +}
> +
>  int bc_finalizer::run() {
> 	run_on(sh.root);
> @@ -211,12 +223,12 @@ void bc_finalizer::finalize_if(region_node* r) {
>  }
> void bc_finalizer::run_on(container_node* c) {
> -
> +	node *prev_node = NULL;
>  	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
>  		node *n = *I;
> 		if (n->is_alu_group()) {
> -			finalize_alu_group(static_cast<alu_group_node*>(n));
> +			finalize_alu_group(static_cast<alu_group_node*>(n), prev_node);
>  		} else {
>  			if (n->is_alu_clause()) {
>  				cf_node *c = static_cast<cf_node*>(n);
> @@ -251,19 +263,24 @@ void bc_finalizer::run_on(container_node* c) {
>  			if (n->is_container())
>  				run_on(static_cast<container_node*>(n));
>  		}
> +		prev_node = n;
>  	}
>  }
> -void bc_finalizer::finalize_alu_group(alu_group_node* g) {
> +void bc_finalizer::finalize_alu_group(alu_group_node* g, node  
> *prev_node) {
> 	alu_node *last = NULL;
> +	alu_group_node *prev_g = NULL;
> +	bool add_nop = false;
> +	if (prev_node && prev_node->is_alu_group()) {
> +		prev_g = static_cast<alu_group_node*>(prev_node);
> +	}
> 	for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
>  		alu_node *n = static_cast<alu_node*>(*I);
>  		unsigned slot = n->bc.slot;
> -
>  		value *d = n->dst.empty() ? NULL : n->dst[0];
> -
> +		bool local_nop;
>  		if (d && d->is_special_reg()) {
>  			assert(n->bc.op_ptr->flags & AF_MOVA);
>  			d = NULL;
> @@ -299,17 +316,24 @@ void  
> bc_finalizer::finalize_alu_group(alu_group_node* g) {
> 		update_ngpr(n->bc.dst_gpr);
> -		finalize_alu_src(g, n);
> +		local_nop = finalize_alu_src(g, n, prev_g);
> +		if (local_nop)
> +			add_nop = true;
> 		last = n;
>  	}
> +	if (add_nop) {
> +		if (sh.get_ctx().r6xx_gpr_index_workaround) {
> +			insert_rv6xx_load_ar_workaround(g);
> +		}
> +	}
>  	last->bc.last = 1;
>  }
> -void bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
> +bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a,  
> alu_group_node *prev) {
>  	vvec &sv = a->src;
> -
> +	bool add_nop = false;
>  	FBC_DUMP(
>  		sblog << "finalize_alu_src: ";
>  		dump::dump_op(a);
> @@ -336,6 +360,15 @@ void bc_finalizer::finalize_alu_src(alu_group_node*  
> g, alu_node* a) {
>  			if (!v->rel->is_const()) {
>  				src.rel = 1;
>  				update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
> +				if (prev && !add_nop) {
> +					for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE;  
> ++pI) {
> +						alu_node *pn = static_cast<alu_node*>(*pI);
> +						if (pn->bc.dst_gpr == src.sel) {
> +							add_nop = true;
> +							break;
> +						}
> +					}
> +				}
>  			} else
>  				src.rel = 0;
> @@ -393,11 +426,23 @@ void  
> bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a) {
>  			assert(!"unknown value kind");
>  			break;
>  		}
> +		if (prev && !add_nop) {
> +			for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE;  
> ++pI) {
> +				alu_node *pn = static_cast<alu_node*>(*pI);
> +				if (pn->bc.dst_rel) {
> +					if (pn->bc.dst_gpr == src.sel) {
> +						add_nop = true;
> +						break;
> +					}
> +				}
> +			}
> +		}
>  	}
> 	while (si < 3) {
>  		a->bc.src[si++].sel = 0;
>  	}
> +	return add_nop;
>  }
> void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src,  
> unsigned arg_start)
> diff --git a/src/gallium/drivers/r600/sb/sb_context.cpp  
> b/src/gallium/drivers/r600/sb/sb_context.cpp
> index 8e11428..5dba85b 100644
> --- a/src/gallium/drivers/r600/sb/sb_context.cpp
> +++ b/src/gallium/drivers/r600/sb/sb_context.cpp
> @@ -61,6 +61,8 @@ int sb_context::init(r600_isa *isa, sb_hw_chip chip,  
> sb_hw_class cclass) {
> 	uses_mova_gpr = is_r600() && chip != HW_CHIP_RV670;
> +	r6xx_gpr_index_workaround = is_r600() && chip != HW_CHIP_RV670 && chip  
> != HW_CHIP_RS780 && chip != HW_CHIP_RS880;
> +
>  	switch (chip) {
>  	case HW_CHIP_RV610:
>  	case HW_CHIP_RS780:
> diff --git a/src/gallium/drivers/r600/sb/sb_pass.h  
> b/src/gallium/drivers/r600/sb/sb_pass.h
> index 812d14a..0346df1 100644
> --- a/src/gallium/drivers/r600/sb/sb_pass.h
> +++ b/src/gallium/drivers/r600/sb/sb_pass.h
> @@ -695,8 +695,9 @@ public:
> 	void run_on(container_node *c);
> -	void finalize_alu_group(alu_group_node *g);
> -	void finalize_alu_src(alu_group_node *g, alu_node *a);
> +	void insert_rv6xx_load_ar_workaround(alu_group_node *b4);
> +	void finalize_alu_group(alu_group_node *g, node *prev_node);
> +	bool finalize_alu_src(alu_group_node *g, alu_node *a, alu_group_node  
> *prev_node);
> 	void emit_set_grad(fetch_node* f);
>  	void finalize_fetch(fetch_node *f);

Reviewed-By: Glenn Kennard <glenn.kennard at gmail.com>


More information about the mesa-dev mailing list