[Mesa-dev] [PATCH] r600g/sb: Support gs5 sampler indexing

Edward O'Callaghan edward.ocallaghan at koparo.com
Mon Sep 21 16:48:42 PDT 2015


Reviewed-by: Edward O'Callaghan <eocallaghan at alterapraxis.com>

-- 
  Edward O'Callaghan
  edward.ocallaghan at koparo.com

On Tue, Sep 22, 2015, at 12:21 AM, Glenn Kennard wrote:
> Signed-off-by: Glenn Kennard <glenn.kennard at gmail.com>
> ---
> Just UBO support left before gs5 can be enabled.
> Could improve how the two index registers are set/used to reduce
> the number of clauses, but as is its about as good as what the blob
> emits.
> 
>  src/gallium/drivers/r600/r600_shader.c       |  12 ++-
>  src/gallium/drivers/r600/r600_shader.h       |   4 +-
>  src/gallium/drivers/r600/sb/sb_bc.h          |  10 ++-
>  src/gallium/drivers/r600/sb/sb_bc_dump.cpp   |  17 +++-
>  src/gallium/drivers/r600/sb/sb_bc_parser.cpp |  50 +++++++++++-
>  src/gallium/drivers/r600/sb/sb_gcm.cpp       |  11 ++-
>  src/gallium/drivers/r600/sb/sb_sched.cpp     | 118
>  +++++++++++++++++++++++++--
>  src/gallium/drivers/r600/sb/sb_sched.h       |   5 +-
>  8 files changed, 201 insertions(+), 26 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/r600_shader.c
> b/src/gallium/drivers/r600/r600_shader.c
> index 1d90582..24c3d43 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -166,8 +166,8 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
>      if (rctx->b.chip_class <= R700) {
>  	    use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_GEOMETRY);
>      }
> -       /* disable SB for shaders using CF_INDEX_0/1 (sampler/ubo array
> indexing) as it doesn't handle those currently */
> -       use_sb &= !shader->shader.uses_index_registers;
> +       /* disable SB for shaders using ubo array indexing as it doesn't
> handle those currently */
> +       use_sb &= !shader->shader.uses_ubo_indexing;
>  	/* disable SB for shaders using doubles */
>  	use_sb &= !shader->shader.uses_doubles;
>  
> @@ -1251,7 +1251,7 @@ static int tgsi_split_constant(struct
> r600_shader_ctx *ctx)
>  		}
>  
>  		if (ctx->src[i].kc_rel)
> -                       ctx->shader->uses_index_registers = true;
> +                       ctx->shader->uses_ubo_indexing = true;
>  
>  		if (ctx->src[i].rel) {
>  			int chan = inst->Src[i].Indirect.Swizzle;
> @@ -1912,7 +1912,7 @@ static int r600_shader_from_tgsi(struct
> r600_context *rctx,
>  
>  	shader->uses_doubles = ctx.info.uses_doubles;
>  
> -       indirect_gprs = ctx.info.indirect_files & ~(1 <<
> TGSI_FILE_CONSTANT);
> +       indirect_gprs = ctx.info.indirect_files & ~((1 <<
> TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER));
>  	tgsi_parse_init(&ctx.parse, tokens);
>  	ctx.type = ctx.info.processor;
>  	shader->processor_type = ctx.type;
> @@ -1936,7 +1936,7 @@ static int r600_shader_from_tgsi(struct
> r600_context *rctx,
>  	ctx.gs_next_vertex = 0;
>  	ctx.gs_stream_output_info = &so;
>  
> -       shader->uses_index_registers = false;
> +       shader->uses_ubo_indexing = false;
>  	ctx.face_gpr = -1;
>  	ctx.fixed_pt_position_gpr = -1;
>  	ctx.fragcoord_input = -1;
> @@ -5703,8 +5703,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
>  		sampler_src_reg = 3;
>  
>  	sampler_index_mode = inst->Src[sampler_src_reg].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
> -       if (sampler_index_mode)
> -               ctx->shader->uses_index_registers = true;
>  
>  	src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
>  
> diff --git a/src/gallium/drivers/r600/r600_shader.h
> b/src/gallium/drivers/r600/r600_shader.h
> index 48de9cd..8ba32ae 100644
> --- a/src/gallium/drivers/r600/r600_shader.h
> +++ b/src/gallium/drivers/r600/r600_shader.h
> @@ -75,8 +75,8 @@ struct r600_shader {
>  	boolean			has_txq_cube_array_z_comp;
>  	boolean			uses_tex_buffers;
>  	boolean                 gs_prim_id_input;
> -       /* Temporarily workaround SB not handling CF_INDEX_[01] index
> registers */
> -       boolean                 uses_index_registers;
> +       /* Temporarily workaround SB not handling ubo indexing */
> +       boolean                 uses_ubo_indexing;
>  
>  	/* Size in bytes of a data item in the ring(s) (single vertex data).
>  	   Stages with only one ring items 123 will be set to 0. */
> diff --git a/src/gallium/drivers/r600/sb/sb_bc.h
> b/src/gallium/drivers/r600/sb/sb_bc.h
> index ab988f8..126750d 100644
> --- a/src/gallium/drivers/r600/sb/sb_bc.h
> +++ b/src/gallium/drivers/r600/sb/sb_bc.h
> @@ -48,6 +48,7 @@ class fetch_node;
>  class alu_group_node;
>  class region_node;
>  class shader;
> +class value;
>  
>  class sb_ostream {
>  public:
> @@ -818,13 +819,16 @@ class bc_parser {
>  
>  	bool gpr_reladdr;
>  
> +       // Note: currently relies on input emitting SET_CF in same basic
> block as uses
> +       value *cf_index_value[2];
> +       alu_node *mova;
>  public:
>  
>  	bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) :
>  		ctx(sctx), dec(), bc(bc), pshader(pshader),
>  		dw(), bc_ndw(), max_cf(),
>  		sh(), error(), slots(), cgroup(),
> -               cf_map(), loop_stack(), gpr_reladdr() { }
> +               cf_map(), loop_stack(), gpr_reladdr(), cf_index_value(),
> mova() { }
>  
>  	int decode();
>  	int prepare();
> @@ -852,6 +856,10 @@ private:
>  	int prepare_loop(cf_node *c);
>  	int prepare_if(cf_node *c);
>  
> +       void save_set_cf_index(value *val, unsigned idx);
> +       value *get_cf_index_value(unsigned idx);
> +       void save_mova(alu_node *mova);
> +       alu_node *get_mova();
>  };
>  
>  
> diff --git a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
> b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
> index 0fc73c4..3c70ea7 100644
> --- a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
> +++ b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
> @@ -27,6 +27,7 @@
>  #include "sb_bc.h"
>  #include "sb_shader.h"
>  #include "sb_pass.h"
> +#include "eg_sq.h" // V_SQ_CF_INDEX_0/1
>  
>  namespace r600_sb {
>  
> @@ -354,6 +355,14 @@ void bc_dump::dump(alu_node& n) {
>  			s << "  " << vec_bs[n.bc.bank_swizzle];
>  	}
>  
> +       if (ctx.is_cayman()) {
> +               if (n.bc.op == ALU_OP1_MOVA_INT) {
> +                       static const char *mova_str[] = { " AR_X", " PC",
> " CF_IDX0", " CF_IDX1",
> +                               " Unknown MOVA_INT dest" };
> +                       s << mova_str[std::min(n.bc.dst_gpr, 4u)];  //
> CM_V_SQ_MOVA_DST_AR_*
> +               }
> +       }
> +
>  	sblog << s.str() << "\n";
>  }
>  
> @@ -450,9 +459,9 @@ void bc_dump::dump(fetch_node& n) {
>  		if (n.bc.fetch_whole_quad)
>  			s << " FWQ";
>  		if (ctx.is_egcm() && n.bc.resource_index_mode)
> -                       s << " RIM:SQ_CF_INDEX_" <<
> n.bc.resource_index_mode;
> +                       s << " RIM:SQ_CF_INDEX_" <<
> (n.bc.resource_index_mode - V_SQ_CF_INDEX_0);
>  		if (ctx.is_egcm() && n.bc.sampler_index_mode)
> -                       s << " SID:SQ_CF_INDEX_" <<
> n.bc.sampler_index_mode;
> +                       s << " SID:SQ_CF_INDEX_" <<
> (n.bc.sampler_index_mode - V_SQ_CF_INDEX_0);
>  
>  		s << " UCF:" << n.bc.use_const_fields
>  				<< " FMT(DTA:" << n.bc.data_format
> @@ -470,9 +479,9 @@ void bc_dump::dump(fetch_node& n) {
>  			if (n.bc.offset[k])
>  				s << " O" << chans[k] << ":" << n.bc.offset[k];
>  		if (ctx.is_egcm() && n.bc.resource_index_mode)
> -                       s << " RIM:SQ_CF_INDEX_" <<
> n.bc.resource_index_mode;
> +                       s << " RIM:SQ_CF_INDEX_" <<
> (n.bc.resource_index_mode - V_SQ_CF_INDEX_0);
>  		if (ctx.is_egcm() && n.bc.sampler_index_mode)
> -                       s << " SID:SQ_CF_INDEX_" <<
> n.bc.sampler_index_mode;
> +                       s << " SID:SQ_CF_INDEX_" <<
> (n.bc.sampler_index_mode - V_SQ_CF_INDEX_0);
>  	}
>  
>  	sblog << s.str() << "\n";
> diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
> b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
> index 19bd078..eb43670 100644
> --- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
> +++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
> @@ -34,6 +34,7 @@
>  
>  #include "r600_pipe.h"
>  #include "r600_shader.h"
> +#include "eg_sq.h" // CM_V_SQ_MOVA_DST_CF_IDX0/1
>  
>  #include <stack>
>  
> @@ -121,7 +122,7 @@ int bc_parser::parse_decls() {
>  		return 0;
>  	}
>  
> -       if (pshader->indirect_files & ~(1 << TGSI_FILE_CONSTANT)) {
> +       if (pshader->indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 <<
> TGSI_FILE_SAMPLER))) {
>  
>  		assert(pshader->num_arrays);
>  
> @@ -328,6 +329,28 @@ int bc_parser::prepare_alu_clause(cf_node* cf) {
>  	return 0;
>  }
>  
> +void bc_parser::save_set_cf_index(value *val, unsigned idx)
> +{
> +       assert(idx <= 1);
> +       assert(val);
> +       cf_index_value[idx] = val;
> +}
> +value *bc_parser::get_cf_index_value(unsigned idx)
> +{
> +       assert(idx <= 1);
> +       return cf_index_value[idx];
> +}
> +void bc_parser::save_mova(alu_node *mova)
> +{
> +       assert(mova);
> +       this->mova = mova;
> +}
> +alu_node *bc_parser::get_mova()
> +{
> +       assert(mova);
> +       return mova;
> +}
> +
>  int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
>  
>  	alu_node *n;
> @@ -375,9 +398,24 @@ int bc_parser::prepare_alu_group(cf_node* cf,
> alu_group_node *g) {
>  			n->dst.resize(1);
>  		}
>  
> -               if (flags & AF_MOVA) {
> -
> -                       n->dst[0] = sh->get_special_value(SV_AR_INDEX);
> +               if (n->bc.op == ALU_OP0_SET_CF_IDX0 || n->bc.op ==
> ALU_OP0_SET_CF_IDX1) {
> +                       // Move CF_IDX value into tex instruction
> operands, scheduler will later re-emit setting of CF_IDX
> +                       // DCE will kill this op
> +                       save_set_cf_index(get_mova()->src[0], n->bc.op ==
> ALU_OP0_SET_CF_IDX1);
> +               } else if (flags & AF_MOVA) {
> +
> +                       if ((n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX0 ||
> +                               n->bc.dst_gpr ==
> CM_V_SQ_MOVA_DST_CF_IDX1) &&
> +                               ctx.is_cayman())
> +                       {
> +                               // Move CF_IDX value into tex instruction
> operands, scheduler will later re-emit setting of CF_IDX
> +                               save_set_cf_index(n->src[0],
> n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1);
> +                               n->dst.resize(0);
> +                       }
> +                       else {
> +                               n->dst[0] =
> sh->get_special_value(SV_AR_INDEX);
> +                               save_mova(n);
> +                       }
>  
>  			n->flags |= NF_DONT_HOIST;
>  
> @@ -608,6 +646,10 @@ int bc_parser::prepare_fetch_clause(cf_node *cf) {
>  					                              n->bc.src_sel[s], false);
>  			}
>  
> +                       // Scheduler will emit the appropriate
> instructions to set CF_IDX0/1
> +                       if (n->bc.sampler_index_mode !=
> V_SQ_CF_INDEX_NONE) {
> +                              
> n->src.push_back(get_cf_index_value(n->bc.sampler_index_mode ==
> V_SQ_CF_INDEX_1));
> +                       }
>  		}
>  	}
>  
> diff --git a/src/gallium/drivers/r600/sb/sb_gcm.cpp
> b/src/gallium/drivers/r600/sb/sb_gcm.cpp
> index bccb671..236b2ea 100644
> --- a/src/gallium/drivers/r600/sb/sb_gcm.cpp
> +++ b/src/gallium/drivers/r600/sb/sb_gcm.cpp
> @@ -37,6 +37,7 @@
>  #include "sb_bc.h"
>  #include "sb_shader.h"
>  #include "sb_pass.h"
> +#include "eg_sq.h" // V_SQ_CF_INDEX_NONE
>  
>  namespace r600_sb {
>  
> @@ -406,6 +407,14 @@ void gcm::bu_sched_bb(bb_node* bb) {
>  					ncnt = 3;
>  				}
>  
> +                               bool sampler_indexing = false;
> +                               if (n->is_fetch_inst() &&
> +                                       static_cast<fetch_node
> *>(n)->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE)
> +                               {
> +                                       sampler_indexing = true; // Give
> sampler indexed ops get their own clause
> +                                       ncnt = sh.get_ctx().is_cayman() ?
> 2 : 3; // MOVA + SET_CF_IDX0/1
> +                               }
> +
>  				if ((sq == SQ_TEX || sq == SQ_VTX) &&
>  						((last_count >= ctx.max_fetch/2 &&
>  						check_alu_ready_count(24)) ||
> @@ -418,7 +427,7 @@ void gcm::bu_sched_bb(bb_node* bb) {
>  				bu_ready[sq].pop_front();
>  
>  				if (sq != SQ_CF) {
> -                                       if (!clause) {
> +                                       if (!clause || sampler_indexing)
> {
>  						clause = sh.create_clause(sq == SQ_ALU ?
>  								NST_ALU_CLAUSE :
>  									sq == SQ_TEX ? NST_TEX_CLAUSE :
> diff --git a/src/gallium/drivers/r600/sb/sb_sched.cpp
> b/src/gallium/drivers/r600/sb/sb_sched.cpp
> index c98b8ff..601445f 100644
> --- a/src/gallium/drivers/r600/sb/sb_sched.cpp
> +++ b/src/gallium/drivers/r600/sb/sb_sched.cpp
> @@ -36,6 +36,7 @@
>  #include "sb_shader.h"
>  #include "sb_pass.h"
>  #include "sb_sched.h"
> +#include "eg_sq.h" // V_SQ_CF_INDEX_NONE/0/1
>  
>  namespace r600_sb {
>  
> @@ -781,7 +782,14 @@ void post_scheduler::schedule_bb(bb_node* bb) {
>  			sblog << "\n";
>  		);
>  
> -               if (n->subtype == NST_ALU_CLAUSE) {
> +               // May require emitting ALU ops to load index registers
> +               if (n->is_fetch_clause()) {
> +                       n->remove();
> +                       process_fetch(static_cast<container_node *>(n));
> +                       continue;
> +               }
> +
> +               if (n->is_alu_clause()) {
>  			n->remove();
>  			process_alu(static_cast<container_node*>(n));
>  			continue;
> @@ -823,6 +831,102 @@ void post_scheduler::init_regmap() {
>  	}
>  }
>  
> +static alu_node *create_set_idx(shader &sh, unsigned ar_idx) {
> +       alu_node *a = sh.create_alu();
> +
> +       assert(ar_idx == V_SQ_CF_INDEX_0 || ar_idx == V_SQ_CF_INDEX_1);
> +       if (ar_idx == V_SQ_CF_INDEX_0)
> +               a->bc.set_op(ALU_OP0_SET_CF_IDX0);
> +       else
> +               a->bc.set_op(ALU_OP0_SET_CF_IDX1);
> +       a->bc.slot = SLOT_X;
> +       a->dst.resize(1); // Dummy needed for recolor
> +
> +       PSC_DUMP(
> +               sblog << "created IDX load: "
> +               dump::dump_op(a);
> +               sblog << "\n";
> +       );
> +
> +       return a;
> +}
> +
> +void post_scheduler::load_index_register(value *v, unsigned ar_idx)
> +{
> +       alu.reset();
> +
> +       if (!sh.get_ctx().is_cayman()) {
> +               // Evergreen has to first load address register, then use
> CF_SET_IDX0/1
> +               alu_group_tracker &rt = alu.grp();
> +               alu_node *set_idx = create_set_idx(sh, ar_idx);
> +               if (!rt.try_reserve(set_idx)) {
> +                       sblog << "can't emit SET_CF_IDX";
> +                       dump::dump_op(set_idx);
> +                       sblog << "\n";
> +               }
> +               process_group();
> +
> +               if (!alu.check_clause_limits()) {
> +                       // Can't happen since clause only contains
> MOVA/CF_SET_IDX0/1
> +               }
> +               alu.emit_group();
> +       }
> +
> +       alu_group_tracker &rt = alu.grp();
> +       alu_node *a = alu.create_ar_load(v, ar_idx == V_SQ_CF_INDEX_1 ?
> SEL_Z : SEL_Y);
> +
> +       if (!rt.try_reserve(a)) {
> +               sblog << "can't emit AR load : ";
> +               dump::dump_op(a);
> +               sblog << "\n";
> +       }
> +
> +       process_group();
> +
> +       if (!alu.check_clause_limits()) {
> +               // Can't happen since clause only contains
> MOVA/CF_SET_IDX0/1
> +       }
> +
> +       alu.emit_group();
> +       alu.emit_clause(cur_bb);
> +}
> +
> +void post_scheduler::process_fetch(container_node *c) {
> +       if (c->empty())
> +               return;
> +
> +       for (node_iterator N, I = c->begin(), E = c->end(); I != E; I =
> N) {
> +               N = I;
> +               ++N;
> +
> +               node *n = *I;
> +
> +               fetch_node *f = static_cast<fetch_node*>(n);
> +
> +               PSC_DUMP(
> +                       sblog << "process_tex ";
> +                       dump::dump_op(n);
> +                       sblog << "  ";
> +               );
> +
> +               if (f->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE) {
> +                       // Currently require prior opt passes to use one
> TEX per indexed op
> +                       assert(f->parent->count() == 1);
> +
> +                       value *v = f->src.back(); // Last src is index
> offset
> +
> +                       cur_bb->push_front(c);
> +
> +                       load_index_register(v, f->bc.sampler_index_mode);
> +                       f->src.pop_back(); // Don't need index value any
> more
> +
> +                       return;
> +               }
> +       }
> +
> +       cur_bb->push_front(c);
> +}
> +
>  void post_scheduler::process_alu(container_node *c) {
>  
>  	if (c->empty())
> @@ -1180,7 +1284,7 @@ void post_scheduler::emit_load_ar() {
>  	alu.discard_current_group();
>  
>  	alu_group_tracker &rt = alu.grp();
> -       alu_node *a = alu.create_ar_load();
> +       alu_node *a = alu.create_ar_load(alu.current_ar, SEL_X);
>  
>  	if (!rt.try_reserve(a)) {
>  		sblog << "can't emit AR load : ";
> @@ -1936,11 +2040,9 @@ bool alu_kcache_tracker::update_kc() {
>  	return true;
>  }
>  
> -alu_node* alu_clause_tracker::create_ar_load() {
> +alu_node* alu_clause_tracker::create_ar_load(value *v, chan_select
> ar_channel) {
>  	alu_node *a = sh.create_alu();
>  
> -       // FIXME use MOVA_GPR on R6xx
> -
>  	if (sh.get_ctx().uses_mova_gpr) {
>  		a->bc.set_op(ALU_OP1_MOVA_GPR_INT);
>  		a->bc.slot = SLOT_TRANS;
> @@ -1948,9 +2050,13 @@ alu_node* alu_clause_tracker::create_ar_load() {
>  		a->bc.set_op(ALU_OP1_MOVA_INT);
>  		a->bc.slot = SLOT_X;
>  	}
> +       a->bc.dst_chan = ar_channel;
> +       if (ar_channel != SEL_X && sh.get_ctx().is_cayman()) {
> +               a->bc.dst_gpr = ar_channel == SEL_Y ?
> CM_V_SQ_MOVA_DST_CF_IDX0 : CM_V_SQ_MOVA_DST_CF_IDX1;
> +       }
>  
>  	a->dst.resize(1);
> -       a->src.push_back(current_ar);
> +       a->src.push_back(v);
>  
>  	PSC_DUMP(
>  		sblog << "created AR load: ";
> diff --git a/src/gallium/drivers/r600/sb/sb_sched.h
> b/src/gallium/drivers/r600/sb/sb_sched.h
> index 87c4586..2ca7146 100644
> --- a/src/gallium/drivers/r600/sb/sb_sched.h
> +++ b/src/gallium/drivers/r600/sb/sb_sched.h
> @@ -235,7 +235,7 @@ public:
>  	void new_group();
>  	bool is_empty();
>  
> -       alu_node* create_ar_load();
> +       alu_node* create_ar_load(value *v, chan_select ar_channel);
>  
>  	void discard_current_group();
>  
> @@ -266,6 +266,9 @@ public:
>  	void run_on(container_node *n);
>  	void schedule_bb(bb_node *bb);
>  
> +       void load_index_register(value *v, unsigned idx);
> +       void process_fetch(container_node *c);
> +
>  	void process_alu(container_node *c);
>  	void schedule_alu(container_node *c);
>  	bool prepare_alu_group();
> -- 
> 1.9.1
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list