[Mesa-dev] [PATCH 11/53] r600/sb: add support for GDS to the sb decoder/dump.

Glenn Kennard glenn.kennard at gmail.com
Mon Nov 30 04:02:37 PST 2015


On Mon, 30 Nov 2015 07:20:20 +0100, Dave Airlie <airlied at gmail.com> wrote:

> From: Dave Airlie <airlied at redhat.com>
>
> This just adds support to the decoder, not actual SB support.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  src/gallium/drivers/r600/sb/sb_bc.h           |  6 ++--
>  src/gallium/drivers/r600/sb/sb_bc_decoder.cpp | 43 ++++++++++++++++++++++++++-
>  src/gallium/drivers/r600/sb/sb_bc_dump.cpp    | 24 +++++++++------
>  src/gallium/drivers/r600/sb/sb_bc_fmt_def.inc | 28 +++++++++++++++++
>  4 files changed, 89 insertions(+), 12 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/sb/sb_bc.h b/src/gallium/drivers/r600/sb/sb_bc.h
> index 9c2a917..5b1bbbd 100644
> --- a/src/gallium/drivers/r600/sb/sb_bc.h
> +++ b/src/gallium/drivers/r600/sb/sb_bc.h
> @@ -535,11 +535,11 @@ struct bc_fetch {
>  	unsigned resource_id:8;
> 	unsigned src_gpr:7;
> -	unsigned src_rel:1;
> +	unsigned src_rel:2; /* GDS expands to 2 bits */

SB interprets src_rel set as the src_gpr using relative indexing which isn't true for
REL_GLOBAL, so I think we want separate bits for this. The only modes we will use are
REL_NONE and REL_GLOBAL for GDS ops since we don't use the loop register in the
driver, so one bit should be enough.

>  	unsigned src_sel[4];
> 	unsigned dst_gpr:7;
> -	unsigned dst_rel:1;
> +	unsigned dst_rel:2; /* GDS expands to 2 bits */

Same as for src_rel.

>  	unsigned dst_sel[4];
> 	unsigned alt_const:1;
> @@ -573,6 +573,7 @@ struct bc_fetch {
>  	unsigned endian_swap:2;
>  	unsigned mega_fetch:1;
>+	unsigned src2_gpr:7; /* for GDS */
>  	void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); }
>  };
>@@ -739,6 +740,7 @@ private:
>  	int decode_cf_mem(unsigned &i, bc_cf &bc);
> 	int decode_fetch_vtx(unsigned &i, bc_fetch &bc);
> +	int decode_fetch_gds(unsigned &i, bc_fetch &bc);
>  };
> // bytecode format definition
> diff --git a/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp b/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
> index 5fe8f50..7626920 100644
> --- a/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
> +++ b/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
> @@ -373,7 +373,20 @@ int bc_decoder::decode_fetch(unsigned & i, bc_fetch& bc) {
> 	unsigned fetch_opcode = dw0 & 0x1F;
>-	bc.set_op(r600_isa_fetch_by_opcode(ctx.isa, fetch_opcode));
> +	if (fetch_opcode == 2) { // MEM_INST_MEM
> +		unsigned mem_op = (dw0 >> 8) & 0x7;
> +		unsigned gds_op;
> +		if (mem_op == 4) {
> +			gds_op = (dw1 >> 9) & 0x1f;
> +			fetch_opcode = FETCH_OP_GDS_ADD + gds_op;
> +		} else if (mem_op == 5)
> +			fetch_opcode = FETCH_OP_TF_WRITE;
> +		bc.set_op(fetch_opcode);
> +	} else
> +		bc.set_op(r600_isa_fetch_by_opcode(ctx.isa, fetch_opcode));
> +
> +	if (bc.op_ptr->flags & FF_GDS)
> +		return decode_fetch_gds(i, bc);
> 	if (bc.op_ptr->flags & FF_VTX)
>  		return decode_fetch_vtx(i, bc);
> @@ -439,6 +452,34 @@ int bc_decoder::decode_fetch(unsigned & i, bc_fetch& bc) {
>  	return r;
>  }
>+int bc_decoder::decode_fetch_gds(unsigned & i, bc_fetch& bc) {
> +	int r = 0;
> +	uint32_t dw0 = dw[i];
> +	uint32_t dw1 = dw[i+1];
> +	uint32_t dw2 = dw[i+2];
> +	i+= 4;

I'd probably add a note the instruction is padded to 128 bits since the +=4 looks like a typo otherwise.

> +	assert(i <= ndw);
> +
> +	MEM_GDS_WORD0_EGCM w0(dw0);
> +	bc.src_gpr = w0.get_SRC_GPR();
> +	bc.src_rel = w0.get_SRC_REL();
> +	bc.src_sel[0] = w0.get_SRC_SEL_X();
> +	bc.src_sel[1] = w0.get_SRC_SEL_Y();
> +	bc.src_sel[2] = w0.get_SRC_SEL_Z();
> +
> +	MEM_GDS_WORD1_EGCM w1(dw1);
> +	bc.dst_gpr = w1.get_DST_GPR();
> +	bc.dst_rel = w1.get_DST_REL();
> +	bc.src2_gpr = w1.get_SRC_GPR();
> +
> +	MEM_GDS_WORD2_EGCM w2(dw2);
> +	bc.dst_sel[0] = w2.get_DST_SEL_X();
> +	bc.dst_sel[1] = w2.get_DST_SEL_Y();
> +	bc.dst_sel[2] = w2.get_DST_SEL_Z();
> +	bc.dst_sel[3] = w2.get_DST_SEL_W();
> +	return r;
> +}
> +
>  int bc_decoder::decode_fetch_vtx(unsigned & i, bc_fetch& bc) {
>  	int r = 0;
>  	uint32_t dw0 = dw[i];
> diff --git a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
> index 3c70ea7..3c051ad 100644
> --- a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
> +++ b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
> @@ -425,23 +425,26 @@ bc_dump::bc_dump(shader& s, bytecode* bc)  :
>  void bc_dump::dump(fetch_node& n) {
>  	sb_ostringstream s;
>  	static const char * fetch_type[] = {"VERTEX", "INSTANCE", ""};
> +	unsigned gds = n.bc.op_ptr->flags & FF_GDS;
> 	s << n.bc.op_ptr->name;
>  	fill_to(s, 20);
>-	s << "R";
> -	print_sel(s, n.bc.dst_gpr, n.bc.dst_rel, INDEX_LOOP, 0);
> -	s << ".";
> -	for (int k = 0; k < 4; ++k)
> -		s << chans[n.bc.dst_sel[k]];
> -	s << ", ";
> +	if (!gds) {
> +		s << "R";
> +		print_sel(s, n.bc.dst_gpr, n.bc.dst_rel, INDEX_LOOP, 0);
> +		s << ".";
> +		for (int k = 0; k < 4; ++k)
> +			s << chans[n.bc.dst_sel[k]];
> +		s << ", ";
> +	}
> 	s << "R";
>  	print_sel(s, n.bc.src_gpr, n.bc.src_rel, INDEX_LOOP, 0);
>  	s << ".";
> 	unsigned vtx = n.bc.op_ptr->flags & FF_VTX;
> -	unsigned num_src_comp = vtx ? ctx.is_cayman() ? 2 : 1 : 4;
> +	unsigned num_src_comp = gds ? 3 : vtx ? ctx.is_cayman() ? 2 : 1 : 4;
> 	for (unsigned k = 0; k < num_src_comp; ++k)
>  		s << chans[n.bc.src_sel[k]];
> @@ -450,9 +453,12 @@ void bc_dump::dump(fetch_node& n) {
>  		s << " + " << n.bc.offset[0] << "b ";
>  	}
>-	s << ",   RID:" << n.bc.resource_id;
> +	if (!gds)
> +		s << ",   RID:" << n.bc.resource_id;
> +
> +	if (gds) {
>-	if (vtx) {
> +	} else if (vtx) {
>  		s << "  " << fetch_type[n.bc.fetch_type];
>  		if (!ctx.is_cayman() && n.bc.mega_fetch_count)
>  			s << " MFC:" << n.bc.mega_fetch_count;
> diff --git a/src/gallium/drivers/r600/sb/sb_bc_fmt_def.inc b/src/gallium/drivers/r600/sb/sb_bc_fmt_def.inc
> index 50f73d7..e775499 100644
> --- a/src/gallium/drivers/r600/sb/sb_bc_fmt_def.inc
> +++ b/src/gallium/drivers/r600/sb/sb_bc_fmt_def.inc
> @@ -541,3 +541,31 @@ BC_FIELD(TEX_WORD2,     SRC_SEL_Y,              SSY,        25, 23)
>  BC_FIELD(TEX_WORD2,     SRC_SEL_Z,              SSZ,        28, 26)
>  BC_FIELD(TEX_WORD2,     SRC_SEL_W,              SSW,        31, 29)
>  BC_FORMAT_END(TEX_WORD2)
> +
> +BC_FORMAT_BEGIN_HW(MEM_GDS_WORD0, EGCM)
> +BC_FIELD(MEM_GDS_WORD0,  MEM_INST,               M_INST,     4, 0)
> +BC_FIELD(MEM_GDS_WORD0,  MEM_OP,  		 M_OP,      10, 8)
> +BC_FIELD(MEM_GDS_WORD0,  SRC_GPR,                S_GPR,     17, 11)
> +BC_FIELD(MEM_GDS_WORD0,  SRC_REL,                SR,        19, 18)
> +BC_FIELD(MEM_GDS_WORD0,  SRC_SEL_X,              SSX,       22, 20)
> +BC_FIELD(MEM_GDS_WORD0,  SRC_SEL_Y,              SSY,       25, 23)
> +BC_FIELD(MEM_GDS_WORD0,  SRC_SEL_Z,              SSZ,       28, 26)
> +BC_FORMAT_END(MEM_GDS_WORD0)
> +
> +BC_FORMAT_BEGIN_HW(MEM_GDS_WORD1, EGCM)
> +BC_FIELD(MEM_GDS_WORD1,     DST_GPR,                D_GPR,      6,  0)
> +BC_FIELD(MEM_GDS_WORD1,     DST_REL,                DR,         8,  7)
> +BC_FIELD(MEM_GDS_WORD1,     GDS_OP,                 G_OP,      14,  9)
> +BC_FIELD(MEM_GDS_WORD1,     SRC_GPR,                S_GPR,     22, 16)
> +BC_FIELD(MEM_GDS_WORD1,     UAV_INDEX_MODE,         U_IM,      25, 24)
> +BC_FIELD(MEM_GDS_WORD1,     UAV_ID,                 U_ID,      29, 26)
> +BC_FIELD(MEM_GDS_WORD1,     ALLOC_CONSUME,          AC,        30, 30)
> +BC_FIELD(MEM_GDS_WORD1,     BCARD_FIRST_REQ,        BFR,       31, 31)
> +BC_FORMAT_END(MEM_GDS_WORD1)
> +
> +BC_FORMAT_BEGIN_HW(MEM_GDS_WORD2, EGCM)
> +BC_FIELD(MEM_GDS_WORD2,     DST_SEL_X,              DSX,        2, 0)
> +BC_FIELD(MEM_GDS_WORD2,     DST_SEL_Y,              DSY,        5, 3)
> +BC_FIELD(MEM_GDS_WORD2,     DST_SEL_Z,              DSZ,        8, 6)
> +BC_FIELD(MEM_GDS_WORD2,     DST_SEL_W,              DSW,       11, 9)
> +BC_FORMAT_END(MEM_GDS_WORD2)
> \ No newline at end of file

With src_rel/dst_rel dealt with as suggested above,

Reviewed-by: Glenn Kennard <glenn.kennard at gmail.com>


More information about the mesa-dev mailing list