[Mesa-dev] [PATCH 11/53] r600/sb: add support for GDS to the sb decoder/dump.
Glenn Kennard
glenn.kennard at gmail.com
Mon Nov 30 04:02:37 PST 2015
On Mon, 30 Nov 2015 07:20:20 +0100, Dave Airlie <airlied at gmail.com> wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> This just adds support to the decoder, not actual SB support.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
> src/gallium/drivers/r600/sb/sb_bc.h | 6 ++--
> src/gallium/drivers/r600/sb/sb_bc_decoder.cpp | 43 ++++++++++++++++++++++++++-
> src/gallium/drivers/r600/sb/sb_bc_dump.cpp | 24 +++++++++------
> src/gallium/drivers/r600/sb/sb_bc_fmt_def.inc | 28 +++++++++++++++++
> 4 files changed, 89 insertions(+), 12 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/sb/sb_bc.h b/src/gallium/drivers/r600/sb/sb_bc.h
> index 9c2a917..5b1bbbd 100644
> --- a/src/gallium/drivers/r600/sb/sb_bc.h
> +++ b/src/gallium/drivers/r600/sb/sb_bc.h
> @@ -535,11 +535,11 @@ struct bc_fetch {
> unsigned resource_id:8;
> unsigned src_gpr:7;
> - unsigned src_rel:1;
> + unsigned src_rel:2; /* GDS expands to 2 bits */
SB interprets src_rel set as the src_gpr using relative indexing which isn't true for
REL_GLOBAL, so I think we want separate bits for this. The only modes we will use are
REL_NONE and REL_GLOBAL for GDS ops since we don't use the loop register in the
driver, so one bit should be enough.
> unsigned src_sel[4];
> unsigned dst_gpr:7;
> - unsigned dst_rel:1;
> + unsigned dst_rel:2; /* GDS expands to 2 bits */
Same as for src_rel.
> unsigned dst_sel[4];
> unsigned alt_const:1;
> @@ -573,6 +573,7 @@ struct bc_fetch {
> unsigned endian_swap:2;
> unsigned mega_fetch:1;
>+ unsigned src2_gpr:7; /* for GDS */
> void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); }
> };
>@@ -739,6 +740,7 @@ private:
> int decode_cf_mem(unsigned &i, bc_cf &bc);
> int decode_fetch_vtx(unsigned &i, bc_fetch &bc);
> + int decode_fetch_gds(unsigned &i, bc_fetch &bc);
> };
> // bytecode format definition
> diff --git a/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp b/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
> index 5fe8f50..7626920 100644
> --- a/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
> +++ b/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
> @@ -373,7 +373,20 @@ int bc_decoder::decode_fetch(unsigned & i, bc_fetch& bc) {
> unsigned fetch_opcode = dw0 & 0x1F;
>- bc.set_op(r600_isa_fetch_by_opcode(ctx.isa, fetch_opcode));
> + if (fetch_opcode == 2) { // MEM_INST_MEM
> + unsigned mem_op = (dw0 >> 8) & 0x7;
> + unsigned gds_op;
> + if (mem_op == 4) {
> + gds_op = (dw1 >> 9) & 0x1f;
> + fetch_opcode = FETCH_OP_GDS_ADD + gds_op;
> + } else if (mem_op == 5)
> + fetch_opcode = FETCH_OP_TF_WRITE;
> + bc.set_op(fetch_opcode);
> + } else
> + bc.set_op(r600_isa_fetch_by_opcode(ctx.isa, fetch_opcode));
> +
> + if (bc.op_ptr->flags & FF_GDS)
> + return decode_fetch_gds(i, bc);
> if (bc.op_ptr->flags & FF_VTX)
> return decode_fetch_vtx(i, bc);
> @@ -439,6 +452,34 @@ int bc_decoder::decode_fetch(unsigned & i, bc_fetch& bc) {
> return r;
> }
>+int bc_decoder::decode_fetch_gds(unsigned & i, bc_fetch& bc) {
> + int r = 0;
> + uint32_t dw0 = dw[i];
> + uint32_t dw1 = dw[i+1];
> + uint32_t dw2 = dw[i+2];
> + i+= 4;
I'd probably add a note the instruction is padded to 128 bits since the +=4 looks like a typo otherwise.
> + assert(i <= ndw);
> +
> + MEM_GDS_WORD0_EGCM w0(dw0);
> + bc.src_gpr = w0.get_SRC_GPR();
> + bc.src_rel = w0.get_SRC_REL();
> + bc.src_sel[0] = w0.get_SRC_SEL_X();
> + bc.src_sel[1] = w0.get_SRC_SEL_Y();
> + bc.src_sel[2] = w0.get_SRC_SEL_Z();
> +
> + MEM_GDS_WORD1_EGCM w1(dw1);
> + bc.dst_gpr = w1.get_DST_GPR();
> + bc.dst_rel = w1.get_DST_REL();
> + bc.src2_gpr = w1.get_SRC_GPR();
> +
> + MEM_GDS_WORD2_EGCM w2(dw2);
> + bc.dst_sel[0] = w2.get_DST_SEL_X();
> + bc.dst_sel[1] = w2.get_DST_SEL_Y();
> + bc.dst_sel[2] = w2.get_DST_SEL_Z();
> + bc.dst_sel[3] = w2.get_DST_SEL_W();
> + return r;
> +}
> +
> int bc_decoder::decode_fetch_vtx(unsigned & i, bc_fetch& bc) {
> int r = 0;
> uint32_t dw0 = dw[i];
> diff --git a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
> index 3c70ea7..3c051ad 100644
> --- a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
> +++ b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp
> @@ -425,23 +425,26 @@ bc_dump::bc_dump(shader& s, bytecode* bc) :
> void bc_dump::dump(fetch_node& n) {
> sb_ostringstream s;
> static const char * fetch_type[] = {"VERTEX", "INSTANCE", ""};
> + unsigned gds = n.bc.op_ptr->flags & FF_GDS;
> s << n.bc.op_ptr->name;
> fill_to(s, 20);
>- s << "R";
> - print_sel(s, n.bc.dst_gpr, n.bc.dst_rel, INDEX_LOOP, 0);
> - s << ".";
> - for (int k = 0; k < 4; ++k)
> - s << chans[n.bc.dst_sel[k]];
> - s << ", ";
> + if (!gds) {
> + s << "R";
> + print_sel(s, n.bc.dst_gpr, n.bc.dst_rel, INDEX_LOOP, 0);
> + s << ".";
> + for (int k = 0; k < 4; ++k)
> + s << chans[n.bc.dst_sel[k]];
> + s << ", ";
> + }
> s << "R";
> print_sel(s, n.bc.src_gpr, n.bc.src_rel, INDEX_LOOP, 0);
> s << ".";
> unsigned vtx = n.bc.op_ptr->flags & FF_VTX;
> - unsigned num_src_comp = vtx ? ctx.is_cayman() ? 2 : 1 : 4;
> + unsigned num_src_comp = gds ? 3 : vtx ? ctx.is_cayman() ? 2 : 1 : 4;
> for (unsigned k = 0; k < num_src_comp; ++k)
> s << chans[n.bc.src_sel[k]];
> @@ -450,9 +453,12 @@ void bc_dump::dump(fetch_node& n) {
> s << " + " << n.bc.offset[0] << "b ";
> }
>- s << ", RID:" << n.bc.resource_id;
> + if (!gds)
> + s << ", RID:" << n.bc.resource_id;
> +
> + if (gds) {
>- if (vtx) {
> + } else if (vtx) {
> s << " " << fetch_type[n.bc.fetch_type];
> if (!ctx.is_cayman() && n.bc.mega_fetch_count)
> s << " MFC:" << n.bc.mega_fetch_count;
> diff --git a/src/gallium/drivers/r600/sb/sb_bc_fmt_def.inc b/src/gallium/drivers/r600/sb/sb_bc_fmt_def.inc
> index 50f73d7..e775499 100644
> --- a/src/gallium/drivers/r600/sb/sb_bc_fmt_def.inc
> +++ b/src/gallium/drivers/r600/sb/sb_bc_fmt_def.inc
> @@ -541,3 +541,31 @@ BC_FIELD(TEX_WORD2, SRC_SEL_Y, SSY, 25, 23)
> BC_FIELD(TEX_WORD2, SRC_SEL_Z, SSZ, 28, 26)
> BC_FIELD(TEX_WORD2, SRC_SEL_W, SSW, 31, 29)
> BC_FORMAT_END(TEX_WORD2)
> +
> +BC_FORMAT_BEGIN_HW(MEM_GDS_WORD0, EGCM)
> +BC_FIELD(MEM_GDS_WORD0, MEM_INST, M_INST, 4, 0)
> +BC_FIELD(MEM_GDS_WORD0, MEM_OP, M_OP, 10, 8)
> +BC_FIELD(MEM_GDS_WORD0, SRC_GPR, S_GPR, 17, 11)
> +BC_FIELD(MEM_GDS_WORD0, SRC_REL, SR, 19, 18)
> +BC_FIELD(MEM_GDS_WORD0, SRC_SEL_X, SSX, 22, 20)
> +BC_FIELD(MEM_GDS_WORD0, SRC_SEL_Y, SSY, 25, 23)
> +BC_FIELD(MEM_GDS_WORD0, SRC_SEL_Z, SSZ, 28, 26)
> +BC_FORMAT_END(MEM_GDS_WORD0)
> +
> +BC_FORMAT_BEGIN_HW(MEM_GDS_WORD1, EGCM)
> +BC_FIELD(MEM_GDS_WORD1, DST_GPR, D_GPR, 6, 0)
> +BC_FIELD(MEM_GDS_WORD1, DST_REL, DR, 8, 7)
> +BC_FIELD(MEM_GDS_WORD1, GDS_OP, G_OP, 14, 9)
> +BC_FIELD(MEM_GDS_WORD1, SRC_GPR, S_GPR, 22, 16)
> +BC_FIELD(MEM_GDS_WORD1, UAV_INDEX_MODE, U_IM, 25, 24)
> +BC_FIELD(MEM_GDS_WORD1, UAV_ID, U_ID, 29, 26)
> +BC_FIELD(MEM_GDS_WORD1, ALLOC_CONSUME, AC, 30, 30)
> +BC_FIELD(MEM_GDS_WORD1, BCARD_FIRST_REQ, BFR, 31, 31)
> +BC_FORMAT_END(MEM_GDS_WORD1)
> +
> +BC_FORMAT_BEGIN_HW(MEM_GDS_WORD2, EGCM)
> +BC_FIELD(MEM_GDS_WORD2, DST_SEL_X, DSX, 2, 0)
> +BC_FIELD(MEM_GDS_WORD2, DST_SEL_Y, DSY, 5, 3)
> +BC_FIELD(MEM_GDS_WORD2, DST_SEL_Z, DSZ, 8, 6)
> +BC_FIELD(MEM_GDS_WORD2, DST_SEL_W, DSW, 11, 9)
> +BC_FORMAT_END(MEM_GDS_WORD2)
> \ No newline at end of file
With src_rel/dst_rel dealt with as suggested above,
Reviewed-by: Glenn Kennard <glenn.kennard at gmail.com>
More information about the mesa-dev
mailing list