[Mesa-dev] [PATCH 3/4] r600: add ARB_query_buffer_object support

Fri Jan 26 11:49:05 UTC 2018

On 26 Jan. 2018 09:09, "Roland Scheidegger" <sroland at vmware.com> wrote:

Am 25.01.2018 um 01:40 schrieb Dave Airlie:
> From: Dave Airlie <airlied at redhat.com>
>
> This uses a different shader than radeonsi, as we can't address non-256
> aligned ssbos, which the radeonsi code does. This passes some extra
> offsets into the shader.
Couldn't you just require the query buffers to have sufficient alignment
in the first place, hence simplifying this? ssbo's need to have 256B
alignment as well, as do UBOs.

The compute shader is used to write results at various dword locations
inside the ssbo, so the buffer is aligned but one run of this shader needs
to write results at non zero offsets into the buffer.

Albeit I can't really see what GL would require, buffer object alignment
is quite a mystery to me in general...

>
> It also contains a set of u64 instruction implementation that may
> or may not be complete (at least the u64div is definitely not something
> that works outside this use-case). If r600 grows 64-bit integers,
> it will use the GLSL lowering for divmod.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---

...
> +static int emit_u64add(struct r600_shader_ctx *ctx, int op,
> +                    int treg,
> +                    int src0_sel, int src0_chan,
> +                    int src1_sel, int src1_chan)
> +{
> +     struct r600_bytecode_alu alu;
> +     int r;
> +     int opc;
> +
> +     if (op == ALU_OP2_ADD_INT)
> +             opc = ALU_OP2_ADDC_UINT;
> +     else
> +             opc = ALU_OP2_SUBB_UINT;
> +
> +     memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> +     alu.op = op;            ;
> +     alu.dst.sel = treg;
> +     alu.dst.chan = 0;
> +     alu.dst.write = 1;
> +     alu.src[0].sel = src0_sel;
> +     alu.src[0].chan = src0_chan + 0;
> +     alu.src[1].sel = src1_sel;
> +     alu.src[1].chan = src1_chan + 0;
> +     alu.src[1].neg = 0;
> +     r = r600_bytecode_add_alu(ctx->bc, &alu);
> +     if (r)
> +             return r;
> +
> +     memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> +     alu.op = op;
> +     alu.dst.sel = treg;
> +     alu.dst.chan = 1;
> +     alu.dst.write = 1;
> +     alu.src[0].sel = src0_sel;
> +     alu.src[0].chan = src0_chan + 1;
> +     alu.src[1].sel = src1_sel;
> +     alu.src[1].chan = src1_chan + 1;
> +     alu.src[1].neg = 0;
> +     r = r600_bytecode_add_alu(ctx->bc, &alu);
> +     if (r)
> +             return r;
> +
> +     memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> +     alu.op = opc;
> +     alu.dst.sel = treg;
> +     alu.dst.chan = 2;
> +     alu.dst.write = 1;
> +     alu.last = 1;
> +     alu.src[0].sel = src0_sel;
> +     alu.src[0].chan = src0_chan + 0;
> +     alu.src[1].sel = src1_sel;
> +     alu.src[1].chan = src1_chan + 0;
> +     alu.src[1].neg = 0;
> +     r = r600_bytecode_add_alu(ctx->bc, &alu);
> +     if (r)
> +             return r;
> +
> +     memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> +     alu.op = op;
> +     alu.dst.sel = treg;
> +     alu.dst.chan = 1;
> +     alu.dst.write = 1;
> +     alu.src[0].sel = treg;
> +     alu.src[0].chan = 1;
> +     alu.src[1].sel = treg;
> +     alu.src[1].chan = 2;
> +     alu.last = 1;
> +     r = r600_bytecode_add_alu(ctx->bc, &alu);
> +     if (r)
> +             return r;
> +     return 0;
> +}
> +
> +static int egcm_u64add(struct r600_shader_ctx *ctx)
Couldn't you call into emit_u64add for performing the actual add?
Or maybe it wouldn't really be simpler...

Unfortunately not, since it takes srcs from a tgsi inst, i could probably
make it call in here later.

> +{
> +     struct tgsi_full_instruction *inst = &ctx->parse.FullToken.
FullInstruction;
> +     struct r600_bytecode_alu alu;
> +     int r;
> +     int treg = ctx->temp_reg;
> +     int op = ALU_OP2_ADD_INT, opc = ALU_OP2_ADDC_UINT;
> +
> +     if (ctx->src[1].neg) {
> +             op = ALU_OP2_SUB_INT;
> +             opc = ALU_OP2_SUBB_UINT;
> +     }
> +     memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> +     alu.op = op;            ;
> +     alu.dst.sel = treg;
> +     alu.dst.chan = 0;
> +     alu.dst.write = 1;
> +     r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
> +     r600_bytecode_src(&alu.src[1], &ctx->src[1], 0);
> +     alu.src[1].neg = 0;
> +     r = r600_bytecode_add_alu(ctx->bc, &alu);
> +     if (r)
> +             return r;
> +
> +     memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> +     alu.op = op;
> +     alu.dst.sel = treg;
> +     alu.dst.chan = 1;
> +     alu.dst.write = 1;
> +     r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
> +     r600_bytecode_src(&alu.src[1], &ctx->src[1], 1);
> +     alu.src[1].neg = 0;
> +     r = r600_bytecode_add_alu(ctx->bc, &alu);
> +     if (r)
> +             return r;
> +
> +     memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> +     alu.op = opc              ;
> +     alu.dst.sel = treg;
> +     alu.dst.chan = 2;
> +     alu.dst.write = 1;
> +     alu.last = 1;
> +     r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
> +     r600_bytecode_src(&alu.src[1], &ctx->src[1], 0);
> +     alu.src[1].neg = 0;
> +     r = r600_bytecode_add_alu(ctx->bc, &alu);
> +     if (r)
> +             return r;
> +
> +     memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> +     alu.op = op;
> +     tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
> +     alu.src[0].sel = treg;
> +     alu.src[0].chan = 1;
> +     alu.src[1].sel = treg;
> +     alu.src[1].chan = 2;
> +     alu.last = 1;
> +     r = r600_bytecode_add_alu(ctx->bc, &alu);
> +     if (r)
> +             return r;
> +     memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> +     alu.op = ALU_OP1_MOV;
> +     tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
> +     alu.src[0].sel = treg;
> +     alu.src[0].chan = 0;
> +     alu.last = 1;
> +     r = r600_bytecode_add_alu(ctx->bc, &alu);
> +     if (r)
> +             return r;
> +     return 0;
> +}

But in any case, looks great to me (albeit I didn't check the math
really - that udiv is complex...)

Thanks,
Dave.

For the series:
Reviewed-by: Roland Scheidegger <sroland at vmware.com>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180126/d1428aee/attachment-0001.html>