[Mesa-dev] [PATCH] r600/fp64: fix integer->double conversion
Roland Scheidegger
sroland at vmware.com
Fri Feb 2 16:38:04 UTC 2018
Am 02.02.2018 um 05:56 schrieb Dave Airlie:
> From: Dave Airlie <airlied at redhat.com>
>
> Doing a straight uint/int->fp32->fp64 conversion causes
> some precision issues, Roland suggested splitting the
> integer into two portions and doing two separate
> int->fp32->fp64 conversions then adding the results.
>
> This passes the tests in CTS and piglit.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
> src/gallium/drivers/r600/r600_shader.c | 118 +++++++++++++++++++++++++--------
> 1 file changed, 90 insertions(+), 28 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
> index 13aa681049..22f2736b03 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -4490,44 +4490,106 @@ static int egcm_int_to_double(struct r600_shader_ctx *ctx)
> {
> struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
> struct r600_bytecode_alu alu;
> - int i, r;
> - int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
> + int i, c, r;
> + int write_mask = inst->Dst[0].Register.WriteMask;
> + int temp_reg = r600_get_temp(ctx);
>
> assert(inst->Instruction.Opcode == TGSI_OPCODE_I2D ||
> inst->Instruction.Opcode == TGSI_OPCODE_U2D);
>
> - for (i = 0; i <= (lasti+1)/2; i++) {
> - memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> - alu.op = ctx->inst_info->op;
> -
> - r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
> - alu.dst.sel = ctx->temp_reg;
> - alu.dst.chan = i;
> - alu.dst.write = 1;
> - alu.last = 1;
> + for (c = 0; c < 2; c++) {
> + int dchan = c * 2;
> + if (write_mask & (0x3 << dchan)) {
> + /* split into 24-bit int and 8-bit int */
> + memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> + alu.op = ALU_OP2_AND_INT;
> + alu.dst.sel = temp_reg;
> + alu.dst.chan = dchan;
> + r600_bytecode_src(&alu.src[0], &ctx->src[0], c);
> + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
> + alu.src[1].value = 0xffffff00;
> + alu.dst.write = 1;
> + r = r600_bytecode_add_alu(ctx->bc, &alu);
> + if (r)
> + return r;
>
> - r = r600_bytecode_add_alu(ctx->bc, &alu);
> - if (r)
> - return r;
> + memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> + alu.op = ALU_OP2_AND_INT;
> + alu.dst.sel = temp_reg;
> + alu.dst.chan = dchan + 1;
> + r600_bytecode_src(&alu.src[0], &ctx->src[0], c);
> + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
> + alu.src[1].value = 0xff;
> + alu.dst.write = 1;
> + alu.last = 1;
> + r = r600_bytecode_add_alu(ctx->bc, &alu);
> + if (r)
> + return r;
> + }
> }
>
> - for (i = 0; i <= lasti; i++) {
> - memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> - alu.op = ALU_OP1_FLT32_TO_FLT64;
> + for (c = 0; c < 2; c++) {
> + int dchan = c * 2;
> + if (write_mask & (0x3 << dchan)) {
> + for (i = dchan; i <= dchan + 1; i++) {
> + memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> + alu.op = i == dchan ? ctx->inst_info->op : ALU_OP1_UINT_TO_FLT;
>
> - alu.src[0].chan = i/2;
> - if (i%2 == 0)
> - alu.src[0].sel = ctx->temp_reg;
> - else {
> - alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
> - alu.src[0].value = 0x0;
> + alu.src[0].sel = temp_reg;
> + alu.src[0].chan = i;
> + alu.dst.sel = temp_reg;
> + alu.dst.chan = i;
> + alu.dst.write = 1;
> + alu.last = i == dchan + 1;
> +
> + r = r600_bytecode_add_alu(ctx->bc, &alu);
> + if (r)
> + return r;
> + }
> }
That'll still work on eg (cypress) where UINT_TO_FLT is scalar, right?
I just realized that for the low 8 bits you could actually skip the
masking and use UBYTE0_FLT instead if that instruction does what the
docs say :-). Though I guess on Cayman that won't be much of an
improvement, but might shave off another instruction or two on Cypress
(as this one is a vector instruction)...
In any case,
Reviewed-by: Roland Scheidegger <sroland at vmware.com>
> - tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
> - alu.last = i == lasti;
> + }
>
> - r = r600_bytecode_add_alu(ctx->bc, &alu);
> - if (r)
> - return r;
> + for (c = 0; c < 2; c++) {
> + int dchan = c * 2;
> + if (write_mask & (0x3 << dchan)) {
> + for (i = 0; i < 4; i++) {
> + memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> + alu.op = ALU_OP1_FLT32_TO_FLT64;
> +
> + alu.src[0].chan = dchan + (i / 2);
> + if (i == 0 || i == 2)
> + alu.src[0].sel = temp_reg;
> + else {
> + alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
> + alu.src[0].value = 0x0;
> + }
> + alu.dst.sel = ctx->temp_reg;
> + alu.dst.chan = i;
> + alu.last = i == 3;
> + alu.dst.write = 1;
> +
> + r = r600_bytecode_add_alu(ctx->bc, &alu);
> + if (r)
> + return r;
> + }
> +
> + for (i = 0; i <= 1; i++) {
> + memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> + alu.op = ALU_OP2_ADD_64;
> +
> + alu.src[0].chan = fp64_switch(i);
> + alu.src[0].sel = ctx->temp_reg;
> +
> + alu.src[1].chan = fp64_switch(i + 2);
> + alu.src[1].sel = ctx->temp_reg;
> + tgsi_dst(ctx, &inst->Dst[0], dchan + i, &alu.dst);
> + alu.last = i == 1;
> +
> + r = r600_bytecode_add_alu(ctx->bc, &alu);
> + if (r)
> + return r;
> + }
> + }
> }
>
> return 0;
>
More information about the mesa-dev
mailing list