[Mesa-dev] [PATCH v2 3/5] r600: implement DDIV

Roland Scheidegger sroland at vmware.com
Thu Jan 19 17:06:07 UTC 2017


Oh, in this case a cap bit would have been fine then :-).

I was just thinking since d3d11 requires (if doubles are supported) real
ddiv (unlike fdiv), hw would have some way to do this without too much
trouble. But I guess it wasn't deemed important enough then.

Roland

Am 19.01.2017 um 17:50 schrieb Ilia Mirkin:
> NVIDIA can't do division either (double or otherwise - but double is
> even worse since the rcp is only half there). It has some fancy
> functions which get things like division, sqrt, etc.
> 
> 2017-01-19 11:39 GMT-05:00 Roland Scheidegger <sroland at vmware.com>:
>> Double-capable Evergreen/NI can't do ddiv? Interesting. I wonder how
>> it's made d3d11 double conformant...
>>
>> Roland
>>
>> Am 19.01.2017 um 14:59 schrieb Nicolai Hähnle:
>>> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>>>
>>> ---
>>>  src/gallium/drivers/r600/r600_shader.c | 59 ++++++++++++++++++++++++++++++++++
>>>  1 file changed, 59 insertions(+)
>>>
>>> diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
>>> index 5c4bc91..eaabb04 100644
>>> --- a/src/gallium/drivers/r600/r600_shader.c
>>> +++ b/src/gallium/drivers/r600/r600_shader.c
>>> @@ -4384,20 +4384,77 @@ static int cayman_mul_double_instr(struct r600_shader_ctx *ctx)
>>>                       alu.last = 1;
>>>               r = r600_bytecode_add_alu(ctx->bc, &alu);
>>>               if (r)
>>>                       return r;
>>>       }
>>>
>>>       return 0;
>>>  }
>>>
>>>  /*
>>> + * Emit RECIP_64 + MUL_64 to implement division.
>>> + */
>>> +static int cayman_ddiv_instr(struct r600_shader_ctx *ctx)
>>> +{
>>> +     struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
>>> +     int r;
>>> +     struct r600_bytecode_alu alu;
>>> +     int t1 = ctx->temp_reg;
>>> +     int k;
>>> +
>>> +     /* Only support one double at a time. This is the same constraint as
>>> +      * in DMUL lowering. */
>>> +     assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ||
>>> +            inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW);
>>> +
>>> +     k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1;
>>> +
>>> +     r = cayman_emit_unary_double_raw(ctx->bc, ALU_OP2_RECIP_64, t1, &ctx->src[1], false);
>>> +     if (r)
>>> +             return r;
>>> +
>>> +     for (int i = 0; i < 4; i++) {
>>> +             memset(&alu, 0, sizeof(struct r600_bytecode_alu));
>>> +             alu.op = ALU_OP2_MUL_64;
>>> +
>>> +             r600_bytecode_src(&alu.src[0], &ctx->src[0], k * 2 + ((i == 3) ? 0 : 1));
>>> +
>>> +             alu.src[1].sel = t1;
>>> +             alu.src[1].chan = (i == 3) ? 0 : 1;
>>> +
>>> +             alu.dst.sel = t1;
>>> +             alu.dst.chan = i;
>>> +             alu.dst.write = 1;
>>> +             if (i == 3)
>>> +                     alu.last = 1;
>>> +             r = r600_bytecode_add_alu(ctx->bc, &alu);
>>> +             if (r)
>>> +                     return r;
>>> +     }
>>> +
>>> +     for (int i = 0; i < 2; i++) {
>>> +             memset(&alu, 0, sizeof(struct r600_bytecode_alu));
>>> +             alu.op = ALU_OP1_MOV;
>>> +             alu.src[0].sel = t1;
>>> +             alu.src[0].chan = i;
>>> +             tgsi_dst(ctx, &inst->Dst[0], k * 2 + i, &alu.dst);
>>> +             alu.dst.write = 1;
>>> +             if (i == 1)
>>> +                     alu.last = 1;
>>> +             r = r600_bytecode_add_alu(ctx->bc, &alu);
>>> +             if (r)
>>> +                     return r;
>>> +     }
>>> +     return 0;
>>> +}
>>> +
>>> +/*
>>>   * r600 - trunc to -PI..PI range
>>>   * r700 - normalize by dividing by 2PI
>>>   * see fdo bug 27901
>>>   */
>>>  static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
>>>  {
>>>       int r;
>>>       struct r600_bytecode_alu alu;
>>>
>>>       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
>>> @@ -9393,20 +9450,21 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
>>>       [TGSI_OPCODE_UMSB]      = { ALU_OP1_FFBH_UINT, tgsi_msb},
>>>       [TGSI_OPCODE_INTERP_CENTROID]   = { ALU_OP0_NOP, tgsi_interp_egcm},
>>>       [TGSI_OPCODE_INTERP_SAMPLE]     = { ALU_OP0_NOP, tgsi_interp_egcm},
>>>       [TGSI_OPCODE_INTERP_OFFSET]     = { ALU_OP0_NOP, tgsi_interp_egcm},
>>>       [TGSI_OPCODE_F2D]       = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64},
>>>       [TGSI_OPCODE_D2F]       = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest},
>>>       [TGSI_OPCODE_DABS]      = { ALU_OP1_MOV, tgsi_op2_64},
>>>       [TGSI_OPCODE_DNEG]      = { ALU_OP2_ADD_64, tgsi_dneg},
>>>       [TGSI_OPCODE_DADD]      = { ALU_OP2_ADD_64, tgsi_op2_64},
>>>       [TGSI_OPCODE_DMUL]      = { ALU_OP2_MUL_64, cayman_mul_double_instr},
>>> +     [TGSI_OPCODE_DDIV]      = { 0, cayman_ddiv_instr },
>>>       [TGSI_OPCODE_DMAX]      = { ALU_OP2_MAX_64, tgsi_op2_64},
>>>       [TGSI_OPCODE_DMIN]      = { ALU_OP2_MIN_64, tgsi_op2_64},
>>>       [TGSI_OPCODE_DSLT]      = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s},
>>>       [TGSI_OPCODE_DSGE]      = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest},
>>>       [TGSI_OPCODE_DSEQ]      = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest},
>>>       [TGSI_OPCODE_DSNE]      = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest},
>>>       [TGSI_OPCODE_DRCP]      = { ALU_OP2_RECIP_64, cayman_emit_double_instr},
>>>       [TGSI_OPCODE_DSQRT]     = { ALU_OP2_SQRT_64, cayman_emit_double_instr},
>>>       [TGSI_OPCODE_DMAD]      = { ALU_OP3_FMA_64, tgsi_op3_64},
>>>       [TGSI_OPCODE_DFMA]      = { ALU_OP3_FMA_64, tgsi_op3_64},
>>> @@ -9615,20 +9673,21 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
>>>       [TGSI_OPCODE_UMSB]      = { ALU_OP1_FFBH_UINT, tgsi_msb},
>>>       [TGSI_OPCODE_INTERP_CENTROID]   = { ALU_OP0_NOP, tgsi_interp_egcm},
>>>       [TGSI_OPCODE_INTERP_SAMPLE]     = { ALU_OP0_NOP, tgsi_interp_egcm},
>>>       [TGSI_OPCODE_INTERP_OFFSET]     = { ALU_OP0_NOP, tgsi_interp_egcm},
>>>       [TGSI_OPCODE_F2D]       = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64},
>>>       [TGSI_OPCODE_D2F]       = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest},
>>>       [TGSI_OPCODE_DABS]      = { ALU_OP1_MOV, tgsi_op2_64},
>>>       [TGSI_OPCODE_DNEG]      = { ALU_OP2_ADD_64, tgsi_dneg},
>>>       [TGSI_OPCODE_DADD]      = { ALU_OP2_ADD_64, tgsi_op2_64},
>>>       [TGSI_OPCODE_DMUL]      = { ALU_OP2_MUL_64, cayman_mul_double_instr},
>>> +     [TGSI_OPCODE_DDIV]      = { 0, cayman_ddiv_instr },
>>>       [TGSI_OPCODE_DMAX]      = { ALU_OP2_MAX_64, tgsi_op2_64},
>>>       [TGSI_OPCODE_DMIN]      = { ALU_OP2_MIN_64, tgsi_op2_64},
>>>       [TGSI_OPCODE_DSLT]      = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s},
>>>       [TGSI_OPCODE_DSGE]      = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest},
>>>       [TGSI_OPCODE_DSEQ]      = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest},
>>>       [TGSI_OPCODE_DSNE]      = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest},
>>>       [TGSI_OPCODE_DRCP]      = { ALU_OP2_RECIP_64, cayman_emit_double_instr},
>>>       [TGSI_OPCODE_DSQRT]     = { ALU_OP2_SQRT_64, cayman_emit_double_instr},
>>>       [TGSI_OPCODE_DMAD]      = { ALU_OP3_FMA_64, tgsi_op3_64},
>>>       [TGSI_OPCODE_DFMA]      = { ALU_OP3_FMA_64, tgsi_op3_64},
>>>
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev&d=DwIFaQ&c=uilaK90D4TOVoH58JNXRgQ&r=_QIjpv-UJ77xEQY8fIYoQtr5qv8wKrPJc7v7_-CYAb0&m=J5FJXQE5rxo_kT4tHliVQvX1ErGFhyP_W6lilHDtV4k&s=yxBOD4g25OZ4TpXpC9j2mropTOVp4Js4IUUkBYuX58w&e= 



More information about the mesa-dev mailing list