[Mesa-dev] [PATCH 3/4] r600/radeonsi: implement new float comparison instructions

Sun Nov 23 16:41:23 PST 2014

Am 23.11.2014 um 22:34 schrieb Marek Olšák:
> Hi Roland,
> 
> What's the reason for not using an ordered comparison for SNE?
Well, I guess for consistency with the new Fxxx versions (essentially
dx10). All ordered except not equal which is unordered (I have no idea
if that's what the r600 does with with the non-dx10 version of this opcode).
I have to say though for the old opcodes this behavior is probably not
really required. GL most likely allows you to do whatever you want
anyway. These should be used mostly for non integer capable hardware and
not everybody might do the same (d3d9 and hardware didn't really support
NaNs so it didn't matter). Behavior with NaNs in general (or Infs for
that matter) is not really that well defined in gallium/tgsi, you can't
actually distinguish a shader which needs to honor dx10 (or opencl or
whatever) rules for accuracy / non-normal number treatment from one
which does not have to (at least older GL versions allowed just about
everything wrt float math).

Roland


> 
> Marek
> 
> On Tue, Aug 13, 2013 at 7:04 PM,  <sroland at vmware.com> wrote:
>> From: Roland Scheidegger <sroland at vmware.com>
>>
>> Also use ordered comparisons for old cmp instructions. Untested.
>> ---
>>  src/gallium/drivers/r600/r600_shader.c             |   18 ++++---
>>  .../drivers/radeon/radeon_setup_tgsi_llvm.c        |   49 ++++++++++++++++----
>>  2 files changed, 48 insertions(+), 19 deletions(-)
>>
>> diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
>> index 37298cc..fb766c4 100644
>> --- a/src/gallium/drivers/r600/r600_shader.c
>> +++ b/src/gallium/drivers/r600/r600_shader.c
>> @@ -5743,11 +5743,10 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
>>         {105,                   0, ALU_OP0_NOP, tgsi_unsupported},
>>         {106,                   0, ALU_OP0_NOP, tgsi_unsupported},
>>         {TGSI_OPCODE_NOP,       0, ALU_OP0_NOP, tgsi_unsupported},
>> -       /* gap */
>> -       {108,                   0, ALU_OP0_NOP, tgsi_unsupported},
>> -       {109,                   0, ALU_OP0_NOP, tgsi_unsupported},
>> -       {110,                   0, ALU_OP0_NOP, tgsi_unsupported},
>> -       {111,                   0, ALU_OP0_NOP, tgsi_unsupported},
>> +       {TGSI_OPCODE_FSEQ,      0, ALU_OP2_SETE_DX10, tgsi_op2},
>> +       {TGSI_OPCODE_FSGE,      0, ALU_OP2_SETGE_DX10, tgsi_op2},
>> +       {TGSI_OPCODE_FSLT,      0, ALU_OP2_SETGT_DX10, tgsi_op2_swap},
>> +       {TGSI_OPCODE_FSNE,      0, ALU_OP2_SETNE_DX10, tgsi_op2_swap},
>>         {TGSI_OPCODE_NRM4,      0, ALU_OP0_NOP, tgsi_unsupported},
>>         {TGSI_OPCODE_CALLNZ,    0, ALU_OP0_NOP, tgsi_unsupported},
>>         /* gap */
>> @@ -5936,11 +5935,10 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
>>         {105,                   0, ALU_OP0_NOP, tgsi_unsupported},
>>         {106,                   0, ALU_OP0_NOP, tgsi_unsupported},
>>         {TGSI_OPCODE_NOP,       0, ALU_OP0_NOP, tgsi_unsupported},
>> -       /* gap */
>> -       {108,                   0, ALU_OP0_NOP, tgsi_unsupported},
>> -       {109,                   0, ALU_OP0_NOP, tgsi_unsupported},
>> -       {110,                   0, ALU_OP0_NOP, tgsi_unsupported},
>> -       {111,                   0, ALU_OP0_NOP, tgsi_unsupported},
>> +       {TGSI_OPCODE_FSEQ,      0, ALU_OP2_SETE_DX10, tgsi_op2},
>> +       {TGSI_OPCODE_FSGE,      0, ALU_OP2_SETGE_DX10, tgsi_op2},
>> +       {TGSI_OPCODE_FSLT,      0, ALU_OP2_SETGT_DX10, tgsi_op2_swap},
>> +       {TGSI_OPCODE_FSNE,      0, ALU_OP2_SETNE_DX10, tgsi_op2_swap},
>>         {TGSI_OPCODE_NRM4,      0, ALU_OP0_NOP, tgsi_unsupported},
>>         {TGSI_OPCODE_CALLNZ,    0, ALU_OP0_NOP, tgsi_unsupported},
>>         /* gap */
>> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
>> index 7a47746..8ff9abd 100644
>> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
>> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
>> @@ -850,18 +850,16 @@ static void emit_cmp(
>>         LLVMRealPredicate pred;
>>         LLVMValueRef cond;
>>
>> -       /* XXX I'm not sure whether to do unordered or ordered comparisons,
>> -        * but llvmpipe uses unordered comparisons, so for consistency we use
>> -        * unordered.  (The authors of llvmpipe aren't sure about using
>> -        * unordered vs ordered comparisons either.
>> +       /* Use ordered for everything but NE (which is usual for
>> +        * float comparisons)
>>          */
>>         switch (emit_data->inst->Instruction.Opcode) {
>> -       case TGSI_OPCODE_SGE: pred = LLVMRealUGE; break;
>> -       case TGSI_OPCODE_SEQ: pred = LLVMRealUEQ; break;
>> -       case TGSI_OPCODE_SLE: pred = LLVMRealULE; break;
>> -       case TGSI_OPCODE_SLT: pred = LLVMRealULT; break;
>> +       case TGSI_OPCODE_SGE: pred = LLVMRealOGE; break;
>> +       case TGSI_OPCODE_SEQ: pred = LLVMRealOEQ; break;
>> +       case TGSI_OPCODE_SLE: pred = LLVMRealOLE; break;
>> +       case TGSI_OPCODE_SLT: pred = LLVMRealOLT; break;
>>         case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break;
>> -       case TGSI_OPCODE_SGT: pred = LLVMRealUGT; break;
>> +       case TGSI_OPCODE_SGT: pred = LLVMRealOGT; break;
>>         default: assert(!"unknown instruction"); pred = 0; break;
>>         }
>>
>> @@ -872,6 +870,35 @@ static void emit_cmp(
>>                 cond, bld_base->base.one, bld_base->base.zero, "");
>>  }
>>
>> +static void emit_fcmp(
>> +               const struct lp_build_tgsi_action *action,
>> +               struct lp_build_tgsi_context * bld_base,
>> +               struct lp_build_emit_data * emit_data)
>> +{
>> +       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
>> +       LLVMContextRef context = bld_base->base.gallivm->context;
>> +       LLVMRealPredicate pred;
>> +
>> +       /* Use ordered for everything but NE (which is usual for
>> +        * float comparisons)
>> +        */
>> +       switch (emit_data->inst->Instruction.Opcode) {
>> +       case TGSI_OPCODE_FSEQ: pred = LLVMRealOEQ; break;
>> +       case TGSI_OPCODE_FSGE: pred = LLVMRealOGE; break;
>> +       case TGSI_OPCODE_FSLT: pred = LLVMRealOLT; break;
>> +       case TGSI_OPCODE_FSNE: pred = LLVMRealUNE; break;
>> +       default: assert(!"unknown instruction"); pred = 0; break;
>> +       }
>> +
>> +       LLVMValueRef v = LLVMBuildFCmp(builder, pred,
>> +                       emit_data->args[0], emit_data->args[1],"");
>> +
>> +       v = LLVMBuildSExtOrBitCast(builder, v,
>> +                       LLVMInt32TypeInContext(context), "");
>> +
>> +       emit_data->output[emit_data->chan] = v;
>> +}
>> +
>>  static void emit_not(
>>                 const struct lp_build_tgsi_action * action,
>>                 struct lp_build_tgsi_context * bld_base,
>> @@ -1236,6 +1263,10 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
>>         bld_base->op_actions[TGSI_OPCODE_FRC].intr_name = "llvm.AMDIL.fraction.";
>>         bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
>>         bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
>> +       bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp;
>> +       bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp;
>> +       bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
>> +       bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
>>         bld_base->op_actions[TGSI_OPCODE_IABS].emit = build_tgsi_intrinsic_nomem;
>>         bld_base->op_actions[TGSI_OPCODE_IABS].intr_name = "llvm.AMDIL.abs.";
>>         bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
>> --
>> 1.7.9.5
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev&d=AAIBaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=Vjtt0vs_iqoI31UfJxBl7yv9I2FeiaeAYgMTLKRBc_I&m=CUj8HmqjJ9d3n9C7KwZVi8QPq2HM8oyttmm2OTLqPxM&s=QeHyY6qHoK29EFubbeH32OzFG_fo-7ehi4U7_U-hMNY&e=