[Mesa-dev] [PATCH v2 10.1/11] nir: Add a fdot instruction that replicates the result to a vec4

Fri Sep 11 09:07:33 PDT 2015

On Fri, Sep 11, 2015 at 9:00 AM, Connor Abbott <cwabbott0 at gmail.com> wrote:
> On Fri, Sep 11, 2015 at 11:52 AM, Jason Ekstrand <jason at jlekstrand.net> wrote:
>> Fortunately, nir_constant_expr already auto-splats if "dst" never shows up
>> in the constant expression field so we don't need to do anything there.
>>
>> Cc: Connor Abbott <cwabbott0 at gmail.com>
>> ---
>>  src/glsl/nir/nir.h                | 6 ++++++
>>  src/glsl/nir/nir_opcodes.py       | 3 +++
>>  src/glsl/nir/nir_opt_algebraic.py | 3 +++
>>  3 files changed, 12 insertions(+)
>>
>> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
>> index 3f693b1..4e4543a 100644
>> --- a/src/glsl/nir/nir.h
>> +++ b/src/glsl/nir/nir.h
>> @@ -1434,6 +1434,12 @@ typedef struct nir_shader_compiler_options {
>>     /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */
>>     bool lower_scmp;
>>
>> +   /* Does the native fdot instruction replicate its result for four
>> +    * components?  If so, then opt_algebraic_late will turn all fdotN
>> +    * instructions into fdot_replicatedN instructions.
>> +    */
>> +   bool fdot_replicates;
>> +
>>     /**
>>      * Does the driver support real 32-bit integers?  (Otherwise, integers
>>      * are simulated by floats.)
>> diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py
>> index df5b7e2..495d109 100644
>> --- a/src/glsl/nir/nir_opcodes.py
>> +++ b/src/glsl/nir/nir_opcodes.py
>> @@ -453,6 +453,9 @@ binop("fxor", tfloat, commutative,
>>  binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
>>               "{src}")
>>
>> +binop_reduce("fdot_replicated", 4, tfloat, tfloat,
>> +             "{src0} * {src1}", "{src0} + {src1}", "{src}")
>> +
>
> The {}'s are a relic of when the constant-folding stuff was
> implemented using Python string formatting. We shouldn't add more of
> them, and we should probably fix the ones above too. Other than that,
> these 2 patches have my R-b.

As I said on IRC, the {}'s are still needed for binop_reduce.  It uses
them to re-construct the actual expression which then doesn't contain
any {}'s.

--Jason

>>  binop("fmin", tfloat, "", "fminf(src0, src1)")
>>  binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1")
>>  binop("umin", tunsigned, commutative + associative, "src1 > src0 ? src0 : src1")
>> diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py
>> index 226e0a8..acc3b04 100644
>> --- a/src/glsl/nir/nir_opt_algebraic.py
>> +++ b/src/glsl/nir/nir_opt_algebraic.py
>> @@ -240,6 +240,9 @@ late_optimizations = [
>>     (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
>>     (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
>>     (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
>> +   (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'),
>> +   (('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'),
>> +   (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
>>  ]
>>
>>  print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
>> --
>> 2.5.0.400.gff86faf
>>