[Mesa-dev] Mesa (master): nir/flrp: Lower flrp(±1, b, c) and flrp(a, ±1, c) differently

Ian Romanick idr at freedesktop.org
Tue May 7 15:30:36 UTC 2019


On 5/7/19 8:20 AM, Samuel Pitoiset wrote:
> This introduces glitches with Talos and Serious Sam 2017 with RADV...
> 
> Are you able to reproduce the problem with ANV?

Probably not very easily.  If you can figure out which shader it is, it
should be easy to figure out the problem from before / after NIR.

> On 5/7/19 8:01 AM, GitLab Mirror wrote:
>> Module: Mesa
>> Branch: master
>> Commit: 5b908db604b2f47bb8382047533e556db8d5f52b
>> URL:   
>> http://cgit.freedesktop.org/mesa/mesa/commit/?id=5b908db604b2f47bb8382047533e556db8d5f52b
>>
>>
>> Author: Ian Romanick <ian.d.romanick at intel.com>
>> Date:   Tue Aug 21 17:17:24 2018 -0700
>>
>> nir/flrp: Lower flrp(±1, b, c) and flrp(a, ±1, c) differently
>>
>> No changes on any other Intel platforms.
>>
>> v2: Rebase on 424372e5dd5 ("nir: Use the flrp lowering pass instead of
>> nir_opt_algebraic")
>>
>> Iron Lake and GM45 had similar results. (Iron Lake shown)
>> total instructions in shared programs: 8189888 -> 8153912 (-0.44%)
>> instructions in affected programs: 1199037 -> 1163061 (-3.00%)
>> helped: 4124
>> HURT: 10
>> helped stats (abs) min: 1 max: 40 x̄: 8.73 x̃: 9
>> helped stats (rel) min: 0.20% max: 86.96% x̄: 4.96% x̃: 3.02%
>> HURT stats (abs)   min: 1 max: 2 x̄: 1.20 x̃: 1
>> HURT stats (rel)   min: 1.06% max: 3.92% x̄: 1.62% x̃: 1.06%
>> 95% mean confidence interval for instructions value: -8.84 -8.56
>> 95% mean confidence interval for instructions %-change: -5.12% -4.77%
>> Instructions are helped.
>>
>> total cycles in shared programs: 188606710 -> 188426964 (-0.10%)
>> cycles in affected programs: 27505596 -> 27325850 (-0.65%)
>> helped: 4026
>> HURT: 77
>> helped stats (abs) min: 2 max: 646 x̄: 44.99 x̃: 46
>> helped stats (rel) min: <.01% max: 94.58% x̄: 2.35% x̃: 0.85%
>> HURT stats (abs)   min: 2 max: 376 x̄: 17.79 x̃: 6
>> HURT stats (rel)   min: <.01% max: 2.60% x̄: 0.22% x̃: 0.04%
>> 95% mean confidence interval for cycles value: -44.75 -42.87
>> 95% mean confidence interval for cycles %-change: -2.44% -2.17%
>> Cycles are helped.
>>
>> LOST:   3
>> GAINED: 35
>>
>> Reviewed-by: Matt Turner <mattst88 at gmail.com>
>>
>> ---
>>
>>   src/compiler/nir/nir_lower_flrp.c | 134
>> ++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 134 insertions(+)
>>
>> diff --git a/src/compiler/nir/nir_lower_flrp.c
>> b/src/compiler/nir/nir_lower_flrp.c
>> index 952068ec9cc..c041fefc52b 100644
>> --- a/src/compiler/nir/nir_lower_flrp.c
>> +++ b/src/compiler/nir/nir_lower_flrp.c
>> @@ -137,6 +137,89 @@ replace_with_fast(struct nir_builder *bld, struct
>> u_vector *dead_flrp,
>>      append_flrp_to_dead_list(dead_flrp, alu);
>>   }
>>   +/**
>> + * Replace flrp(a, b, c) with (b*c ± c) + a
>> + */
>> +static void
>> +replace_with_expanded_ffma_and_add(struct nir_builder *bld,
>> +                                   struct u_vector *dead_flrp,
>> +                                   struct nir_alu_instr *alu, bool
>> subtract_c)
>> +{
>> +   nir_ssa_def *const a = nir_ssa_for_alu_src(bld, alu, 0);
>> +   nir_ssa_def *const b = nir_ssa_for_alu_src(bld, alu, 1);
>> +   nir_ssa_def *const c = nir_ssa_for_alu_src(bld, alu, 2);
>> +
>> +   nir_ssa_def *const b_times_c = nir_fadd(bld, b, c);
>> +   nir_instr_as_alu(b_times_c->parent_instr)->exact = alu->exact;
>> +
>> +   nir_ssa_def *inner_sum;
>> +
>> +   if (subtract_c) {
>> +      nir_ssa_def *const neg_c = nir_fneg(bld, c);
>> +      nir_instr_as_alu(neg_c->parent_instr)->exact = alu->exact;
>> +
>> +      inner_sum = nir_fadd(bld, b_times_c, neg_c);
>> +   } else {
>> +      inner_sum = nir_fadd(bld, b_times_c, c);
>> +   }
>> +
>> +   nir_instr_as_alu(inner_sum->parent_instr)->exact = alu->exact;
>> +
>> +   nir_ssa_def *const outer_sum = nir_fadd(bld, inner_sum, a);
>> +   nir_instr_as_alu(outer_sum->parent_instr)->exact = alu->exact;
>> +
>> +   nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
>> nir_src_for_ssa(outer_sum));
>> +
>> +   /* DO NOT REMOVE the original flrp yet.  Many of the lowering
>> choices are
>> +    * based on other uses of the sources.  Removing the flrp may
>> cause the
>> +    * last flrp in a sequence to make a different, incorrect choice.
>> +    */
>> +   append_flrp_to_dead_list(dead_flrp, alu);
>> +}
>> +
>> +/**
>> + * Determines whether a swizzled source is constant w/ all components
>> the same.
>> + *
>> + * The value of the constant is stored in \c result.
>> + *
>> + * \return
>> + * True if all components of the swizzled source are the same constant.
>> + * Otherwise false is returned.
>> + */
>> +static bool
>> +all_same_constant(const nir_alu_instr *instr, unsigned src, double
>> *result)
>> +{
>> +   nir_const_value *val = nir_src_as_const_value(instr->src[src].src);
>> +
>> +   if (!val)
>> +      return false;
>> +
>> +   const uint8_t *const swizzle = instr->src[src].swizzle;
>> +   const unsigned num_components =
>> nir_dest_num_components(instr->dest.dest);
>> +
>> +   if (instr->dest.dest.ssa.bit_size == 32) {
>> +      const float first = val[swizzle[0]].f32;
>> +
>> +      for (unsigned i = 1; i < num_components; i++) {
>> +         if (val[swizzle[i]].f32 != first)
>> +            return false;
>> +      }
>> +
>> +      *result = first;
>> +   } else {
>> +      const double first = val[swizzle[0]].f64;
>> +
>> +      for (unsigned i = 1; i < num_components; i++) {
>> +         if (val[swizzle[i]].f64 != first)
>> +            return false;
>> +      }
>> +
>> +      *result = first;
>> +   }
>> +
>> +   return true;
>> +}
>> +
>>   static bool
>>   sources_are_constants_with_similar_magnitudes(const nir_alu_instr
>> *instr)
>>   {
>> @@ -265,6 +348,57 @@ convert_flrp_instruction(nir_builder *bld,
>>         return;
>>      }
>>   +   /*
>> +    * - If x = 1:
>> +    *
>> +    *        (yt + -t) + 1
>> +    *
>> +    * - If x = -1:
>> +    *
>> +    *        (yt + t) - 1
>> +    *
>> +    *   In both cases, x is used in place of ±1 for simplicity.  Both
>> forms
>> +    *   lend to ffma generation on platforms that support ffma.
>> +    */
>> +   double src0_as_constant;
>> +   if (all_same_constant(alu, 0, &src0_as_constant)) {
>> +      if (src0_as_constant == 1.0) {
>> +         replace_with_expanded_ffma_and_add(bld, dead_flrp, alu,
>> +                                            true /* subtract t */);
>> +         return;
>> +      } else if (src0_as_constant == -1.0) {
>> +         replace_with_expanded_ffma_and_add(bld, dead_flrp, alu,
>> +                                            false /* add t */);
>> +         return;
>> +      }
>> +   }
>> +
>> +   /*
>> +    * - If y = ±1:
>> +    *
>> +    *        x(1 - t) + yt
>> +    *
>> +    *   In this case either the multiply in yt will be eliminated by
>> +    *   nir_opt_algebraic.  If FMA is supported, this results in
>> fma(x, (1 -
>> +    *   t), ±t) for two instructions.  If FMA is not supported, then
>> the cost
>> +    *   is 3 instructions.  We rely on nir_opt_algebraic to generate
>> the FMA
>> +    *   instructions as well.
>> +    *
>> +    *   Another possible replacement is
>> +    *
>> +    *        -xt + x ± t
>> +    *
>> +    *   Some groupings of this may be better on some platforms in some
>> +    *   circumstances, bit it is probably dependent on scheduling. 
>> Futher
>> +    *   investigation may be required.
>> +    */
>> +   double src1_as_constant;
>> +   if ((all_same_constant(alu, 1, &src1_as_constant) &&
>> +        (src1_as_constant == -1.0 || src1_as_constant == 1.0))) {
>> +      replace_with_strict(bld, dead_flrp, alu);
>> +      return;
>> +   }
>> +
>>      if (have_ffma) {
>>         if (always_precise) {
>>            replace_with_strict_ffma(bld, dead_flrp, alu);
>>
>> _______________________________________________
>> mesa-commit mailing list
>> mesa-commit at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-commit
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev



More information about the mesa-dev mailing list