[Mesa-dev] [PATCH 04/28] nir: add support for flushing to zero denorm constants

Mon Dec 10 10:09:40 UTC 2018

On 05/12/2018 18:32, Connor Abbott wrote:
> Given that other places call nir_eval_const_opcode(), and they'll be
> broken unless they also flush denorms, it's probably a good idea to
> move all this into nir_eval_const_opcode() itself.
> 

Thanks for the feedback. I will do this and fix the rest of things you
mentioned here.

Sam

> On Wed, Dec 5, 2018 at 4:56 PM Samuel Iglesias Gonsálvez
> <siglesias at igalia.com> wrote:
>>
>> Signed-off-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
>> ---
>>  src/compiler/nir/nir_opt_constant_folding.c | 74 +++++++++++++++++++--
>>  1 file changed, 68 insertions(+), 6 deletions(-)
>>
>> diff --git a/src/compiler/nir/nir_opt_constant_folding.c b/src/compiler/nir/nir_opt_constant_folding.c
>> index 1fca530af24..a6df8284e17 100644
>> --- a/src/compiler/nir/nir_opt_constant_folding.c
>> +++ b/src/compiler/nir/nir_opt_constant_folding.c
>> @@ -39,7 +39,7 @@ struct constant_fold_state {
>>  };
>>
>>  static bool
>> -constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
>> +constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx, unsigned execution_mode)
>>  {
>>     nir_const_value src[NIR_MAX_VEC_COMPONENTS];
>>
>> @@ -77,12 +77,39 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
>>           switch(load_const->def.bit_size) {
>>           case 64:
>>              src[i].u64[j] = load_const->value.u64[instr->src[i].swizzle[j]];
>> +            if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP64 &&
>> +                (nir_op_infos[instr->op].input_types[i] == nir_type_float ||
>> +                 nir_op_infos[instr->op].input_types[i] == nir_type_float64)) {
>> +               if (src[i].u64[j] < 0x0010000000000000)
>> +                  src[i].u64[j] = 0;
>> +               if (src[i].u64[j] & 0x8000000000000000 &&
>> +                   !(src[i].u64[j] & 0x7ff0000000000000))
>> +                  src[i].u64[j] = 0x8000000000000000;
>> +            }
> 
> Given that this code is duplicated for inputs and outputs in this
> patch, maybe refactor to a shared helper?
> 
>>              break;
>>           case 32:
>>              src[i].u32[j] = load_const->value.u32[instr->src[i].swizzle[j]];
>> +            if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP32 &&
>> +                (nir_op_infos[instr->op].input_types[i] == nir_type_float ||
>> +                 nir_op_infos[instr->op].input_types[i] == nir_type_float32)) {
>> +                   if (src[i].u32[j] < 0x00800000)
>> +                      src[i].u32[j] = 0;
>> +                   if (src[i].u32[j] & 0x80000000 &&
>> +                       !(src[i].u32[j] & 0x7f800000))
>> +                      src[i].u32[j] = 0x80000000;
>> +                }
>>              break;
>>           case 16:
>>              src[i].u16[j] = load_const->value.u16[instr->src[i].swizzle[j]];
>> +            if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP16 &&
>> +                (nir_op_infos[instr->op].input_types[i] == nir_type_float ||
>> +                 nir_op_infos[instr->op].input_types[i] == nir_type_float16)) {
>> +                   if (src[i].u16[j] < 0x0400)
>> +                      src[i].u16[j] = 0;
>> +                   if (src[i].u16[j] & 0x8000 &&
>> +                       !(src[i].u16[j] & 0x7c00))
>> +                      src[i].u16[j] = 0x8000;
>> +                }
>>              break;
>>           case 8:
>>              src[i].u8[j] = load_const->value.u8[instr->src[i].swizzle[j]];
>> @@ -106,6 +133,40 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
>>        nir_eval_const_opcode(instr->op, instr->dest.dest.ssa.num_components,
>>                              bit_size, src);
>>
>> +   for (unsigned j = 0; j < instr->dest.dest.ssa.num_components; j++) {
>> +      if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP64 &&
>> +          bit_size == 64 &&
>> +          (nir_op_infos[instr->op].output_type == nir_type_float ||
>> +           nir_op_infos[instr->op].output_type == nir_type_float64)) {
> 
> The bit_size doesn't have to equal the destination bitsize, it's the
> bitsize for inputs and outputs which are unsized (e.g. output_type ==
> nir_type_float instead of nir_type_float32). This should be
> (output_type == nir_type_float && bit_size == 64) || output_type ==
> nir_type_float64, and that goes for for the other bitsizes too.
> 
>> +         if (dest.u64[j] < 0x0010000000000000)
>> +            dest.u64[j] = 0;
>> +         if (dest.u64[j] & 0x8000000000000000 &&
>> +             !(dest.u64[j] & 0x7ff0000000000000))
>> +            dest.u64[j] = 0x8000000000000000;
>> +      }
>> +      if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP32 &&
>> +          bit_size == 32 &&
>> +          (nir_op_infos[instr->op].output_type == nir_type_float ||
>> +           nir_op_infos[instr->op].output_type == nir_type_float32)) {
>> +         if (dest.u32[j] < 0x00800000)
>> +            dest.u32[j] = 0;
>> +         if (dest.u32[j] & 0x80000000 &&
>> +             !(dest.u32[j] & 0x7f800000))
>> +            dest.u32[j] = 0x80000000;
>> +      }
>> +
>> +      if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP16 &&
>> +          bit_size == 16 &&
>> +          (nir_op_infos[instr->op].output_type == nir_type_float ||
>> +           nir_op_infos[instr->op].output_type == nir_type_float16)) {
>> +         if (dest.u16[j] < 0x0400)
>> +            dest.u16[j] = 0;
>> +         if (dest.u16[j] & 0x8000 &&
>> +             !(dest.u16[j] & 0x7c00))
>> +            dest.u16[j] = 0x8000;
>> +      }
>> +   }
>> +
>>     nir_load_const_instr *new_instr =
>>        nir_load_const_instr_create(mem_ctx,
>>                                    instr->dest.dest.ssa.num_components,
>> @@ -157,14 +218,14 @@ constant_fold_intrinsic_instr(nir_intrinsic_instr *instr)
>>  }
>>
>>  static bool
>> -constant_fold_block(nir_block *block, void *mem_ctx)
>> +constant_fold_block(nir_block *block, void *mem_ctx, unsigned execution_mode)
>>  {
>>     bool progress = false;
>>
>>     nir_foreach_instr_safe(instr, block) {
>>        switch (instr->type) {
>>        case nir_instr_type_alu:
>> -         progress |= constant_fold_alu_instr(nir_instr_as_alu(instr), mem_ctx);
>> +         progress |= constant_fold_alu_instr(nir_instr_as_alu(instr), mem_ctx, execution_mode);
>>           break;
>>        case nir_instr_type_intrinsic:
>>           progress |=
>> @@ -180,13 +241,13 @@ constant_fold_block(nir_block *block, void *mem_ctx)
>>  }
>>
>>  static bool
>> -nir_opt_constant_folding_impl(nir_function_impl *impl)
>> +nir_opt_constant_folding_impl(nir_function_impl *impl, unsigned execution_mode)
>>  {
>>     void *mem_ctx = ralloc_parent(impl);
>>     bool progress = false;
>>
>>     nir_foreach_block(block, impl) {
>> -      progress |= constant_fold_block(block, mem_ctx);
>> +      progress |= constant_fold_block(block, mem_ctx, execution_mode);
>>     }
>>
>>     if (progress)
>> @@ -200,10 +261,11 @@ bool
>>  nir_opt_constant_folding(nir_shader *shader)
>>  {
>>     bool progress = false;
>> +   unsigned execution_mode = shader->info.shader_float_controls_execution_mode;
>>
>>     nir_foreach_function(function, shader) {
>>        if (function->impl)
>> -         progress |= nir_opt_constant_folding_impl(function->impl);
>> +         progress |= nir_opt_constant_folding_impl(function->impl, execution_mode);
>>     }
>>
>>     return progress;
>> --
>> 2.19.1
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: OpenPGP digital signature
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20181210/1062d9ff/attachment-0001.sig>