[Mesa-dev] [PATCH 04/28] nir: add support for flushing to zero denorm constants

Wed Dec 5 17:56:31 UTC 2018

On Wed, Dec 05, 2018 at 04:55:19PM +0100, Samuel Iglesias Gonsálvez wrote:
> Signed-off-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
> ---
>  src/compiler/nir/nir_opt_constant_folding.c | 74 +++++++++++++++++++--
>  1 file changed, 68 insertions(+), 6 deletions(-)
> 
> diff --git a/src/compiler/nir/nir_opt_constant_folding.c b/src/compiler/nir/nir_opt_constant_folding.c
> index 1fca530af24..a6df8284e17 100644
> --- a/src/compiler/nir/nir_opt_constant_folding.c
> +++ b/src/compiler/nir/nir_opt_constant_folding.c
> @@ -39,7 +39,7 @@ struct constant_fold_state {
>  };
>  
>  static bool
> -constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
> +constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx, unsigned execution_mode)
>  {
>     nir_const_value src[NIR_MAX_VEC_COMPONENTS];
>  
> @@ -77,12 +77,39 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
>           switch(load_const->def.bit_size) {
>           case 64:
>              src[i].u64[j] = load_const->value.u64[instr->src[i].swizzle[j]];
> +            if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP64 &&
> +                (nir_op_infos[instr->op].input_types[i] == nir_type_float ||
> +                 nir_op_infos[instr->op].input_types[i] == nir_type_float64)) {
It maybe a stupid question but should we really test than the input type is a float?
Isn't it include in the float64 check?

> +               if (src[i].u64[j] < 0x0010000000000000)
> +                  src[i].u64[j] = 0;
> +               if (src[i].u64[j] & 0x8000000000000000 &&
> +                   !(src[i].u64[j] & 0x7ff0000000000000))
> +                  src[i].u64[j] = 0x8000000000000000;
We can maybe do something like:

if ((src[i].u64[j] & 0x000fffffffffffff) &&
    !(src[i].u64[j] & 0x7ff0000000000000))
   src[i].u64[j] &= 0x8000000000000000;
> +            }
>              break;
>           case 32:
>              src[i].u32[j] = load_const->value.u32[instr->src[i].swizzle[j]];
> +            if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP32 &&
> +                (nir_op_infos[instr->op].input_types[i] == nir_type_float ||
> +                 nir_op_infos[instr->op].input_types[i] == nir_type_float32)) {
> +                   if (src[i].u32[j] < 0x00800000)
> +                      src[i].u32[j] = 0;
> +                   if (src[i].u32[j] & 0x80000000 &&
> +                       !(src[i].u32[j] & 0x7f800000))
> +                      src[i].u32[j] = 0x80000000;
> +                }
>              break;
>           case 16:
>              src[i].u16[j] = load_const->value.u16[instr->src[i].swizzle[j]];
> +            if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP16 &&
> +                (nir_op_infos[instr->op].input_types[i] == nir_type_float ||
> +                 nir_op_infos[instr->op].input_types[i] == nir_type_float16)) {
> +                   if (src[i].u16[j] < 0x0400)
> +                      src[i].u16[j] = 0;
> +                   if (src[i].u16[j] & 0x8000 &&
> +                       !(src[i].u16[j] & 0x7c00))
> +                      src[i].u16[j] = 0x8000;
> +                }
>              break;
>           case 8:
>              src[i].u8[j] = load_const->value.u8[instr->src[i].swizzle[j]];
> @@ -106,6 +133,40 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
>        nir_eval_const_opcode(instr->op, instr->dest.dest.ssa.num_components,
>                              bit_size, src);
>  
> +   for (unsigned j = 0; j < instr->dest.dest.ssa.num_components; j++) {
> +      if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP64 &&
> +          bit_size == 64 &&
> +          (nir_op_infos[instr->op].output_type == nir_type_float ||
> +           nir_op_infos[instr->op].output_type == nir_type_float64)) {
Kind of the same question than before.
Is looking at the output type give us the information that we have a 64 bit gloat?
So we can drop the bit_size and float check.

Once again, I didn't have too much knowleadge in NIR, more in fp64.
So it's maybe stupid. ;)

> +         if (dest.u64[j] < 0x0010000000000000)
> +            dest.u64[j] = 0;
> +         if (dest.u64[j] & 0x8000000000000000 &&
> +             !(dest.u64[j] & 0x7ff0000000000000))
> +            dest.u64[j] = 0x8000000000000000;
> +      }
> +      if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP32 &&
> +          bit_size == 32 &&
> +          (nir_op_infos[instr->op].output_type == nir_type_float ||
> +           nir_op_infos[instr->op].output_type == nir_type_float32)) {
> +         if (dest.u32[j] < 0x00800000)
> +            dest.u32[j] = 0;
> +         if (dest.u32[j] & 0x80000000 &&
> +             !(dest.u32[j] & 0x7f800000))
> +            dest.u32[j] = 0x80000000;
> +      }
> +
> +      if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP16 &&
> +          bit_size == 16 &&
> +          (nir_op_infos[instr->op].output_type == nir_type_float ||
> +           nir_op_infos[instr->op].output_type == nir_type_float16)) {
> +         if (dest.u16[j] < 0x0400)
> +            dest.u16[j] = 0;
> +         if (dest.u16[j] & 0x8000 &&
> +             !(dest.u16[j] & 0x7c00))
> +            dest.u16[j] = 0x8000;
> +      }
> +   }
> +
>     nir_load_const_instr *new_instr =
>        nir_load_const_instr_create(mem_ctx,
>                                    instr->dest.dest.ssa.num_components,
> @@ -157,14 +218,14 @@ constant_fold_intrinsic_instr(nir_intrinsic_instr *instr)
>  }
>  
>  static bool
> -constant_fold_block(nir_block *block, void *mem_ctx)
> +constant_fold_block(nir_block *block, void *mem_ctx, unsigned execution_mode)
>  {
>     bool progress = false;
>  
>     nir_foreach_instr_safe(instr, block) {
>        switch (instr->type) {
>        case nir_instr_type_alu:
> -         progress |= constant_fold_alu_instr(nir_instr_as_alu(instr), mem_ctx);
> +         progress |= constant_fold_alu_instr(nir_instr_as_alu(instr), mem_ctx, execution_mode);
>           break;
>        case nir_instr_type_intrinsic:
>           progress |=
> @@ -180,13 +241,13 @@ constant_fold_block(nir_block *block, void *mem_ctx)
>  }
>  
>  static bool
> -nir_opt_constant_folding_impl(nir_function_impl *impl)
> +nir_opt_constant_folding_impl(nir_function_impl *impl, unsigned execution_mode)
>  {
>     void *mem_ctx = ralloc_parent(impl);
>     bool progress = false;
>  
>     nir_foreach_block(block, impl) {
> -      progress |= constant_fold_block(block, mem_ctx);
> +      progress |= constant_fold_block(block, mem_ctx, execution_mode);
>     }
>  
>     if (progress)
> @@ -200,10 +261,11 @@ bool
>  nir_opt_constant_folding(nir_shader *shader)
>  {
>     bool progress = false;
> +   unsigned execution_mode = shader->info.shader_float_controls_execution_mode;
>  
>     nir_foreach_function(function, shader) {
>        if (function->impl)
> -         progress |= nir_opt_constant_folding_impl(function->impl);
> +         progress |= nir_opt_constant_folding_impl(function->impl, execution_mode);
>     }
>  
>     return progress;
> -- 
> 2.19.1
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev