[Mesa-dev] [PATCH v2 04/29] nir: add support for flushing to zero denorm constants
Samuel Iglesias Gonsálvez
siglesias at igalia.com
Thu Jan 31 13:07:31 UTC 2019
On 30/01/2019 16:18, Connor Abbott wrote:
>
>
> On Tue, Dec 18, 2018 at 11:35 AM Samuel Iglesias Gonsálvez
> <siglesias at igalia.com <mailto:siglesias at igalia.com>> wrote:
>
> v2:
> - Refactor conditions and shared function (Connor)
> - Move code to nir_eval_const_opcode() (Connor)
> - Don't flush to zero on fquantize2f16
> From Vulkan spec, VK_KHR_shader_float_controls section:
>
> "3) Do denorm and rounding mode controls apply to OpSpecConstantOp?
>
> RESOLVED: Yes, except when the opcode is OpQuantizeToF16."
>
> Signed-off-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com
> <mailto:siglesias at igalia.com>>
> ---
> src/compiler/nir/nir_constant_expressions.h | 3 +-
> src/compiler/nir/nir_constant_expressions.py | 65 ++++++++++++++++++--
> src/compiler/nir/nir_loop_analyze.c | 7 ++-
> src/compiler/nir/nir_opt_constant_folding.c | 15 ++---
> src/compiler/spirv/spirv_to_nir.c | 3 +-
> 5 files changed, 75 insertions(+), 18 deletions(-)
>
> diff --git a/src/compiler/nir/nir_constant_expressions.h
> b/src/compiler/nir/nir_constant_expressions.h
> index 1d6bbbc25d3..a2d416abc45 100644
> --- a/src/compiler/nir/nir_constant_expressions.h
> +++ b/src/compiler/nir/nir_constant_expressions.h
> @@ -31,6 +31,7 @@
> #include "nir.h"
>
> nir_const_value nir_eval_const_opcode(nir_op op, unsigned
> num_components,
> - unsigned bit_size,
> nir_const_value *src);
> + unsigned bit_size,
> nir_const_value *src,
> + unsigned
> float_controls_execution_mode);
>
> #endif /* NIR_CONSTANT_EXPRESSIONS_H */
> diff --git a/src/compiler/nir/nir_constant_expressions.py
> b/src/compiler/nir/nir_constant_expressions.py
> index 505cdd8baae..dc2132df0d0 100644
> --- a/src/compiler/nir/nir_constant_expressions.py
> +++ b/src/compiler/nir/nir_constant_expressions.py
> @@ -66,6 +66,37 @@ template = """\
> #include "util/bigmath.h"
> #include "nir_constant_expressions.h"
>
> +/**
> + * Checks if the provided value is a denorm and flushes it to zero.
> +*/
> +static nir_const_value
> +constant_denorm_flush_to_zero(nir_const_value value, unsigned
> index, unsigned bit_size)
> +{
> + switch(bit_size) {
> + case 64:
> + if (value.u64[index] < 0x0010000000000000)
> + value.u64[index] = 0;
> + if (value.u64[index] & 0x8000000000000000 &&
> + !(value.u64[index] & 0x7ff0000000000000))
> + value.u64[index] = 0x8000000000000000;
> + break;
> + case 32:
> + if (value.u32[index] < 0x00800000)
> + value.u32[index] = 0;
> + if (value.u32[index] & 0x80000000 &&
> + !(value.u32[index] & 0x7f800000))
> + value.u32[index] = 0x80000000;
> + break;
> + case 16:
> + if (value.u16[index] < 0x0400)
> + value.u16[index] = 0;
> + if (value.u16[index] & 0x8000 &&
> + !(value.u16[index] & 0x7c00))
> + value.u16[index] = 0x8000;
> + }
> + return value;
> +}
> +
> /**
> * Evaluate one component of packSnorm4x8.
> */
> @@ -260,7 +291,7 @@ struct ${type}${width}_vec {
> % endfor
> % endfor
>
> -<%def name="evaluate_op(op, bit_size)">
> +<%def name="evaluate_op(op, bit_size, execution_mode)">
> <%
> output_type = type_add_size(op.output_type, bit_size)
> input_types = [type_add_size(type_, bit_size) for type_ in
> op.input_types]
> @@ -277,6 +308,14 @@ struct ${type}${width}_vec {
> <% continue %>
> %endif
>
> + % for k in range(op.input_sizes[j]):
> + % if op.name <http://op.name> != "fquantize2f16" and
> bit_size > 8 and op.input_types[j] == "float":
>
>
> This condition doesn't seem right. It may happen to work, but it isn't
> following NIR principles on ALU instructions. Each NIR opcode has an
> output type (given by op.output_type) and input types for each source
> (given by op.input_types). Each type may be sized, like "float32", or
> unsized like "float". All unsized inputs/outputs must have the same bit
> size at runtime. The bit_size here is the common bitsize for
> inputs/outputs with unsized types like "float" in the opcode definition.
> Even though in general it's only known at runtime, we're switching over
> all bitsizes in the generated code, so here we do know what it is at
> compile time. In order to handle sized types, we have to drop all
> references to bit_size and stop comparing op.input_types[j] directly
> with "float" since it may be a sized type. Also, if this source has a
> float type, we already know that its bit size is at least 16, so the
> second check should be useless. I think what you actually want to do is
> extract the base type, i.e. something like: op.name <http://op.name> !=
> "fquantize16" and type_base_name(input_types[j]) == "float"
>
Right. Thanks for the explanation.
>
> + if (execution_mode &
> SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) {
>
>
> bit_size here isn't the right bit size for sized sources. You want
> ${type_size(input_types[i])} which is the actual size of the source at
> runtime. This will be bit_size if the op.input_types[j] is unsized or
> the input size otherwise.
>
> + _src[${j}] =
> constant_denorm_flush_to_zero(_src[${j}], ${k}, bit_size);
>
>
> same as above with bit_size.
>
OK, I will do them for the dst ones.
> Finally, I think you missed the equivalent hunk for non-per-component
> sources.
>
Actually, I realized that this is not mandatory for the sources, just
the destination, so I will remove the source part all together and do
the suggested fixes into the destination part.
>
> + }
> + % endif
> + % endfor
> +
> const struct ${input_types[j]}_vec src${j} = {
> % for k in range(op.input_sizes[j]):
> % if input_types[j] == "int1":
> @@ -343,6 +382,12 @@ struct ${type}${width}_vec {
> % else:
> _dst_val.${get_const_field(output_type)}[_i] = dst;
> % endif
> +
> + % if op.name <http://op.name> != "fquantize2f16" and
> bit_size > 8 and op.output_type == "float":
> + if (execution_mode &
> SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) {
> + _dst_val = constant_denorm_flush_to_zero(_dst_val,
> _i, bit_size);
> + }
> + % endif
>
> }
> % else:
> ## In the non-per-component case, create a struct dst with
> @@ -375,6 +420,12 @@ struct ${type}${width}_vec {
> % else:
> _dst_val.${get_const_field(output_type)}[${k}] =
> dst.${"xyzw"[k]};
> % endif
> +
> + % if op.name <http://op.name> != "fquantize2f16" and
> bit_size > 8 and op.output_type == "float":
> + if (execution_mode &
> SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) {
> + _dst_val = constant_denorm_flush_to_zero(_dst_val,
> ${k}, bit_size);
> + }
> + % endif
> % endfor
> % endif
> </%def>
> @@ -383,7 +434,8 @@ struct ${type}${width}_vec {
> static nir_const_value
> evaluate_${name}(MAYBE_UNUSED unsigned num_components,
> ${"UNUSED" if op_bit_sizes(op) is None else ""}
> unsigned bit_size,
> - MAYBE_UNUSED nir_const_value *_src)
> + MAYBE_UNUSED nir_const_value *_src,
> + MAYBE_UNUSED unsigned execution_mode)
> {
> nir_const_value _dst_val = { {0, } };
>
> @@ -391,7 +443,7 @@ evaluate_${name}(MAYBE_UNUSED unsigned
> num_components,
> switch (bit_size) {
> % for bit_size in op_bit_sizes(op):
> case ${bit_size}: {
> - ${evaluate_op(op, bit_size)}
> + ${evaluate_op(op, bit_size, execution_mode)}
> break;
> }
> % endfor
> @@ -400,7 +452,7 @@ evaluate_${name}(MAYBE_UNUSED unsigned
> num_components,
> unreachable("unknown bit width");
> }
> % else:
> - ${evaluate_op(op, 0)}
> + ${evaluate_op(op, 0, execution_mode)}
> % endif
>
> return _dst_val;
> @@ -409,12 +461,13 @@ evaluate_${name}(MAYBE_UNUSED unsigned
> num_components,
>
> nir_const_value
> nir_eval_const_opcode(nir_op op, unsigned num_components,
> - unsigned bit_width, nir_const_value *src)
> + unsigned bit_width, nir_const_value *src,
> + unsigned float_controls_execution_mode)
> {
> switch (op) {
> % for name in sorted(opcodes.keys()):
> case nir_op_${name}:
> - return evaluate_${name}(num_components, bit_width, src);
> + return evaluate_${name}(num_components, bit_width, src,
> float_controls_execution_mode);
> % endfor
> default:
> unreachable("shouldn't get here");
> diff --git a/src/compiler/nir/nir_loop_analyze.c
> b/src/compiler/nir/nir_loop_analyze.c
> index 259f02a854e..c9fba8649db 100644
> --- a/src/compiler/nir/nir_loop_analyze.c
> +++ b/src/compiler/nir/nir_loop_analyze.c
> @@ -497,19 +497,20 @@ test_iterations(int32_t iter_int,
> nir_const_value *step,
> */
> nir_const_value mul_src[2] = { iter_src, *step };
> nir_const_value mul_result =
> - nir_eval_const_opcode(mul_op, 1, bit_size, mul_src);
> + nir_eval_const_opcode(mul_op, 1, bit_size, mul_src,
> SHADER_DEFAULT_FLOAT_CONTROL_MODE);
>
>
> Shouldn't we be using the actual float control mode for the shader? I
> don't know much about this code, but presumably it's trying to simulate
> something calculated by the shader.
>
Good catch! I'll fix it.
Sam
>
>
> /* Add the initial value to the accumulated induction variable
> total */
> nir_const_value add_src[2] = { mul_result, *initial };
> nir_const_value add_result =
> - nir_eval_const_opcode(add_op, 1, bit_size, add_src);
> + nir_eval_const_opcode(add_op, 1, bit_size, add_src,
> SHADER_DEFAULT_FLOAT_CONTROL_MODE);
>
> nir_const_value src[2] = { { {0, } }, { {0, } } };
> src[limit_rhs ? 0 : 1] = add_result;
> src[limit_rhs ? 1 : 0] = *limit;
>
> /* Evaluate the loop exit condition */
> - nir_const_value result = nir_eval_const_opcode(cond_op, 1,
> bit_size, src);
> + nir_const_value result = nir_eval_const_opcode(cond_op, 1,
> bit_size, src,
> +
> SHADER_DEFAULT_FLOAT_CONTROL_MODE);
>
> return invert_cond ? (result.u32[0] == 0) : (result.u32[0] != 0);
> }
> diff --git a/src/compiler/nir/nir_opt_constant_folding.c
> b/src/compiler/nir/nir_opt_constant_folding.c
> index 5097a3bcc36..bd6130d5b33 100644
> --- a/src/compiler/nir/nir_opt_constant_folding.c
> +++ b/src/compiler/nir/nir_opt_constant_folding.c
> @@ -39,7 +39,7 @@ struct constant_fold_state {
> };
>
> static bool
> -constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
> +constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx,
> unsigned execution_mode)
> {
> nir_const_value src[NIR_MAX_VEC_COMPONENTS];
>
> @@ -108,7 +108,7 @@ constant_fold_alu_instr(nir_alu_instr *instr,
> void *mem_ctx)
>
> nir_const_value dest =
> nir_eval_const_opcode(instr->op,
> instr->dest.dest.ssa.num_components,
> - bit_size, src);
> + bit_size, src, execution_mode);
>
> nir_load_const_instr *new_instr =
> nir_load_const_instr_create(mem_ctx,
> @@ -161,14 +161,14 @@
> constant_fold_intrinsic_instr(nir_intrinsic_instr *instr)
> }
>
> static bool
> -constant_fold_block(nir_block *block, void *mem_ctx)
> +constant_fold_block(nir_block *block, void *mem_ctx, unsigned
> execution_mode)
> {
> bool progress = false;
>
> nir_foreach_instr_safe(instr, block) {
> switch (instr->type) {
> case nir_instr_type_alu:
> - progress |=
> constant_fold_alu_instr(nir_instr_as_alu(instr), mem_ctx);
> + progress |=
> constant_fold_alu_instr(nir_instr_as_alu(instr), mem_ctx,
> execution_mode);
> break;
> case nir_instr_type_intrinsic:
> progress |=
> @@ -184,13 +184,13 @@ constant_fold_block(nir_block *block, void
> *mem_ctx)
> }
>
> static bool
> -nir_opt_constant_folding_impl(nir_function_impl *impl)
> +nir_opt_constant_folding_impl(nir_function_impl *impl, unsigned
> execution_mode)
> {
> void *mem_ctx = ralloc_parent(impl);
> bool progress = false;
>
> nir_foreach_block(block, impl) {
> - progress |= constant_fold_block(block, mem_ctx);
> + progress |= constant_fold_block(block, mem_ctx, execution_mode);
> }
>
> if (progress)
> @@ -204,10 +204,11 @@ bool
> nir_opt_constant_folding(nir_shader *shader)
> {
> bool progress = false;
> + unsigned execution_mode =
> shader->info.shader_float_controls_execution_mode;
>
> nir_foreach_function(function, shader) {
> if (function->impl)
> - progress |= nir_opt_constant_folding_impl(function->impl);
> + progress |= nir_opt_constant_folding_impl(function->impl,
> execution_mode);
> }
>
> return progress;
> diff --git a/src/compiler/spirv/spirv_to_nir.c
> b/src/compiler/spirv/spirv_to_nir.c
> index 96d4d80970f..7578a83e2bb 100644
> --- a/src/compiler/spirv/spirv_to_nir.c
> +++ b/src/compiler/spirv/spirv_to_nir.c
> @@ -1842,7 +1842,8 @@ vtn_handle_constant(struct vtn_builder *b,
> SpvOp opcode,
> }
>
> val->constant->values[0] =
> - nir_eval_const_opcode(op, num_components, bit_size, src);
> + nir_eval_const_opcode(op, num_components, bit_size, src,
> +
> b->shader->info.shader_float_controls_execution_mode);
> break;
> } /* default */
> }
> --
> 2.19.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org <mailto:mesa-dev at lists.freedesktop.org>
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: OpenPGP digital signature
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190131/8ae9e193/attachment-0001.sig>
More information about the mesa-dev
mailing list