[Mesa-dev] [PATCH v3 04/44] nir: add support for flushing to zero denorm constants
Samuel Iglesias Gonsálvez
siglesias at igalia.com
Wed Feb 6 10:44:33 UTC 2019
v2:
- Refactor conditions and shared function (Connor)
- Move code to nir_eval_const_opcode() (Connor)
- Don't flush to zero on fquantize2f16
From Vulkan spec, VK_KHR_shader_float_controls section:
"3) Do denorm and rounding mode controls apply to OpSpecConstantOp?
RESOLVED: Yes, except when the opcode is OpQuantizeToF16."
v3:
- Fix bit size (Connor)
- Fix execution mode on nir_loop_analize (Connor)
Signed-off-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
---
src/compiler/nir/nir_constant_expressions.h | 3 +-
src/compiler/nir/nir_constant_expressions.py | 71 ++++++++++++++++++--
src/compiler/nir/nir_loop_analyze.c | 22 +++---
src/compiler/nir/nir_opt_constant_folding.c | 15 +++--
src/compiler/spirv/spirv_to_nir.c | 3 +-
5 files changed, 90 insertions(+), 24 deletions(-)
diff --git a/src/compiler/nir/nir_constant_expressions.h b/src/compiler/nir/nir_constant_expressions.h
index 1d6bbbc25d3..a2d416abc45 100644
--- a/src/compiler/nir/nir_constant_expressions.h
+++ b/src/compiler/nir/nir_constant_expressions.h
@@ -31,6 +31,7 @@
#include "nir.h"
nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components,
- unsigned bit_size, nir_const_value *src);
+ unsigned bit_size, nir_const_value *src,
+ unsigned float_controls_execution_mode);
#endif /* NIR_CONSTANT_EXPRESSIONS_H */
diff --git a/src/compiler/nir/nir_constant_expressions.py b/src/compiler/nir/nir_constant_expressions.py
index 505cdd8baae..e79590f8359 100644
--- a/src/compiler/nir/nir_constant_expressions.py
+++ b/src/compiler/nir/nir_constant_expressions.py
@@ -66,6 +66,37 @@ template = """\
#include "util/bigmath.h"
#include "nir_constant_expressions.h"
+/**
+ * Checks if the provided value is a denorm and flushes it to zero.
+*/
+static nir_const_value
+constant_denorm_flush_to_zero(nir_const_value value, unsigned index, unsigned bit_size)
+{
+ switch(bit_size) {
+ case 64:
+ if (value.u64[index] < 0x0010000000000000)
+ value.u64[index] = 0;
+ if (value.u64[index] & 0x8000000000000000 &&
+ !(value.u64[index] & 0x7ff0000000000000))
+ value.u64[index] = 0x8000000000000000;
+ break;
+ case 32:
+ if (value.u32[index] < 0x00800000)
+ value.u32[index] = 0;
+ if (value.u32[index] & 0x80000000 &&
+ !(value.u32[index] & 0x7f800000))
+ value.u32[index] = 0x80000000;
+ break;
+ case 16:
+ if (value.u16[index] < 0x0400)
+ value.u16[index] = 0;
+ if (value.u16[index] & 0x8000 &&
+ !(value.u16[index] & 0x7c00))
+ value.u16[index] = 0x8000;
+ }
+ return value;
+}
+
/**
* Evaluate one component of packSnorm4x8.
*/
@@ -260,7 +291,7 @@ struct ${type}${width}_vec {
% endfor
% endfor
-<%def name="evaluate_op(op, bit_size)">
+<%def name="evaluate_op(op, bit_size, execution_mode)">
<%
output_type = type_add_size(op.output_type, bit_size)
input_types = [type_add_size(type_, bit_size) for type_ in op.input_types]
@@ -343,6 +374,18 @@ struct ${type}${width}_vec {
% else:
_dst_val.${get_const_field(output_type)}[_i] = dst;
% endif
+
+ % if op.name != "fquantize2f16" and type_base_type(output_type) == "float":
+ % if type_has_size(output_type):
+ if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP${type_size(output_type)}) {
+ _dst_val = constant_denorm_flush_to_zero(_dst_val, _i, ${type_size(output_type)});
+ }
+ % else:
+ if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) {
+ _dst_val = constant_denorm_flush_to_zero(_dst_val, _i, bit_size);
+ }
+ %endif
+ % endif
}
% else:
## In the non-per-component case, create a struct dst with
@@ -375,6 +418,18 @@ struct ${type}${width}_vec {
% else:
_dst_val.${get_const_field(output_type)}[${k}] = dst.${"xyzw"[k]};
% endif
+
+ % if op.name != "fquantize2f16" and type_base_type(output_type) == "float":
+ % if type_has_size(output_type):
+ if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP${type_size(output_type)}) {
+ _dst_val = constant_denorm_flush_to_zero(_dst_val, ${k}, ${type_size(output_type)});
+ }
+ % else:
+ if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) {
+ _dst_val = constant_denorm_flush_to_zero(_dst_val, ${k}, bit_size);
+ }
+ % endif
+ % endif
% endfor
% endif
</%def>
@@ -383,7 +438,8 @@ struct ${type}${width}_vec {
static nir_const_value
evaluate_${name}(MAYBE_UNUSED unsigned num_components,
${"UNUSED" if op_bit_sizes(op) is None else ""} unsigned bit_size,
- MAYBE_UNUSED nir_const_value *_src)
+ MAYBE_UNUSED nir_const_value *_src,
+ MAYBE_UNUSED unsigned execution_mode)
{
nir_const_value _dst_val = { {0, } };
@@ -391,7 +447,7 @@ evaluate_${name}(MAYBE_UNUSED unsigned num_components,
switch (bit_size) {
% for bit_size in op_bit_sizes(op):
case ${bit_size}: {
- ${evaluate_op(op, bit_size)}
+ ${evaluate_op(op, bit_size, execution_mode)}
break;
}
% endfor
@@ -400,7 +456,7 @@ evaluate_${name}(MAYBE_UNUSED unsigned num_components,
unreachable("unknown bit width");
}
% else:
- ${evaluate_op(op, 0)}
+ ${evaluate_op(op, 0, execution_mode)}
% endif
return _dst_val;
@@ -409,12 +465,13 @@ evaluate_${name}(MAYBE_UNUSED unsigned num_components,
nir_const_value
nir_eval_const_opcode(nir_op op, unsigned num_components,
- unsigned bit_width, nir_const_value *src)
+ unsigned bit_width, nir_const_value *src,
+ unsigned float_controls_execution_mode)
{
switch (op) {
% for name in sorted(opcodes.keys()):
case nir_op_${name}:
- return evaluate_${name}(num_components, bit_width, src);
+ return evaluate_${name}(num_components, bit_width, src, float_controls_execution_mode);
% endfor
default:
unreachable("shouldn't get here");
@@ -424,6 +481,8 @@ nir_eval_const_opcode(nir_op op, unsigned num_components,
from mako.template import Template
print(Template(template).render(opcodes=opcodes, type_sizes=type_sizes,
+ type_base_type=type_base_type,
+ type_size=type_size,
type_has_size=type_has_size,
type_add_size=type_add_size,
op_bit_sizes=op_bit_sizes,
diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c
index 6deb6cb9627..9026a4f406e 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -469,7 +469,8 @@ static bool
test_iterations(int32_t iter_int, nir_const_value *step,
nir_const_value *limit, nir_op cond_op, unsigned bit_size,
nir_alu_type induction_base_type,
- nir_const_value *initial, bool limit_rhs, bool invert_cond)
+ nir_const_value *initial, bool limit_rhs, bool invert_cond,
+ unsigned execution_mode)
{
assert(nir_op_infos[cond_op].num_inputs == 2);
@@ -497,19 +498,20 @@ test_iterations(int32_t iter_int, nir_const_value *step,
*/
nir_const_value mul_src[2] = { iter_src, *step };
nir_const_value mul_result =
- nir_eval_const_opcode(mul_op, 1, bit_size, mul_src);
+ nir_eval_const_opcode(mul_op, 1, bit_size, mul_src, execution_mode);
/* Add the initial value to the accumulated induction variable total */
nir_const_value add_src[2] = { mul_result, *initial };
nir_const_value add_result =
- nir_eval_const_opcode(add_op, 1, bit_size, add_src);
+ nir_eval_const_opcode(add_op, 1, bit_size, add_src, execution_mode);
nir_const_value src[2] = { { {0, } }, { {0, } } };
src[limit_rhs ? 0 : 1] = add_result;
src[limit_rhs ? 1 : 0] = *limit;
/* Evaluate the loop exit condition */
- nir_const_value result = nir_eval_const_opcode(cond_op, 1, bit_size, src);
+ nir_const_value result = nir_eval_const_opcode(cond_op, 1, bit_size, src,
+ execution_mode);
return invert_cond ? (result.u32[0] == 0) : (result.u32[0] != 0);
}
@@ -517,7 +519,8 @@ test_iterations(int32_t iter_int, nir_const_value *step,
static int
calculate_iterations(nir_const_value *initial, nir_const_value *step,
nir_const_value *limit, nir_loop_variable *alu_def,
- nir_alu_instr *cond_alu, bool limit_rhs, bool invert_cond)
+ nir_alu_instr *cond_alu, bool limit_rhs, bool invert_cond,
+ unsigned execution_mode)
{
assert(initial != NULL && step != NULL && limit != NULL);
@@ -584,7 +587,7 @@ calculate_iterations(nir_const_value *initial, nir_const_value *step,
if (test_iterations(iter_bias, step, limit, cond_alu->op, bit_size,
induction_base_type, initial,
- limit_rhs, invert_cond)) {
+ limit_rhs, invert_cond, execution_mode)) {
return iter_bias > 0 ? iter_bias - trip_offset : iter_bias;
}
}
@@ -599,7 +602,7 @@ calculate_iterations(nir_const_value *initial, nir_const_value *step,
* loop.
*/
static void
-find_trip_count(loop_info_state *state)
+find_trip_count(loop_info_state *state, unsigned execution_mode)
{
bool trip_count_known = true;
nir_loop_terminator *limiting_terminator = NULL;
@@ -670,7 +673,8 @@ find_trip_count(loop_info_state *state)
&limit_val,
basic_ind->ind->alu_def, alu,
limit_rhs,
- terminator->continue_from_then);
+ terminator->continue_from_then,
+ execution_mode);
/* Where we not able to calculate the iteration count */
if (iterations == -1) {
@@ -801,7 +805,7 @@ get_loop_info(loop_info_state *state, nir_function_impl *impl)
return;
/* Run through each of the terminators and try to compute a trip-count */
- find_trip_count(state);
+ find_trip_count(state, impl->function->shader->info.shader_float_controls_execution_mode);
nir_foreach_block_in_cf_node(block, &state->loop->cf_node) {
if (force_unroll_heuristics(state, block)) {
diff --git a/src/compiler/nir/nir_opt_constant_folding.c b/src/compiler/nir/nir_opt_constant_folding.c
index 83be0d78dbd..10bbf553d45 100644
--- a/src/compiler/nir/nir_opt_constant_folding.c
+++ b/src/compiler/nir/nir_opt_constant_folding.c
@@ -39,7 +39,7 @@ struct constant_fold_state {
};
static bool
-constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
+constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx, unsigned execution_mode)
{
nir_const_value src[NIR_MAX_VEC_COMPONENTS];
@@ -108,7 +108,7 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
nir_const_value dest =
nir_eval_const_opcode(instr->op, instr->dest.dest.ssa.num_components,
- bit_size, src);
+ bit_size, src, execution_mode);
nir_load_const_instr *new_instr =
nir_load_const_instr_create(mem_ctx,
@@ -161,14 +161,14 @@ constant_fold_intrinsic_instr(nir_intrinsic_instr *instr)
}
static bool
-constant_fold_block(nir_block *block, void *mem_ctx)
+constant_fold_block(nir_block *block, void *mem_ctx, unsigned execution_mode)
{
bool progress = false;
nir_foreach_instr_safe(instr, block) {
switch (instr->type) {
case nir_instr_type_alu:
- progress |= constant_fold_alu_instr(nir_instr_as_alu(instr), mem_ctx);
+ progress |= constant_fold_alu_instr(nir_instr_as_alu(instr), mem_ctx, execution_mode);
break;
case nir_instr_type_intrinsic:
progress |=
@@ -184,13 +184,13 @@ constant_fold_block(nir_block *block, void *mem_ctx)
}
static bool
-nir_opt_constant_folding_impl(nir_function_impl *impl)
+nir_opt_constant_folding_impl(nir_function_impl *impl, unsigned execution_mode)
{
void *mem_ctx = ralloc_parent(impl);
bool progress = false;
nir_foreach_block(block, impl) {
- progress |= constant_fold_block(block, mem_ctx);
+ progress |= constant_fold_block(block, mem_ctx, execution_mode);
}
if (progress) {
@@ -209,10 +209,11 @@ bool
nir_opt_constant_folding(nir_shader *shader)
{
bool progress = false;
+ unsigned execution_mode = shader->info.shader_float_controls_execution_mode;
nir_foreach_function(function, shader) {
if (function->impl)
- progress |= nir_opt_constant_folding_impl(function->impl);
+ progress |= nir_opt_constant_folding_impl(function->impl, execution_mode);
}
return progress;
diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c
index 3f23e799431..c1703d98bc1 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -1966,7 +1966,8 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
}
val->constant->values[0] =
- nir_eval_const_opcode(op, num_components, bit_size, src);
+ nir_eval_const_opcode(op, num_components, bit_size, src,
+ b->shader->info.shader_float_controls_execution_mode);
break;
} /* default */
}
--
2.19.1
More information about the mesa-dev
mailing list