<div dir="ltr"><div dir="ltr"><br></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Tue, Dec 18, 2018 at 11:35 AM Samuel Iglesias Gonsálvez <<a href="mailto:siglesias@igalia.com">siglesias@igalia.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">v2:<br>
- Refactor conditions and shared function (Connor)<br>
- Move code to nir_eval_const_opcode() (Connor)<br>
- Don't flush to zero on fquantize2f16<br>
From Vulkan spec, VK_KHR_shader_float_controls section:<br>
<br>
"3) Do denorm and rounding mode controls apply to OpSpecConstantOp?<br>
<br>
RESOLVED: Yes, except when the opcode is OpQuantizeToF16."<br>
<br>
Signed-off-by: Samuel Iglesias Gonsálvez <<a href="mailto:siglesias@igalia.com" target="_blank">siglesias@igalia.com</a>><br>
---<br>
src/compiler/nir/nir_constant_expressions.h | 3 +-<br>
src/compiler/nir/nir_constant_expressions.py | 65 ++++++++++++++++++--<br>
src/compiler/nir/nir_loop_analyze.c | 7 ++-<br>
src/compiler/nir/nir_opt_constant_folding.c | 15 ++---<br>
src/compiler/spirv/spirv_to_nir.c | 3 +-<br>
5 files changed, 75 insertions(+), 18 deletions(-)<br>
<br>
diff --git a/src/compiler/nir/nir_constant_expressions.h b/src/compiler/nir/nir_constant_expressions.h<br>
index 1d6bbbc25d3..a2d416abc45 100644<br>
--- a/src/compiler/nir/nir_constant_expressions.h<br>
+++ b/src/compiler/nir/nir_constant_expressions.h<br>
@@ -31,6 +31,7 @@<br>
#include "nir.h"<br>
<br>
nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components,<br>
- unsigned bit_size, nir_const_value *src);<br>
+ unsigned bit_size, nir_const_value *src,<br>
+ unsigned float_controls_execution_mode);<br>
<br>
#endif /* NIR_CONSTANT_EXPRESSIONS_H */<br>
diff --git a/src/compiler/nir/nir_constant_expressions.py b/src/compiler/nir/nir_constant_expressions.py<br>
index 505cdd8baae..dc2132df0d0 100644<br>
--- a/src/compiler/nir/nir_constant_expressions.py<br>
+++ b/src/compiler/nir/nir_constant_expressions.py<br>
@@ -66,6 +66,37 @@ template = """\<br>
#include "util/bigmath.h"<br>
#include "nir_constant_expressions.h"<br>
<br>
+/**<br>
+ * Checks if the provided value is a denorm and flushes it to zero.<br>
+*/<br>
+static nir_const_value<br>
+constant_denorm_flush_to_zero(nir_const_value value, unsigned index, unsigned bit_size)<br>
+{<br>
+ switch(bit_size) {<br>
+ case 64:<br>
+ if (value.u64[index] < 0x0010000000000000)<br>
+ value.u64[index] = 0;<br>
+ if (value.u64[index] & 0x8000000000000000 &&<br>
+ !(value.u64[index] & 0x7ff0000000000000))<br>
+ value.u64[index] = 0x8000000000000000;<br>
+ break;<br>
+ case 32:<br>
+ if (value.u32[index] < 0x00800000)<br>
+ value.u32[index] = 0;<br>
+ if (value.u32[index] & 0x80000000 &&<br>
+ !(value.u32[index] & 0x7f800000))<br>
+ value.u32[index] = 0x80000000;<br>
+ break;<br>
+ case 16:<br>
+ if (value.u16[index] < 0x0400)<br>
+ value.u16[index] = 0;<br>
+ if (value.u16[index] & 0x8000 &&<br>
+ !(value.u16[index] & 0x7c00))<br>
+ value.u16[index] = 0x8000;<br>
+ }<br>
+ return value;<br>
+}<br>
+<br>
/**<br>
* Evaluate one component of packSnorm4x8.<br>
*/<br>
@@ -260,7 +291,7 @@ struct ${type}${width}_vec {<br>
% endfor<br>
% endfor<br>
<br>
-<%def name="evaluate_op(op, bit_size)"><br>
+<%def name="evaluate_op(op, bit_size, execution_mode)"><br>
<%<br>
output_type = type_add_size(op.output_type, bit_size)<br>
input_types = [type_add_size(type_, bit_size) for type_ in op.input_types]<br>
@@ -277,6 +308,14 @@ struct ${type}${width}_vec {<br>
<% continue %><br>
%endif<br>
<br>
+ % for k in range(op.input_sizes[j]):<br>
+ % if <a href="http://op.name" rel="noreferrer" target="_blank">op.name</a> != "fquantize2f16" and bit_size > 8 and op.input_types[j] == "float":<br></blockquote><div><br></div>This condition doesn't seem right. It may happen to work, but it isn't following NIR principles on ALU instructions. Each NIR opcode has an output type (given by op.output_type) and input types for each source (given by op.input_types). Each type may be sized, like "float32", or unsized like "float". All unsized inputs/outputs must have the same bit size at runtime. The bit_size here is the common bitsize for inputs/outputs with unsized types like "float" in the opcode definition. Even though in general it's only known at runtime, we're switching over all bitsizes in the generated code, so here we do know what it is at compile time. In order to handle sized types, we have to drop all references to bit_size and stop comparing op.input_types[j] directly with "float" since it may be a sized type. Also, if this source has a float type, we already know that its bit size is at least 16, so the second check should be useless. I think what you actually want to do is extract the base type, i.e. something like: <a href="http://op.name">op.name</a> != "fquantize16" and type_base_name(input_types[j]) == "float"<br><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) {<br></blockquote><div> </div><div>bit_size here isn't the right bit size for sized sources. You want ${type_size(input_types[i])} which is the actual size of the source at runtime. This will be bit_size if the op.input_types[j] is unsized or the input size otherwise.<br></div><div><br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ _src[${j}] = constant_denorm_flush_to_zero(_src[${j}], ${k}, bit_size);<br></blockquote><div><br></div><div>same as above with bit_size.</div><div><br></div><div>Finally, I think you missed the equivalent hunk for non-per-component sources.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ }<br>
+ % endif<br>
+ % endfor<br>
+<br>
const struct ${input_types[j]}_vec src${j} = {<br>
% for k in range(op.input_sizes[j]):<br>
% if input_types[j] == "int1":<br>
@@ -343,6 +382,12 @@ struct ${type}${width}_vec {<br>
% else:<br>
_dst_val.${get_const_field(output_type)}[_i] = dst;<br>
% endif<br>
+<br>
+ % if <a href="http://op.name" rel="noreferrer" target="_blank">op.name</a> != "fquantize2f16" and bit_size > 8 and op.output_type == "float":<br>
+ if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) {<br>
+ _dst_val = constant_denorm_flush_to_zero(_dst_val, _i, bit_size);<br>
+ }<br>
+ % endif<br></blockquote><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
}<br>
% else:<br>
## In the non-per-component case, create a struct dst with<br>
@@ -375,6 +420,12 @@ struct ${type}${width}_vec {<br>
% else:<br>
_dst_val.${get_const_field(output_type)}[${k}] = dst.${"xyzw"[k]};<br>
% endif<br>
+<br>
+ % if <a href="http://op.name" rel="noreferrer" target="_blank">op.name</a> != "fquantize2f16" and bit_size > 8 and op.output_type == "float":<br>
+ if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) {<br>
+ _dst_val = constant_denorm_flush_to_zero(_dst_val, ${k}, bit_size);<br>
+ }<br>
+ % endif<br>
% endfor<br>
% endif<br>
</%def><br>
@@ -383,7 +434,8 @@ struct ${type}${width}_vec {<br>
static nir_const_value<br>
evaluate_${name}(MAYBE_UNUSED unsigned num_components,<br>
${"UNUSED" if op_bit_sizes(op) is None else ""} unsigned bit_size,<br>
- MAYBE_UNUSED nir_const_value *_src)<br>
+ MAYBE_UNUSED nir_const_value *_src,<br>
+ MAYBE_UNUSED unsigned execution_mode)<br>
{<br>
nir_const_value _dst_val = { {0, } };<br>
<br>
@@ -391,7 +443,7 @@ evaluate_${name}(MAYBE_UNUSED unsigned num_components,<br>
switch (bit_size) {<br>
% for bit_size in op_bit_sizes(op):<br>
case ${bit_size}: {<br>
- ${evaluate_op(op, bit_size)}<br>
+ ${evaluate_op(op, bit_size, execution_mode)}<br>
break;<br>
}<br>
% endfor<br>
@@ -400,7 +452,7 @@ evaluate_${name}(MAYBE_UNUSED unsigned num_components,<br>
unreachable("unknown bit width");<br>
}<br>
% else:<br>
- ${evaluate_op(op, 0)}<br>
+ ${evaluate_op(op, 0, execution_mode)}<br>
% endif<br>
<br>
return _dst_val;<br>
@@ -409,12 +461,13 @@ evaluate_${name}(MAYBE_UNUSED unsigned num_components,<br>
<br>
nir_const_value<br>
nir_eval_const_opcode(nir_op op, unsigned num_components,<br>
- unsigned bit_width, nir_const_value *src)<br>
+ unsigned bit_width, nir_const_value *src,<br>
+ unsigned float_controls_execution_mode)<br>
{<br>
switch (op) {<br>
% for name in sorted(opcodes.keys()):<br>
case nir_op_${name}:<br>
- return evaluate_${name}(num_components, bit_width, src);<br>
+ return evaluate_${name}(num_components, bit_width, src, float_controls_execution_mode);<br>
% endfor<br>
default:<br>
unreachable("shouldn't get here");<br>
diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c<br>
index 259f02a854e..c9fba8649db 100644<br>
--- a/src/compiler/nir/nir_loop_analyze.c<br>
+++ b/src/compiler/nir/nir_loop_analyze.c<br>
@@ -497,19 +497,20 @@ test_iterations(int32_t iter_int, nir_const_value *step,<br>
*/<br>
nir_const_value mul_src[2] = { iter_src, *step };<br>
nir_const_value mul_result =<br>
- nir_eval_const_opcode(mul_op, 1, bit_size, mul_src);<br>
+ nir_eval_const_opcode(mul_op, 1, bit_size, mul_src, SHADER_DEFAULT_FLOAT_CONTROL_MODE);<br></blockquote><div><br></div><div>Shouldn't we be using the actual float control mode for the shader? I don't know much about this code, but presumably it's trying to simulate something calculated by the shader.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
<br>
/* Add the initial value to the accumulated induction variable total */<br>
nir_const_value add_src[2] = { mul_result, *initial };<br>
nir_const_value add_result =<br>
- nir_eval_const_opcode(add_op, 1, bit_size, add_src);<br>
+ nir_eval_const_opcode(add_op, 1, bit_size, add_src, SHADER_DEFAULT_FLOAT_CONTROL_MODE);<br>
<br>
nir_const_value src[2] = { { {0, } }, { {0, } } };<br>
src[limit_rhs ? 0 : 1] = add_result;<br>
src[limit_rhs ? 1 : 0] = *limit;<br>
<br>
/* Evaluate the loop exit condition */<br>
- nir_const_value result = nir_eval_const_opcode(cond_op, 1, bit_size, src);<br>
+ nir_const_value result = nir_eval_const_opcode(cond_op, 1, bit_size, src,<br>
+ SHADER_DEFAULT_FLOAT_CONTROL_MODE);<br>
<br>
return invert_cond ? (result.u32[0] == 0) : (result.u32[0] != 0);<br>
}<br>
diff --git a/src/compiler/nir/nir_opt_constant_folding.c b/src/compiler/nir/nir_opt_constant_folding.c<br>
index 5097a3bcc36..bd6130d5b33 100644<br>
--- a/src/compiler/nir/nir_opt_constant_folding.c<br>
+++ b/src/compiler/nir/nir_opt_constant_folding.c<br>
@@ -39,7 +39,7 @@ struct constant_fold_state {<br>
};<br>
<br>
static bool<br>
-constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)<br>
+constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx, unsigned execution_mode)<br>
{<br>
nir_const_value src[NIR_MAX_VEC_COMPONENTS];<br>
<br>
@@ -108,7 +108,7 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)<br>
<br>
nir_const_value dest =<br>
nir_eval_const_opcode(instr->op, instr->dest.dest.ssa.num_components,<br>
- bit_size, src);<br>
+ bit_size, src, execution_mode);<br>
<br>
nir_load_const_instr *new_instr =<br>
nir_load_const_instr_create(mem_ctx,<br>
@@ -161,14 +161,14 @@ constant_fold_intrinsic_instr(nir_intrinsic_instr *instr)<br>
}<br>
<br>
static bool<br>
-constant_fold_block(nir_block *block, void *mem_ctx)<br>
+constant_fold_block(nir_block *block, void *mem_ctx, unsigned execution_mode)<br>
{<br>
bool progress = false;<br>
<br>
nir_foreach_instr_safe(instr, block) {<br>
switch (instr->type) {<br>
case nir_instr_type_alu:<br>
- progress |= constant_fold_alu_instr(nir_instr_as_alu(instr), mem_ctx);<br>
+ progress |= constant_fold_alu_instr(nir_instr_as_alu(instr), mem_ctx, execution_mode);<br>
break;<br>
case nir_instr_type_intrinsic:<br>
progress |=<br>
@@ -184,13 +184,13 @@ constant_fold_block(nir_block *block, void *mem_ctx)<br>
}<br>
<br>
static bool<br>
-nir_opt_constant_folding_impl(nir_function_impl *impl)<br>
+nir_opt_constant_folding_impl(nir_function_impl *impl, unsigned execution_mode)<br>
{<br>
void *mem_ctx = ralloc_parent(impl);<br>
bool progress = false;<br>
<br>
nir_foreach_block(block, impl) {<br>
- progress |= constant_fold_block(block, mem_ctx);<br>
+ progress |= constant_fold_block(block, mem_ctx, execution_mode);<br>
}<br>
<br>
if (progress)<br>
@@ -204,10 +204,11 @@ bool<br>
nir_opt_constant_folding(nir_shader *shader)<br>
{<br>
bool progress = false;<br>
+ unsigned execution_mode = shader->info.shader_float_controls_execution_mode;<br>
<br>
nir_foreach_function(function, shader) {<br>
if (function->impl)<br>
- progress |= nir_opt_constant_folding_impl(function->impl);<br>
+ progress |= nir_opt_constant_folding_impl(function->impl, execution_mode);<br>
}<br>
<br>
return progress;<br>
diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c<br>
index 96d4d80970f..7578a83e2bb 100644<br>
--- a/src/compiler/spirv/spirv_to_nir.c<br>
+++ b/src/compiler/spirv/spirv_to_nir.c<br>
@@ -1842,7 +1842,8 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,<br>
}<br>
<br>
val->constant->values[0] =<br>
- nir_eval_const_opcode(op, num_components, bit_size, src);<br>
+ nir_eval_const_opcode(op, num_components, bit_size, src,<br>
+ b->shader->info.shader_float_controls_execution_mode);<br>
break;<br>
} /* default */<br>
}<br>
-- <br>
2.19.1<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</blockquote></div></div>