<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Mon, Mar 7, 2016 at 12:46 AM, Samuel Iglesias Gonsálvez <span dir="ltr"><<a href="mailto:siglesias@igalia.com" target="_blank">siglesias@igalia.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">From: Connor Abbott <<a href="mailto:connor.w.abbott@intel.com" target="_blank">connor.w.abbott@intel.com</a>><br>
<br>
When we replace an expresion we have to compute bitsize information for the<br>
replacement. We do this in two passes to validate that bitsize information<br>
is consistent and correct: first we propagate bitsize from child nodes to<br>
parent, then we do it the other way around, starting from the original's<br>
instruction destination bitsize.<br>
<br>
v2 (Iago):<br>
- Always use nir_type_bool32 instead of nir_type_bool when generating<br>
algebraic optimizations. Before we used nir_type_bool32 with constants<br>
and nir_type_bool with variables.<br>
- Fix bool comparisons in nir_search.c to account for bitsized types.<br>
<br>
v3 (Sam):<br>
- Unpack the double constant value as unsigned long long (8 bytes) in<br>
nir_algrebraic.py.<br>
<br>
Signed-off-by: Iago Toral Quiroga <<a href="mailto:itoral@igalia.com" target="_blank">itoral@igalia.com</a>><br>
Signed-off-by: Samuel Iglesias Gonsálvez <<a href="mailto:siglesias@igalia.com" target="_blank">siglesias@igalia.com</a>><br>
---<br>
src/compiler/nir/nir_algebraic.py | 22 +++-<br>
src/compiler/nir/nir_search.c | 244 ++++++++++++++++++++++++++++++++++----<br>
src/compiler/nir/nir_search.h | 8 +-<br>
3 files changed, 247 insertions(+), 27 deletions(-)<br>
<br>
diff --git a/src/compiler/nir/nir_algebraic.py b/src/compiler/nir/nir_algebraic.py<br>
index 2357b57..1818877 100644<br>
--- a/src/compiler/nir/nir_algebraic.py<br>
+++ b/src/compiler/nir/nir_algebraic.py<br>
@@ -63,11 +63,11 @@ class Value(object):<br>
static const ${val.c_type} ${<a href="http://val.name" rel="noreferrer" target="_blank">val.name</a>} = {<br>
{ ${val.type_enum} },<br>
% if isinstance(val, Constant):<br>
- { ${hex(val)} /* ${val.value} */ },<br>
+ ${val.type()}, { ${hex(val)} /* ${val.value} */ },<br>
% elif isinstance(val, Variable):<br>
${val.index}, /* ${val.var_name} */<br>
${'true' if val.is_constant else 'false'},<br>
- nir_type_${ val.required_type or 'invalid' },<br>
+ ${val.type() or 'nir_type_invalid' },<br>
% elif isinstance(val, Expression):<br>
nir_op_${val.opcode},<br>
{ ${', '.join(src.c_ptr for src in val.sources)} },<br>
@@ -107,10 +107,18 @@ class Constant(Value):<br>
if isinstance(self.value, (int, long)):<br>
return hex(self.value)<br>
elif isinstance(self.value, float):<br>
- return hex(struct.unpack('I', struct.pack('f', self.value))[0])<br>
+ return hex(struct.unpack('Q', struct.pack('d', self.value))[0])<br>
else:<br>
assert False<br>
<br>
+ def type(self):<br>
+ if isinstance(self.value, (bool)):<br>
+ return "nir_type_bool32"<br>
+ elif isinstance(self.value, (int, long)):<br>
+ return "nir_type_int"<br>
+ elif isinstance(self.value, float):<br>
+ return "nir_type_float"<br>
+<br>
_var_name_re = re.compile(r"(?P<const>#)?(?P<name>\w+)(?:@(?P<type>\w+))?")<br>
<br>
class Variable(Value):<br>
@@ -129,6 +137,14 @@ class Variable(Value):<br>
<br>
self.index = varset[self.var_name]<br>
<br>
+ def type(self):<br>
+ if self.required_type == 'bool':<br>
+ return "nir_type_bool32"<br>
+ elif self.required_type in ('int', 'unsigned'):<br>
+ return "nir_type_int"<br>
+ elif self.required_type == 'float':<br>
+ return "nir_type_float"<br>
+<br>
class Expression(Value):<br>
def __init__(self, expr, name_base, varset):<br>
Value.__init__(self, name_base, "expression")<br>
diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c<br>
index f509ce6..e874c79 100644<br>
--- a/src/compiler/nir/nir_search.c<br>
+++ b/src/compiler/nir/nir_search.c<br>
@@ -62,7 +62,8 @@ alu_instr_is_bool(nir_alu_instr *instr)<br>
case nir_op_inot:<br>
return src_is_bool(instr->src[0].src);<br>
default:<br>
- return nir_op_infos[instr->op].output_type == nir_type_bool;<br>
+ return (nir_op_infos[instr->op].output_type & NIR_ALU_TYPE_BASE_TYPE_MASK)<br>
+ == nir_type_bool;<br>
}<br>
}<br>
<br>
@@ -125,8 +126,10 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,<br>
nir_alu_instr *src_alu =<br>
nir_instr_as_alu(instr->src[src].src.ssa->parent_instr);<br>
<br>
- if (nir_op_infos[src_alu->op].output_type != var->type &&<br>
- !(var->type == nir_type_bool && alu_instr_is_bool(src_alu)))<br>
+ if ((nir_op_infos[src_alu->op].output_type &<br>
+ NIR_ALU_TYPE_BASE_TYPE_MASK) != var->type &&<br>
+ !((var->type & NIR_ALU_TYPE_BASE_TYPE_MASK) == nir_type_bool &&<br>
+ alu_instr_is_bool(src_alu)))<br>
return false;<br>
}<br>
<br>
@@ -158,21 +161,65 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,<br>
nir_load_const_instr *load =<br>
nir_instr_as_load_const(instr->src[src].src.ssa->parent_instr);<br>
<br>
- switch (nir_op_infos[instr->op].input_types[src]) {<br>
+ switch (const_val->type) {<br>
case nir_type_float:<br>
for (unsigned i = 0; i < num_components; ++i) {<br>
- if (load->value.f[new_swizzle[i]] != const_val->data.f)<br>
+ double val;<br>
+ switch (load->def.bit_size) {<br>
+ case 32:<br>
+ val = load->value.f[new_swizzle[i]];<br>
+ break;<br>
+ case 64:<br>
+ val = load->value.d[new_swizzle[i]];<br>
+ break;<br>
+ default:<br>
+ unreachable("unknown bit size");<br>
+ }<br>
+<br>
+ if (val != const_val->data.d)<br>
return false;<br>
}<br>
return true;<br>
+<br>
case nir_type_int:<br>
+ for (unsigned i = 0; i < num_components; ++i) {<br>
+ int64_t val;<br>
+ switch (load->def.bit_size) {<br>
+ case 32:<br>
+ val = load->value.i[new_swizzle[i]];<br>
+ break;<br>
+ case 64:<br>
+ val = load->value.l[new_swizzle[i]];<br>
+ break;<br>
+ default:<br>
+ unreachable("unknown bit size");<br>
+ }<br>
+<br>
+ if (val != const_val->data.i)<br>
+ return false; <br></blockquote><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+ }<br>
+ return true;<br>
+<br>
case nir_type_uint:<br>
- case nir_type_bool:<br>
+ case nir_type_bool32:<br>
for (unsigned i = 0; i < num_components; ++i) {<br>
- if (load->value.i[new_swizzle[i]] != const_val->data.i)<br>
+ uint64_t val;<br>
+ switch (load->def.bit_size) {<br>
+ case 32:<br>
+ val = load->value.u[new_swizzle[i]];<br>
+ break;<br>
+ case 64:<br>
+ val = load->value.ul[new_swizzle[i]];<br>
+ break;<br>
+ default:<br>
+ unreachable("unknown bit size");<br>
+ }<br>
+<br>
+ if (val != const_val->data.u)<br>
return false;<br></blockquote><div><br></div><div>64-bit. Also, we may want bool to be its own case. I think keeping it with uint is probably fine.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
}<br>
return true;<br>
+<br>
default:<br>
unreachable("Invalid alu source type");<br>
}<br>
@@ -244,9 +291,123 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr,<br>
}<br>
}<br>
<br>
+typedef struct bitsize_tree {<br>
+ unsigned num_srcs;<br>
+ struct bitsize_tree *srcs[4];<br>
+<br>
+ unsigned common_size;<br>
+ bool is_src_sized[4];<br>
+ bool is_dest_sized;<br>
+<br>
+ unsigned dest_size;<br>
+ unsigned src_size[4];<br>
+} bitsize_tree;<br>
+<br>
+static bitsize_tree *<br>
+build_bitsize_tree(void *mem_ctx, struct match_state *state,<br>
+ const nir_search_value *value)<br>
+{<br>
+ bitsize_tree *tree = ralloc(mem_ctx, bitsize_tree);<br>
+<br>
+ switch (value->type) {<br>
+ case nir_search_value_expression: {<br>
+ nir_search_expression *expr = nir_search_value_as_expression(value);<br>
+ nir_op_info info = nir_op_infos[expr->opcode];<br>
+ tree->num_srcs = info.num_inputs;<br>
+ tree->common_size = 0;<br>
+ for (unsigned i = 0; i < info.num_inputs; i++) {<br>
+ tree->is_src_sized[i] = !!(info.input_types[i] & NIR_ALU_TYPE_SIZE_MASK);<br>
+ if (tree->is_src_sized[i])<br>
+ tree->src_size[i] = info.input_types[i] & NIR_ALU_TYPE_SIZE_MASK;<br>
+ tree->srcs[i] = build_bitsize_tree(mem_ctx, state, expr->srcs[i]);<br>
+ }<br>
+ tree->is_dest_sized = !!(info.output_type & NIR_ALU_TYPE_SIZE_MASK);<br>
+ if (tree->is_dest_sized)<br>
+ tree->dest_size = info.output_type & NIR_ALU_TYPE_SIZE_MASK;<br>
+ break;<br>
+ }<br>
+<br>
+ case nir_search_value_variable: {<br>
+ nir_search_variable *var = nir_search_value_as_variable(value);<br>
+ tree->num_srcs = 0;<br>
+ tree->is_dest_sized = true;<br>
+ tree->dest_size = nir_src_bit_size(state->variables[var->variable].src);<br>
+ break;<br>
+ }<br>
+<br>
+ case nir_search_value_constant: {<br>
+ tree->num_srcs = 0;<br>
+ tree->is_dest_sized = false;<br>
+ tree->common_size = 0;<br>
+ break;<br>
+ }<br>
+ }<br>
+<br>
+ return tree;<br>
+}<br>
+<br>
+static unsigned<br>
+bitsize_tree_filter_up(bitsize_tree *tree)<br>
+{<br>
+ for (unsigned i = 0; i < tree->num_srcs; i++) {<br>
+ unsigned src_size = bitsize_tree_filter_up(tree->srcs[i]);<br>
+ if (src_size == 0)<br>
+ continue;<br>
+<br>
+ if (tree->is_src_sized[i]) {<br>
+ assert(src_size == tree->src_size[i]);<br>
+ } else if (tree->common_size != 0) {<br>
+ assert(src_size == tree->common_size);<br>
+ tree->src_size[i] = src_size;<br>
+ } else {<br>
+ tree->common_size = src_size;<br>
+ tree->src_size[i] = src_size;<br>
+ }<br>
+ }<br>
+<br>
+ if (tree->num_srcs && tree->common_size) {<br>
+ if (tree->dest_size == 0)<br>
+ tree->dest_size = tree->common_size;<br>
+ else if (!tree->is_dest_sized)<br>
+ assert(tree->dest_size == tree->common_size);<br>
+<br>
+ for (unsigned i = 0; i < tree->num_srcs; i++) {<br>
+ if (!tree->src_size[i])<br>
+ tree->src_size[i] = tree->common_size;<br>
+ }<br>
+ }<br>
+<br>
+ return tree->dest_size;<br>
+}<br>
+<br>
+static void<br>
+bitsize_tree_filter_down(bitsize_tree *tree, unsigned size)<br>
+{<br>
+ if (tree->dest_size)<br>
+ assert(tree->dest_size == size);<br>
+ else<br>
+ tree->dest_size = size;<br>
+<br>
+ if (!tree->is_dest_sized) {<br>
+ if (tree->common_size)<br>
+ assert(tree->common_size == size);<br>
+ else<br>
+ tree->common_size = size;<br>
+ }<br>
+<br>
+ for (unsigned i = 0; i < tree->num_srcs; i++) {<br>
+ if (!tree->src_size[i]) {<br>
+ assert(tree->common_size);<br>
+ tree->src_size[i] = tree->common_size;<br>
+ }<br>
+ bitsize_tree_filter_down(tree->srcs[i], tree->src_size[i]);<br>
+ }<br>
+}<br></blockquote><div><br></div><div>Oh, man... This is so ugly... Unfortunately, I know of no better way to do it so we'll go with it.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
static nir_alu_src<br>
-construct_value(const nir_search_value *value, nir_alu_type type,<br>
- unsigned num_components, struct match_state *state,<br>
+construct_value(const nir_search_value *value,<br>
+ unsigned num_components, bitsize_tree *bitsize,<br>
+ struct match_state *state,<br>
nir_instr *instr, void *mem_ctx)<br>
{<br>
switch (value->type) {<br>
@@ -257,7 +418,8 @@ construct_value(const nir_search_value *value, nir_alu_type type,<br>
num_components = nir_op_infos[expr->opcode].output_size;<br>
<br>
nir_alu_instr *alu = nir_alu_instr_create(mem_ctx, expr->opcode);<br>
- nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components, 32, NULL);<br>
+ nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components,<br>
+ bitsize->dest_size, NULL);<br>
alu->dest.write_mask = (1 << num_components) - 1;<br>
alu->dest.saturate = false;<br>
<br>
@@ -269,8 +431,7 @@ construct_value(const nir_search_value *value, nir_alu_type type,<br>
num_components = nir_op_infos[alu->op].input_sizes[i];<br>
<br>
alu->src[i] = construct_value(expr->srcs[i],<br>
- nir_op_infos[alu->op].input_types[i],<br>
- num_components,<br>
+ num_components, bitsize->srcs[i],<br>
state, instr, mem_ctx);<br>
}<br>
<br>
@@ -301,23 +462,57 @@ construct_value(const nir_search_value *value, nir_alu_type type,<br>
const nir_search_constant *c = nir_search_value_as_constant(value);<br>
nir_load_const_instr *load = nir_load_const_instr_create(mem_ctx, 1);<br>
<br>
- switch (type) {<br>
+ switch (c->type) { <br></blockquote><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
case nir_type_float:<br>
- load-><a href="http://def.name" rel="noreferrer" target="_blank">def.name</a> = ralloc_asprintf(mem_ctx, "%f", c->data.f);<br>
- load->value.f[0] = c->data.f;<br>
+ load-><a href="http://def.name" rel="noreferrer" target="_blank">def.name</a> = ralloc_asprintf(mem_ctx, "%f", c->data.d);<br>
+ switch (bitsize->dest_size) {<br>
+ case 32:<br>
+ load->value.f[0] = c->data.d;<br>
+ break;<br>
+ case 64:<br>
+ load->value.d[0] = c->data.d;<br>
+ break;<br>
+ default:<br>
+ unreachable("unknown bit size");<br>
+ }<br>
break;<br>
+<br>
case nir_type_int:<br>
- load-><a href="http://def.name" rel="noreferrer" target="_blank">def.name</a> = ralloc_asprintf(mem_ctx, "%d", c->data.i);<br>
- load->value.i[0] = c->data.i;<br>
+ load-><a href="http://def.name" rel="noreferrer" target="_blank">def.name</a> = ralloc_asprintf(mem_ctx, "%ld", c->data.i);<br>
+ switch (bitsize->dest_size) {<br>
+ case 32:<br>
+ load->value.i[0] = c->data.i;<br>
+ break;<br>
+ case 64:<br>
+ load->value.l[0] = c->data.i;<br>
+ break;<br>
+ default:<br>
+ unreachable("unknown bit size");<br>
+ }<br>
break;<br>
+<br>
case nir_type_uint:<br>
- case nir_type_bool:<br>
+ load-><a href="http://def.name" rel="noreferrer" target="_blank">def.name</a> = ralloc_asprintf(mem_ctx, "%lu", c->data.u);<br>
+ switch (bitsize->dest_size) {<br>
+ case 32:<br>
+ load->value.u[0] = c->data.u;<br>
+ break;<br>
+ case 64:<br>
+ load->value.ul[0] = c->data.u;<br>
+ break;<br>
+ default:<br>
+ unreachable("unknown bit size");<br>
+ }<br>
+<br>
+ case nir_type_bool32:<br>
load->value.u[0] = c->data.u;<br>
break;<br>
default:<br>
unreachable("Invalid alu source type");<br>
}<br>
<br>
+ load->def.bit_size = bitsize->dest_size;<br>
+<br>
nir_instr_insert_before(instr, &load->instr);<br>
<br>
nir_alu_src val;<br>
@@ -352,6 +547,11 @@ nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search,<br>
swizzle, &state))<br>
return NULL;<br>
<br>
+ void *bitsize_ctx = ralloc_context(NULL);<br>
+ bitsize_tree *tree = build_bitsize_tree(bitsize_ctx, &state, replace);<br>
+ bitsize_tree_filter_up(tree);<br>
+ bitsize_tree_filter_down(tree, instr->dest.dest.ssa.bit_size);<br>
+<br>
/* Inserting a mov may be unnecessary. However, it's much easier to<br>
* simply let copy propagation clean this up than to try to go through<br>
* and rewrite swizzles ourselves.<br>
@@ -362,9 +562,9 @@ nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search,<br>
instr->dest.dest.ssa.num_components,<br>
instr->dest.dest.ssa.bit_size, NULL);<br>
<br>
- mov->src[0] = construct_value(replace, nir_op_infos[instr->op].output_type,<br>
- instr->dest.dest.ssa.num_components, &state,<br>
- &instr->instr, mem_ctx);<br>
+ mov->src[0] = construct_value(replace,<br>
+ instr->dest.dest.ssa.num_components,<br>
+ tree, &state, &instr->instr, mem_ctx);<br>
nir_instr_insert_before(&instr->instr, &mov->instr);<br>
<br>
nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa,<br>
@@ -376,5 +576,7 @@ nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search,<br>
*/<br>
nir_instr_remove(&instr->instr);<br>
<br>
+ ralloc_free(bitsize_ctx);<br>
+<br>
return mov;<br>
}<br>
diff --git a/src/compiler/nir/nir_search.h b/src/compiler/nir/nir_search.h<br>
index 7d47792..321d6d0 100644<br>
--- a/src/compiler/nir/nir_search.h<br>
+++ b/src/compiler/nir/nir_search.h<br>
@@ -71,10 +71,12 @@ typedef struct {<br>
typedef struct {<br>
nir_search_value value;<br>
<br>
+ nir_alu_type type;<br>
+<br>
union {<br>
- uint32_t u;<br>
- int32_t i;<br>
- float f;<br>
+ uint64_t u;<br>
+ int64_t i;<br>
+ double d; <br></blockquote><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
} data;<br>
} nir_search_constant;<br>
<span><font color="#888888"><br>
--<br>
2.7.0<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div>