<div dir="ltr">This patch appears to cause regressions on Haswell. I'm not sure why. The fact that it's only haswell strikes me as a bit odd so it's possible it's not your patch's fault. I'm looking into it.<br></div><div class="gmail_extra"><br><div class="gmail_quote">On Tue, Jan 10, 2017 at 1:41 AM, Timothy Arceri <span dir="ltr"><<a href="mailto:timothy.arceri@collabora.com" target="_blank">timothy.arceri@collabora.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">If we just check that we are not dealing with an identity swizzle<br>
in match_value() before calling match_expression() we can avoid<br>
a bunch of temp swizzle arrays and the passing it around and<br>
resetting craziness.<br>
---<br>
src/compiler/nir/nir_search.c | 89 ++++++++++++++++++------------<wbr>-------------<br>
1 file changed, 38 insertions(+), 51 deletions(-)<br>
<br>
diff --git a/src/compiler/nir/nir_search.<wbr>c b/src/compiler/nir/nir_search.<wbr>c<br>
index b34b13f..7a84b18 100644<br>
--- a/src/compiler/nir/nir_search.<wbr>c<br>
+++ b/src/compiler/nir/nir_search.<wbr>c<br>
@@ -37,8 +37,7 @@ struct match_state {<br>
<br>
static bool<br>
match_expression(const nir_search_expression *expr, nir_alu_instr *instr,<br>
- unsigned num_components, const uint8_t *swizzle,<br>
- struct match_state *state);<br>
+ unsigned num_components, struct match_state *state);<br>
<br>
static const uint8_t identity_swizzle[] = { 0, 1, 2, 3 };<br>
<br>
@@ -93,22 +92,15 @@ src_is_type(nir_src src, nir_alu_type type)<br>
<br>
static bool<br>
match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,<br>
- unsigned num_components, const uint8_t *swizzle,<br>
- struct match_state *state)<br>
+ unsigned num_components, struct match_state *state)<br>
{<br>
- uint8_t new_swizzle[4];<br>
-<br>
/* If the source is an explicitly sized source, then we need to reset<br>
- * both the number of components and the swizzle.<br>
+ * the number of components.<br>
*/<br>
if (nir_op_infos[instr->op].<wbr>input_sizes[src] != 0) {<br>
num_components = nir_op_infos[instr->op].input_<wbr>sizes[src];<br>
- swizzle = identity_swizzle;<br>
}<br>
<br>
- for (unsigned i = 0; i < num_components; ++i)<br>
- new_swizzle[i] = instr->src[src].swizzle[<wbr>swizzle[i]];<br>
-<br>
/* If the value has a specific bit size and it doesn't match, bail */<br>
if (value->bit_size &&<br>
nir_src_bit_size(instr->src[<wbr>src].src) != value->bit_size)<br>
@@ -122,9 +114,23 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,<br>
if (instr->src[src].src.ssa-><wbr>parent_instr->type != nir_instr_type_alu)<br>
return false;<br>
<br>
+ /* If we have an explicitly sized destination, we can only handle the<br>
+ * identity swizzle. While dot(vec3(a, b, c).zxy) is a valid<br>
+ * expression, we don't have the information right now to propagate that<br>
+ * swizzle through. We can only properly propagate swizzles if the<br>
+ * instruction is vectorized.<br>
+ */<br>
+ nir_alu_instr *alu_instr =<br>
+ nir_instr_as_alu(instr->src[<wbr>src].src.ssa->parent_instr);<br>
+ if (nir_op_infos[alu_instr->op].<wbr>output_size != 0) {<br>
+ for (unsigned i = 0; i < num_components; i++) {<br>
+ if (instr->src[src].swizzle[i] != i)<br>
+ return false;<br>
+ }<br>
+ }<br>
+<br>
return match_expression(nir_search_<wbr>value_as_expression(value),<br>
- nir_instr_as_alu(instr->src[<wbr>src].src.ssa->parent_instr),<br>
- num_components, new_swizzle, state);<br>
+ alu_instr, num_components, state);<br>
<br>
case nir_search_value_variable: {<br>
nir_search_variable *var = nir_search_value_as_variable(<wbr>value);<br>
@@ -138,7 +144,8 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,<br>
assert(!instr->src[src].abs && !instr->src[src].negate);<br>
<br>
for (unsigned i = 0; i < num_components; ++i) {<br>
- if (state->variables[var-><wbr>variable].swizzle[i] != new_swizzle[i])<br>
+ if (state->variables[var-><wbr>variable].swizzle[i] !=<br>
+ instr->src[src].swizzle[i])<br>
return false;<br>
}<br>
<br>
@@ -148,7 +155,8 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,<br>
instr->src[src].src.ssa-><wbr>parent_instr->type != nir_instr_type_load_const)<br>
return false;<br>
<br>
- if (var->cond && !var->cond(instr, src, num_components, new_swizzle))<br>
+ if (var->cond && !var->cond(instr, src, num_components,<br>
+ instr->src[src].swizzle))<br>
return false;<br>
<br>
if (var->type != nir_type_invalid &&<br>
@@ -161,9 +169,10 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,<br>
state->variables[var-><wbr>variable].negate = false;<br>
<br>
for (unsigned i = 0; i < 4; ++i) {<br>
- if (i < num_components)<br>
- state->variables[var-><wbr>variable].swizzle[i] = new_swizzle[i];<br>
- else<br>
+ if (i < num_components) {<br>
+ state->variables[var-><wbr>variable].swizzle[i] =<br>
+ instr->src[src].swizzle[i];<br>
+ } else<br>
state->variables[var-><wbr>variable].swizzle[i] = 0;<br>
}<br>
<br>
@@ -189,10 +198,10 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,<br>
double val;<br>
switch (load->def.bit_size) {<br>
case 32:<br>
- val = load->value.f32[new_swizzle[i]<wbr>];<br>
+ val = load->value.f32[instr->src[<wbr>src].swizzle[i]];<br>
break;<br>
case 64:<br>
- val = load->value.f64[new_swizzle[i]<wbr>];<br>
+ val = load->value.f64[instr->src[<wbr>src].swizzle[i]];<br>
break;<br>
default:<br>
unreachable("unknown bit size");<br>
@@ -208,10 +217,10 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,<br>
int64_t val;<br>
switch (load->def.bit_size) {<br>
case 32:<br>
- val = load->value.i32[new_swizzle[i]<wbr>];<br>
+ val = load->value.i32[instr->src[<wbr>src].swizzle[i]];<br>
break;<br>
case 64:<br>
- val = load->value.i64[new_swizzle[i]<wbr>];<br>
+ val = load->value.i64[instr->src[<wbr>src].swizzle[i]];<br>
break;<br>
default:<br>
unreachable("unknown bit size");<br>
@@ -228,10 +237,10 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,<br>
uint64_t val;<br>
switch (load->def.bit_size) {<br>
case 32:<br>
- val = load->value.u32[new_swizzle[i]<wbr>];<br>
+ val = load->value.u32[instr->src[<wbr>src].swizzle[i]];<br>
break;<br>
case 64:<br>
- val = load->value.u64[new_swizzle[i]<wbr>];<br>
+ val = load->value.u64[instr->src[<wbr>src].swizzle[i]];<br>
break;<br>
default:<br>
unreachable("unknown bit size");<br>
@@ -254,8 +263,7 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,<br>
<br>
static bool<br>
match_expression(const nir_search_expression *expr, nir_alu_instr *instr,<br>
- unsigned num_components, const uint8_t *swizzle,<br>
- struct match_state *state)<br>
+ unsigned num_components, struct match_state *state)<br>
{<br>
if (instr->op != expr->opcode)<br>
return false;<br>
@@ -274,19 +282,6 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr,<br>
assert(!instr->dest.saturate);<br>
assert(nir_op_infos[instr->op]<wbr>.num_inputs > 0);<br>
<br>
- /* If we have an explicitly sized destination, we can only handle the<br>
- * identity swizzle. While dot(vec3(a, b, c).zxy) is a valid<br>
- * expression, we don't have the information right now to propagate that<br>
- * swizzle through. We can only properly propagate swizzles if the<br>
- * instruction is vectorized.<br>
- */<br>
- if (nir_op_infos[instr->op].<wbr>output_size != 0) {<br>
- for (unsigned i = 0; i < num_components; i++) {<br>
- if (swizzle[i] != i)<br>
- return false;<br>
- }<br>
- }<br>
-<br>
/* Stash off the current variables_seen bitmask. This way we can<br>
* restore it prior to matching in the commutative case below.<br>
*/<br>
@@ -294,8 +289,7 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr,<br>
<br>
bool matched = true;<br>
for (unsigned i = 0; i < nir_op_infos[instr->op].num_<wbr>inputs; i++) {<br>
- if (!match_value(expr->srcs[i], instr, i, num_components,<br>
- swizzle, state)) {<br>
+ if (!match_value(expr->srcs[i], instr, i, num_components, state)) {<br>
matched = false;<br>
break;<br>
}<br>
@@ -313,12 +307,10 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr,<br>
*/<br>
state->variables_seen = variables_seen_stash;<br>
<br>
- if (!match_value(expr->srcs[0], instr, 1, num_components,<br>
- swizzle, state))<br>
+ if (!match_value(expr->srcs[0], instr, 1, num_components, state))<br>
return false;<br>
<br>
- return match_value(expr->srcs[1], instr, 0, num_components,<br>
- swizzle, state);<br>
+ return match_value(expr->srcs[1], instr, 0, num_components, state);<br>
} else {<br>
return false;<br>
}<br>
@@ -578,11 +570,6 @@ nir_alu_instr *<br>
nir_replace_instr(nir_alu_<wbr>instr *instr, const nir_search_expression *search,<br>
const nir_search_value *replace, void *mem_ctx)<br>
{<br>
- uint8_t swizzle[4] = { 0, 0, 0, 0 };<br>
-<br>
- for (unsigned i = 0; i < instr->dest.dest.ssa.num_<wbr>components; ++i)<br>
- swizzle[i] = i;<br>
-<br>
assert(instr->dest.dest.is_<wbr>ssa);<br>
<br>
struct match_state state;<br>
@@ -591,7 +578,7 @@ nir_replace_instr(nir_alu_<wbr>instr *instr, const nir_search_expression *search,<br>
state.variables_seen = 0;<br>
<br>
if (!match_expression(search, instr, instr->dest.dest.ssa.num_<wbr>components,<br>
- swizzle, &state))<br>
+ &state))<br>
return NULL;<br>
<br>
void *bitsize_ctx = ralloc_context(NULL);<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.9.3<br>
<br>
______________________________<wbr>_________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/<wbr>mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div>