<div dir="ltr">Reviewed-by: Connor Abbott <<a href="mailto:cwabbott0@gmail.com">cwabbott0@gmail.com</a>><div><br></div><div>Nice to see that this idea worked out well!<br><div class="gmail_extra"><br><div class="gmail_quote">On Tue, Dec 16, 2014 at 1:11 AM, Jason Ekstrand <span dir="ltr"><<a href="mailto:jason@jlekstrand.net" target="_blank">jason@jlekstrand.net</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">We used to have the number of components built into the intrinsic. This<br>
meant that all of our load/store intrinsics had vec1, vec2, vec3, and vec4<br>
variants. This lead to piles of switch statements to generate the correct<br>
texture names, and introspection to figure out the number of components.<br></blockquote><div><br></div><div>This doesn't touch textures, I think you can just delete "texture" and it'll make more sense.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
We can make things much nicer by allowing "vectorized" intrinsics.<br>
---<br>
src/glsl/nir/glsl_to_nir.cpp | 60 ++++------------<br>
src/glsl/nir/nir.h | 15 +++-<br>
src/glsl/nir/nir_intrinsics.h | 79 +++++++--------------<br>
src/glsl/nir/nir_lower_io.c | 115 +++++++------------------------<br>
src/glsl/nir/nir_lower_locals_to_regs.c | 18 ++---<br>
src/glsl/nir/nir_lower_system_values.c | 3 +-<br>
src/glsl/nir/nir_lower_variables.c | 74 +++++++-------------<br>
src/glsl/nir/nir_validate.c | 10 +--<br>
src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 64 +++++------------<br>
9 files changed, 123 insertions(+), 315 deletions(-)<br>
<br>
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp<br>
index f85b50e..088a8e9 100644<br>
--- a/src/glsl/nir/glsl_to_nir.cpp<br>
+++ b/src/glsl/nir/glsl_to_nir.cpp<br>
@@ -629,7 +629,8 @@ nir_visitor::visit(ir_call *ir)<br>
nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);<br>
<br>
nir_intrinsic_instr *store_instr =<br>
- nir_intrinsic_instr_create(shader, nir_intrinsic_store_var_vec1);<br>
+ nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);<br>
+ store_instr->num_components = 1;<br>
<br>
ir->return_deref->accept(this);<br>
store_instr->variables[0] = this->deref_head;<br>
@@ -704,17 +705,9 @@ nir_visitor::visit(ir_assignment *ir)<br>
* back into the LHS. Copy propagation should get rid of the mess.<br>
*/<br>
<br>
- nir_intrinsic_op load_op;<br>
- switch (ir->lhs->type->vector_elements) {<br>
- case 1: load_op = nir_intrinsic_load_var_vec1; break;<br>
- case 2: load_op = nir_intrinsic_load_var_vec2; break;<br>
- case 3: load_op = nir_intrinsic_load_var_vec3; break;<br>
- case 4: load_op = nir_intrinsic_load_var_vec4; break;<br>
- default: unreachable("Invalid number of components"); break;<br>
- }<br>
-<br>
- nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader,<br>
- load_op);<br>
+ nir_intrinsic_instr *load =<br>
+ nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var);<br>
+ load->num_components = ir->lhs->type->vector_elements;<br>
load->dest.is_ssa = true;<br>
nir_ssa_def_init(&load->instr, &load->dest.ssa,<br>
num_components, NULL);<br>
@@ -759,17 +752,9 @@ nir_visitor::visit(ir_assignment *ir)<br>
src.ssa = &vec->dest.dest.ssa;<br>
}<br>
<br>
- nir_intrinsic_op store_op;<br>
- switch (ir->lhs->type->vector_elements) {<br>
- case 1: store_op = nir_intrinsic_store_var_vec1; break;<br>
- case 2: store_op = nir_intrinsic_store_var_vec2; break;<br>
- case 3: store_op = nir_intrinsic_store_var_vec3; break;<br>
- case 4: store_op = nir_intrinsic_store_var_vec4; break;<br>
- default: unreachable("Invalid number of components"); break;<br>
- }<br>
-<br>
- nir_intrinsic_instr *store = nir_intrinsic_instr_create(this->shader,<br>
- store_op);<br>
+ nir_intrinsic_instr *store =<br>
+ nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var);<br>
+ store->num_components = ir->lhs->type->vector_elements;<br>
nir_deref *store_deref = nir_copy_deref(this->shader, &lhs_deref->deref);<br>
store->variables[0] = nir_deref_as_var(store_deref);<br>
store->src[0] = src;<br>
@@ -848,17 +833,9 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir)<br>
* must emit a variable load.<br>
*/<br>
<br>
- nir_intrinsic_op load_op;<br>
- switch (ir->type->vector_elements) {<br>
- case 1: load_op = nir_intrinsic_load_var_vec1; break;<br>
- case 2: load_op = nir_intrinsic_load_var_vec2; break;<br>
- case 3: load_op = nir_intrinsic_load_var_vec3; break;<br>
- case 4: load_op = nir_intrinsic_load_var_vec4; break;<br>
- default: unreachable("Invalid number of components");<br>
- }<br>
-<br>
nir_intrinsic_instr *load_instr =<br>
- nir_intrinsic_instr_create(this->shader, load_op);<br>
+ nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var);<br>
+ load_instr->num_components = ir->type->vector_elements;<br>
load_instr->variables[0] = this->deref_head;<br>
add_instr(&load_instr->instr, ir->type->vector_elements);<br>
}<br>
@@ -917,23 +894,12 @@ nir_visitor::visit(ir_expression *ir)<br>
<br>
nir_intrinsic_op op;<br>
if (const_index) {<br>
- switch (ir->type->vector_elements) {<br>
- case 1: op = nir_intrinsic_load_ubo_vec1; break;<br>
- case 2: op = nir_intrinsic_load_ubo_vec2; break;<br>
- case 3: op = nir_intrinsic_load_ubo_vec3; break;<br>
- case 4: op = nir_intrinsic_load_ubo_vec4; break;<br>
- default: assert(0); break;<br>
- }<br>
+ op = nir_intrinsic_load_ubo;<br>
} else {<br>
- switch (ir->type->vector_elements) {<br>
- case 1: op = nir_intrinsic_load_ubo_vec1_indirect; break;<br>
- case 2: op = nir_intrinsic_load_ubo_vec2_indirect; break;<br>
- case 3: op = nir_intrinsic_load_ubo_vec3_indirect; break;<br>
- case 4: op = nir_intrinsic_load_ubo_vec4_indirect; break;<br>
- default: assert(0); break;<br>
- }<br>
+ op = nir_intrinsic_load_ubo_indirect;<br>
}<br>
nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op);<br>
+ load->num_components = ir->type->vector_elements;<br>
load->const_index[0] = ir->operands[0]->as_constant()->value.u[0];<br>
load->const_index[1] = const_index ? const_index->value.u[0] : 0; /* base offset */<br>
load->const_index[2] = 1; /* number of vec4's */<br>
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h<br>
index 30146d6..412ceea 100644<br>
--- a/src/glsl/nir/nir.h<br>
+++ b/src/glsl/nir/nir.h<br>
@@ -693,6 +693,9 @@ typedef struct {<br>
<br>
nir_dest dest;<br>
<br>
+ /** number of components if this is a vectorized intrinsic */<br>
+ uint8_t num_components;<br>
+<br>
int const_index[3];<br>
<br>
nir_deref_var *variables[2];<br>
@@ -732,12 +735,20 @@ typedef struct {<br>
<br>
unsigned num_srcs; /** < number of register/SSA inputs */<br>
<br>
- /** number of components of each input register */<br>
+ /** number of components of each input register<br>
+ *<br>
+ * If this value is 0, the number of components is given by the<br>
+ * num_components field of nir_intrinsic_instr.<br>
+ */<br>
unsigned src_components[NIR_INTRINSIC_MAX_INPUTS];<br>
<br>
bool has_dest;<br>
<br>
- /** number of components of each output register */<br>
+ /** number of components of the output register<br>
+ *<br>
+ * If this value is 0, the number of components is given by the<br>
+ * num_components field of nir_intrinsic_instr.<br>
+ */<br>
unsigned dest_components;<br>
<br>
/** the number of inputs/outputs that are variables */<br>
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h<br>
index e4ad8cd..75bd12f 100644<br>
--- a/src/glsl/nir/nir_intrinsics.h<br>
+++ b/src/glsl/nir/nir_intrinsics.h<br>
@@ -42,19 +42,9 @@<br>
#define ARR(...) { __VA_ARGS__ }<br>
<br>
<br>
-INTRINSIC(load_var_vec1, 0, ARR(), true, 1, 1, 0,<br>
- NIR_INTRINSIC_CAN_ELIMINATE)<br>
-INTRINSIC(load_var_vec2, 0, ARR(), true, 2, 1, 0,<br>
- NIR_INTRINSIC_CAN_ELIMINATE)<br>
-INTRINSIC(load_var_vec3, 0, ARR(), true, 3, 1, 0,<br>
- NIR_INTRINSIC_CAN_ELIMINATE)<br>
-INTRINSIC(load_var_vec4, 0, ARR(), true, 4, 1, 0,<br>
- NIR_INTRINSIC_CAN_ELIMINATE)<br>
-INTRINSIC(store_var_vec1, 1, ARR(1), false, 0, 1, 0, 0)<br>
-INTRINSIC(store_var_vec2, 1, ARR(2), false, 0, 1, 0, 0)<br>
-INTRINSIC(store_var_vec3, 1, ARR(3), false, 0, 1, 0, 0)<br>
-INTRINSIC(store_var_vec4, 1, ARR(4), false, 0, 1, 0, 0)<br>
-INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)<br>
+INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE)<br>
+INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 0, 0)<br>
+INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)<br>
<br>
/*<br>
* a barrier is an intrinsic with no inputs/outputs but which can't be moved<br>
@@ -94,27 +84,6 @@ SYSTEM_VALUE(sample_pos, 2)<br>
SYSTEM_VALUE(sample_mask_in, 1)<br>
SYSTEM_VALUE(invocation_id, 1)<br>
<br>
-#define LOAD_OR_INTERP(name, num_srcs, src_comps, num_indices, flags) \<br>
- INTRINSIC(name##_vec1, num_srcs, ARR(src_comps), true, 1, \<br>
- 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \<br>
- INTRINSIC(name##_vec2, num_srcs, ARR(src_comps), true, 2, \<br>
- 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \<br>
- INTRINSIC(name##_vec3, num_srcs, ARR(src_comps), true, 3, \<br>
- 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \<br>
- INTRINSIC(name##_vec4, num_srcs, ARR(src_comps), true, 4, \<br>
- 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \<br>
- INTRINSIC(name##_vec1_indirect, 1 + num_srcs, ARR(1, src_comps), true, 1, \<br>
- 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \<br>
- INTRINSIC(name##_vec2_indirect, 1 + num_srcs, ARR(1, src_comps), true, 2, \<br>
- 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \<br>
- INTRINSIC(name##_vec3_indirect, 1 + num_srcs, ARR(1, src_comps), true, 3, \<br>
- 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \<br>
- INTRINSIC(name##_vec4_indirect, 1 + num_srcs, ARR(1, src_comps), true, 4, \<br>
- 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags)<br>
-<br>
-#define LOAD(name, num_indices, flags) \<br>
- LOAD_OR_INTERP(load_##name, 0, 0, num_indices, flags)<br>
-<br>
/*<br>
* The first index is the address to load from, and the second index is the<br>
* number of array elements to load. For UBO's (and SSBO's), the first index<br>
@@ -129,6 +98,12 @@ SYSTEM_VALUE(invocation_id, 1)<br>
* elements begin immediately after the previous array element.<br>
*/<br>
<br>
+#define LOAD(name, num_indices, flags) \<br>
+ INTRINSIC(load_##name, 0, ARR(), true, 0, 0, num_indices, \<br>
+ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \<br>
+ INTRINSIC(load_##name##_indirect, 1, ARR(1), true, 0, 0, num_indices, \<br>
+ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \<br>
+<br>
LOAD(uniform, 2, NIR_INTRINSIC_CAN_REORDER)<br>
LOAD(ubo, 3, NIR_INTRINSIC_CAN_REORDER)<br>
LOAD(input, 2, NIR_INTRINSIC_CAN_REORDER)<br>
@@ -140,29 +115,16 @@ LOAD(input, 2, NIR_INTRINSIC_CAN_REORDER)<br>
* interp_at_offset* intrinsics take a second source that is either a<br>
* sample id or a vec2 position offset.<br>
*/<br>
-#define INTERP(name, flags) \<br>
- LOAD_OR_INTERP(interp_##name, 0, 0, 2, flags)<br>
-<br>
-#define INTERP_WITH_ARG(name, src_comps, flags) \<br>
- LOAD_OR_INTERP(interp_##name, 1, src_comps, 2, flags)<br>
<br>
-INTERP(at_centroid, NIR_INTRINSIC_CAN_REORDER)<br>
-INTERP_WITH_ARG(at_sample, 1, NIR_INTRINSIC_CAN_REORDER)<br>
-INTERP_WITH_ARG(at_offset, 1, NIR_INTRINSIC_CAN_REORDER)<br>
+#define INTERP(name, num_srcs, src_comps) \<br>
+ INTRINSIC(interp_##name, num_srcs, ARR(src_comps), true, \<br>
+ 0, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \<br>
+ INTRINSIC(interp_##name##_indirect, 1 + num_srcs, ARR(1, src_comps), true, \<br>
+ 0, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)<br>
<br>
-#define STORE(name, num_indices, flags) \<br>
- INTRINSIC(store_##name##_vec1, 1, ARR(1), false, 0, 0, num_indices, flags) \<br>
- INTRINSIC(store_##name##_vec2, 1, ARR(2), false, 0, 0, num_indices, flags) \<br>
- INTRINSIC(store_##name##_vec3, 1, ARR(3), false, 0, 0, num_indices, flags) \<br>
- INTRINSIC(store_##name##_vec4, 1, ARR(4), false, 0, 0, num_indices, flags) \<br>
- INTRINSIC(store_##name##_vec1_indirect, 2, ARR(1, 1), false, 0, 0, \<br>
- num_indices, flags) \<br>
- INTRINSIC(store_##name##_vec2_indirect, 2, ARR(2, 1), false, 0, 0, \<br>
- num_indices, flags) \<br>
- INTRINSIC(store_##name##_vec3_indirect, 2, ARR(3, 1), false, 0, 0, \<br>
- num_indices, flags) \<br>
- INTRINSIC(store_##name##_vec4_indirect, 2, ARR(4, 1), false, 0, 0, \<br>
- num_indices, flags) \<br>
+INTERP(at_centroid, 0, 0)<br>
+INTERP(at_sample, 1, 1)<br>
+INTERP(at_offset, 1, 1)<br>
<br>
/*<br>
* Stores work the same way as loads, except now the first register input is<br>
@@ -170,7 +132,12 @@ INTERP_WITH_ARG(at_offset, 1, NIR_INTRINSIC_CAN_REORDER)<br>
* offset.<br>
*/<br>
<br>
+#define STORE(name, num_indices, flags) \<br>
+ INTRINSIC(store_##name, 1, ARR(0), false, 0, 0, num_indices, flags) \<br>
+ INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \<br>
+ num_indices, flags) \<br>
+<br>
STORE(output, 2, 0)<br>
/* STORE(ssbo, 3, 0) */<br>
<br>
-LAST_INTRINSIC(store_output_vec4_indirect)<br>
+LAST_INTRINSIC(store_output_indirect)<br>
diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c<br>
index a3b8186..ed3ce81 100644<br>
--- a/src/glsl/nir/nir_lower_io.c<br>
+++ b/src/glsl/nir/nir_lower_io.c<br>
@@ -186,66 +186,6 @@ get_io_offset(nir_deref_var *deref, nir_instr *instr, nir_src *indirect,<br>
return base_offset;<br>
}<br>
<br>
-static nir_intrinsic_op<br>
-get_load_op(nir_variable_mode mode, bool indirect, unsigned num_components)<br>
-{<br>
- if (indirect) {<br>
- switch (mode) {<br>
- case nir_var_shader_in:<br>
- switch (num_components) {<br>
- case 1: return nir_intrinsic_load_input_vec1_indirect;<br>
- case 2: return nir_intrinsic_load_input_vec2_indirect;<br>
- case 3: return nir_intrinsic_load_input_vec3_indirect;<br>
- case 4: return nir_intrinsic_load_input_vec4_indirect;<br>
- default: unreachable("Invalid number of components"); break;<br>
- }<br>
- break;<br>
-<br>
- case nir_var_uniform:<br>
- switch (num_components) {<br>
- case 1: return nir_intrinsic_load_uniform_vec1_indirect;<br>
- case 2: return nir_intrinsic_load_uniform_vec2_indirect;<br>
- case 3: return nir_intrinsic_load_uniform_vec3_indirect;<br>
- case 4: return nir_intrinsic_load_uniform_vec4_indirect;<br>
- default: unreachable("Invalid number of components"); break;<br>
- }<br>
- break;<br>
-<br>
- default:<br>
- unreachable("Invalid input type");<br>
- break;<br>
- }<br>
- } else {<br>
- switch (mode) {<br>
- case nir_var_shader_in:<br>
- switch (num_components) {<br>
- case 1: return nir_intrinsic_load_input_vec1;<br>
- case 2: return nir_intrinsic_load_input_vec2;<br>
- case 3: return nir_intrinsic_load_input_vec3;<br>
- case 4: return nir_intrinsic_load_input_vec4;<br>
- default: unreachable("Invalid number of components"); break;<br>
- }<br>
- break;<br>
-<br>
- case nir_var_uniform:<br>
- switch (num_components) {<br>
- case 1: return nir_intrinsic_load_uniform_vec1;<br>
- case 2: return nir_intrinsic_load_uniform_vec2;<br>
- case 3: return nir_intrinsic_load_uniform_vec3;<br>
- case 4: return nir_intrinsic_load_uniform_vec4;<br>
- default: unreachable("Invalid number of components"); break;<br>
- }<br>
- break;<br>
-<br>
- default:<br>
- unreachable("Invalid input type");<br>
- break;<br>
- }<br>
- }<br>
-<br>
- return nir_intrinsic_load_input_vec1;<br>
-}<br>
-<br>
static bool<br>
nir_lower_io_block(nir_block *block, void *void_state)<br>
{<br>
@@ -258,22 +198,35 @@ nir_lower_io_block(nir_block *block, void *void_state)<br>
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);<br>
<br>
switch (intrin->intrinsic) {<br>
- case nir_intrinsic_load_var_vec1:<br>
- case nir_intrinsic_load_var_vec2:<br>
- case nir_intrinsic_load_var_vec3:<br>
- case nir_intrinsic_load_var_vec4: {<br>
+ case nir_intrinsic_load_var: {<br>
nir_variable_mode mode = intrin->variables[0]->var->data.mode;<br>
if (mode != nir_var_shader_in && mode != nir_var_uniform)<br>
continue;<br>
<br>
bool has_indirect = deref_has_indirect(intrin->variables[0]);<br>
- unsigned num_components =<br>
- nir_intrinsic_infos[intrin->intrinsic].dest_components;<br>
<br>
- nir_intrinsic_op load_op = get_load_op(mode, has_indirect,<br>
- num_components);<br>
+ nir_intrinsic_op load_op;<br>
+ switch (mode) {<br>
+ case nir_var_shader_in:<br>
+ if (has_indirect) {<br>
+ load_op = nir_intrinsic_load_input_indirect;<br>
+ } else {<br>
+ load_op = nir_intrinsic_load_input;<br>
+ }<br>
+ break;<br>
+ case nir_var_uniform:<br>
+ if (has_indirect) {<br>
+ load_op = nir_intrinsic_load_uniform_indirect;<br>
+ } else {<br>
+ load_op = nir_intrinsic_load_uniform;<br>
+ }<br>
+ break;<br>
+ default:<br>
+ unreachable("Unknown variable mode");<br>
+ }<br>
nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx,<br>
load_op);<br>
+ load->num_components = intrin->num_components;<br>
<br>
nir_src indirect;<br>
unsigned offset = get_io_offset(intrin->variables[0],<br>
@@ -289,7 +242,7 @@ nir_lower_io_block(nir_block *block, void *void_state)<br>
if (intrin->dest.is_ssa) {<br>
load->dest.is_ssa = true;<br>
nir_ssa_def_init(&load->instr, &load->dest.ssa,<br>
- num_components, NULL);<br>
+ intrin->num_components, NULL);<br>
<br>
nir_src new_src = {<br>
.is_ssa = true,<br>
@@ -307,38 +260,22 @@ nir_lower_io_block(nir_block *block, void *void_state)<br>
break;<br>
}<br>
<br>
- case nir_intrinsic_store_var_vec1:<br>
- case nir_intrinsic_store_var_vec2:<br>
- case nir_intrinsic_store_var_vec3:<br>
- case nir_intrinsic_store_var_vec4: {<br>
+ case nir_intrinsic_store_var: {<br>
if (intrin->variables[0]->var->data.mode != nir_var_shader_out)<br>
continue;<br>
<br>
bool has_indirect = deref_has_indirect(intrin->variables[0]);<br>
- unsigned num_components =<br>
- nir_intrinsic_infos[intrin->intrinsic].src_components[0];<br>
<br>
nir_intrinsic_op store_op;<br>
if (has_indirect) {<br>
- switch (num_components) {<br>
- case 1: store_op = nir_intrinsic_store_output_vec1_indirect; break;<br>
- case 2: store_op = nir_intrinsic_store_output_vec2_indirect; break;<br>
- case 3: store_op = nir_intrinsic_store_output_vec3_indirect; break;<br>
- case 4: store_op = nir_intrinsic_store_output_vec4_indirect; break;<br>
- default: unreachable("Invalid number of components"); break;<br>
- }<br>
+ store_op = nir_intrinsic_store_output_indirect;<br>
} else {<br>
- switch (num_components) {<br>
- case 1: store_op = nir_intrinsic_store_output_vec1; break;<br>
- case 2: store_op = nir_intrinsic_store_output_vec2; break;<br>
- case 3: store_op = nir_intrinsic_store_output_vec3; break;<br>
- case 4: store_op = nir_intrinsic_store_output_vec4; break;<br>
- default: unreachable("Invalid number of components"); break;<br>
- }<br>
+ store_op = nir_intrinsic_store_output;<br>
}<br>
<br>
nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx,<br>
store_op);<br>
+ store->num_components = intrin->num_components;<br>
<br>
nir_src indirect;<br>
unsigned offset = get_io_offset(intrin->variables[0],<br>
diff --git a/src/glsl/nir/nir_lower_locals_to_regs.c b/src/glsl/nir/nir_lower_locals_to_regs.c<br>
index caf1c29..081ed6b 100644<br>
--- a/src/glsl/nir/nir_lower_locals_to_regs.c<br>
+++ b/src/glsl/nir/nir_lower_locals_to_regs.c<br>
@@ -219,22 +219,18 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)<br>
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);<br>
<br>
switch (intrin->intrinsic) {<br>
- case nir_intrinsic_load_var_vec1:<br>
- case nir_intrinsic_load_var_vec2:<br>
- case nir_intrinsic_load_var_vec3:<br>
- case nir_intrinsic_load_var_vec4: {<br>
+ case nir_intrinsic_load_var: {<br>
if (intrin->variables[0]->var->data.mode != nir_var_local)<br>
continue;<br>
<br>
nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, nir_op_imov);<br>
mov->src[0].src = get_deref_reg_src(intrin->variables[0],<br>
&intrin->instr, state);<br>
- unsigned num_components = mov->src[0].src.reg.reg->num_components;<br>
- mov->dest.write_mask = (1 << num_components) - 1;<br>
+ mov->dest.write_mask = (1 << intrin->num_components) - 1;<br>
if (intrin->dest.is_ssa) {<br>
mov->dest.dest.is_ssa = true;<br>
nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa,<br>
- num_components, NULL);<br>
+ intrin->num_components, NULL);<br>
<br>
nir_src new_src = {<br>
.is_ssa = true,<br>
@@ -252,20 +248,16 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)<br>
break;<br>
}<br>
<br>
- case nir_intrinsic_store_var_vec1:<br>
- case nir_intrinsic_store_var_vec2:<br>
- case nir_intrinsic_store_var_vec3:<br>
- case nir_intrinsic_store_var_vec4: {<br>
+ case nir_intrinsic_store_var: {<br>
if (intrin->variables[0]->var->data.mode != nir_var_local)<br>
continue;<br>
<br>
nir_src reg_src = get_deref_reg_src(intrin->variables[0],<br>
&intrin->instr, state);<br>
- unsigned num_components = reg_src.reg.reg->num_components;<br>
<br>
nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, nir_op_imov);<br>
mov->src[0].src = nir_src_copy(intrin->src[0], state->mem_ctx);<br>
- mov->dest.write_mask = (1 << num_components) - 1;<br>
+ mov->dest.write_mask = (1 << intrin->num_components) - 1;<br>
mov->dest.dest.is_ssa = false;<br>
mov->dest.dest.reg.reg = reg_src.reg.reg;<br>
mov->dest.dest.reg.base_offset = reg_src.reg.base_offset;<br>
diff --git a/src/glsl/nir/nir_lower_system_values.c b/src/glsl/nir/nir_lower_system_values.c<br>
index cbd1dac..e700df4 100644<br>
--- a/src/glsl/nir/nir_lower_system_values.c<br>
+++ b/src/glsl/nir/nir_lower_system_values.c<br>
@@ -30,8 +30,7 @@<br>
static void<br>
convert_instr(nir_intrinsic_instr *instr)<br>
{<br>
- if (instr->intrinsic != nir_intrinsic_load_var_vec1 &&<br>
- instr->intrinsic != nir_intrinsic_load_var_vec2)<br>
+ if (instr->intrinsic != nir_intrinsic_load_var)<br>
return;<br>
<br>
nir_variable *var = instr->variables[0]->var;<br>
diff --git a/src/glsl/nir/nir_lower_variables.c b/src/glsl/nir/nir_lower_variables.c<br>
index 052b021..dab3639 100644<br>
--- a/src/glsl/nir/nir_lower_variables.c<br>
+++ b/src/glsl/nir/nir_lower_variables.c<br>
@@ -449,17 +449,11 @@ fill_deref_tables_block(nir_block *block, void *void_state)<br>
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);<br>
<br>
switch (intrin->intrinsic) {<br>
- case nir_intrinsic_load_var_vec1:<br>
- case nir_intrinsic_load_var_vec2:<br>
- case nir_intrinsic_load_var_vec3:<br>
- case nir_intrinsic_load_var_vec4:<br>
+ case nir_intrinsic_load_var:<br>
register_load_instr(intrin, true, state);<br>
break;<br>
<br>
- case nir_intrinsic_store_var_vec1:<br>
- case nir_intrinsic_store_var_vec2:<br>
- case nir_intrinsic_store_var_vec3:<br>
- case nir_intrinsic_store_var_vec4:<br>
+ case nir_intrinsic_store_var:<br>
register_store_instr(intrin, true, state);<br>
break;<br>
<br>
@@ -541,17 +535,9 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,<br>
nir_deref *src_deref = nir_copy_deref(state->mem_ctx, &src_head->deref);<br>
nir_deref *dest_deref = nir_copy_deref(state->mem_ctx, &dest_head->deref);<br>
<br>
- nir_intrinsic_op load_op;<br>
- switch (num_components) {<br>
- case 1: load_op = nir_intrinsic_load_var_vec1; break;<br>
- case 2: load_op = nir_intrinsic_load_var_vec2; break;<br>
- case 3: load_op = nir_intrinsic_load_var_vec3; break;<br>
- case 4: load_op = nir_intrinsic_load_var_vec4; break;<br>
- default: unreachable("Invalid number of components"); break;<br>
- }<br>
-<br>
- nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx,<br>
- load_op);<br>
+ nir_intrinsic_instr *load =<br>
+ nir_intrinsic_instr_create(state->mem_ctx, nir_intrinsic_load_var);<br>
+ load->num_components = num_components;<br>
load->variables[0] = nir_deref_as_var(src_deref);<br>
load->dest.is_ssa = true;<br>
nir_ssa_def_init(&load->instr, &load->dest.ssa, num_components, NULL);<br>
@@ -559,17 +545,9 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,<br>
nir_instr_insert_before(©_instr->instr, &load->instr);<br>
register_load_instr(load, false, state);<br>
<br>
- nir_intrinsic_op store_op;<br>
- switch (num_components) {<br>
- case 1: store_op = nir_intrinsic_store_var_vec1; break;<br>
- case 2: store_op = nir_intrinsic_store_var_vec2; break;<br>
- case 3: store_op = nir_intrinsic_store_var_vec3; break;<br>
- case 4: store_op = nir_intrinsic_store_var_vec4; break;<br>
- default: unreachable("Invalid number of components"); break;<br>
- }<br>
-<br>
- nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx,<br>
- store_op);<br>
+ nir_intrinsic_instr *store =<br>
+ nir_intrinsic_instr_create(state->mem_ctx, nir_intrinsic_store_var);<br>
+ store->num_components = num_components;<br>
store->variables[0] = nir_deref_as_var(dest_deref);<br>
store->src[0].is_ssa = true;<br>
store->src[0].ssa = &load->dest.ssa;<br>
@@ -782,14 +760,9 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)<br>
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);<br>
<br>
switch (intrin->intrinsic) {<br>
- case nir_intrinsic_load_var_vec1:<br>
- case nir_intrinsic_load_var_vec2:<br>
- case nir_intrinsic_load_var_vec3:<br>
- case nir_intrinsic_load_var_vec4: {<br>
+ case nir_intrinsic_load_var: {<br>
struct deref_node *node = get_deref_node(intrin->variables[0],<br>
false, state);<br>
- unsigned num_chans =<br>
- nir_intrinsic_infos[intrin->intrinsic].dest_components;<br>
<br>
if (node == NULL) {<br>
/* If we hit this path then we are referencing an invalid<br>
@@ -799,7 +772,8 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)<br>
*/<br>
nir_ssa_undef_instr *undef =<br>
nir_ssa_undef_instr_create(state->mem_ctx);<br>
- nir_ssa_def_init(&undef->instr, &undef->def, num_chans, NULL);<br>
+ nir_ssa_def_init(&undef->instr, &undef->def,<br>
+ intrin->num_components, NULL);<br>
<br>
nir_instr_insert_before(&intrin->instr, &undef->instr);<br>
nir_instr_remove(&intrin->instr);<br>
@@ -821,14 +795,15 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)<br>
nir_op_imov);<br>
mov->src[0].src.is_ssa = true;<br>
mov->src[0].src.ssa = get_ssa_def_for_block(node, block, state);<br>
- for (unsigned i = num_chans; i < 4; i++)<br>
+ for (unsigned i = intrin->num_components; i < 4; i++)<br>
mov->src[0].swizzle[i] = 0;<br>
<br>
assert(intrin->dest.is_ssa);<br>
<br>
- mov->dest.write_mask = (1 << num_chans) - 1;<br>
+ mov->dest.write_mask = (1 << intrin->num_components) - 1;<br>
mov->dest.dest.is_ssa = true;<br>
- nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa, num_chans, NULL);<br>
+ nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa,<br>
+ intrin->num_components, NULL);<br>
<br>
nir_instr_insert_before(&intrin->instr, &mov->instr);<br>
nir_instr_remove(&intrin->instr);<br>
@@ -843,10 +818,7 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)<br>
break;<br>
}<br>
<br>
- case nir_intrinsic_store_var_vec1:<br>
- case nir_intrinsic_store_var_vec2:<br>
- case nir_intrinsic_store_var_vec3:<br>
- case nir_intrinsic_store_var_vec4: {<br>
+ case nir_intrinsic_store_var: {<br>
struct deref_node *node = get_deref_node(intrin->variables[0],<br>
false, state);<br>
<br>
@@ -860,7 +832,8 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)<br>
if (!node->lower_to_ssa)<br>
continue;<br>
<br>
- unsigned num_chans = glsl_get_vector_elements(node->type);<br>
+ assert(intrin->num_components ==<br>
+ glsl_get_vector_elements(node->type));<br>
<br>
assert(intrin->src[0].is_ssa);<br>
<br>
@@ -873,12 +846,12 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)<br>
<br>
mov->src[1].src.is_ssa = true;<br>
mov->src[1].src.ssa = intrin->src[0].ssa;<br>
- for (unsigned i = num_chans; i < 4; i++)<br>
+ for (unsigned i = intrin->num_components; i < 4; i++)<br>
mov->src[1].swizzle[i] = 0;<br>
<br>
mov->src[2].src.is_ssa = true;<br>
mov->src[2].src.ssa = get_ssa_def_for_block(node, block, state);<br>
- for (unsigned i = num_chans; i < 4; i++)<br>
+ for (unsigned i = intrin->num_components; i < 4; i++)<br>
mov->src[2].swizzle[i] = 0;<br>
<br>
} else {<br>
@@ -886,13 +859,14 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)<br>
<br>
mov->src[0].src.is_ssa = true;<br>
mov->src[0].src.ssa = intrin->src[0].ssa;<br>
- for (unsigned i = num_chans; i < 4; i++)<br>
+ for (unsigned i = intrin->num_components; i < 4; i++)<br>
mov->src[0].swizzle[i] = 0;<br>
}<br>
<br>
- mov->dest.write_mask = (1 << num_chans) - 1;<br>
+ mov->dest.write_mask = (1 << intrin->num_components) - 1;<br>
mov->dest.dest.is_ssa = true;<br>
- nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa, num_chans, NULL);<br>
+ nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa,<br>
+ intrin->num_components, NULL);<br>
<br>
nir_instr_insert_before(&intrin->instr, &mov->instr);<br>
nir_instr_remove(&intrin->instr);<br>
diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c<br>
index b8ef802..ee29fc3 100644<br>
--- a/src/glsl/nir/nir_validate.c<br>
+++ b/src/glsl/nir/nir_validate.c<br>
@@ -338,16 +338,10 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)<br>
}<br>
<br>
switch (instr->intrinsic) {<br>
- case nir_intrinsic_load_var_vec1:<br>
- case nir_intrinsic_load_var_vec2:<br>
- case nir_intrinsic_load_var_vec3:<br>
- case nir_intrinsic_load_var_vec4:<br>
+ case nir_intrinsic_load_var:<br>
assert(instr->variables[0]->var->data.mode != nir_var_shader_out);<br>
break;<br>
- case nir_intrinsic_store_var_vec1:<br>
- case nir_intrinsic_store_var_vec2:<br>
- case nir_intrinsic_store_var_vec3:<br>
- case nir_intrinsic_store_var_vec4:<br>
+ case nir_intrinsic_store_var:<br>
assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&<br>
instr->variables[0]->var->data.mode != nir_var_uniform);<br>
break;<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp<br>
index dbb2470..4c1805d 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp<br>
@@ -1312,14 +1312,10 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)<br>
break;<br>
}<br>
<br>
- case nir_intrinsic_load_uniform_vec1:<br>
- case nir_intrinsic_load_uniform_vec2:<br>
- case nir_intrinsic_load_uniform_vec3:<br>
- case nir_intrinsic_load_uniform_vec4: {<br>
+ case nir_intrinsic_load_uniform: {<br>
unsigned index = 0;<br>
for (int i = 0; i < instr->const_index[1]; i++) {<br>
- for (unsigned j = 0;<br>
- j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {<br>
+ for (unsigned j = 0; j < instr->num_components; j++) {<br>
fs_reg src = nir_uniforms;<br>
src.reg_offset = instr->const_index[0] + index;<br>
src.type = dest.type;<br>
@@ -1335,14 +1331,10 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)<br>
break;<br>
}<br>
<br>
- case nir_intrinsic_load_uniform_vec1_indirect:<br>
- case nir_intrinsic_load_uniform_vec2_indirect:<br>
- case nir_intrinsic_load_uniform_vec3_indirect:<br>
- case nir_intrinsic_load_uniform_vec4_indirect: {<br>
+ case nir_intrinsic_load_uniform_indirect: {<br>
unsigned index = 0;<br>
for (int i = 0; i < instr->const_index[1]; i++) {<br>
- for (unsigned j = 0;<br>
- j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {<br>
+ for (unsigned j = 0; j < instr->num_components; j++) {<br>
fs_reg src = nir_uniforms;<br>
src.reg_offset = instr->const_index[0] + index;<br>
src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));<br>
@@ -1360,10 +1352,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)<br>
break;<br>
}<br>
<br>
- case nir_intrinsic_load_ubo_vec1:<br>
- case nir_intrinsic_load_ubo_vec2:<br>
- case nir_intrinsic_load_ubo_vec3:<br>
- case nir_intrinsic_load_ubo_vec4: {<br>
+ case nir_intrinsic_load_ubo: {<br>
fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start +<br>
(unsigned) instr->const_index[0]);<br>
fs_reg packed_consts = fs_reg(this, glsl_type::float_type);<br>
@@ -1373,8 +1362,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)<br>
emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,<br>
packed_consts, surf_index, const_offset_reg));<br>
<br>
- for (unsigned i = 0;<br>
- i < nir_intrinsic_infos[instr->intrinsic].dest_components; i++) {<br>
+ for (unsigned i = 0; i < instr->num_components; i++) {<br>
packed_consts.set_smear(instr->const_index[1] % 16 / 4 + i);<br>
<br>
/* The std140 packing rules don't allow vectors to cross 16-byte<br>
@@ -1392,10 +1380,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)<br>
break;<br>
}<br>
<br>
- case nir_intrinsic_load_ubo_vec1_indirect:<br>
- case nir_intrinsic_load_ubo_vec2_indirect:<br>
- case nir_intrinsic_load_ubo_vec3_indirect:<br>
- case nir_intrinsic_load_ubo_vec4_indirect: {<br>
+ case nir_intrinsic_load_ubo_indirect: {<br>
fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start +<br>
instr->const_index[0]);<br>
/* Turn the byte offset into a dword offset. */<br>
@@ -1404,8 +1389,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)<br>
emit(SHR(offset, retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_D),<br>
fs_reg(2)));<br>
<br>
- for (unsigned i = 0;<br>
- i < nir_intrinsic_infos[instr->intrinsic].dest_components; i++) {<br>
+ for (unsigned i = 0; i < instr->num_components; i++) {<br>
exec_list list = VARYING_PULL_CONSTANT_LOAD(dest, surf_index,<br>
offset, base_offset + i);<br>
fs_inst *last_inst = (fs_inst *) list.get_tail();<br>
@@ -1418,14 +1402,10 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)<br>
break;<br>
}<br>
<br>
- case nir_intrinsic_load_input_vec1:<br>
- case nir_intrinsic_load_input_vec2:<br>
- case nir_intrinsic_load_input_vec3:<br>
- case nir_intrinsic_load_input_vec4: {<br>
+ case nir_intrinsic_load_input: {<br>
unsigned index = 0;<br>
for (int i = 0; i < instr->const_index[1]; i++) {<br>
- for (unsigned j = 0;<br>
- j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {<br>
+ for (unsigned j = 0; j < instr->num_components; j++) {<br>
fs_reg src = nir_inputs;<br>
src.reg_offset = instr->const_index[0] + index;<br>
src.type = dest.type;<br>
@@ -1441,14 +1421,10 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)<br>
break;<br>
}<br>
<br>
- case nir_intrinsic_load_input_vec1_indirect:<br>
- case nir_intrinsic_load_input_vec2_indirect:<br>
- case nir_intrinsic_load_input_vec3_indirect:<br>
- case nir_intrinsic_load_input_vec4_indirect: {<br>
+ case nir_intrinsic_load_input_indirect: {<br>
unsigned index = 0;<br>
for (int i = 0; i < instr->const_index[1]; i++) {<br>
- for (unsigned j = 0;<br>
- j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {<br>
+ for (unsigned j = 0; j < instr->num_components; j++) {<br>
fs_reg src = nir_inputs;<br>
src.reg_offset = instr->const_index[0] + index;<br>
src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));<br>
@@ -1466,15 +1442,11 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)<br>
break;<br>
}<br>
<br>
- case nir_intrinsic_store_output_vec1:<br>
- case nir_intrinsic_store_output_vec2:<br>
- case nir_intrinsic_store_output_vec3:<br>
- case nir_intrinsic_store_output_vec4: {<br>
+ case nir_intrinsic_store_output: {<br>
fs_reg src = get_nir_src(instr->src[0]);<br>
unsigned index = 0;<br>
for (int i = 0; i < instr->const_index[1]; i++) {<br>
- for (unsigned j = 0;<br>
- j < nir_intrinsic_infos[instr->intrinsic].src_components[0]; j++) {<br>
+ for (unsigned j = 0; j < instr->num_components; j++) {<br>
fs_reg new_dest = nir_outputs;<br>
new_dest.reg_offset = instr->const_index[0] + index;<br>
new_dest.type = src.type;<br>
@@ -1489,16 +1461,12 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)<br>
break;<br>
}<br>
<br>
- case nir_intrinsic_store_output_vec1_indirect:<br>
- case nir_intrinsic_store_output_vec2_indirect:<br>
- case nir_intrinsic_store_output_vec3_indirect:<br>
- case nir_intrinsic_store_output_vec4_indirect: {<br>
+ case nir_intrinsic_store_output_indirect: {<br>
fs_reg src = get_nir_src(instr->src[0]);<br>
fs_reg indirect = get_nir_src(instr->src[1]);<br>
unsigned index = 0;<br>
for (int i = 0; i < instr->const_index[1]; i++) {<br>
- for (unsigned j = 0;<br>
- j < nir_intrinsic_infos[instr->intrinsic].src_components[0]; j++) {<br>
+ for (unsigned j = 0; j < instr->num_components; j++) {<br>
fs_reg new_dest = nir_outputs;<br>
new_dest.reg_offset = instr->const_index[0] + index;<br>
new_dest.reladdr = new(mem_ctx) fs_reg(indirect);<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.2.0<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div></div>