[Mesa-dev] [PATCH 099/133] nir: Vectorize intrinsics
Jason Ekstrand
jason at jlekstrand.net
Mon Dec 15 22:11:44 PST 2014
We used to have the number of components built into the intrinsic. This
meant that all of our load/store intrinsics had vec1, vec2, vec3, and vec4
variants. This lead to piles of switch statements to generate the correct
texture names, and introspection to figure out the number of components.
We can make things much nicer by allowing "vectorized" intrinsics.
---
src/glsl/nir/glsl_to_nir.cpp | 60 ++++------------
src/glsl/nir/nir.h | 15 +++-
src/glsl/nir/nir_intrinsics.h | 79 +++++++--------------
src/glsl/nir/nir_lower_io.c | 115 +++++++------------------------
src/glsl/nir/nir_lower_locals_to_regs.c | 18 ++---
src/glsl/nir/nir_lower_system_values.c | 3 +-
src/glsl/nir/nir_lower_variables.c | 74 +++++++-------------
src/glsl/nir/nir_validate.c | 10 +--
src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 64 +++++------------
9 files changed, 123 insertions(+), 315 deletions(-)
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index f85b50e..088a8e9 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -629,7 +629,8 @@ nir_visitor::visit(ir_call *ir)
nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
nir_intrinsic_instr *store_instr =
- nir_intrinsic_instr_create(shader, nir_intrinsic_store_var_vec1);
+ nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
+ store_instr->num_components = 1;
ir->return_deref->accept(this);
store_instr->variables[0] = this->deref_head;
@@ -704,17 +705,9 @@ nir_visitor::visit(ir_assignment *ir)
* back into the LHS. Copy propagation should get rid of the mess.
*/
- nir_intrinsic_op load_op;
- switch (ir->lhs->type->vector_elements) {
- case 1: load_op = nir_intrinsic_load_var_vec1; break;
- case 2: load_op = nir_intrinsic_load_var_vec2; break;
- case 3: load_op = nir_intrinsic_load_var_vec3; break;
- case 4: load_op = nir_intrinsic_load_var_vec4; break;
- default: unreachable("Invalid number of components"); break;
- }
-
- nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader,
- load_op);
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var);
+ load->num_components = ir->lhs->type->vector_elements;
load->dest.is_ssa = true;
nir_ssa_def_init(&load->instr, &load->dest.ssa,
num_components, NULL);
@@ -759,17 +752,9 @@ nir_visitor::visit(ir_assignment *ir)
src.ssa = &vec->dest.dest.ssa;
}
- nir_intrinsic_op store_op;
- switch (ir->lhs->type->vector_elements) {
- case 1: store_op = nir_intrinsic_store_var_vec1; break;
- case 2: store_op = nir_intrinsic_store_var_vec2; break;
- case 3: store_op = nir_intrinsic_store_var_vec3; break;
- case 4: store_op = nir_intrinsic_store_var_vec4; break;
- default: unreachable("Invalid number of components"); break;
- }
-
- nir_intrinsic_instr *store = nir_intrinsic_instr_create(this->shader,
- store_op);
+ nir_intrinsic_instr *store =
+ nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var);
+ store->num_components = ir->lhs->type->vector_elements;
nir_deref *store_deref = nir_copy_deref(this->shader, &lhs_deref->deref);
store->variables[0] = nir_deref_as_var(store_deref);
store->src[0] = src;
@@ -848,17 +833,9 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir)
* must emit a variable load.
*/
- nir_intrinsic_op load_op;
- switch (ir->type->vector_elements) {
- case 1: load_op = nir_intrinsic_load_var_vec1; break;
- case 2: load_op = nir_intrinsic_load_var_vec2; break;
- case 3: load_op = nir_intrinsic_load_var_vec3; break;
- case 4: load_op = nir_intrinsic_load_var_vec4; break;
- default: unreachable("Invalid number of components");
- }
-
nir_intrinsic_instr *load_instr =
- nir_intrinsic_instr_create(this->shader, load_op);
+ nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var);
+ load_instr->num_components = ir->type->vector_elements;
load_instr->variables[0] = this->deref_head;
add_instr(&load_instr->instr, ir->type->vector_elements);
}
@@ -917,23 +894,12 @@ nir_visitor::visit(ir_expression *ir)
nir_intrinsic_op op;
if (const_index) {
- switch (ir->type->vector_elements) {
- case 1: op = nir_intrinsic_load_ubo_vec1; break;
- case 2: op = nir_intrinsic_load_ubo_vec2; break;
- case 3: op = nir_intrinsic_load_ubo_vec3; break;
- case 4: op = nir_intrinsic_load_ubo_vec4; break;
- default: assert(0); break;
- }
+ op = nir_intrinsic_load_ubo;
} else {
- switch (ir->type->vector_elements) {
- case 1: op = nir_intrinsic_load_ubo_vec1_indirect; break;
- case 2: op = nir_intrinsic_load_ubo_vec2_indirect; break;
- case 3: op = nir_intrinsic_load_ubo_vec3_indirect; break;
- case 4: op = nir_intrinsic_load_ubo_vec4_indirect; break;
- default: assert(0); break;
- }
+ op = nir_intrinsic_load_ubo_indirect;
}
nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op);
+ load->num_components = ir->type->vector_elements;
load->const_index[0] = ir->operands[0]->as_constant()->value.u[0];
load->const_index[1] = const_index ? const_index->value.u[0] : 0; /* base offset */
load->const_index[2] = 1; /* number of vec4's */
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 30146d6..412ceea 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -693,6 +693,9 @@ typedef struct {
nir_dest dest;
+ /** number of components if this is a vectorized intrinsic */
+ uint8_t num_components;
+
int const_index[3];
nir_deref_var *variables[2];
@@ -732,12 +735,20 @@ typedef struct {
unsigned num_srcs; /** < number of register/SSA inputs */
- /** number of components of each input register */
+ /** number of components of each input register
+ *
+ * If this value is 0, the number of components is given by the
+ * num_components field of nir_intrinsic_instr.
+ */
unsigned src_components[NIR_INTRINSIC_MAX_INPUTS];
bool has_dest;
- /** number of components of each output register */
+ /** number of components of the output register
+ *
+ * If this value is 0, the number of components is given by the
+ * num_components field of nir_intrinsic_instr.
+ */
unsigned dest_components;
/** the number of inputs/outputs that are variables */
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index e4ad8cd..75bd12f 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -42,19 +42,9 @@
#define ARR(...) { __VA_ARGS__ }
-INTRINSIC(load_var_vec1, 0, ARR(), true, 1, 1, 0,
- NIR_INTRINSIC_CAN_ELIMINATE)
-INTRINSIC(load_var_vec2, 0, ARR(), true, 2, 1, 0,
- NIR_INTRINSIC_CAN_ELIMINATE)
-INTRINSIC(load_var_vec3, 0, ARR(), true, 3, 1, 0,
- NIR_INTRINSIC_CAN_ELIMINATE)
-INTRINSIC(load_var_vec4, 0, ARR(), true, 4, 1, 0,
- NIR_INTRINSIC_CAN_ELIMINATE)
-INTRINSIC(store_var_vec1, 1, ARR(1), false, 0, 1, 0, 0)
-INTRINSIC(store_var_vec2, 1, ARR(2), false, 0, 1, 0, 0)
-INTRINSIC(store_var_vec3, 1, ARR(3), false, 0, 1, 0, 0)
-INTRINSIC(store_var_vec4, 1, ARR(4), false, 0, 1, 0, 0)
-INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
+INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE)
+INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 0, 0)
+INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
/*
* a barrier is an intrinsic with no inputs/outputs but which can't be moved
@@ -94,27 +84,6 @@ SYSTEM_VALUE(sample_pos, 2)
SYSTEM_VALUE(sample_mask_in, 1)
SYSTEM_VALUE(invocation_id, 1)
-#define LOAD_OR_INTERP(name, num_srcs, src_comps, num_indices, flags) \
- INTRINSIC(name##_vec1, num_srcs, ARR(src_comps), true, 1, \
- 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
- INTRINSIC(name##_vec2, num_srcs, ARR(src_comps), true, 2, \
- 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
- INTRINSIC(name##_vec3, num_srcs, ARR(src_comps), true, 3, \
- 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
- INTRINSIC(name##_vec4, num_srcs, ARR(src_comps), true, 4, \
- 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
- INTRINSIC(name##_vec1_indirect, 1 + num_srcs, ARR(1, src_comps), true, 1, \
- 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
- INTRINSIC(name##_vec2_indirect, 1 + num_srcs, ARR(1, src_comps), true, 2, \
- 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
- INTRINSIC(name##_vec3_indirect, 1 + num_srcs, ARR(1, src_comps), true, 3, \
- 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags) \
- INTRINSIC(name##_vec4_indirect, 1 + num_srcs, ARR(1, src_comps), true, 4, \
- 0, num_indices, NIR_INTRINSIC_CAN_ELIMINATE | flags)
-
-#define LOAD(name, num_indices, flags) \
- LOAD_OR_INTERP(load_##name, 0, 0, num_indices, flags)
-
/*
* The first index is the address to load from, and the second index is the
* number of array elements to load. For UBO's (and SSBO's), the first index
@@ -129,6 +98,12 @@ SYSTEM_VALUE(invocation_id, 1)
* elements begin immediately after the previous array element.
*/
+#define LOAD(name, num_indices, flags) \
+ INTRINSIC(load_##name, 0, ARR(), true, 0, 0, num_indices, \
+ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \
+ INTRINSIC(load_##name##_indirect, 1, ARR(1), true, 0, 0, num_indices, \
+ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \
+
LOAD(uniform, 2, NIR_INTRINSIC_CAN_REORDER)
LOAD(ubo, 3, NIR_INTRINSIC_CAN_REORDER)
LOAD(input, 2, NIR_INTRINSIC_CAN_REORDER)
@@ -140,29 +115,16 @@ LOAD(input, 2, NIR_INTRINSIC_CAN_REORDER)
* interp_at_offset* intrinsics take a second source that is either a
* sample id or a vec2 position offset.
*/
-#define INTERP(name, flags) \
- LOAD_OR_INTERP(interp_##name, 0, 0, 2, flags)
-
-#define INTERP_WITH_ARG(name, src_comps, flags) \
- LOAD_OR_INTERP(interp_##name, 1, src_comps, 2, flags)
-INTERP(at_centroid, NIR_INTRINSIC_CAN_REORDER)
-INTERP_WITH_ARG(at_sample, 1, NIR_INTRINSIC_CAN_REORDER)
-INTERP_WITH_ARG(at_offset, 1, NIR_INTRINSIC_CAN_REORDER)
+#define INTERP(name, num_srcs, src_comps) \
+ INTRINSIC(interp_##name, num_srcs, ARR(src_comps), true, \
+ 0, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) \
+ INTRINSIC(interp_##name##_indirect, 1 + num_srcs, ARR(1, src_comps), true, \
+ 0, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-#define STORE(name, num_indices, flags) \
- INTRINSIC(store_##name##_vec1, 1, ARR(1), false, 0, 0, num_indices, flags) \
- INTRINSIC(store_##name##_vec2, 1, ARR(2), false, 0, 0, num_indices, flags) \
- INTRINSIC(store_##name##_vec3, 1, ARR(3), false, 0, 0, num_indices, flags) \
- INTRINSIC(store_##name##_vec4, 1, ARR(4), false, 0, 0, num_indices, flags) \
- INTRINSIC(store_##name##_vec1_indirect, 2, ARR(1, 1), false, 0, 0, \
- num_indices, flags) \
- INTRINSIC(store_##name##_vec2_indirect, 2, ARR(2, 1), false, 0, 0, \
- num_indices, flags) \
- INTRINSIC(store_##name##_vec3_indirect, 2, ARR(3, 1), false, 0, 0, \
- num_indices, flags) \
- INTRINSIC(store_##name##_vec4_indirect, 2, ARR(4, 1), false, 0, 0, \
- num_indices, flags) \
+INTERP(at_centroid, 0, 0)
+INTERP(at_sample, 1, 1)
+INTERP(at_offset, 1, 1)
/*
* Stores work the same way as loads, except now the first register input is
@@ -170,7 +132,12 @@ INTERP_WITH_ARG(at_offset, 1, NIR_INTRINSIC_CAN_REORDER)
* offset.
*/
+#define STORE(name, num_indices, flags) \
+ INTRINSIC(store_##name, 1, ARR(0), false, 0, 0, num_indices, flags) \
+ INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \
+ num_indices, flags) \
+
STORE(output, 2, 0)
/* STORE(ssbo, 3, 0) */
-LAST_INTRINSIC(store_output_vec4_indirect)
+LAST_INTRINSIC(store_output_indirect)
diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c
index a3b8186..ed3ce81 100644
--- a/src/glsl/nir/nir_lower_io.c
+++ b/src/glsl/nir/nir_lower_io.c
@@ -186,66 +186,6 @@ get_io_offset(nir_deref_var *deref, nir_instr *instr, nir_src *indirect,
return base_offset;
}
-static nir_intrinsic_op
-get_load_op(nir_variable_mode mode, bool indirect, unsigned num_components)
-{
- if (indirect) {
- switch (mode) {
- case nir_var_shader_in:
- switch (num_components) {
- case 1: return nir_intrinsic_load_input_vec1_indirect;
- case 2: return nir_intrinsic_load_input_vec2_indirect;
- case 3: return nir_intrinsic_load_input_vec3_indirect;
- case 4: return nir_intrinsic_load_input_vec4_indirect;
- default: unreachable("Invalid number of components"); break;
- }
- break;
-
- case nir_var_uniform:
- switch (num_components) {
- case 1: return nir_intrinsic_load_uniform_vec1_indirect;
- case 2: return nir_intrinsic_load_uniform_vec2_indirect;
- case 3: return nir_intrinsic_load_uniform_vec3_indirect;
- case 4: return nir_intrinsic_load_uniform_vec4_indirect;
- default: unreachable("Invalid number of components"); break;
- }
- break;
-
- default:
- unreachable("Invalid input type");
- break;
- }
- } else {
- switch (mode) {
- case nir_var_shader_in:
- switch (num_components) {
- case 1: return nir_intrinsic_load_input_vec1;
- case 2: return nir_intrinsic_load_input_vec2;
- case 3: return nir_intrinsic_load_input_vec3;
- case 4: return nir_intrinsic_load_input_vec4;
- default: unreachable("Invalid number of components"); break;
- }
- break;
-
- case nir_var_uniform:
- switch (num_components) {
- case 1: return nir_intrinsic_load_uniform_vec1;
- case 2: return nir_intrinsic_load_uniform_vec2;
- case 3: return nir_intrinsic_load_uniform_vec3;
- case 4: return nir_intrinsic_load_uniform_vec4;
- default: unreachable("Invalid number of components"); break;
- }
- break;
-
- default:
- unreachable("Invalid input type");
- break;
- }
- }
-
- return nir_intrinsic_load_input_vec1;
-}
-
static bool
nir_lower_io_block(nir_block *block, void *void_state)
{
@@ -258,22 +198,35 @@ nir_lower_io_block(nir_block *block, void *void_state)
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
- case nir_intrinsic_load_var_vec1:
- case nir_intrinsic_load_var_vec2:
- case nir_intrinsic_load_var_vec3:
- case nir_intrinsic_load_var_vec4: {
+ case nir_intrinsic_load_var: {
nir_variable_mode mode = intrin->variables[0]->var->data.mode;
if (mode != nir_var_shader_in && mode != nir_var_uniform)
continue;
bool has_indirect = deref_has_indirect(intrin->variables[0]);
- unsigned num_components =
- nir_intrinsic_infos[intrin->intrinsic].dest_components;
- nir_intrinsic_op load_op = get_load_op(mode, has_indirect,
- num_components);
+ nir_intrinsic_op load_op;
+ switch (mode) {
+ case nir_var_shader_in:
+ if (has_indirect) {
+ load_op = nir_intrinsic_load_input_indirect;
+ } else {
+ load_op = nir_intrinsic_load_input;
+ }
+ break;
+ case nir_var_uniform:
+ if (has_indirect) {
+ load_op = nir_intrinsic_load_uniform_indirect;
+ } else {
+ load_op = nir_intrinsic_load_uniform;
+ }
+ break;
+ default:
+ unreachable("Unknown variable mode");
+ }
nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx,
load_op);
+ load->num_components = intrin->num_components;
nir_src indirect;
unsigned offset = get_io_offset(intrin->variables[0],
@@ -289,7 +242,7 @@ nir_lower_io_block(nir_block *block, void *void_state)
if (intrin->dest.is_ssa) {
load->dest.is_ssa = true;
nir_ssa_def_init(&load->instr, &load->dest.ssa,
- num_components, NULL);
+ intrin->num_components, NULL);
nir_src new_src = {
.is_ssa = true,
@@ -307,38 +260,22 @@ nir_lower_io_block(nir_block *block, void *void_state)
break;
}
- case nir_intrinsic_store_var_vec1:
- case nir_intrinsic_store_var_vec2:
- case nir_intrinsic_store_var_vec3:
- case nir_intrinsic_store_var_vec4: {
+ case nir_intrinsic_store_var: {
if (intrin->variables[0]->var->data.mode != nir_var_shader_out)
continue;
bool has_indirect = deref_has_indirect(intrin->variables[0]);
- unsigned num_components =
- nir_intrinsic_infos[intrin->intrinsic].src_components[0];
nir_intrinsic_op store_op;
if (has_indirect) {
- switch (num_components) {
- case 1: store_op = nir_intrinsic_store_output_vec1_indirect; break;
- case 2: store_op = nir_intrinsic_store_output_vec2_indirect; break;
- case 3: store_op = nir_intrinsic_store_output_vec3_indirect; break;
- case 4: store_op = nir_intrinsic_store_output_vec4_indirect; break;
- default: unreachable("Invalid number of components"); break;
- }
+ store_op = nir_intrinsic_store_output_indirect;
} else {
- switch (num_components) {
- case 1: store_op = nir_intrinsic_store_output_vec1; break;
- case 2: store_op = nir_intrinsic_store_output_vec2; break;
- case 3: store_op = nir_intrinsic_store_output_vec3; break;
- case 4: store_op = nir_intrinsic_store_output_vec4; break;
- default: unreachable("Invalid number of components"); break;
- }
+ store_op = nir_intrinsic_store_output;
}
nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx,
store_op);
+ store->num_components = intrin->num_components;
nir_src indirect;
unsigned offset = get_io_offset(intrin->variables[0],
diff --git a/src/glsl/nir/nir_lower_locals_to_regs.c b/src/glsl/nir/nir_lower_locals_to_regs.c
index caf1c29..081ed6b 100644
--- a/src/glsl/nir/nir_lower_locals_to_regs.c
+++ b/src/glsl/nir/nir_lower_locals_to_regs.c
@@ -219,22 +219,18 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
- case nir_intrinsic_load_var_vec1:
- case nir_intrinsic_load_var_vec2:
- case nir_intrinsic_load_var_vec3:
- case nir_intrinsic_load_var_vec4: {
+ case nir_intrinsic_load_var: {
if (intrin->variables[0]->var->data.mode != nir_var_local)
continue;
nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, nir_op_imov);
mov->src[0].src = get_deref_reg_src(intrin->variables[0],
&intrin->instr, state);
- unsigned num_components = mov->src[0].src.reg.reg->num_components;
- mov->dest.write_mask = (1 << num_components) - 1;
+ mov->dest.write_mask = (1 << intrin->num_components) - 1;
if (intrin->dest.is_ssa) {
mov->dest.dest.is_ssa = true;
nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa,
- num_components, NULL);
+ intrin->num_components, NULL);
nir_src new_src = {
.is_ssa = true,
@@ -252,20 +248,16 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
break;
}
- case nir_intrinsic_store_var_vec1:
- case nir_intrinsic_store_var_vec2:
- case nir_intrinsic_store_var_vec3:
- case nir_intrinsic_store_var_vec4: {
+ case nir_intrinsic_store_var: {
if (intrin->variables[0]->var->data.mode != nir_var_local)
continue;
nir_src reg_src = get_deref_reg_src(intrin->variables[0],
&intrin->instr, state);
- unsigned num_components = reg_src.reg.reg->num_components;
nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, nir_op_imov);
mov->src[0].src = nir_src_copy(intrin->src[0], state->mem_ctx);
- mov->dest.write_mask = (1 << num_components) - 1;
+ mov->dest.write_mask = (1 << intrin->num_components) - 1;
mov->dest.dest.is_ssa = false;
mov->dest.dest.reg.reg = reg_src.reg.reg;
mov->dest.dest.reg.base_offset = reg_src.reg.base_offset;
diff --git a/src/glsl/nir/nir_lower_system_values.c b/src/glsl/nir/nir_lower_system_values.c
index cbd1dac..e700df4 100644
--- a/src/glsl/nir/nir_lower_system_values.c
+++ b/src/glsl/nir/nir_lower_system_values.c
@@ -30,8 +30,7 @@
static void
convert_instr(nir_intrinsic_instr *instr)
{
- if (instr->intrinsic != nir_intrinsic_load_var_vec1 &&
- instr->intrinsic != nir_intrinsic_load_var_vec2)
+ if (instr->intrinsic != nir_intrinsic_load_var)
return;
nir_variable *var = instr->variables[0]->var;
diff --git a/src/glsl/nir/nir_lower_variables.c b/src/glsl/nir/nir_lower_variables.c
index 052b021..dab3639 100644
--- a/src/glsl/nir/nir_lower_variables.c
+++ b/src/glsl/nir/nir_lower_variables.c
@@ -449,17 +449,11 @@ fill_deref_tables_block(nir_block *block, void *void_state)
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
- case nir_intrinsic_load_var_vec1:
- case nir_intrinsic_load_var_vec2:
- case nir_intrinsic_load_var_vec3:
- case nir_intrinsic_load_var_vec4:
+ case nir_intrinsic_load_var:
register_load_instr(intrin, true, state);
break;
- case nir_intrinsic_store_var_vec1:
- case nir_intrinsic_store_var_vec2:
- case nir_intrinsic_store_var_vec3:
- case nir_intrinsic_store_var_vec4:
+ case nir_intrinsic_store_var:
register_store_instr(intrin, true, state);
break;
@@ -541,17 +535,9 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
nir_deref *src_deref = nir_copy_deref(state->mem_ctx, &src_head->deref);
nir_deref *dest_deref = nir_copy_deref(state->mem_ctx, &dest_head->deref);
- nir_intrinsic_op load_op;
- switch (num_components) {
- case 1: load_op = nir_intrinsic_load_var_vec1; break;
- case 2: load_op = nir_intrinsic_load_var_vec2; break;
- case 3: load_op = nir_intrinsic_load_var_vec3; break;
- case 4: load_op = nir_intrinsic_load_var_vec4; break;
- default: unreachable("Invalid number of components"); break;
- }
-
- nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx,
- load_op);
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(state->mem_ctx, nir_intrinsic_load_var);
+ load->num_components = num_components;
load->variables[0] = nir_deref_as_var(src_deref);
load->dest.is_ssa = true;
nir_ssa_def_init(&load->instr, &load->dest.ssa, num_components, NULL);
@@ -559,17 +545,9 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
nir_instr_insert_before(©_instr->instr, &load->instr);
register_load_instr(load, false, state);
- nir_intrinsic_op store_op;
- switch (num_components) {
- case 1: store_op = nir_intrinsic_store_var_vec1; break;
- case 2: store_op = nir_intrinsic_store_var_vec2; break;
- case 3: store_op = nir_intrinsic_store_var_vec3; break;
- case 4: store_op = nir_intrinsic_store_var_vec4; break;
- default: unreachable("Invalid number of components"); break;
- }
-
- nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx,
- store_op);
+ nir_intrinsic_instr *store =
+ nir_intrinsic_instr_create(state->mem_ctx, nir_intrinsic_store_var);
+ store->num_components = num_components;
store->variables[0] = nir_deref_as_var(dest_deref);
store->src[0].is_ssa = true;
store->src[0].ssa = &load->dest.ssa;
@@ -782,14 +760,9 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
- case nir_intrinsic_load_var_vec1:
- case nir_intrinsic_load_var_vec2:
- case nir_intrinsic_load_var_vec3:
- case nir_intrinsic_load_var_vec4: {
+ case nir_intrinsic_load_var: {
struct deref_node *node = get_deref_node(intrin->variables[0],
false, state);
- unsigned num_chans =
- nir_intrinsic_infos[intrin->intrinsic].dest_components;
if (node == NULL) {
/* If we hit this path then we are referencing an invalid
@@ -799,7 +772,8 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)
*/
nir_ssa_undef_instr *undef =
nir_ssa_undef_instr_create(state->mem_ctx);
- nir_ssa_def_init(&undef->instr, &undef->def, num_chans, NULL);
+ nir_ssa_def_init(&undef->instr, &undef->def,
+ intrin->num_components, NULL);
nir_instr_insert_before(&intrin->instr, &undef->instr);
nir_instr_remove(&intrin->instr);
@@ -821,14 +795,15 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)
nir_op_imov);
mov->src[0].src.is_ssa = true;
mov->src[0].src.ssa = get_ssa_def_for_block(node, block, state);
- for (unsigned i = num_chans; i < 4; i++)
+ for (unsigned i = intrin->num_components; i < 4; i++)
mov->src[0].swizzle[i] = 0;
assert(intrin->dest.is_ssa);
- mov->dest.write_mask = (1 << num_chans) - 1;
+ mov->dest.write_mask = (1 << intrin->num_components) - 1;
mov->dest.dest.is_ssa = true;
- nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa, num_chans, NULL);
+ nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa,
+ intrin->num_components, NULL);
nir_instr_insert_before(&intrin->instr, &mov->instr);
nir_instr_remove(&intrin->instr);
@@ -843,10 +818,7 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)
break;
}
- case nir_intrinsic_store_var_vec1:
- case nir_intrinsic_store_var_vec2:
- case nir_intrinsic_store_var_vec3:
- case nir_intrinsic_store_var_vec4: {
+ case nir_intrinsic_store_var: {
struct deref_node *node = get_deref_node(intrin->variables[0],
false, state);
@@ -860,7 +832,8 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)
if (!node->lower_to_ssa)
continue;
- unsigned num_chans = glsl_get_vector_elements(node->type);
+ assert(intrin->num_components ==
+ glsl_get_vector_elements(node->type));
assert(intrin->src[0].is_ssa);
@@ -873,12 +846,12 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)
mov->src[1].src.is_ssa = true;
mov->src[1].src.ssa = intrin->src[0].ssa;
- for (unsigned i = num_chans; i < 4; i++)
+ for (unsigned i = intrin->num_components; i < 4; i++)
mov->src[1].swizzle[i] = 0;
mov->src[2].src.is_ssa = true;
mov->src[2].src.ssa = get_ssa_def_for_block(node, block, state);
- for (unsigned i = num_chans; i < 4; i++)
+ for (unsigned i = intrin->num_components; i < 4; i++)
mov->src[2].swizzle[i] = 0;
} else {
@@ -886,13 +859,14 @@ lower_deref_to_ssa_block(nir_block *block, void *void_state)
mov->src[0].src.is_ssa = true;
mov->src[0].src.ssa = intrin->src[0].ssa;
- for (unsigned i = num_chans; i < 4; i++)
+ for (unsigned i = intrin->num_components; i < 4; i++)
mov->src[0].swizzle[i] = 0;
}
- mov->dest.write_mask = (1 << num_chans) - 1;
+ mov->dest.write_mask = (1 << intrin->num_components) - 1;
mov->dest.dest.is_ssa = true;
- nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa, num_chans, NULL);
+ nir_ssa_def_init(&mov->instr, &mov->dest.dest.ssa,
+ intrin->num_components, NULL);
nir_instr_insert_before(&intrin->instr, &mov->instr);
nir_instr_remove(&intrin->instr);
diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c
index b8ef802..ee29fc3 100644
--- a/src/glsl/nir/nir_validate.c
+++ b/src/glsl/nir/nir_validate.c
@@ -338,16 +338,10 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
}
switch (instr->intrinsic) {
- case nir_intrinsic_load_var_vec1:
- case nir_intrinsic_load_var_vec2:
- case nir_intrinsic_load_var_vec3:
- case nir_intrinsic_load_var_vec4:
+ case nir_intrinsic_load_var:
assert(instr->variables[0]->var->data.mode != nir_var_shader_out);
break;
- case nir_intrinsic_store_var_vec1:
- case nir_intrinsic_store_var_vec2:
- case nir_intrinsic_store_var_vec3:
- case nir_intrinsic_store_var_vec4:
+ case nir_intrinsic_store_var:
assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
instr->variables[0]->var->data.mode != nir_var_uniform);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index dbb2470..4c1805d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1312,14 +1312,10 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
}
- case nir_intrinsic_load_uniform_vec1:
- case nir_intrinsic_load_uniform_vec2:
- case nir_intrinsic_load_uniform_vec3:
- case nir_intrinsic_load_uniform_vec4: {
+ case nir_intrinsic_load_uniform: {
unsigned index = 0;
for (int i = 0; i < instr->const_index[1]; i++) {
- for (unsigned j = 0;
- j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
+ for (unsigned j = 0; j < instr->num_components; j++) {
fs_reg src = nir_uniforms;
src.reg_offset = instr->const_index[0] + index;
src.type = dest.type;
@@ -1335,14 +1331,10 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
}
- case nir_intrinsic_load_uniform_vec1_indirect:
- case nir_intrinsic_load_uniform_vec2_indirect:
- case nir_intrinsic_load_uniform_vec3_indirect:
- case nir_intrinsic_load_uniform_vec4_indirect: {
+ case nir_intrinsic_load_uniform_indirect: {
unsigned index = 0;
for (int i = 0; i < instr->const_index[1]; i++) {
- for (unsigned j = 0;
- j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
+ for (unsigned j = 0; j < instr->num_components; j++) {
fs_reg src = nir_uniforms;
src.reg_offset = instr->const_index[0] + index;
src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
@@ -1360,10 +1352,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
}
- case nir_intrinsic_load_ubo_vec1:
- case nir_intrinsic_load_ubo_vec2:
- case nir_intrinsic_load_ubo_vec3:
- case nir_intrinsic_load_ubo_vec4: {
+ case nir_intrinsic_load_ubo: {
fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start +
(unsigned) instr->const_index[0]);
fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
@@ -1373,8 +1362,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
packed_consts, surf_index, const_offset_reg));
- for (unsigned i = 0;
- i < nir_intrinsic_infos[instr->intrinsic].dest_components; i++) {
+ for (unsigned i = 0; i < instr->num_components; i++) {
packed_consts.set_smear(instr->const_index[1] % 16 / 4 + i);
/* The std140 packing rules don't allow vectors to cross 16-byte
@@ -1392,10 +1380,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
}
- case nir_intrinsic_load_ubo_vec1_indirect:
- case nir_intrinsic_load_ubo_vec2_indirect:
- case nir_intrinsic_load_ubo_vec3_indirect:
- case nir_intrinsic_load_ubo_vec4_indirect: {
+ case nir_intrinsic_load_ubo_indirect: {
fs_reg surf_index = fs_reg(prog_data->binding_table.ubo_start +
instr->const_index[0]);
/* Turn the byte offset into a dword offset. */
@@ -1404,8 +1389,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
emit(SHR(offset, retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_D),
fs_reg(2)));
- for (unsigned i = 0;
- i < nir_intrinsic_infos[instr->intrinsic].dest_components; i++) {
+ for (unsigned i = 0; i < instr->num_components; i++) {
exec_list list = VARYING_PULL_CONSTANT_LOAD(dest, surf_index,
offset, base_offset + i);
fs_inst *last_inst = (fs_inst *) list.get_tail();
@@ -1418,14 +1402,10 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
}
- case nir_intrinsic_load_input_vec1:
- case nir_intrinsic_load_input_vec2:
- case nir_intrinsic_load_input_vec3:
- case nir_intrinsic_load_input_vec4: {
+ case nir_intrinsic_load_input: {
unsigned index = 0;
for (int i = 0; i < instr->const_index[1]; i++) {
- for (unsigned j = 0;
- j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
+ for (unsigned j = 0; j < instr->num_components; j++) {
fs_reg src = nir_inputs;
src.reg_offset = instr->const_index[0] + index;
src.type = dest.type;
@@ -1441,14 +1421,10 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
}
- case nir_intrinsic_load_input_vec1_indirect:
- case nir_intrinsic_load_input_vec2_indirect:
- case nir_intrinsic_load_input_vec3_indirect:
- case nir_intrinsic_load_input_vec4_indirect: {
+ case nir_intrinsic_load_input_indirect: {
unsigned index = 0;
for (int i = 0; i < instr->const_index[1]; i++) {
- for (unsigned j = 0;
- j < nir_intrinsic_infos[instr->intrinsic].dest_components; j++) {
+ for (unsigned j = 0; j < instr->num_components; j++) {
fs_reg src = nir_inputs;
src.reg_offset = instr->const_index[0] + index;
src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
@@ -1466,15 +1442,11 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
}
- case nir_intrinsic_store_output_vec1:
- case nir_intrinsic_store_output_vec2:
- case nir_intrinsic_store_output_vec3:
- case nir_intrinsic_store_output_vec4: {
+ case nir_intrinsic_store_output: {
fs_reg src = get_nir_src(instr->src[0]);
unsigned index = 0;
for (int i = 0; i < instr->const_index[1]; i++) {
- for (unsigned j = 0;
- j < nir_intrinsic_infos[instr->intrinsic].src_components[0]; j++) {
+ for (unsigned j = 0; j < instr->num_components; j++) {
fs_reg new_dest = nir_outputs;
new_dest.reg_offset = instr->const_index[0] + index;
new_dest.type = src.type;
@@ -1489,16 +1461,12 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
break;
}
- case nir_intrinsic_store_output_vec1_indirect:
- case nir_intrinsic_store_output_vec2_indirect:
- case nir_intrinsic_store_output_vec3_indirect:
- case nir_intrinsic_store_output_vec4_indirect: {
+ case nir_intrinsic_store_output_indirect: {
fs_reg src = get_nir_src(instr->src[0]);
fs_reg indirect = get_nir_src(instr->src[1]);
unsigned index = 0;
for (int i = 0; i < instr->const_index[1]; i++) {
- for (unsigned j = 0;
- j < nir_intrinsic_infos[instr->intrinsic].src_components[0]; j++) {
+ for (unsigned j = 0; j < instr->num_components; j++) {
fs_reg new_dest = nir_outputs;
new_dest.reg_offset = instr->const_index[0] + index;
new_dest.reladdr = new(mem_ctx) fs_reg(indirect);
--
2.2.0
More information about the mesa-dev
mailing list