[Mesa-dev] [PATCH 3/3] glsl: Use array deref for access to vector components
Kristian Høgsberg Kristensen
krh at bitplanet.net
Wed Nov 4 15:33:54 PST 2015
We've assumed that we could lower per-component vector access from
vec[i] = scalar
to
vec = ir_triop_vector_insert(vec, scalar, i)
but with SSBOs (and compute shader SLM and tesselation outputs) this is
no longer valid. If a vector is "externally visible", multiple threads
can write independent components simultaneously. With lowering to
ir_triop_vector_insert, each thread read the entire vector, changes one
component, then writes out the entire vector. This is racy.
Instead of generating a ir_binop_vector_extract when we see v[i], we
generate ir_dereference_array. We then add a lowering pass to lower the
ir_dereference_array to ir_binop_vector_extract for rvalues and for to
vector_insert for lvalues in a separate lowering pass.
The resulting IR is the same as before, but we now have a window between
ast->ir conversion and the lowering pass where v[i] appears in the IR as
an array deref. This lets us run lowering passes that lower the vector
access to I/O (eg for SSBO load/store) before we lower the per-component
access to full vector writes.
Signed-off-by: Kristian Høgsberg Kristensen <krh at bitplanet.net>
---
src/glsl/Makefile.sources | 1 +
src/glsl/ast_array_index.cpp | 5 +-
src/glsl/ast_function.cpp | 24 +++------
src/glsl/ast_to_hir.cpp | 43 ----------------
src/glsl/ir_optimization.h | 1 +
src/glsl/ir_validate.cpp | 7 +--
src/glsl/linker.cpp | 2 +
src/glsl/lower_ubo_reference.cpp | 14 +++++-
src/glsl/lower_vector_derefs.cpp | 104 +++++++++++++++++++++++++++++++++++++++
src/glsl/opt_dead_code_local.cpp | 2 +
10 files changed, 135 insertions(+), 68 deletions(-)
create mode 100644 src/glsl/lower_vector_derefs.cpp
diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index ca87036..30d8263 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -176,6 +176,7 @@ LIBGLSL_FILES = \
lower_vec_index_to_cond_assign.cpp \
lower_vec_index_to_swizzle.cpp \
lower_vector.cpp \
+ lower_vector_derefs.cpp \
lower_vector_insert.cpp \
lower_vertex_id.cpp \
lower_output_reads.cpp \
diff --git a/src/glsl/ast_array_index.cpp b/src/glsl/ast_array_index.cpp
index 74d403f..ca7a9a1 100644
--- a/src/glsl/ast_array_index.cpp
+++ b/src/glsl/ast_array_index.cpp
@@ -319,10 +319,9 @@ _mesa_ast_array_index_to_hir(void *mem_ctx,
* expression.
*/
if (array->type->is_array()
- || array->type->is_matrix()) {
+ || array->type->is_matrix()
+ || array->type->is_vector()) {
return new(mem_ctx) ir_dereference_array(array, idx);
- } else if (array->type->is_vector()) {
- return new(mem_ctx) ir_expression(ir_binop_vector_extract, array, idx);
} else if (array->type->is_error()) {
return array;
} else {
diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
index e4e4a3f..5584470 100644
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -256,18 +256,10 @@ verify_parameter_modes(_mesa_glsl_parse_state *state,
actual->variable_referenced()->name);
return false;
} else if (!actual->is_lvalue()) {
- /* Even though ir_binop_vector_extract is not an l-value, let it
- * slop through. generate_call will handle it correctly.
- */
- ir_expression *const expr = ((ir_rvalue *) actual)->as_expression();
- if (expr == NULL
- || expr->operation != ir_binop_vector_extract
- || !expr->operands[0]->is_lvalue()) {
- _mesa_glsl_error(&loc, state,
- "function parameter '%s %s' is not an lvalue",
- mode, formal->name);
- return false;
- }
+ _mesa_glsl_error(&loc, state,
+ "function parameter '%s %s' is not an lvalue",
+ mode, formal->name);
+ return false;
}
}
@@ -376,12 +368,8 @@ fix_parameter(void *mem_ctx, ir_rvalue *actual, const glsl_type *formal_type,
ir_rvalue *lhs = actual;
if (expr != NULL && expr->operation == ir_binop_vector_extract) {
- rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert,
- expr->operands[0]->type,
- expr->operands[0]->clone(mem_ctx, NULL),
- rhs,
- expr->operands[1]->clone(mem_ctx, NULL));
- lhs = expr->operands[0]->clone(mem_ctx, NULL);
+ lhs == new(mem_ctx) ir_dereference_array(expr->operands[0]->clone(mem_ctx, NULL),
+ expr->operands[1]->clone(mem_ctx, NULL));
}
ir_assignment *const assignment_2 = new(mem_ctx) ir_assignment(lhs, rhs);
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 0306530..660ae99 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -850,43 +850,6 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
{
void *ctx = state;
bool error_emitted = (lhs->type->is_error() || rhs->type->is_error());
- ir_rvalue *extract_channel = NULL;
-
- /* If the assignment LHS comes back as an ir_binop_vector_extract
- * expression, move it to the RHS as an ir_triop_vector_insert.
- */
- if (lhs->ir_type == ir_type_expression) {
- ir_expression *const lhs_expr = lhs->as_expression();
-
- if (unlikely(lhs_expr->operation == ir_binop_vector_extract)) {
- ir_rvalue *new_rhs =
- validate_assignment(state, lhs_loc, lhs,
- rhs, is_initializer);
-
- if (new_rhs == NULL) {
- return lhs;
- } else {
- /* This converts:
- * - LHS: (expression float vector_extract <vec> <channel>)
- * - RHS: <scalar>
- * into:
- * - LHS: <vec>
- * - RHS: (expression vec2 vector_insert <vec> <channel> <scalar>)
- *
- * The LHS type is now a vector instead of a scalar. Since GLSL
- * allows assignments to be used as rvalues, we need to re-extract
- * the channel from assignment_temp when returning the rvalue.
- */
- extract_channel = lhs_expr->operands[1];
- rhs = new(ctx) ir_expression(ir_triop_vector_insert,
- lhs_expr->operands[0]->type,
- lhs_expr->operands[0],
- new_rhs,
- extract_channel);
- lhs = lhs_expr->operands[0]->clone(ctx, NULL);
- }
- }
- }
ir_variable *lhs_var = lhs->variable_referenced();
if (lhs_var)
@@ -984,12 +947,6 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
}
ir_rvalue *rvalue = new(ctx) ir_dereference_variable(var);
- if (extract_channel) {
- rvalue = new(ctx) ir_expression(ir_binop_vector_extract,
- rvalue,
- extract_channel->clone(ctx, NULL));
- }
-
*out_rvalue = rvalue;
} else {
if (!error_emitted)
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 6d19a6c..2fee81c 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -129,6 +129,7 @@ void lower_packed_varyings(void *mem_ctx,
unsigned locations_used, ir_variable_mode mode,
unsigned gs_input_vertices, gl_shader *shader);
bool lower_vector_insert(exec_list *instructions, bool lower_nonconstant_index);
+bool lower_vector_derefs(gl_shader *shader);
void lower_named_interface_blocks(void *mem_ctx, gl_shader *shader);
bool optimize_redundant_jumps(exec_list *instructions);
bool optimize_split_arrays(exec_list *instructions, bool linked);
diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
index 935571a..e63b5c3 100644
--- a/src/glsl/ir_validate.cpp
+++ b/src/glsl/ir_validate.cpp
@@ -110,9 +110,10 @@ ir_validate::visit(ir_dereference_variable *ir)
ir_visitor_status
ir_validate::visit_enter(class ir_dereference_array *ir)
{
- if (!ir->array->type->is_array() && !ir->array->type->is_matrix()) {
- printf("ir_dereference_array @ %p does not specify an array or a "
- "matrix\n",
+ if (!ir->array->type->is_array() && !ir->array->type->is_matrix() &&
+ !ir->array->type->is_vector()) {
+ printf("ir_dereference_array @ %p does not specify an array, a vector "
+ "or a matrix\n",
(void *) ir);
ir->print();
printf("\n");
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index ea6a3f3..aab1403 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -4455,6 +4455,8 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
if (ctx->Const.ShaderCompilerOptions[i].LowerBufferInterfaceBlocks)
lower_ubo_reference(prog->_LinkedShaders[i]);
+
+ lower_vector_derefs(prog->_LinkedShaders[i]);
}
done:
diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
index 24806ac..b74aa3d 100644
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -390,7 +390,19 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
case ir_type_dereference_array: {
ir_dereference_array *deref_array = (ir_dereference_array *) deref;
unsigned array_stride;
- if (deref_array->array->type->is_matrix() && *row_major) {
+ if (deref_array->array->type->is_vector()) {
+ /* We get this when storing or loading a component out of a vector
+ * with a non-constant index. This happens for v[i] = f where v is
+ * a vector (or m[i][j] = f where m is a matrix). If we don't
+ * lower that here, it gets turned into v = vector_insert(v, i,
+ * f), which loads the entire vector, modifies one component and
+ * then write the entire thing back. That breaks if another
+ * thread or SIMD channel is modifying the same vector.
+ */
+ array_stride = 4;
+ if (deref_array->array->type->is_double())
+ array_stride *= 2;
+ } else if (deref_array->array->type->is_matrix() && *row_major) {
/* When loading a vector out of a row major matrix, the
* step between the columns (vectors) is the size of a
* float, while the step between the rows (elements of a
diff --git a/src/glsl/lower_vector_derefs.cpp b/src/glsl/lower_vector_derefs.cpp
new file mode 100644
index 0000000..4a5d6f0
--- /dev/null
+++ b/src/glsl/lower_vector_derefs.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "ir.h"
+#include "ir_builder.h"
+#include "ir_rvalue_visitor.h"
+#include "ir_optimization.h"
+
+using namespace ir_builder;
+
+namespace {
+
+class vector_deref_visitor : public ir_rvalue_enter_visitor {
+public:
+ vector_deref_visitor()
+ : progress(false)
+ {
+ }
+
+ virtual ~vector_deref_visitor()
+ {
+ }
+
+ virtual void handle_rvalue(ir_rvalue **rv);
+ virtual ir_visitor_status visit_enter(ir_assignment *ir);
+
+ bool progress;
+};
+
+} /* anonymous namespace */
+
+ir_visitor_status
+vector_deref_visitor::visit_enter(ir_assignment *ir)
+{
+ if (!ir->lhs || ir->lhs->ir_type != ir_type_dereference_array)
+ return ir_rvalue_enter_visitor::visit_enter(ir);
+
+ ir_dereference_array *const deref = (ir_dereference_array *) ir->lhs;
+ if (!deref->array->type->is_vector())
+ return ir_rvalue_enter_visitor::visit_enter(ir);
+
+ ir_dereference *const new_lhs = (ir_dereference *) deref->array;
+ ir->set_lhs(new_lhs);
+
+ ir_constant *old_index_constant = deref->array_index->constant_expression_value();
+ void *mem_ctx = ralloc_parent(ir);
+ if (!old_index_constant) {
+ ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert,
+ new_lhs->type,
+ new_lhs->clone(mem_ctx, NULL),
+ ir->rhs,
+ deref->array_index);
+ ir->write_mask = (1 << new_lhs->type->vector_elements) - 1;
+ } else {
+ ir->write_mask = 1 << old_index_constant->get_int_component(0);
+ }
+
+ return ir_rvalue_enter_visitor::visit_enter(ir);
+}
+
+void
+vector_deref_visitor::handle_rvalue(ir_rvalue **rv)
+{
+ if (*rv == NULL || (*rv)->ir_type != ir_type_dereference_array)
+ return;
+
+ ir_dereference_array *const deref = (ir_dereference_array *) *rv;
+ if (!deref->array->type->is_vector())
+ return;
+
+ void *mem_ctx = ralloc_parent(deref);
+ *rv = new(mem_ctx) ir_expression(ir_binop_vector_extract,
+ deref->array,
+ deref->array_index);
+}
+
+bool
+lower_vector_derefs(gl_shader *shader)
+{
+ vector_deref_visitor v;
+
+ visit_list_elements(&v, shader->ir);
+
+ return v.progress;
+}
diff --git a/src/glsl/opt_dead_code_local.cpp b/src/glsl/opt_dead_code_local.cpp
index 4770fcf..32acf9b 100644
--- a/src/glsl/opt_dead_code_local.cpp
+++ b/src/glsl/opt_dead_code_local.cpp
@@ -196,6 +196,8 @@ process_assignment(void *ctx, ir_assignment *ir, exec_list *assignments)
foreach_in_list_safe(assignment_entry, entry, assignments) {
if (entry->lhs != var)
continue;
+ if (entry->ir->lhs->ir_type != ir_type_variable)
+ continue;
int remove = entry->unused & ir->write_mask;
if (debug) {
--
2.6.2
More information about the mesa-dev
mailing list