[Mesa-dev] [PATCH V4 24/26] glsl: lower tessellation varyings packed with component layout qualifier
Timothy Arceri
timothy.arceri at collabora.com
Sun Mar 6 04:17:44 UTC 2016
For tessellation shaders we cannot just copy everything to the packed
varyings like we do in other stages as tessellation uses shared memory for
varyings, therefore it is only safe to copy array elements that the shader
actually uses.
This class searches the IR for uses of varyings and then creates
instructions that copy those vars to a packed varying. This means it is
easy to end up with duplicate copies if the varying is used more than once,
also arrays of arrays create a duplicate copy for each dimension that
exists. These issues are not easily resolved without breaking various
corner cases so we leave it to a later IR stage to clean up the mess.
Note that neither GLSL IR nor NIR can currently clean up the
duplicates when and indirect is used as an array index. This patch
assumes that NIR will eventually be able to clean this up.
V4: Fix IR validation for vector components accessed by an array style
subscript. Also add spec quote and link to spec bug to support not
writing vecotr component individually.
V3: Use correct base location for per-patch outputs, fix some comments,
don't segfault when using subscript to access vector components, and
for now don't support packing arrays of different size or varyings
that start at different locations in tcs out/tes in varyings.
V2: clone array index when creating array dereference for the packed
varying
---
src/compiler/glsl/ir_optimization.h | 2 +-
src/compiler/glsl/link_varyings.cpp | 12 +-
src/compiler/glsl/lower_packed_varyings.cpp | 520 +++++++++++++++++++++++++++-
3 files changed, 523 insertions(+), 11 deletions(-)
diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h
index a115c46..0de5228 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -122,7 +122,7 @@ void lower_output_reads(unsigned stage, exec_list *instructions);
bool lower_packing_builtins(exec_list *instructions, int op_mask);
void lower_shared_reference(struct gl_shader *shader, unsigned *shared_size);
void lower_ubo_reference(struct gl_shader *shader);
-void lower_packed_varyings(void *mem_ctx,
+void lower_packed_varyings(void *mem_ctx, struct gl_shader_program *prog,
unsigned locations_used, ir_variable_mode mode,
gl_shader *shader, unsigned base_location,
bool disable_varying_packing,
diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp
index d433f37..4a20f96 100644
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -1863,14 +1863,14 @@ assign_varying_locations(struct gl_context *ctx,
/* Pack vertex inputs with the component layout qualifier */
unsigned vertex_attributes = _mesa_bitcount_64(reserved_slots);
if (vertex_attributes > 0)
- lower_packed_varyings(mem_ctx, vertex_attributes,
+ lower_packed_varyings(mem_ctx, prog, vertex_attributes,
ir_var_shader_in, producer,
VERT_ATTRIB_GENERIC0, true,
ctx->Extensions.ARB_enhanced_layouts);
}
- lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out, producer,
- VARYING_SLOT_VAR0,
+ lower_packed_varyings(mem_ctx, prog, slots_used, ir_var_shader_out,
+ producer, VARYING_SLOT_VAR0,
disable_varying_packing,
ctx->Extensions.ARB_enhanced_layouts);
}
@@ -1887,13 +1887,13 @@ assign_varying_locations(struct gl_context *ctx,
/* Pack frag outputs with the component layout qualifier */
unsigned frag_outs = _mesa_bitcount_64(reserved_slots);
if (frag_outs > 0)
- lower_packed_varyings(mem_ctx, frag_outs, ir_var_shader_out,
+ lower_packed_varyings(mem_ctx, prog, frag_outs, ir_var_shader_out,
consumer, FRAG_RESULT_DATA0, true,
ctx->Extensions.ARB_enhanced_layouts);
}
- lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in, consumer,
- VARYING_SLOT_VAR0,
+ lower_packed_varyings(mem_ctx, prog, slots_used, ir_var_shader_in,
+ consumer, VARYING_SLOT_VAR0,
disable_varying_packing,
ctx->Extensions.ARB_enhanced_layouts);
}
diff --git a/src/compiler/glsl/lower_packed_varyings.cpp b/src/compiler/glsl/lower_packed_varyings.cpp
index 5b2338d..2466246 100644
--- a/src/compiler/glsl/lower_packed_varyings.cpp
+++ b/src/compiler/glsl/lower_packed_varyings.cpp
@@ -148,7 +148,10 @@
#include "ir.h"
#include "ir_builder.h"
#include "ir_optimization.h"
+#include "ir_rvalue_visitor.h"
+#include "linker.h"
#include "program/prog_instruction.h"
+#include "util/hash_table.h"
using namespace ir_builder;
@@ -177,6 +180,22 @@ check_for_matching_arrays(ir_variable *packed_var, ir_variable *var)
return array_match;
}
+/**
+ * Creates new type for and array when the base type changes.
+ */
+static const glsl_type *
+update_packed_array_type(const glsl_type *type, const glsl_type *packed_type)
+{
+ const glsl_type *element_type = type->fields.array;
+ if (element_type->is_array()) {
+ const glsl_type *new_array_type =
+ update_packed_array_type(element_type, packed_type);
+ return glsl_type::get_array_instance(new_array_type, type->length);
+ } else {
+ return glsl_type::get_array_instance(packed_type, type->length);
+ }
+}
+
static bool
needs_lowering(ir_variable *var, bool has_enhanced_layouts,
bool disable_varying_packing)
@@ -228,6 +247,51 @@ create_packed_var(void * const mem_ctx, const char *packed_name,
return packed_var;
}
+/**
+ * Creates a packed varying for the tessellation packing.
+ */
+static ir_variable *
+create_tess_packed_var(void *mem_ctx, ir_variable *unpacked_var)
+{
+ /* create packed varying name using location */
+ char location_str[11];
+ snprintf(location_str, 11, "%d", unpacked_var->data.location);
+ char *packed_name;
+ if ((ir_variable_mode) unpacked_var->data.mode == ir_var_shader_out)
+ packed_name = ralloc_asprintf(mem_ctx, "packed_out:%s", location_str);
+ else
+ packed_name = ralloc_asprintf(mem_ctx, "packed_in:%s", location_str);
+
+ const glsl_type *packed_type;
+ switch (unpacked_var->type->without_array()->base_type) {
+ case GLSL_TYPE_UINT:
+ packed_type = glsl_type::uvec4_type;
+ break;
+ case GLSL_TYPE_INT:
+ packed_type = glsl_type::ivec4_type;
+ break;
+ case GLSL_TYPE_FLOAT:
+ packed_type = glsl_type::vec4_type;
+ break;
+ case GLSL_TYPE_DOUBLE:
+ packed_type = glsl_type::dvec4_type;
+ break;
+ default:
+ assert(!"Unexpected type in tess varying packing");
+ return NULL;
+ }
+
+ /* Create new array type */
+ if (unpacked_var->type->is_array()) {
+ packed_type = update_packed_array_type(unpacked_var->type, packed_type);
+ }
+
+ return create_packed_var(mem_ctx, packed_name, packed_type, unpacked_var,
+ (ir_variable_mode) unpacked_var->data.mode,
+ unpacked_var->data.location,
+ unpacked_var->type->is_array());
+}
+
namespace {
/**
@@ -785,11 +849,348 @@ lower_packed_varyings_gs_splicer::visit_leave(ir_emit_vertex *ev)
}
+/**
+ * For tessellation control shaders, we cannot just copy everything to the
+ * packed varyings like we do in other stages. TCS outputs can be used as
+ * shared memory, where multiple threads concurrently perform partial reads
+ * and writes that must not conflict. It is only safe to access the exact
+ * components that the shader uses.
+ *
+ * This class searches the IR for uses of varyings and then emits a copy for
+ * everything it finds hoping later optimizations are able to clean up any
+ * duplicates.
+ */
+class lower_packed_varyings_tess_visitor : public ir_rvalue_visitor
+{
+public:
+ lower_packed_varyings_tess_visitor(void *mem_ctx, hash_table *varyings,
+ ir_variable_mode mode)
+ : mem_ctx(mem_ctx), varyings(varyings), mode(mode)
+ {
+ }
+
+ virtual ~lower_packed_varyings_tess_visitor()
+ {
+ }
+
+ virtual ir_visitor_status visit_leave(ir_assignment *);
+ virtual ir_visitor_status visit_leave(ir_dereference_array *);
+
+ ir_dereference *create_dereference(ir_dereference *deref,
+ unsigned *dimensions,
+ bool *has_vec_subscript);
+ unsigned create_extra_array_dereference(unsigned inner_dimension,
+ const glsl_type **types_list,
+ ir_dereference **packed_deref_list,
+ ir_dereference **deref_list);
+ ir_variable *get_packed_var(ir_variable *var);
+ void handle_rvalue(ir_rvalue **rvalue);
+
+ /**
+ * Exec list into which the visitor should insert the packing instructions.
+ * Caller provides this list; it should insert the instructions into the
+ * appropriate place in the shader once the visitor has finished running.
+ */
+ exec_list new_instructions;
+
+private:
+ /**
+ * Memory context used to allocate new instructions for the shader.
+ */
+ void * const mem_ctx;
+
+ hash_table *varyings;
+
+ ir_variable_mode mode;
+};
+
+/**
+ * Search the hash table for a packed varying for this variable.
+ */
+ir_variable *
+lower_packed_varyings_tess_visitor::get_packed_var(ir_variable *var)
+{
+ assert(var);
+
+ const struct hash_entry *entry =
+ _mesa_hash_table_search(varyings, var);
+
+ return entry ? (ir_variable *) entry->data : NULL;
+}
+
+ir_dereference *
+lower_packed_varyings_tess_visitor::create_dereference(ir_dereference *deref,
+ unsigned *dimension,
+ bool *has_vec_subscript)
+{
+ ir_dereference_array *deref_array = deref->as_dereference_array();
+ if (deref_array) {
+ ir_dereference *array =
+ create_dereference(deref_array->array->as_dereference(), dimension,
+ has_vec_subscript);
+
+ /* The array dereference may actually be to access vector components
+ * so don't touch the dimension count unless we are actually dealing
+ * with an array.
+ */
+ if (deref_array->array->type->is_array()) {
+ (*dimension)--;
+ } else {
+ /* If we have found a vector not an array don't create an array
+ * dereference and set the has_vec_subscript flag so we can remove
+ * the array dereference from the unpacked var too.
+ */
+ *has_vec_subscript = true;
+ return array;
+ }
+
+ return new(this->mem_ctx)
+ ir_dereference_array(array,
+ deref_array->array_index->clone(mem_ctx, NULL));
+ } else {
+ ir_variable *unpacked_var = deref->variable_referenced();
+ ir_variable *packed_var = get_packed_var(unpacked_var);
+ return new(this->mem_ctx) ir_dereference_variable(packed_var);
+ }
+}
+/**
+ * This creates the extra derefs needed to copy the full array. For example if
+ * we have:
+ *
+ * layout(location = 0, component = 3) in float b[][6];
+ * layout(location = 0, component = 3) out float b_tcs[][6];
+ * ...
+ * b_tcs[gl_InvocationID] = b[gl_InvocationID];
+ *
+ * We need to copy all the inner array elements to the new packed varying:
+ *
+ * packed_out:26[gl_InvocationID][0].w = b_tcs[gl_InvocationID][0];
+ * ...
+ * packed_out:26[gl_InvocationID][5].w = b_tcs[gl_InvocationID][5];
+ */
+unsigned
+lower_packed_varyings_tess_visitor::create_extra_array_dereference(unsigned inner_dimension,
+ const glsl_type **types_list,
+ ir_dereference **packed_deref_list,
+ ir_dereference **deref_list)
+{
+ unsigned outer_deref_array_size;
+ if (inner_dimension != 0)
+ outer_deref_array_size =
+ create_extra_array_dereference(inner_dimension - 1, types_list,
+ packed_deref_list, deref_list);
+ else {
+ assert(types_list[inner_dimension]->length > 0);
+ outer_deref_array_size = 1;
+ }
+
+ unsigned deref_array_size =
+ types_list[inner_dimension]->length * outer_deref_array_size;
+
+ /* Create new lists to store the new instructions in */
+ ir_dereference **new_packed_deref_list = (ir_dereference **)
+ rzalloc_array_size(mem_ctx, sizeof(ir_dereference *), deref_array_size);
+ ir_dereference **new_deref_list = (ir_dereference **)
+ rzalloc_array_size(mem_ctx, sizeof(ir_dereference *), deref_array_size);
+
+ unsigned list_count = 0;
+ for (unsigned i = 0; i < types_list[inner_dimension]->length; i++) {
+ for (unsigned j = 0; j < outer_deref_array_size; j++) {
+ /* Clone the outer dimension derefs */
+ ir_dereference *deref_clone = deref_list[j]->clone(this->mem_ctx, NULL);
+ ir_dereference *packed_deref_clone = packed_deref_list[j]->clone(this->mem_ctx, NULL);
+
+ /* Create new derefs for the inner dimiension */
+ ir_constant *constant = new(this->mem_ctx) ir_constant(i);
+ new_packed_deref_list[list_count] = new(this->mem_ctx)
+ ir_dereference_array(packed_deref_clone, constant);
+
+ ir_constant *constant2 = new(this->mem_ctx) ir_constant(i);
+ new_deref_list[list_count] = new(this->mem_ctx)
+ ir_dereference_array(deref_clone, constant2);
+ list_count++;
+ }
+ }
+
+ /* Copy the new derefs so the caller can access them */
+ for (unsigned j = 0; j < list_count; j++) {
+ packed_deref_list[j] = new_packed_deref_list[j];
+ deref_list[j] = new_deref_list[j];
+ }
+ return deref_array_size;
+}
+
+void
+lower_packed_varyings_tess_visitor::handle_rvalue(ir_rvalue **rvalue)
+{
+ if (!*rvalue)
+ return;
+
+ ir_dereference *deref = (*rvalue)->as_dereference();
+
+ if (!deref)
+ return;
+
+ ir_variable *unpacked_var = deref->variable_referenced();
+ ir_variable *packed_var = get_packed_var(unpacked_var);
+
+ /* If the variable is packed then create a new dereference and wrap it in
+ * a swizzle to get the correct values as specified by the component
+ * qualifier.
+ */
+ if (packed_var) {
+ /* Count array dimensions */
+ const glsl_type *type = packed_var->type;
+ unsigned dimensions = 0;
+ while (type->is_array()) {
+ type = type->fields.array;
+ dimensions++;
+ }
+
+ /* Create a type list in reverse order (inner -> outer arrays) as this
+ * is the order the IR works in.
+ */
+ const glsl_type **types_list = (const glsl_type **)
+ rzalloc_array_size(mem_ctx, sizeof(glsl_type *), dimensions);
+ unsigned order = dimensions;
+ type = unpacked_var->type;
+ while (type->is_array()) {
+ types_list[--order] = type;
+ type = type->fields.array;
+ }
+
+ /* Create a derefence for the packed var and clone the unpacked deref */
+ unsigned inner_dimension = dimensions;
+ bool has_vec_subscript = false;
+ ir_dereference *packed = create_dereference(deref, &inner_dimension,
+ &has_vec_subscript);
+
+ /* If the innermost array dereference is used to access vec components
+ * rather than an array element remove it. This means we will end up
+ * writting to all components with a shader like:
+ *
+ * layout(location = 0, component = 1) patch out vec3 color;
+ * ...
+ * color[gl_InvocationID] = gl_InvocationID;
+ *
+ * The spec seems to support this. From the ARB_tessellation_shader
+ * spec:
+ *
+ * "Tessellation control shaders will get undefined results if one
+ * invocation reads a per-vertex or per-patch attribute written by
+ * another invocation at any point during the same phase, or if two
+ * invocations attempt to write different values to the same
+ * per-patch output in a single phase."
+ *
+ * FIXME: The text is a little unclear about what "attempt to write
+ * different values to the same per-patch output" actually means. A spec
+ * bug has been reported update once bug is resolved.
+ * https://www.khronos.org/bugzilla/show_bug.cgi?id=1472
+ */
+ ir_dereference *cloned_deref;
+ if (has_vec_subscript)
+ cloned_deref = deref->as_dereference_array()->array->
+ as_dereference()->clone(this->mem_ctx, NULL);
+ else
+ cloned_deref = deref->clone(this->mem_ctx, NULL);
+
+ /* If needed create extra derefs so we can copy all inner array elements
+ * of a multi-dimensional array.
+ */
+ unsigned instruction_count;
+ ir_dereference **packed_deref;
+ ir_dereference **unpacked_deref;
+ if (inner_dimension != 0) {
+ instruction_count =
+ types_list[inner_dimension - 1]->arrays_of_arrays_size();
+
+ /* Create new lists to store the new instructions in */
+ packed_deref = (ir_dereference **)
+ rzalloc_array_size(mem_ctx, sizeof(ir_dereference *),
+ instruction_count);
+ unpacked_deref = (ir_dereference **)
+ rzalloc_array_size(mem_ctx, sizeof(ir_dereference *),
+ instruction_count);
+
+ /* Pass in the outer array derefs that already exist */
+ packed_deref[0] = packed;
+ unpacked_deref[0] = cloned_deref;
+
+ instruction_count =
+ create_extra_array_dereference(inner_dimension - 1, types_list,
+ packed_deref, unpacked_deref);
+ } else {
+ instruction_count = 1;
+ packed_deref = &packed;
+ unpacked_deref = &cloned_deref;
+ }
+
+ /* Wrap packed derefs in a swizzle and the create assignment */
+ unsigned swizzle_values[4] = { 0, 0, 0, 0 };
+ unsigned components =
+ unpacked_var->type->without_array()->vector_elements;
+ for (unsigned i = 0; i < components; ++i) {
+ swizzle_values[i] = i + unpacked_var->data.location_frac;
+ }
+
+ for (unsigned i = 0; i < instruction_count; i++) {
+ ir_swizzle *swiz = new(this->mem_ctx) ir_swizzle(packed_deref[i], swizzle_values,
+ components);
+ ir_assignment *assign;
+ if (mode == ir_var_shader_out) {
+ assign = new (this->mem_ctx) ir_assignment(swiz, unpacked_deref[i]);
+ } else {
+ assign = new (this->mem_ctx) ir_assignment(unpacked_deref[i], swiz);
+ }
+ new_instructions.push_tail(assign);
+ }
+ }
+}
+
+ir_visitor_status
+lower_packed_varyings_tess_visitor::visit_leave(ir_dereference_array *ir)
+{
+ /* The array index is not the target of the assignment, so clear the
+ * 'in_assignee' flag. Restore it after returning from the array index.
+ */
+ const bool was_in_assignee = this->in_assignee;
+ this->in_assignee = false;
+ handle_rvalue(&ir->array_index);
+ this->in_assignee = was_in_assignee;
+
+ ir_rvalue *rvalue = ir;
+ handle_rvalue(&rvalue);
+
+ return visit_continue;
+}
+
+ir_visitor_status
+lower_packed_varyings_tess_visitor::visit_leave(ir_assignment *ir)
+{
+ handle_rvalue(&ir->rhs);
+ ir->rhs->accept(this);
+
+ /* The normal rvalue visitor skips the LHS of assignments, but we
+ * need to process those just the same.
+ */
+ ir_rvalue *lhs = ir->lhs;
+ handle_rvalue(&lhs);
+ ir->lhs->accept(this);
+
+ if (ir->condition) {
+ handle_rvalue(&ir->condition);
+ ir->condition->accept(this);
+ }
+
+ return visit_continue;
+}
+
+
void
-lower_packed_varyings(void *mem_ctx, unsigned locations_used,
- ir_variable_mode mode, gl_shader *shader,
- unsigned base_location, bool disable_varying_packing,
- bool has_enhanced_layouts)
+lower_packed_varyings(void *mem_ctx, struct gl_shader_program *prog,
+ unsigned locations_used, ir_variable_mode mode,
+ gl_shader *shader, unsigned base_location,
+ bool disable_varying_packing, bool has_enhanced_layouts)
{
ir_function *main_func = shader->symbols->get_function("main");
exec_list void_parameters;
@@ -841,5 +1242,116 @@ lower_packed_varyings(void *mem_ctx, unsigned locations_used,
main_func_sig->body.head->insert_before(&new_instructions);
main_func_sig->body.head->insert_before(&new_variables);
}
+ } else {
+ /* Build a hash table with all the varyings we can pack. For the
+ * tessellation stages we only pack varyings that have location
+ * and component layout qualifiers as packing varying without these
+ * makes things much more difficult.
+ */
+ hash_table *varyings = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ ir_variable **packed_varyings = (ir_variable **)
+ rzalloc_array_size(mem_ctx, sizeof(*packed_varyings),
+ locations_used);
+
+ foreach_in_list(ir_instruction, node, shader->ir) {
+ ir_variable *var = node->as_variable();
+ if (var == NULL)
+ continue;
+
+ if (var->data.mode != mode ||
+ var->data.location < (int) base_location ||
+ !needs_lowering(var, has_enhanced_layouts, true))
+ continue;
+
+ const glsl_type *t;
+ int location = var->data.location - base_location;
+ if (var->data.patch) {
+ location = var->data.location - VARYING_SLOT_PATCH0;
+ t = var->type;
+ } else {
+ t = var->type->fields.array;
+ }
+
+ /* Clone the variable for program resource list before
+ * it gets modified and lost.
+ */
+ if (!shader->packed_varyings)
+ shader->packed_varyings = new (shader) exec_list;
+
+ shader->packed_varyings->push_tail(var->clone(shader, NULL));
+
+ /* Get the packed varying for this location or create a new one. */
+ ir_variable *packed_var;
+ if (packed_varyings[location]) {
+ packed_var = packed_varyings[location];
+
+ /* FIXME: Its possible to pack two different sized arrays together
+ * and also packed varyings are not required to start at the same
+ * location. However this would be difficult to do with the
+ * current method of packing.
+ */
+ if (packed_var->data.location != var->data.location ||
+ !check_for_matching_arrays(packed_var, var)) {
+ unsigned packed_location = var->data.patch ?
+ packed_var->data.location - VARYING_SLOT_PATCH0 :
+ packed_var->data.location - base_location;
+ const char *varying_mode =
+ var->data.mode == ir_var_shader_out ? "outputs" : "inputs";
+
+ linker_error(prog, "Although allowed by the GLSL spec packing "
+ "varyings with different array types or starting "
+ "at different locations is not currently "
+ "supported in Mesa drivers for %s shader %s "
+ "(%s@%d vs %s@%d)\n.",
+ _mesa_shader_stage_to_string(shader->Stage),
+ varying_mode, packed_var->type->name,
+ packed_location, var->type->name, location);
+
+ _mesa_hash_table_destroy(varyings, NULL);
+ return;
+ }
+ } else {
+ /* Create the new packed varying */
+ packed_var = create_tess_packed_var(mem_ctx, var);
+ var->insert_before(packed_var);
+ packed_varyings[location] = packed_var;
+
+ /* Add the var to the lookup table at all the locations it
+ * consumes.
+ */
+ unsigned num_locs = t->count_attribute_slots(false);
+ for (unsigned i = 0; i < num_locs; i++) {
+ packed_varyings[location + i] = packed_var;
+ }
+ }
+
+ /* Add to varyings the hash table with the old varying as a key, and
+ * the packed varying as the data. This will be used later in the
+ * visitor to look-up variables that need to be replaced.
+ */
+ _mesa_hash_table_insert(varyings, var, packed_var);
+
+ /* Change the old varying into an ordinary global, dead code
+ * elimination will clean this up for us later on.
+ */
+ assert(var->data.mode != ir_var_temporary);
+ var->data.mode = ir_var_auto;
+ }
+
+ /* Find varying dereferences */
+ /* Create instructions that copy varyings to/from temporaries */
+ lower_packed_varyings_tess_visitor visitor(mem_ctx, varyings, mode);
+ visitor.run(shader->ir);
+
+ /* Insert instructions that copy varyings to/from temporaries */
+ if (mode == ir_var_shader_out) {
+ main_func_sig->body.append_list(&visitor.new_instructions);
+ } else {
+ main_func_sig->body.head->insert_before(&visitor.new_instructions);
+ }
+
+ _mesa_hash_table_destroy(varyings, NULL);
}
}
--
2.5.0
More information about the mesa-dev
mailing list