[Mesa-dev] [PATCH 26/28] glsl: lower tessellation varyings packed with component layout qualifier
eocallaghan at alterapraxis.com
eocallaghan at alterapraxis.com
Mon Dec 28 22:36:56 PST 2015
On 2015-12-29 16:00, Timothy Arceri wrote:
> For tessellation shaders we cannot just copy everything to the packed
> varyings like we do in other stages as tessellation uses shared memory
> for
> varyings, therefore it is only safe to copy array elements that the
> shader
> actually uses.
>
> This class searches the IR for uses of varyings and then creates
> instructions that copy those vars to a packed varying. This means it is
> easy to end up with duplicate copies if the varying is used more than
> once,
> also arrays of arrays create a duplicate copy for each dimension that
> exists. These issues are not easily resolved without breaking various
> corner cases so we leave it to a later IR stage to clean up the mess.
>
> Note that neither GLSL IR nor NIR can currently can't clean up the
s/can\'t//
> duplicates when and indirect is used as an array index. This patch
> assumes that NIR will eventually be able to clean this up.
> ---
> src/glsl/lower_packed_varyings.cpp | 421
> +++++++++++++++++++++++++++++++++++++
> 1 file changed, 421 insertions(+)
>
> diff --git a/src/glsl/lower_packed_varyings.cpp
> b/src/glsl/lower_packed_varyings.cpp
> index b606cc8..9522969 100644
> --- a/src/glsl/lower_packed_varyings.cpp
> +++ b/src/glsl/lower_packed_varyings.cpp
> @@ -148,10 +148,28 @@
> #include "ir.h"
> #include "ir_builder.h"
> #include "ir_optimization.h"
> +#include "ir_rvalue_visitor.h"
> #include "program/prog_instruction.h"
> +#include "util/hash_table.h"
>
> using namespace ir_builder;
>
> +/**
> + * Creates new type for and array when the base type changes.
> + */
> +static const glsl_type *
> +update_packed_array_type(const glsl_type *type, const glsl_type
> *packed_type)
> +{
> + const glsl_type *element_type = type->fields.array;
> + if (element_type->is_array()) {
> + const glsl_type *new_array_type =
> + update_packed_array_type(element_type, packed_type);
> + return glsl_type::get_array_instance(new_array_type,
> type->length);
> + } else {
> + return glsl_type::get_array_instance(packed_type, type->length);
> + }
> +}
> +
> static bool
> needs_lowering(ir_variable *var, bool has_enhanced_layouts,
> bool disable_varying_packing)
> @@ -205,6 +223,51 @@ create_packed_var(void * const mem_ctx, const
> char *packed_name,
> return packed_var;
> }
>
> +/**
> + * Creates a packed varying for the tessellation packing.
> + */
> +static ir_variable *
> +create_tess_packed_var(void *mem_ctx, ir_variable *unpacked_var)
> +{
> + /* create packed varying name using location */
> + char location_str[11];
> + snprintf(location_str, 11, "%d", unpacked_var->data.location);
> + char *packed_name;
> + if ((ir_variable_mode) unpacked_var->data.mode ==
> ir_var_shader_out)
> + packed_name = ralloc_asprintf(mem_ctx, "packed_out:%s",
> location_str);
> + else
> + packed_name = ralloc_asprintf(mem_ctx, "packed_in:%s",
> location_str);
> +
> + const glsl_type *packed_type;
> + switch (unpacked_var->type->without_array()->base_type) {
> + case GLSL_TYPE_UINT:
> + packed_type = glsl_type::uvec4_type;
> + break;
> + case GLSL_TYPE_INT:
> + packed_type = glsl_type::ivec4_type;
> + break;
> + case GLSL_TYPE_FLOAT:
> + packed_type = glsl_type::vec4_type;
> + break;
> + case GLSL_TYPE_DOUBLE:
> + packed_type = glsl_type::dvec4_type;
> + break;
> + default:
> + assert(!"Unexpected type in tess varying packing");
> + return NULL;
> + }
> +
> + /* Create array new array type */
> + if (unpacked_var->type->is_array()) {
> + packed_type = update_packed_array_type(unpacked_var->type,
> packed_type);
> + }
> +
> + return create_packed_var(mem_ctx, packed_name, packed_type,
> unpacked_var,
> + (ir_variable_mode)
> unpacked_var->data.mode,
> + unpacked_var->data.location,
> + unpacked_var->type->is_array());
> +}
> +
> namespace {
>
> /**
> @@ -763,6 +826,296 @@
> lower_packed_varyings_gs_splicer::visit_leave(ir_emit_vertex *ev)
> }
>
>
> +/**
> + * For tessellation shaders we cannot just copy everything to the
> packed
> + * varyings like we do in other stages as tessellation uses shared
> memory for
> + * varyings, therefore it is only safe to copy array elements that the
> shader
> + * actually uses.
> + *
> + * This class searches the IR for uses of varyings and then creates
> + * instructions that copy those vars to a packed varying. This means
> it is
> + * easy to end up with duplicate copies if the varying is used more
> than once,
> + * also arrays of arrays create a duplicate copy for each dimension
> that
> + * exists. These issues are not easily resolved without breaking
> various
> + * corner cases so we leave it to a later IR stage to clean up the
> mess.
> + */
> +class lower_packed_varyings_tess_visitor : public ir_rvalue_visitor
> +{
> +public:
> + lower_packed_varyings_tess_visitor(void *mem_ctx, hash_table
> *varyings,
> + ir_variable_mode mode)
> + : mem_ctx(mem_ctx), varyings(varyings), mode(mode)
> + {
> + }
> +
> + virtual ~lower_packed_varyings_tess_visitor()
> + {
> + }
> +
> + virtual ir_visitor_status visit_leave(ir_assignment *);
> + virtual ir_visitor_status visit_leave(ir_dereference_array *);
> +
> + ir_dereference *create_dereference(ir_dereference *deref,
> + unsigned *dimensions);
> + unsigned create_extra_array_dereference(unsigned inner_dimension,
> + const glsl_type
> **types_list,
> + ir_dereference
> **packed_deref_list,
> + ir_dereference
> **deref_list);
> + ir_variable *get_packed_var(ir_variable *var);
> + void handle_rvalue(ir_rvalue **rvalue);
> +
> + /**
> + * Exec list into which the visitor should insert the packing
> instructions.
> + * Caller provides this list; it should insert the instructions
> into the
> + * appropriate place in the shader once the visitor has finished
> running.
> + */
> + exec_list new_instructions;
> +
> +private:
> + /**
> + * Memory context used to allocate new instructions for the shader.
> + */
> + void * const mem_ctx;
> +
> + hash_table *varyings;
> +
> + ir_variable_mode mode;
> +};
> +
> +/**
> + * Search the hash table for a packed varying for this variable.
> + */
> +ir_variable *
> +lower_packed_varyings_tess_visitor::get_packed_var(ir_variable *var)
> +{
> + assert(var);
> +
> + const struct hash_entry *entry =
> + _mesa_hash_table_search(varyings, var);
> +
> + return entry ? (ir_variable *) entry->data : NULL;
> +}
> +
> +ir_dereference *
> +lower_packed_varyings_tess_visitor::create_dereference(ir_dereference
> *deref,
> + unsigned
> *dimension)
> +{
> + ir_dereference_array *deref_array = deref->as_dereference_array();
> + if (deref_array) {
> + (*dimension)--;
> + ir_dereference *array =
> + create_dereference(deref_array->array->as_dereference(),
> dimension);
> + return new(this->mem_ctx)
> + ir_dereference_array(array, deref_array->array_index);
> + } else {
> + ir_variable *unpacked_var = deref->variable_referenced();
> + ir_variable *packed_var = get_packed_var(unpacked_var);
> + return new(this->mem_ctx) ir_dereference_variable(packed_var);
> + }
> +}
> +/**
> + * This creates the extra derefs needed to copy the full array. For
> example if
> + * we have:
> + *
> + * layout(location = 0, component = 3) in float b[][6];
> + * layout(location = 0, component = 3) out float b_tcs[][6];
> + * ...
> + * b_tcs[gl_InvocationID] = b[gl_InvocationID];
> + *
> + * We need to copy all the inner array elements to the new packed
> varying:
> + *
> + * packed_out:26[gl_InvocationID][0].yzw =
> b_tcs[gl_InvocationID][0];
> + * ...
> + * packed_out:26[gl_InvocationID][5].yzw =
> b_tcs[gl_InvocationID][5];
> + */
> +unsigned
> +lower_packed_varyings_tess_visitor::create_extra_array_dereference(unsigned
> inner_dimension,
> +
> const glsl_type **types_list,
> +
> ir_dereference **packed_deref_list,
> +
> ir_dereference **deref_list)
> +{
> + unsigned outer_deref_array_size;
> + if (inner_dimension != 0)
> + outer_deref_array_size =
> + create_extra_array_dereference(inner_dimension - 1,
> types_list,
> + packed_deref_list,
> deref_list);
> + else {
> + assert(types_list[inner_dimension]->length > 0);
> + outer_deref_array_size = 1;
> + }
> +
> + unsigned deref_array_size =
> + types_list[inner_dimension]->length * outer_deref_array_size;
> +
> + /* Create new lists to store the new instructions in */
> + ir_dereference **new_packed_deref_list = (ir_dereference **)
> + rzalloc_array_size(mem_ctx, sizeof(ir_dereference *),
> deref_array_size);
> + ir_dereference **new_deref_list = (ir_dereference **)
> + rzalloc_array_size(mem_ctx, sizeof(ir_dereference *),
> deref_array_size);
> +
> + unsigned list_count = 0;
> + for (unsigned i = 0; i < types_list[inner_dimension]->length; i++)
> {
> + for (unsigned j = 0; j < outer_deref_array_size; j++) {
> + /* Clone the outer dimension derefs */
> + ir_dereference *deref_clone =
> deref_list[j]->clone(this->mem_ctx, NULL);
> + ir_dereference *packed_deref_clone =
> packed_deref_list[j]->clone(this->mem_ctx, NULL);
> +
> + /* Create new derefs for the inner dimiension */
> + ir_constant *constant = new(this->mem_ctx) ir_constant(i);
> + new_packed_deref_list[list_count] = new(this->mem_ctx)
> + ir_dereference_array(packed_deref_clone, constant);
> +
> + ir_constant *constant2 = new(this->mem_ctx) ir_constant(i);
> + new_deref_list[list_count] = new(this->mem_ctx)
> + ir_dereference_array(deref_clone, constant2);
> + list_count++;
> + }
> + }
> +
> + /* Copy the new derefs so the caller can access them */
> + for (unsigned j = 0; j < list_count; j++) {
> + packed_deref_list[j] = new_packed_deref_list[j];
> + deref_list[j] = new_deref_list[j];
> + }
> + return deref_array_size;
> +}
> +
> +void
> +lower_packed_varyings_tess_visitor::handle_rvalue(ir_rvalue **rvalue)
> +{
> + if (!*rvalue)
> + return;
> +
> + ir_dereference *deref = (*rvalue)->as_dereference();
> +
> + if (!deref)
> + return;
> +
> + ir_variable *unpacked_var = deref->variable_referenced();
> + ir_variable *packed_var = get_packed_var(unpacked_var);
> +
> + /* If the variable is packed then create a new dereference and wrap
> it in
> + * a swizzle to get the correct values as specified by the
> component
> + * qualifier.
> + */
> + if (packed_var) {
> + /* Count array dimensions */
> + const glsl_type *type = packed_var->type;
> + unsigned dimensions = 0;
> + while (type->is_array()) {
> + type = type->fields.array;
> + dimensions++;
> + }
> +
> + /* Create a type list in reverse order (inner -> outer arrays)
> as this
> + * is the order the IR works in.
> + */
> + const glsl_type **types_list = (const glsl_type **)
> + rzalloc_array_size(mem_ctx, sizeof(glsl_type *), dimensions);
> + unsigned order = dimensions;
> + type = unpacked_var->type;
> + while (type->is_array()) {
> + types_list[--order] = type;
> + type = type->fields.array;
> + }
> +
> + /* Create a derefence for the packed var and clone the unpacked
> deref */
> + unsigned inner_dimension = dimensions;
> + ir_dereference *packed = create_dereference(deref,
> &inner_dimension);
> + ir_dereference *cloned_deref = deref->clone(this->mem_ctx,
> NULL);
> +
> + /* If needed create extra derefs so we can copy all inner array
> elements
> + * of a multi-dimensional array.
> + */
> + unsigned instruction_count;
> + ir_dereference **packed_deref;
> + ir_dereference **unpacked_deref;
> + if (inner_dimension != 0) {
> + instruction_count =
> + types_list[inner_dimension - 1]->arrays_of_arrays_size();
> +
> + /* Create new lists to store the new instructions in */
> + packed_deref = (ir_dereference **)
> + rzalloc_array_size(mem_ctx, sizeof(ir_dereference *),
> + instruction_count);
> + unpacked_deref = (ir_dereference **)
> + rzalloc_array_size(mem_ctx, sizeof(ir_dereference *),
> + instruction_count);
> +
> + /* Pass in the outer array derefs that already exist */
> + packed_deref[0] = packed;
> + unpacked_deref[0] = cloned_deref;
> +
> + instruction_count =
> + create_extra_array_dereference(inner_dimension - 1,
> types_list,
> + packed_deref,
> unpacked_deref);
> + } else {
> + instruction_count = 1;
> + packed_deref = &packed;
> + unpacked_deref = &cloned_deref;
> + }
> +
> + /* Wrap packed derefs in a swizzle and the create assignment */
> + unsigned swizzle_values[4] = { 0, 0, 0, 0 };
> + unsigned components =
> + unpacked_var->type->without_array()->vector_elements;
> + for (unsigned i = 0; i < components; ++i) {
> + swizzle_values[i] = i + unpacked_var->data.location_frac;
> + }
> +
> + for (unsigned i = 0; i < instruction_count; i++) {
> + ir_swizzle *swiz = new(this->mem_ctx)
> ir_swizzle(packed_deref[i], swizzle_values,
> + components);
> + ir_assignment *assign;
> + if (mode == ir_var_shader_out) {
> + assign = new (this->mem_ctx) ir_assignment(swiz,
> unpacked_deref[i]);
> + } else {
> + assign = new (this->mem_ctx)
> ir_assignment(unpacked_deref[i], swiz);
> + }
> + new_instructions.push_tail(assign);
> + }
> + }
> +}
> +
> +ir_visitor_status
> +lower_packed_varyings_tess_visitor::visit_leave(ir_dereference_array
> *ir)
> +{
> + /* The array index is not the target of the assignment, so clear
> the
> + * 'in_assignee' flag. Restore it after returning from the array
> index.
> + */
> + const bool was_in_assignee = this->in_assignee;
> + this->in_assignee = false;
> + handle_rvalue(&ir->array_index);
> + this->in_assignee = was_in_assignee;
> +
> + ir_rvalue *rvalue = ir;
> + handle_rvalue(&rvalue);
> +
> + return visit_continue;
> +}
> +
> +ir_visitor_status
> +lower_packed_varyings_tess_visitor::visit_leave(ir_assignment *ir)
> +{
> + handle_rvalue(&ir->rhs);
> + ir->rhs->accept(this);
> +
> + /* The normal rvalue visitor skips the LHS of assignments, but we
> + * need to process those just the same.
> + */
> + ir_rvalue *lhs = ir->lhs;
> + handle_rvalue(&lhs);
> + ir->lhs->accept(this);
> +
> + if (ir->condition) {
> + handle_rvalue(&ir->condition);
> + ir->condition->accept(this);
> + }
> +
> + return visit_continue;
> +}
> +
> +
> void
> lower_packed_varyings(void *mem_ctx, unsigned locations_used,
> ir_variable_mode mode, gl_shader *shader,
> @@ -817,5 +1170,73 @@ lower_packed_varyings(void *mem_ctx, unsigned
> locations_used,
> main_func_sig->body.head->insert_before(&new_instructions);
> main_func_sig->body.head->insert_before(&new_variables);
> }
> + } else {
> + /* Build a hash table with all the varyings we can pack. For the
> + * tessellation stages we only pack varyings that have location
> + * and component layout qualifiers as packing varying without
> these
> + * makes things much more difficult.
> + */
> + hash_table *varyings = _mesa_hash_table_create(NULL,
> _mesa_hash_pointer,
> +
> _mesa_key_pointer_equal);
> +
> + ir_variable **packed_varyings = (ir_variable **)
> + rzalloc_array_size(mem_ctx, sizeof(*packed_varyings),
> + locations_used);
> +
> + foreach_in_list(ir_instruction, node, shader->ir) {
> + ir_variable *var = node->as_variable();
> + if (var == NULL)
> + continue;
> +
> + if (var->data.mode != mode ||
> + var->data.location < (int) base_location ||
> + !needs_lowering(var, has_enhanced_layouts, true))
> + continue;
> +
> + /* Clone the variable for program resource list before
> + * it gets modified and lost.
> + */
> + if (!shader->packed_varyings)
> + shader->packed_varyings = new (shader) exec_list;
> +
> + shader->packed_varyings->push_tail(var->clone(shader, NULL));
> +
> + /* Get the packed varying for this location or create a new
> one. */
> + ir_variable *packed_var;
> + if (packed_varyings[var->data.location - base_location]) {
> + packed_var = packed_varyings[var->data.location -
> base_location];
> + } else {
> + /* Create the new packed varying */
> + packed_var = create_tess_packed_var(mem_ctx, var);
> + var->insert_before(packed_var);
> + packed_varyings[var->data.location - base_location] =
> packed_var;
> + }
> +
> + /* Add to varyings the hash table with the old varying as a
> key, and
> + * the packed varying as the data. This will be used later in
> the
> + * visitor to look-up variables that need to be replaced.
> + */
> + _mesa_hash_table_insert(varyings, var, packed_var);
> +
> + /* Change the old varying into an ordinary global, dead code
> + * elimination will clean this up for us later on.
> + */
> + assert(var->data.mode != ir_var_temporary);
> + var->data.mode = ir_var_auto;
> + }
> +
> + /* Find varying dereferences */
> + /* Create instructions that copy varyings to/from temporaries */
> + lower_packed_varyings_tess_visitor visitor(mem_ctx, varyings,
> mode);
> + visitor.run(shader->ir);
> +
> + /* Insert instructions that copy varyings to/from temporaries */
> + if (mode == ir_var_shader_out) {
> + main_func_sig->body.append_list(&visitor.new_instructions);
> + } else {
> +
> main_func_sig->body.head->insert_before(&visitor.new_instructions);
> + }
> +
> + _mesa_hash_table_destroy(varyings, NULL);
> }
> }
More information about the mesa-dev
mailing list