[Mesa-dev] [PATCH v2 08/11] nir: Add a array-of-vector variable shrinking pass
Caio Marcelo de Oliveira Filho
caio.oliveira at intel.com
Fri Aug 24 01:16:05 UTC 2018
On Sat, Jul 28, 2018 at 10:44:39PM -0700, Jason Ekstrand wrote:
> This pass looks for variables with vector or array-of-vector types and
> narrows the type to only the components used.
> ---
> src/compiler/nir/nir.h | 1 +
> src/compiler/nir/nir_split_vars.c | 694 ++++++++++++++++++++++++++++++
> 2 files changed, 695 insertions(+)
This patch and the one that enables the pass are
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira at intel.com>
I have some suggestions below, pick the ones you like.
(...)
> +static void
> +mark_deref_used(nir_deref_instr *deref,
> + nir_component_mask_t comps_read,
> + nir_component_mask_t comps_written,
> + nir_deref_instr *copy_deref,
> + struct hash_table *var_usage_map,
> + nir_variable_mode modes,
> + void *mem_ctx)
> +{
> + if (!(deref->mode & modes))
> + return;
> +
> + nir_variable *var = nir_deref_instr_get_variable(deref);
> +
> + struct vec_var_usage *usage =
> + get_vec_var_usage(var, var_usage_map, true, mem_ctx);
> + if (!usage)
> + return;
> +
> + const unsigned num_var_components =
> + glsl_get_components(glsl_without_array_or_matrix(var->type));
Consider at vec_var_usage creation time, setting a usage->comps (or
similar) to "(1u << num_var_components) - 1". Some cheap savings here
and in other passes, also would result in less noise lines here and
there.
> +
> + usage->comps_read |= comps_read & ((1u << num_var_components) - 1);
> + usage->comps_written |= comps_written & ((1u << num_var_components) - 1);
> +
> + struct vec_var_usage *copy_usage = NULL;
> + if (copy_deref) {
> + copy_usage = get_vec_deref_usage(copy_deref, var_usage_map, modes,
> + true, mem_ctx);
> + if (copy_usage) {
> + if (usage->vars_copied == NULL) {
> + usage->vars_copied = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
> + _mesa_key_pointer_equal);
> + }
> + _mesa_set_add(usage->vars_copied, copy_usage);
> + } else {
> + usage->has_external_copy = true;
> + }
> + }
> +
> + nir_deref_path path;
> + nir_deref_path_init(&path, deref, mem_ctx);
> +
> + nir_deref_path copy_path;
> + if (copy_usage)
> + nir_deref_path_init(©_path, copy_deref, mem_ctx);
> +
> + unsigned copy_i = 0;
> + for (unsigned i = 0; i < usage->num_levels; i++) {
> + struct array_level_usage *level = &usage->levels[i];
> + nir_deref_instr *deref = path.path[i + 1];
> + assert(deref->deref_type == nir_deref_type_array ||
> + deref->deref_type == nir_deref_type_array_wildcard);
> +
> + unsigned max_used;
> + if (deref->deref_type == nir_deref_type_array) {
> + nir_const_value *const_index =
> + nir_src_as_const_value(deref->arr.index);
> + max_used = const_index ? const_index->u32[0] : UINT_MAX;
> + } else {
> + /* For wildcards, we read or wrote the whole thing. */
> + assert(deref->deref_type == nir_deref_type_array_wildcard);
> + max_used = level->array_len - 1;
> +
> + if (copy_usage) {
> + /* Match each wildcard level with the level on copy_usage */
> + for (; copy_path.path[copy_i + 1]; copy_i++) {
> + if (copy_path.path[copy_i + 1]->deref_type ==
> + nir_deref_type_array_wildcard)
> + break;
> + }
> + struct array_level_usage *copy_level =
> + ©_usage->levels[copy_i++];
> +
> + if (level->levels_copied == NULL) {
> + level->levels_copied =
> + _mesa_set_create(mem_ctx, _mesa_hash_pointer,
> + _mesa_key_pointer_equal);
> + }
> + _mesa_set_add(level->levels_copied, copy_level);
Since once level->has_external_copy is set we don't really use the
levels_copied, maybe wrap the bootstrap/set block with if
(!level->has_external_copy)?
> + } else {
> + /* We have a wildcard and we don't know where this copy came from,
> + * flag it and we'll know to not shorten this array.
> + */
Maybe instead of "we don't know" say that it comes from a variable
from other mode.
> + level->has_external_copy = true;
> + }
> + }
> +
> + if (comps_written)
> + level->max_written = MAX2(level->max_written, max_used);
> + if (comps_read)
> + level->max_read = MAX2(level->max_read, max_used);
> + }
> +}
(...)
> +static void
> +find_used_components_impl(nir_function_impl *impl,
> + struct hash_table *var_usage_map,
> + nir_variable_mode modes,
> + void *mem_ctx)
> +{
> + nir_foreach_block(block, impl) {
> + nir_foreach_instr_safe(instr, block) {
Unless I'm missing something, we don't need "safe" variant here.
(...)
> +static bool
> +shrink_vec_var_list(struct exec_list *vars,
> + struct hash_table *var_usage_map)
> +{
> + /* Initialize the components kept field of each variable. This is the
> + * AND of the components written and components read. If a component is
> + * written but never read, it's dead. If it is read but never written,
> + * then all values read are undefined garbage and we may as well not read
> + * them.
> + *
> + * The same logic applies to the array length. We make the array length
> + * the minimum needed required length between read and write and plan to
> + * discard any OOB access. The one exception here is indirect writes
> + * because we don't know where they will land and we can't shrink an array
> + * with indirect writes because previously in-bounds writes may become
> + * out-of-bounds and have undefined behavior.
> + *
> + * Also, if we have a copy that to/from something we can't shrink, we need
> + * to leave components and array_len of any wildcards alone.
> + */
> + nir_foreach_variable(var, vars) {
> + struct vec_var_usage *usage =
> + get_vec_var_usage(var, var_usage_map, false, NULL);
> + if (!usage)
> + continue;
After reading I thought here and in the fix-point loop would be a
better call to iterate directly var_usage_map. Due to the way we
reuse var_usage_map, we would have to skip based on modes, which kind
of spoil things a bit, but maybe still a win. Or use different sets
and join them for the final step... Probably won't make much
difference.
> +
> + const unsigned var_num_components =
> + glsl_get_components(glsl_without_array_or_matrix(var->type));
> +
> + assert(usage->comps_kept == 0);
> + if (usage->has_external_copy)
> + usage->comps_kept = (1u << var_num_components) - 1;
> + else
> + usage->comps_kept = usage->comps_read & usage->comps_written;
> +
> + for (unsigned i = 0; i < usage->num_levels; i++) {
> + struct array_level_usage *level = &usage->levels[i];
> + assert(level->array_len > 0);
> +
> + if (level->max_written == UINT_MAX || level->has_external_copy)
> + continue; /* Can't shrink */
> +
> + unsigned max_used = MIN2(level->max_read, level->max_written);
> + level->array_len = MIN2(max_used, level->array_len - 1) + 1;
> + }
> + }
> +
> + /* In order for variable copies to work, we have to have the same data type
> + * on the source and the destination. In order to satisfy this, we run a
> + * little fixed-point algorithm to transitively ensure that we get enough
> + * components and array elements for this to hold for all copies.
> + */
> + bool fp_progress;
> + do {
> + fp_progress = false;
> + nir_foreach_variable(var, vars) {
> + struct vec_var_usage *var_usage =
> + get_vec_var_usage(var, var_usage_map, false, NULL);
> + if (!var_usage || !var_usage->vars_copied)
> + continue;
> +
> + struct set_entry *copy_entry;
> + set_foreach(var_usage->vars_copied, copy_entry) {
> + const struct vec_var_usage *copy_usage = copy_entry->key;
> + if (copy_usage->comps_kept & ~var_usage->comps_kept) {
> + var_usage->comps_kept |= copy_usage->comps_kept;
Could we set both sides here, i.e. do
"copy_usage->comps_kept = var_usage->comps_kept"? The reasoning
is that it might save an iteration, e.g. A <-> C and B <-> C,
but iteration happens in order: A, B then C. Information from A
will take two iterations to propagate to B.
> + fp_progress = true;
> + }
> + }
> +
> + for (unsigned i = 0; i < var_usage->num_levels; i++) {
> + struct array_level_usage *var_level = &var_usage->levels[i];
> + if (!var_level->levels_copied)
> + continue;
> +
> + set_foreach(var_level->levels_copied, copy_entry) {
> + const struct array_level_usage *copy_level = copy_entry->key;
> + if (var_level->array_len < copy_level->array_len) {
> + var_level->array_len = copy_level->array_len;
> + fp_progress = true;
> + }
> + }
> + }
> + }
> + } while (fp_progress);
> +
> + bool vars_shrunk = false;
> + nir_foreach_variable_safe(var, vars) {
> + struct vec_var_usage *usage =
> + get_vec_var_usage(var, var_usage_map, false, NULL);
> + if (!usage)
> + continue;
> +
> + bool shrunk = false;
> + const struct glsl_type *vec_type = var->type;
> + for (unsigned i = 0; i < usage->num_levels; i++) {
> + /* If we've reduced the array to zero elements at some level, just
> + * set comps_kept to 0 and delete the variable.
> + */
> + if (usage->levels[i].array_len == 0)
> + usage->comps_kept = 0;
Add a break here. The rest of the loop execution would try to find a
shrink situation, but regardless the result it will be ignored since
we bail when comps_kept == 0.
> +
> + assert(usage->levels[i].array_len <= glsl_get_length(vec_type));
> + if (usage->levels[i].array_len < glsl_get_length(vec_type))
> + shrunk = true;
> + vec_type = glsl_get_array_element(vec_type);
> + }
> + assert(glsl_type_is_vector_or_scalar(vec_type));
> +
> + assert(usage->comps_kept < (1u << glsl_get_components(vec_type)));
> + if (usage->comps_kept != (1u << glsl_get_components(vec_type)) - 1)
> + shrunk = true;
> +
> + if (usage->comps_kept == 0) {
> + /* This variable is dead, remove it */
> + vars_shrunk = true;
> + exec_node_remove(&var->node);
> + continue;
> + }
> +
> + if (!shrunk) {
> + /* This variable doesn't need to be shrunk. Remove it from the
> + * hash table so later passes will ignore it.
s/later passes/later steps/ assuming you are referring to the access
fixing step.
> + */
> + _mesa_hash_table_remove_key(var_usage_map, var);
> + continue;
> + }
(...)
> +static void
> +shrink_vec_var_access_impl(nir_function_impl *impl,
> + struct hash_table *var_usage_map,
> + nir_variable_mode modes)
> +{
> + nir_builder b;
> + nir_builder_init(&b, impl);
> +
> + nir_foreach_block(block, impl) {
> + nir_foreach_instr_safe(instr, block) {
> + switch (instr->type) {
> + case nir_instr_type_deref: {
> + nir_deref_instr *deref = nir_instr_as_deref(instr);
> + if (!(deref->mode & modes))
> + break;
> +
> + /* Clean up any dead derefs we find lying around. They may refer
> + * to variables we've deleted.
> + */
> + if (nir_deref_instr_remove_if_unused(deref))
> + break;
> +
> + /* We don't need to check if this is one of the derefs we're
> + * shrinking because this is a no-op if it isn't. The worst that
> + * could happen is that we accidentally fix an invalid deref.
> + */
Consider prefixing this comment with something like: "Update the new
variable type in the deref."
(...)
Thanks,
Caio
More information about the mesa-dev
mailing list