Mesa (main): aco: remove resource flags

Wed Jun 30 18:32:28 UTC 2021

Module: Mesa
Branch: main
Commit: c094765a01638fd4b47bdc113b2d7ecf13c82222
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=c094765a01638fd4b47bdc113b2d7ecf13c82222

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Fri Sep 18 16:24:14 2020 +0100

aco: remove resource flags

After disabling SMEM stores, nir_opt_access() now does the same analysis
and we don't need this anymore. Doing it in isel is also too late if we
want to lower descriptor loads in NIR.

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11652>

---

 src/amd/compiler/aco_instruction_selection.cpp     |   7 +-
 src/amd/compiler/aco_instruction_selection.h       |  66 ------------
 .../compiler/aco_instruction_selection_setup.cpp   | 114 +--------------------
 3 files changed, 2 insertions(+), 185 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 06ca0910b99..297896fd97d 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -6296,12 +6296,7 @@ void visit_load_ssbo(isel_context *ctx, nir_intrinsic_instr *instr)
    bool glc = access & (ACCESS_VOLATILE | ACCESS_COHERENT);
    unsigned size = instr->dest.ssa.bit_size / 8;
 
-   uint32_t flags = get_all_buffer_resource_flags(ctx, instr->src[0].ssa, access);
-   /* GLC bypasses VMEM/SMEM caches, so GLC SMEM loads/stores are coherent with GLC VMEM loads/stores
-    * TODO: this optimization is disabled for now because we still need to ensure correct ordering
-    */
-   bool allow_smem = !(flags & (0 && glc ? has_nonglc_vmem_store : has_vmem_store));
-   allow_smem |= ((access & ACCESS_RESTRICT) && (access & ACCESS_NON_WRITEABLE)) || (access & ACCESS_CAN_REORDER);
+   bool allow_smem = access & ACCESS_CAN_REORDER;
 
    load_buffer(ctx, num_components, size, dst, rsrc, get_ssa_temp(ctx, instr->src[1].ssa),
                nir_intrinsic_align_mul(instr), nir_intrinsic_align_offset(instr), glc, allow_smem,
diff --git a/src/amd/compiler/aco_instruction_selection.h b/src/amd/compiler/aco_instruction_selection.h
index 379da7772e0..a12d886ae2d 100644
--- a/src/amd/compiler/aco_instruction_selection.h
+++ b/src/amd/compiler/aco_instruction_selection.h
@@ -39,17 +39,6 @@ struct shader_io_state {
    }
 };
 
-enum resource_flags {
-   has_glc_vmem_load = 0x1,
-   has_nonglc_vmem_load = 0x2,
-   has_glc_vmem_store = 0x4,
-   has_nonglc_vmem_store = 0x8,
-
-   has_vmem_store = has_glc_vmem_store | has_nonglc_vmem_store,
-
-   buffer_is_restrict = 0x10,
-};
-
 struct isel_context {
    const struct radv_nir_compiler_options *options;
    struct radv_shader_args *args;
@@ -84,9 +73,6 @@ struct isel_context {
    struct hash_table *range_ht;
    nir_unsigned_upper_bound_config ub_config;
 
-   uint32_t resource_flag_offsets[MAX_SETS];
-   std::vector<uint8_t> buffer_resource_flags;
-
    Temp arg_temps[AC_MAX_ARGS];
 
    /* FS inputs */
@@ -116,58 +102,6 @@ inline Temp get_arg(isel_context *ctx, struct ac_arg arg)
    return ctx->arg_temps[arg.arg_index];
 }
 
-inline void get_buffer_resource_flags(isel_context *ctx, nir_ssa_def *def, unsigned access,
-                                      uint8_t **flags, uint32_t *count)
-{
-   nir_binding binding = {0};
-   /* global resources (def=NULL) are considered aliasing with all other buffers and
-    * buffer images */
-   // TODO: only merge flags of resources which can really alias.
-   if (def)
-      binding = nir_chase_binding(nir_src_for_ssa(def));
-
-   if (binding.var) {
-      const glsl_type *type = binding.var->type->without_array();
-      assert(type->is_image());
-      if (type->sampler_dimensionality != GLSL_SAMPLER_DIM_BUF) {
-         *flags = NULL;
-         *count = 0;
-         return;
-      }
-   }
-
-   if (!binding.success) {
-      *flags = ctx->buffer_resource_flags.data();
-      *count = ctx->buffer_resource_flags.size();
-      return;
-   }
-
-   unsigned set_offset = ctx->resource_flag_offsets[binding.desc_set];
-
-   if (!(ctx->buffer_resource_flags[set_offset + binding.binding] & buffer_is_restrict)) {
-      /* Non-restrict buffers alias only with other non-restrict buffers.
-       * We reserve flags[0] for these. */
-      *flags = ctx->buffer_resource_flags.data();
-      *count = 1;
-      return;
-   }
-
-   *flags = ctx->buffer_resource_flags.data() + set_offset + binding.binding;
-   *count = 1;
-}
-
-inline uint8_t get_all_buffer_resource_flags(isel_context *ctx, nir_ssa_def *def, unsigned access)
-{
-   uint8_t *flags;
-   uint32_t count;
-   get_buffer_resource_flags(ctx, def, access, &flags, &count);
-
-   uint8_t res = 0;
-   for (unsigned i = 0; i < count; i++)
-      res |= flags[i];
-   return res;
-}
-
 void init_context(isel_context *ctx, nir_shader *shader);
 void cleanup_context(isel_context *ctx);
 
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp
index ef4d873446b..5d6fbc253d5 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -204,116 +204,6 @@ sanitize_cf_list(nir_function_impl *impl, struct exec_list *cf_list)
    return progress;
 }
 
-void fill_desc_set_info(isel_context *ctx, nir_function_impl *impl)
-{
-   radv_pipeline_layout *pipeline_layout = ctx->options->layout;
-
-   unsigned resource_flag_count = 1; /* +1 to reserve flags[0] for aliased resources */
-   for (unsigned i = 0; i < pipeline_layout->num_sets; i++) {
-      radv_descriptor_set_layout *layout = pipeline_layout->set[i].layout;
-      ctx->resource_flag_offsets[i] = resource_flag_count;
-      resource_flag_count += layout->binding_count;
-   }
-   ctx->buffer_resource_flags = std::vector<uint8_t>(resource_flag_count);
-
-   nir_foreach_variable_with_modes(var, impl->function->shader, nir_var_mem_ssbo) {
-      if (var->data.access & ACCESS_RESTRICT) {
-         uint32_t offset = ctx->resource_flag_offsets[var->data.descriptor_set];
-         ctx->buffer_resource_flags[offset + var->data.binding] |= buffer_is_restrict;
-      }
-   }
-
-   nir_foreach_block(block, impl) {
-      nir_foreach_instr(instr, block) {
-         if (instr->type != nir_instr_type_intrinsic)
-            continue;
-         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-         if (!nir_intrinsic_has_access(intrin))
-            continue;
-
-         nir_ssa_def *res = NULL;
-         unsigned access = nir_intrinsic_access(intrin);
-         unsigned flags = 0;
-         bool glc = access & (ACCESS_VOLATILE | ACCESS_COHERENT | ACCESS_NON_READABLE);
-         switch (intrin->intrinsic) {
-         case nir_intrinsic_load_ssbo: {
-            if (nir_dest_is_divergent(intrin->dest) && (!glc || ctx->program->chip_class >= GFX8))
-               flags |= glc ? has_glc_vmem_load : has_nonglc_vmem_load;
-            res = intrin->src[0].ssa;
-            break;
-         }
-         case nir_intrinsic_ssbo_atomic_add:
-         case nir_intrinsic_ssbo_atomic_imin:
-         case nir_intrinsic_ssbo_atomic_umin:
-         case nir_intrinsic_ssbo_atomic_imax:
-         case nir_intrinsic_ssbo_atomic_umax:
-         case nir_intrinsic_ssbo_atomic_and:
-         case nir_intrinsic_ssbo_atomic_or:
-         case nir_intrinsic_ssbo_atomic_xor:
-         case nir_intrinsic_ssbo_atomic_exchange:
-         case nir_intrinsic_ssbo_atomic_comp_swap:
-            flags |= has_glc_vmem_load | has_glc_vmem_store;
-            res = intrin->src[0].ssa;
-            break;
-         case nir_intrinsic_store_ssbo:
-            flags |= glc ? has_glc_vmem_store : has_nonglc_vmem_store;
-            res = intrin->src[1].ssa;
-            break;
-         case nir_intrinsic_load_global:
-            if (!(access & ACCESS_NON_WRITEABLE))
-               flags |= glc ? has_glc_vmem_load : has_nonglc_vmem_load;
-            break;
-         case nir_intrinsic_store_global:
-            flags |= glc ? has_glc_vmem_store : has_nonglc_vmem_store;
-            break;
-         case nir_intrinsic_global_atomic_add:
-         case nir_intrinsic_global_atomic_imin:
-         case nir_intrinsic_global_atomic_umin:
-         case nir_intrinsic_global_atomic_imax:
-         case nir_intrinsic_global_atomic_umax:
-         case nir_intrinsic_global_atomic_and:
-         case nir_intrinsic_global_atomic_or:
-         case nir_intrinsic_global_atomic_xor:
-         case nir_intrinsic_global_atomic_exchange:
-         case nir_intrinsic_global_atomic_comp_swap:
-            flags |= has_glc_vmem_load | has_glc_vmem_store;
-            break;
-         case nir_intrinsic_image_deref_load:
-         case nir_intrinsic_image_deref_sparse_load:
-            res = intrin->src[0].ssa;
-            flags |= glc ? has_glc_vmem_load : has_nonglc_vmem_load;
-            break;
-         case nir_intrinsic_image_deref_store:
-            res = intrin->src[0].ssa;
-            flags |= (glc || ctx->program->chip_class == GFX6) ? has_glc_vmem_store : has_nonglc_vmem_store;
-            break;
-         case nir_intrinsic_image_deref_atomic_add:
-         case nir_intrinsic_image_deref_atomic_umin:
-         case nir_intrinsic_image_deref_atomic_imin:
-         case nir_intrinsic_image_deref_atomic_umax:
-         case nir_intrinsic_image_deref_atomic_imax:
-         case nir_intrinsic_image_deref_atomic_and:
-         case nir_intrinsic_image_deref_atomic_or:
-         case nir_intrinsic_image_deref_atomic_xor:
-         case nir_intrinsic_image_deref_atomic_exchange:
-         case nir_intrinsic_image_deref_atomic_comp_swap:
-            res = intrin->src[0].ssa;
-            flags |= has_glc_vmem_load | has_glc_vmem_store;
-            break;
-         default:
-            continue;
-         }
-
-         uint8_t *flags_ptr;
-         uint32_t count;
-         get_buffer_resource_flags(ctx, res, access, &flags_ptr, &count);
-
-         for (unsigned i = 0; i < count; i++)
-            flags_ptr[i] |= flags;
-      }
-   }
-}
-
 void apply_nuw_to_ssa(isel_context *ctx, nir_ssa_def *ssa)
 {
    nir_ssa_scalar scalar;
@@ -624,8 +514,6 @@ void init_context(isel_context *ctx, nir_shader *shader)
    nir_divergence_analysis(shader);
    nir_opt_uniform_atomics(shader);
 
-   fill_desc_set_info(ctx, impl);
-
    apply_nuw_to_offsets(ctx, impl);
 
    /* sanitize control flow */
@@ -648,7 +536,7 @@ void init_context(isel_context *ctx, nir_shader *shader)
 
    std::unique_ptr<unsigned[]> nir_to_aco{new unsigned[impl->num_blocks]()};
 
-   /* TODO: make this recursive to improve compile times and merge with fill_desc_set_info() */
+   /* TODO: make this recursive to improve compile times */
    bool done = false;
    while (!done) {
       done = true;