[Mesa-dev] [PATCH 4/6] nir/opt_load_combine: Extend the pass to include shared variables

Eduardo Lima Mitev elima at igalia.com
Thu Apr 14 16:52:34 UTC 2016


---
 src/compiler/nir/nir_opt_load_combine.c | 155 ++++++++++++++++++++++++++------
 1 file changed, 129 insertions(+), 26 deletions(-)

diff --git a/src/compiler/nir/nir_opt_load_combine.c b/src/compiler/nir/nir_opt_load_combine.c
index 09a34c6..3e3d066 100644
--- a/src/compiler/nir/nir_opt_load_combine.c
+++ b/src/compiler/nir/nir_opt_load_combine.c
@@ -38,7 +38,8 @@
 enum intrinsic_groups {
    INTRINSIC_GROUP_NONE = 0,
    INTRINSIC_GROUP_ALL,
-   INTRINSIC_GROUP_SSBO
+   INTRINSIC_GROUP_SSBO,
+   INTRINSIC_GROUP_SHARED
 };
 
 struct cache_node {
@@ -86,33 +87,71 @@ is_memory_barrier_buffer(nir_intrinsic_instr *intrinsic)
    return intrinsic->intrinsic == nir_intrinsic_memory_barrier_buffer;
 }
 
-/*
- * General load/store functions: we'll add more groups to this as needed.
- * For now we only support SSBOs.
- */
+/* Shared variable load/store */
+static bool
+is_atomic_shared(nir_intrinsic_instr *intrinsic)
+{
+   switch (intrinsic->intrinsic) {
+   case nir_intrinsic_shared_atomic_add:
+   case nir_intrinsic_shared_atomic_imin:
+   case nir_intrinsic_shared_atomic_umin:
+   case nir_intrinsic_shared_atomic_imax:
+   case nir_intrinsic_shared_atomic_umax:
+   case nir_intrinsic_shared_atomic_and:
+   case nir_intrinsic_shared_atomic_or:
+   case nir_intrinsic_shared_atomic_xor:
+   case nir_intrinsic_shared_atomic_exchange:
+   case nir_intrinsic_shared_atomic_comp_swap:
+      return true;
+   default:
+      return false;
+   }
+}
+
+static inline bool
+is_store_shared(nir_intrinsic_instr *intrinsic)
+{
+   return intrinsic->intrinsic == nir_intrinsic_store_shared ||
+      is_atomic_shared(intrinsic);
+}
+
+static inline bool
+is_load_shared(nir_intrinsic_instr *intrinsic)
+{
+   return intrinsic->intrinsic == nir_intrinsic_load_shared;
+}
+
+static inline bool
+is_memory_barrier_shared(nir_intrinsic_instr *intrinsic)
+{
+   return intrinsic->intrinsic == nir_intrinsic_memory_barrier_shared;
+}
+
+/* General intrinsic classification functions */
 static inline bool
 is_store(nir_intrinsic_instr *intrinsic)
 {
-   return is_store_ssbo(intrinsic);
+   return is_store_ssbo(intrinsic) || is_store_shared(intrinsic);
 }
 
 static inline bool
 is_load(nir_intrinsic_instr *intrinsic)
 {
-   return is_load_ssbo(intrinsic);
+   return is_load_ssbo(intrinsic) || is_load_shared(intrinsic);
 }
 
 static inline bool
 is_atomic(nir_intrinsic_instr *intrinsic)
 {
-   return is_atomic_ssbo(intrinsic);
+   return is_atomic_ssbo(intrinsic) || is_atomic_shared(intrinsic);
 }
 
 static inline bool
 is_memory_barrier(nir_intrinsic_instr *intrinsic)
 {
    return intrinsic->intrinsic == nir_intrinsic_memory_barrier ||
-      is_memory_barrier_buffer(intrinsic);
+      is_memory_barrier_buffer(intrinsic) ||
+      is_memory_barrier_shared(intrinsic);
 }
 
 static unsigned
@@ -123,6 +162,9 @@ intrinsic_group(nir_intrinsic_instr *intrinsic)
    else if (is_load_ssbo(intrinsic) || is_store_ssbo(intrinsic) ||
             is_memory_barrier_buffer(intrinsic))
       return INTRINSIC_GROUP_SSBO;
+   else if (is_load_shared(intrinsic) || is_store_shared(intrinsic) ||
+            is_memory_barrier_shared(intrinsic))
+      return INTRINSIC_GROUP_SHARED;
    else
       return INTRINSIC_GROUP_NONE;
 }
@@ -178,18 +220,21 @@ nir_src_is_direct(nir_src *src)
  * Gets the block and offset of a load/store instruction.
  *
  * @instr: the intrinsic load/store operation
- * @block: the output block
+ * @block: the output block, can be NULL if @base is non-NULL (shared-vars)
  * @offset: the output offset
+ * @base: the output base, can be NULL if @block is non-NULL (SSBO)
  */
 static void
 get_load_store_address(nir_intrinsic_instr *instr,
                        nir_src **block,
-                       nir_src **offset)
+                       nir_src **offset,
+                       unsigned *base)
 {
    int block_index = -1;
    int offset_index = -1;
+   int base_index = -1;
 
-   assert(block && offset);
+   assert((block || base) && offset);
 
    switch (instr->intrinsic) {
       /* SSBO */
@@ -213,14 +258,46 @@ get_load_store_address(nir_intrinsic_instr *instr,
       offset_index = 1;
       break;
 
+      /* Shared-variables memory access is defined by a direct
+       * value 'base' (const_index[0]) and an indirect SSA value 'offset'.
+       */
+   case nir_intrinsic_load_shared:
+      base_index = 0;
+      offset_index = 0;
+      break;
+
+   case nir_intrinsic_store_shared:
+      base_index = 0;
+      offset_index = 1;
+      break;
+
+   case nir_intrinsic_shared_atomic_add:
+   case nir_intrinsic_shared_atomic_imin:
+   case nir_intrinsic_shared_atomic_umin:
+   case nir_intrinsic_shared_atomic_imax:
+   case nir_intrinsic_shared_atomic_umax:
+   case nir_intrinsic_shared_atomic_and:
+   case nir_intrinsic_shared_atomic_or:
+   case nir_intrinsic_shared_atomic_xor:
+   case nir_intrinsic_shared_atomic_exchange:
+   case nir_intrinsic_shared_atomic_comp_swap:
+      base_index = 0;
+      offset_index = 0;
+      break;
+
    default:
       assert(!"not implemented");
    }
 
-   assert(block_index >= 0 && offset_index >= 0);
+   assert((block_index >= 0 || base_index >= 0) && offset_index >= 0);
+
+   if (block && block_index >= 0)
+      *block = &instr->src[block_index];
 
-   *block = &instr->src[block_index];
    *offset = &instr->src[offset_index];
+
+   if (base && base_index >= 0)
+      *base = instr->const_index[base_index];
 }
 
 /**
@@ -242,10 +319,13 @@ detect_memory_access_conflict(nir_intrinsic_instr *instr1,
 {
    nir_src *instr1_block = NULL;
    nir_src *instr1_offset = NULL;
+   unsigned instr1_base = 0;
    nir_src *instr2_block = NULL;
    nir_src *instr2_offset = NULL;
+   unsigned instr2_base = 0;
    bool blocks_match = false;
    bool offsets_match = false;
+   bool bases_match = false;
 
    if (full_match)
       *full_match = false;
@@ -254,21 +334,30 @@ detect_memory_access_conflict(nir_intrinsic_instr *instr1,
    if (!intrinsic_group_match(instr1, instr2))
       return false;
 
-   get_load_store_address(instr1, &instr1_block, &instr1_offset);
-   get_load_store_address(instr2, &instr2_block, &instr2_offset);
+   get_load_store_address(instr1, &instr1_block, &instr1_offset, &instr1_base);
+   get_load_store_address(instr2, &instr2_block, &instr2_offset, &instr2_base);
 
-   /* There is conflict if the blocks and the offsets of each instruction
-    * are not both direct or both indirect. If that's not the case, then
-    * there is conflict if the blocks and offsets match.
+   /* There is conflict if the blocks (or bases) and the offsets of each
+    * instruction are not both direct or both indirect. If that's not the
+    * case, then there is conflict if the blocks (or bases) and offsets
+    * all match.
     */
 
-   /* For SSBOs the block is an SSA value, but it can still be direct,
-    * if defined by a load_const instruction.
-    */
-   if (nir_src_is_direct(instr1_block) != nir_src_is_direct(instr2_block))
-      return true;
+   /* only SSBOs have a block source, so it can be NULL for shared-var */
+   if (instr1_block) {
+      if (! instr2_block)
+         return true;
 
-   blocks_match = nir_srcs_equal(*instr1_block, *instr2_block);
+      /* For SSBOs the block is an SSA value, but it can still be direct,
+       * if defined by a load_const instruction.
+       */
+      if (nir_src_is_direct(instr1_block) != nir_src_is_direct(instr2_block))
+         return true;
+
+      blocks_match = nir_srcs_equal(*instr1_block, *instr2_block);
+   } else {
+      bases_match = instr1_base == instr2_base;
+   }
 
    /* For SSBOs, the offset is an SSA value, but it can still be direct,
     *if defined by a load_const instruction.
@@ -279,7 +368,7 @@ detect_memory_access_conflict(nir_intrinsic_instr *instr1,
    offsets_match = nir_srcs_equal(*instr1_offset, *instr2_offset);
 
    /* finally, if both blocks and offsets match, it means conflict */
-   if (offsets_match && blocks_match) {
+   if (offsets_match && (blocks_match || bases_match)) {
       if (full_match)
          *full_match = true;
 
@@ -296,6 +385,9 @@ get_store_writemask(nir_intrinsic_instr *instr)
    case nir_intrinsic_store_ssbo:
       return instr->const_index[0];
 
+   case nir_intrinsic_store_shared:
+      return instr->const_index[1];
+
    case nir_intrinsic_ssbo_atomic_add:
    case nir_intrinsic_ssbo_atomic_imin:
    case nir_intrinsic_ssbo_atomic_umin:
@@ -306,6 +398,17 @@ get_store_writemask(nir_intrinsic_instr *instr)
    case nir_intrinsic_ssbo_atomic_xor:
    case nir_intrinsic_ssbo_atomic_exchange:
    case nir_intrinsic_ssbo_atomic_comp_swap:
+      /* fall-through to shared variable atomics */
+   case nir_intrinsic_shared_atomic_add:
+   case nir_intrinsic_shared_atomic_imin:
+   case nir_intrinsic_shared_atomic_umin:
+   case nir_intrinsic_shared_atomic_imax:
+   case nir_intrinsic_shared_atomic_umax:
+   case nir_intrinsic_shared_atomic_and:
+   case nir_intrinsic_shared_atomic_or:
+   case nir_intrinsic_shared_atomic_xor:
+   case nir_intrinsic_shared_atomic_exchange:
+   case nir_intrinsic_shared_atomic_comp_swap:
       return WRITEMASK_X;
 
    default:
-- 
2.7.0



More information about the mesa-dev mailing list