[Mesa-dev] [PATCH 5/6] nir/opt_load_combine: Extend the pass to include image load/store

Thu Apr 14 16:52:35 UTC 2016

---
 src/compiler/nir/nir_opt_load_combine.c | 170 +++++++++++++++++++++++++++++---
 1 file changed, 155 insertions(+), 15 deletions(-)

diff --git a/src/compiler/nir/nir_opt_load_combine.c b/src/compiler/nir/nir_opt_load_combine.c
index 3e3d066..8f3d4d6 100644
--- a/src/compiler/nir/nir_opt_load_combine.c
+++ b/src/compiler/nir/nir_opt_load_combine.c
@@ -39,7 +39,8 @@ enum intrinsic_groups {
    INTRINSIC_GROUP_NONE = 0,
    INTRINSIC_GROUP_ALL,
    INTRINSIC_GROUP_SSBO,
-   INTRINSIC_GROUP_SHARED
+   INTRINSIC_GROUP_SHARED,
+   INTRINSIC_GROUP_IMAGE
 };
 
 struct cache_node {
@@ -127,23 +128,64 @@ is_memory_barrier_shared(nir_intrinsic_instr *intrinsic)
    return intrinsic->intrinsic == nir_intrinsic_memory_barrier_shared;
 }
 
+/* Image load/store */
+static bool
+is_atomic_image(nir_intrinsic_instr *intrinsic)
+{
+   switch (intrinsic->intrinsic) {
+   case nir_intrinsic_image_atomic_add:
+   case nir_intrinsic_image_atomic_min:
+   case nir_intrinsic_image_atomic_max:
+   case nir_intrinsic_image_atomic_and:
+   case nir_intrinsic_image_atomic_or:
+   case nir_intrinsic_image_atomic_xor:
+   case nir_intrinsic_image_atomic_exchange:
+   case nir_intrinsic_image_atomic_comp_swap:
+      return true;
+   default:
+      return false;
+   }
+}
+
+static bool
+is_store_image(nir_intrinsic_instr *intrinsic)
+{
+   return intrinsic->intrinsic == nir_intrinsic_image_store ||
+      is_atomic_image(intrinsic);
+}
+
+static bool
+is_load_image(nir_intrinsic_instr *intrinsic)
+{
+   return intrinsic->intrinsic == nir_intrinsic_image_load;
+}
+
+static inline bool
+is_memory_barrier_image(nir_intrinsic_instr *intrinsic)
+{
+   return intrinsic->intrinsic == nir_intrinsic_memory_barrier_image;
+}
+
 /* General intrinsic classification functions */
 static inline bool
 is_store(nir_intrinsic_instr *intrinsic)
 {
-   return is_store_ssbo(intrinsic) || is_store_shared(intrinsic);
+   return is_store_ssbo(intrinsic) || is_store_shared(intrinsic) ||
+      is_store_image(intrinsic);
 }
 
 static inline bool
 is_load(nir_intrinsic_instr *intrinsic)
 {
-   return is_load_ssbo(intrinsic) || is_load_shared(intrinsic);
+   return is_load_ssbo(intrinsic) || is_load_shared(intrinsic) ||
+      is_load_image(intrinsic);
 }
 
 static inline bool
 is_atomic(nir_intrinsic_instr *intrinsic)
 {
-   return is_atomic_ssbo(intrinsic) || is_atomic_shared(intrinsic);
+   return is_atomic_ssbo(intrinsic) || is_atomic_shared(intrinsic)
+      || is_atomic_image(intrinsic);
 }
 
 static inline bool
@@ -151,7 +193,8 @@ is_memory_barrier(nir_intrinsic_instr *intrinsic)
 {
    return intrinsic->intrinsic == nir_intrinsic_memory_barrier ||
       is_memory_barrier_buffer(intrinsic) ||
-      is_memory_barrier_shared(intrinsic);
+      is_memory_barrier_shared(intrinsic) ||
+      is_memory_barrier_image(intrinsic);
 }
 
 static unsigned
@@ -165,6 +208,9 @@ intrinsic_group(nir_intrinsic_instr *intrinsic)
    else if (is_load_shared(intrinsic) || is_store_shared(intrinsic) ||
             is_memory_barrier_shared(intrinsic))
       return INTRINSIC_GROUP_SHARED;
+   else if (is_load_image(intrinsic) || is_store_image(intrinsic) ||
+            is_memory_barrier_image(intrinsic))
+      return INTRINSIC_GROUP_IMAGE;
    else
       return INTRINSIC_GROUP_NONE;
 }
@@ -217,6 +263,17 @@ nir_src_is_direct(nir_src *src)
 }
 
 /**
+ * Returns true if a nir_src represents an undefined SSA register, false
+ * otherwise.
+ */
+static inline bool
+nir_src_is_undefined(nir_src src)
+{
+   return src.is_ssa &&
+      src.ssa->parent_instr->type == nir_instr_type_ssa_undef;
+}
+
+/**
  * Gets the block and offset of a load/store instruction.
  *
  * @instr: the intrinsic load/store operation
@@ -301,6 +358,61 @@ get_load_store_address(nir_intrinsic_instr *instr,
 }
 
 /**
+ * Returns true if two coordinates sources of an image intrinsic match.
+ * This means that both sources are defined by ALU vec4 ops, with all
+ * 4 sources equivalent. For example, consider the following snippet:
+ *
+ *   vec1 ssa_1 = undefined
+ *   vec4 ssa_2 = vec4 ssa_0, ssa_0, ssa_1, ssa_1
+ *   vec4 ssa_3 = intrinsic image_load (ssa_2, ssa_1) (itex) ()
+ *   ...
+ *   vec1 ssa_6 = undefined
+ *   vec4 ssa_7 = vec4 ssa_0, ssa_0, ssa_1, ssa_1
+ *   vec4 ssa_8 = intrinsic image_load (ssa_7, ssa_6) (itex) ()
+ *
+ * Here, ssa_2 and ssa_7 are the coordinates inside the image, and
+ * they are two different SSA definitions, so comparing them directly
+ * won't work. This function is able to detect this and check that
+ * ssa_2 and ssa_7 are indeed "equivalent"; so the pass can tell that
+ * the two image_load instructions are a match.
+ */
+static bool
+coordinates_match(nir_src *coord1, nir_src *coord2)
+{
+   assert(coord1->is_ssa);
+   assert(coord2->is_ssa);
+
+   nir_ssa_def *ssa1 = coord1->ssa;
+   nir_ssa_def *ssa2 = coord2->ssa;
+
+   nir_instr *parent1 = ssa1->parent_instr;
+   nir_instr *parent2 = ssa2->parent_instr;
+
+   /* @FIXME: currently, all coordinates into an image load/store
+    * instruction are given by an ALU vec4 instruction. This may change in
+    * the future, in which case we should detect it here.
+    */
+   assert(parent1->type == nir_instr_type_alu);
+   assert(parent2->type == nir_instr_type_alu);
+
+   nir_alu_instr *alu1 = nir_instr_as_alu(parent1);
+   nir_alu_instr *alu2 = nir_instr_as_alu(parent2);
+
+   assert(alu1->op == nir_op_vec4);
+   assert(alu2->op == nir_op_vec4);
+
+   for (unsigned i = 0; i < 4; i++) {
+      if (! ((nir_src_is_undefined(alu1->src[i].src) &&
+              nir_src_is_undefined(alu2->src[i].src)) ||
+             nir_srcs_equal(alu1->src[i].src, alu2->src[i].src))) {
+         return false;
+      }
+   }
+
+   return true;
+}
+
+/**
  * Determines whether two instrinsic instructions conflict with each other,
  * meaning that a) they access the same memory area, or b) a non-conflict
  * cannot be determined (because at least one access is indirect).
@@ -334,6 +446,25 @@ detect_memory_access_conflict(nir_intrinsic_instr *instr1,
    if (!intrinsic_group_match(instr1, instr2))
       return false;
 
+   /* Since image load/store are always indirect, we should always report
+    * conflict because we are unable to determine it. However, we still want
+    * to know if the texture objects and coordinates match, to report back
+    * a full match.
+    */
+   if (intrinsic_group(instr1) == INTRINSIC_GROUP_IMAGE) {
+      if (full_match) {
+         assert(instr1->variables[0]);
+         assert(instr2->variables[0]);
+
+         if (instr1->variables[0]->var == instr2->variables[0]->var &&
+             coordinates_match(&instr1->src[0], &instr2->src[0])) {
+            *full_match = true;
+         }
+      }
+
+      return true;
+   }
+
    get_load_store_address(instr1, &instr1_block, &instr1_offset, &instr1_base);
    get_load_store_address(instr2, &instr2_block, &instr2_offset, &instr2_base);
 
@@ -527,21 +658,30 @@ rewrite_load_with_store(struct cache_node *cache,
       if (!blocks_and_offsets_match)
          continue;
 
-      /* The store must write to all the channels we are loading */
-      unsigned store_writemask = get_store_writemask(store);
-      bool writes_all_channels = true;
-      for (int i = 0; i < load->num_components; i++) {
-         if (!((1 << i) & store_writemask)) {
-            writes_all_channels = false;
-            break;
+      if (intrinsic_group(store) != INTRINSIC_GROUP_IMAGE) {
+         /* The store must write to all the channels we are loading */
+         unsigned store_writemask = get_store_writemask(store);
+         bool writes_all_channels = true;
+         for (int i = 0; i < load->num_components; i++) {
+            if (!((1 << i) & store_writemask)) {
+               writes_all_channels = false;
+               break;
+            }
          }
+         if (!writes_all_channels)
+            continue;
       }
-      if (!writes_all_channels)
-         continue;
 
       /* rewrite the new load with the cached store instruction */
       nir_ssa_def *def = &load->dest.ssa;
-      nir_ssa_def *new_def = store->src[0].ssa;
+      nir_ssa_def *new_def;
+      /* while the value source for SSBO and shared-vars is src[0],
+       * image stores use src[2].
+       */
+      if (intrinsic_group(store) != INTRINSIC_GROUP_IMAGE)
+         new_def = store->src[0].ssa;
+      else
+         new_def = store->src[2].ssa;
       nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(new_def));
 
       return true;
-- 
2.7.0