[Mesa-dev] [PATCH 5/6] nir/opt_load_combine: Extend the pass to include image load/store
Eduardo Lima Mitev
elima at igalia.com
Thu Apr 14 16:52:35 UTC 2016
---
src/compiler/nir/nir_opt_load_combine.c | 170 +++++++++++++++++++++++++++++---
1 file changed, 155 insertions(+), 15 deletions(-)
diff --git a/src/compiler/nir/nir_opt_load_combine.c b/src/compiler/nir/nir_opt_load_combine.c
index 3e3d066..8f3d4d6 100644
--- a/src/compiler/nir/nir_opt_load_combine.c
+++ b/src/compiler/nir/nir_opt_load_combine.c
@@ -39,7 +39,8 @@ enum intrinsic_groups {
INTRINSIC_GROUP_NONE = 0,
INTRINSIC_GROUP_ALL,
INTRINSIC_GROUP_SSBO,
- INTRINSIC_GROUP_SHARED
+ INTRINSIC_GROUP_SHARED,
+ INTRINSIC_GROUP_IMAGE
};
struct cache_node {
@@ -127,23 +128,64 @@ is_memory_barrier_shared(nir_intrinsic_instr *intrinsic)
return intrinsic->intrinsic == nir_intrinsic_memory_barrier_shared;
}
+/* Image load/store */
+static bool
+is_atomic_image(nir_intrinsic_instr *intrinsic)
+{
+ switch (intrinsic->intrinsic) {
+ case nir_intrinsic_image_atomic_add:
+ case nir_intrinsic_image_atomic_min:
+ case nir_intrinsic_image_atomic_max:
+ case nir_intrinsic_image_atomic_and:
+ case nir_intrinsic_image_atomic_or:
+ case nir_intrinsic_image_atomic_xor:
+ case nir_intrinsic_image_atomic_exchange:
+ case nir_intrinsic_image_atomic_comp_swap:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+is_store_image(nir_intrinsic_instr *intrinsic)
+{
+ return intrinsic->intrinsic == nir_intrinsic_image_store ||
+ is_atomic_image(intrinsic);
+}
+
+static bool
+is_load_image(nir_intrinsic_instr *intrinsic)
+{
+ return intrinsic->intrinsic == nir_intrinsic_image_load;
+}
+
+static inline bool
+is_memory_barrier_image(nir_intrinsic_instr *intrinsic)
+{
+ return intrinsic->intrinsic == nir_intrinsic_memory_barrier_image;
+}
+
/* General intrinsic classification functions */
static inline bool
is_store(nir_intrinsic_instr *intrinsic)
{
- return is_store_ssbo(intrinsic) || is_store_shared(intrinsic);
+ return is_store_ssbo(intrinsic) || is_store_shared(intrinsic) ||
+ is_store_image(intrinsic);
}
static inline bool
is_load(nir_intrinsic_instr *intrinsic)
{
- return is_load_ssbo(intrinsic) || is_load_shared(intrinsic);
+ return is_load_ssbo(intrinsic) || is_load_shared(intrinsic) ||
+ is_load_image(intrinsic);
}
static inline bool
is_atomic(nir_intrinsic_instr *intrinsic)
{
- return is_atomic_ssbo(intrinsic) || is_atomic_shared(intrinsic);
+ return is_atomic_ssbo(intrinsic) || is_atomic_shared(intrinsic)
+ || is_atomic_image(intrinsic);
}
static inline bool
@@ -151,7 +193,8 @@ is_memory_barrier(nir_intrinsic_instr *intrinsic)
{
return intrinsic->intrinsic == nir_intrinsic_memory_barrier ||
is_memory_barrier_buffer(intrinsic) ||
- is_memory_barrier_shared(intrinsic);
+ is_memory_barrier_shared(intrinsic) ||
+ is_memory_barrier_image(intrinsic);
}
static unsigned
@@ -165,6 +208,9 @@ intrinsic_group(nir_intrinsic_instr *intrinsic)
else if (is_load_shared(intrinsic) || is_store_shared(intrinsic) ||
is_memory_barrier_shared(intrinsic))
return INTRINSIC_GROUP_SHARED;
+ else if (is_load_image(intrinsic) || is_store_image(intrinsic) ||
+ is_memory_barrier_image(intrinsic))
+ return INTRINSIC_GROUP_IMAGE;
else
return INTRINSIC_GROUP_NONE;
}
@@ -217,6 +263,17 @@ nir_src_is_direct(nir_src *src)
}
/**
+ * Returns true if a nir_src represents an undefined SSA register, false
+ * otherwise.
+ */
+static inline bool
+nir_src_is_undefined(nir_src src)
+{
+ return src.is_ssa &&
+ src.ssa->parent_instr->type == nir_instr_type_ssa_undef;
+}
+
+/**
* Gets the block and offset of a load/store instruction.
*
* @instr: the intrinsic load/store operation
@@ -301,6 +358,61 @@ get_load_store_address(nir_intrinsic_instr *instr,
}
/**
+ * Returns true if two coordinates sources of an image intrinsic match.
+ * This means that both sources are defined by ALU vec4 ops, with all
+ * 4 sources equivalent. For example, consider the following snippet:
+ *
+ * vec1 ssa_1 = undefined
+ * vec4 ssa_2 = vec4 ssa_0, ssa_0, ssa_1, ssa_1
+ * vec4 ssa_3 = intrinsic image_load (ssa_2, ssa_1) (itex) ()
+ * ...
+ * vec1 ssa_6 = undefined
+ * vec4 ssa_7 = vec4 ssa_0, ssa_0, ssa_1, ssa_1
+ * vec4 ssa_8 = intrinsic image_load (ssa_7, ssa_6) (itex) ()
+ *
+ * Here, ssa_2 and ssa_7 are the coordinates inside the image, and
+ * they are two different SSA definitions, so comparing them directly
+ * won't work. This function is able to detect this and check that
+ * ssa_2 and ssa_7 are indeed "equivalent"; so the pass can tell that
+ * the two image_load instructions are a match.
+ */
+static bool
+coordinates_match(nir_src *coord1, nir_src *coord2)
+{
+ assert(coord1->is_ssa);
+ assert(coord2->is_ssa);
+
+ nir_ssa_def *ssa1 = coord1->ssa;
+ nir_ssa_def *ssa2 = coord2->ssa;
+
+ nir_instr *parent1 = ssa1->parent_instr;
+ nir_instr *parent2 = ssa2->parent_instr;
+
+ /* @FIXME: currently, all coordinates into an image load/store
+ * instruction are given by an ALU vec4 instruction. This may change in
+ * the future, in which case we should detect it here.
+ */
+ assert(parent1->type == nir_instr_type_alu);
+ assert(parent2->type == nir_instr_type_alu);
+
+ nir_alu_instr *alu1 = nir_instr_as_alu(parent1);
+ nir_alu_instr *alu2 = nir_instr_as_alu(parent2);
+
+ assert(alu1->op == nir_op_vec4);
+ assert(alu2->op == nir_op_vec4);
+
+ for (unsigned i = 0; i < 4; i++) {
+ if (! ((nir_src_is_undefined(alu1->src[i].src) &&
+ nir_src_is_undefined(alu2->src[i].src)) ||
+ nir_srcs_equal(alu1->src[i].src, alu2->src[i].src))) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/**
* Determines whether two instrinsic instructions conflict with each other,
* meaning that a) they access the same memory area, or b) a non-conflict
* cannot be determined (because at least one access is indirect).
@@ -334,6 +446,25 @@ detect_memory_access_conflict(nir_intrinsic_instr *instr1,
if (!intrinsic_group_match(instr1, instr2))
return false;
+ /* Since image load/store are always indirect, we should always report
+ * conflict because we are unable to determine it. However, we still want
+ * to know if the texture objects and coordinates match, to report back
+ * a full match.
+ */
+ if (intrinsic_group(instr1) == INTRINSIC_GROUP_IMAGE) {
+ if (full_match) {
+ assert(instr1->variables[0]);
+ assert(instr2->variables[0]);
+
+ if (instr1->variables[0]->var == instr2->variables[0]->var &&
+ coordinates_match(&instr1->src[0], &instr2->src[0])) {
+ *full_match = true;
+ }
+ }
+
+ return true;
+ }
+
get_load_store_address(instr1, &instr1_block, &instr1_offset, &instr1_base);
get_load_store_address(instr2, &instr2_block, &instr2_offset, &instr2_base);
@@ -527,21 +658,30 @@ rewrite_load_with_store(struct cache_node *cache,
if (!blocks_and_offsets_match)
continue;
- /* The store must write to all the channels we are loading */
- unsigned store_writemask = get_store_writemask(store);
- bool writes_all_channels = true;
- for (int i = 0; i < load->num_components; i++) {
- if (!((1 << i) & store_writemask)) {
- writes_all_channels = false;
- break;
+ if (intrinsic_group(store) != INTRINSIC_GROUP_IMAGE) {
+ /* The store must write to all the channels we are loading */
+ unsigned store_writemask = get_store_writemask(store);
+ bool writes_all_channels = true;
+ for (int i = 0; i < load->num_components; i++) {
+ if (!((1 << i) & store_writemask)) {
+ writes_all_channels = false;
+ break;
+ }
}
+ if (!writes_all_channels)
+ continue;
}
- if (!writes_all_channels)
- continue;
/* rewrite the new load with the cached store instruction */
nir_ssa_def *def = &load->dest.ssa;
- nir_ssa_def *new_def = store->src[0].ssa;
+ nir_ssa_def *new_def;
+ /* while the value source for SSBO and shared-vars is src[0],
+ * image stores use src[2].
+ */
+ if (intrinsic_group(store) != INTRINSIC_GROUP_IMAGE)
+ new_def = store->src[0].ssa;
+ else
+ new_def = store->src[2].ssa;
nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(new_def));
return true;
--
2.7.0
More information about the mesa-dev
mailing list