[Mesa-dev] [PATCH 4/6] nir/opt_load_combine: Extend the pass to include shared variables
Eduardo Lima Mitev
elima at igalia.com
Thu Apr 14 16:52:34 UTC 2016
---
src/compiler/nir/nir_opt_load_combine.c | 155 ++++++++++++++++++++++++++------
1 file changed, 129 insertions(+), 26 deletions(-)
diff --git a/src/compiler/nir/nir_opt_load_combine.c b/src/compiler/nir/nir_opt_load_combine.c
index 09a34c6..3e3d066 100644
--- a/src/compiler/nir/nir_opt_load_combine.c
+++ b/src/compiler/nir/nir_opt_load_combine.c
@@ -38,7 +38,8 @@
enum intrinsic_groups {
INTRINSIC_GROUP_NONE = 0,
INTRINSIC_GROUP_ALL,
- INTRINSIC_GROUP_SSBO
+ INTRINSIC_GROUP_SSBO,
+ INTRINSIC_GROUP_SHARED
};
struct cache_node {
@@ -86,33 +87,71 @@ is_memory_barrier_buffer(nir_intrinsic_instr *intrinsic)
return intrinsic->intrinsic == nir_intrinsic_memory_barrier_buffer;
}
-/*
- * General load/store functions: we'll add more groups to this as needed.
- * For now we only support SSBOs.
- */
+/* Shared variable load/store */
+static bool
+is_atomic_shared(nir_intrinsic_instr *intrinsic)
+{
+ switch (intrinsic->intrinsic) {
+ case nir_intrinsic_shared_atomic_add:
+ case nir_intrinsic_shared_atomic_imin:
+ case nir_intrinsic_shared_atomic_umin:
+ case nir_intrinsic_shared_atomic_imax:
+ case nir_intrinsic_shared_atomic_umax:
+ case nir_intrinsic_shared_atomic_and:
+ case nir_intrinsic_shared_atomic_or:
+ case nir_intrinsic_shared_atomic_xor:
+ case nir_intrinsic_shared_atomic_exchange:
+ case nir_intrinsic_shared_atomic_comp_swap:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool
+is_store_shared(nir_intrinsic_instr *intrinsic)
+{
+ return intrinsic->intrinsic == nir_intrinsic_store_shared ||
+ is_atomic_shared(intrinsic);
+}
+
+static inline bool
+is_load_shared(nir_intrinsic_instr *intrinsic)
+{
+ return intrinsic->intrinsic == nir_intrinsic_load_shared;
+}
+
+static inline bool
+is_memory_barrier_shared(nir_intrinsic_instr *intrinsic)
+{
+ return intrinsic->intrinsic == nir_intrinsic_memory_barrier_shared;
+}
+
+/* General intrinsic classification functions */
static inline bool
is_store(nir_intrinsic_instr *intrinsic)
{
- return is_store_ssbo(intrinsic);
+ return is_store_ssbo(intrinsic) || is_store_shared(intrinsic);
}
static inline bool
is_load(nir_intrinsic_instr *intrinsic)
{
- return is_load_ssbo(intrinsic);
+ return is_load_ssbo(intrinsic) || is_load_shared(intrinsic);
}
static inline bool
is_atomic(nir_intrinsic_instr *intrinsic)
{
- return is_atomic_ssbo(intrinsic);
+ return is_atomic_ssbo(intrinsic) || is_atomic_shared(intrinsic);
}
static inline bool
is_memory_barrier(nir_intrinsic_instr *intrinsic)
{
return intrinsic->intrinsic == nir_intrinsic_memory_barrier ||
- is_memory_barrier_buffer(intrinsic);
+ is_memory_barrier_buffer(intrinsic) ||
+ is_memory_barrier_shared(intrinsic);
}
static unsigned
@@ -123,6 +162,9 @@ intrinsic_group(nir_intrinsic_instr *intrinsic)
else if (is_load_ssbo(intrinsic) || is_store_ssbo(intrinsic) ||
is_memory_barrier_buffer(intrinsic))
return INTRINSIC_GROUP_SSBO;
+ else if (is_load_shared(intrinsic) || is_store_shared(intrinsic) ||
+ is_memory_barrier_shared(intrinsic))
+ return INTRINSIC_GROUP_SHARED;
else
return INTRINSIC_GROUP_NONE;
}
@@ -178,18 +220,21 @@ nir_src_is_direct(nir_src *src)
* Gets the block and offset of a load/store instruction.
*
* @instr: the intrinsic load/store operation
- * @block: the output block
+ * @block: the output block, can be NULL if @base is non-NULL (shared-vars)
* @offset: the output offset
+ * @base: the output base, can be NULL if @block is non-NULL (SSBO)
*/
static void
get_load_store_address(nir_intrinsic_instr *instr,
nir_src **block,
- nir_src **offset)
+ nir_src **offset,
+ unsigned *base)
{
int block_index = -1;
int offset_index = -1;
+ int base_index = -1;
- assert(block && offset);
+ assert((block || base) && offset);
switch (instr->intrinsic) {
/* SSBO */
@@ -213,14 +258,46 @@ get_load_store_address(nir_intrinsic_instr *instr,
offset_index = 1;
break;
+ /* Shared-variables memory access is defined by a direct
+ * value 'base' (const_index[0]) and an indirect SSA value 'offset'.
+ */
+ case nir_intrinsic_load_shared:
+ base_index = 0;
+ offset_index = 0;
+ break;
+
+ case nir_intrinsic_store_shared:
+ base_index = 0;
+ offset_index = 1;
+ break;
+
+ case nir_intrinsic_shared_atomic_add:
+ case nir_intrinsic_shared_atomic_imin:
+ case nir_intrinsic_shared_atomic_umin:
+ case nir_intrinsic_shared_atomic_imax:
+ case nir_intrinsic_shared_atomic_umax:
+ case nir_intrinsic_shared_atomic_and:
+ case nir_intrinsic_shared_atomic_or:
+ case nir_intrinsic_shared_atomic_xor:
+ case nir_intrinsic_shared_atomic_exchange:
+ case nir_intrinsic_shared_atomic_comp_swap:
+ base_index = 0;
+ offset_index = 0;
+ break;
+
default:
assert(!"not implemented");
}
- assert(block_index >= 0 && offset_index >= 0);
+ assert((block_index >= 0 || base_index >= 0) && offset_index >= 0);
+
+ if (block && block_index >= 0)
+ *block = &instr->src[block_index];
- *block = &instr->src[block_index];
*offset = &instr->src[offset_index];
+
+ if (base && base_index >= 0)
+ *base = instr->const_index[base_index];
}
/**
@@ -242,10 +319,13 @@ detect_memory_access_conflict(nir_intrinsic_instr *instr1,
{
nir_src *instr1_block = NULL;
nir_src *instr1_offset = NULL;
+ unsigned instr1_base = 0;
nir_src *instr2_block = NULL;
nir_src *instr2_offset = NULL;
+ unsigned instr2_base = 0;
bool blocks_match = false;
bool offsets_match = false;
+ bool bases_match = false;
if (full_match)
*full_match = false;
@@ -254,21 +334,30 @@ detect_memory_access_conflict(nir_intrinsic_instr *instr1,
if (!intrinsic_group_match(instr1, instr2))
return false;
- get_load_store_address(instr1, &instr1_block, &instr1_offset);
- get_load_store_address(instr2, &instr2_block, &instr2_offset);
+ get_load_store_address(instr1, &instr1_block, &instr1_offset, &instr1_base);
+ get_load_store_address(instr2, &instr2_block, &instr2_offset, &instr2_base);
- /* There is conflict if the blocks and the offsets of each instruction
- * are not both direct or both indirect. If that's not the case, then
- * there is conflict if the blocks and offsets match.
+ /* There is conflict if the blocks (or bases) and the offsets of each
+ * instruction are not both direct or both indirect. If that's not the
+ * case, then there is conflict if the blocks (or bases) and offsets
+ * all match.
*/
- /* For SSBOs the block is an SSA value, but it can still be direct,
- * if defined by a load_const instruction.
- */
- if (nir_src_is_direct(instr1_block) != nir_src_is_direct(instr2_block))
- return true;
+ /* only SSBOs have a block source, so it can be NULL for shared-var */
+ if (instr1_block) {
+ if (! instr2_block)
+ return true;
- blocks_match = nir_srcs_equal(*instr1_block, *instr2_block);
+ /* For SSBOs the block is an SSA value, but it can still be direct,
+ * if defined by a load_const instruction.
+ */
+ if (nir_src_is_direct(instr1_block) != nir_src_is_direct(instr2_block))
+ return true;
+
+ blocks_match = nir_srcs_equal(*instr1_block, *instr2_block);
+ } else {
+ bases_match = instr1_base == instr2_base;
+ }
/* For SSBOs, the offset is an SSA value, but it can still be direct,
*if defined by a load_const instruction.
@@ -279,7 +368,7 @@ detect_memory_access_conflict(nir_intrinsic_instr *instr1,
offsets_match = nir_srcs_equal(*instr1_offset, *instr2_offset);
/* finally, if both blocks and offsets match, it means conflict */
- if (offsets_match && blocks_match) {
+ if (offsets_match && (blocks_match || bases_match)) {
if (full_match)
*full_match = true;
@@ -296,6 +385,9 @@ get_store_writemask(nir_intrinsic_instr *instr)
case nir_intrinsic_store_ssbo:
return instr->const_index[0];
+ case nir_intrinsic_store_shared:
+ return instr->const_index[1];
+
case nir_intrinsic_ssbo_atomic_add:
case nir_intrinsic_ssbo_atomic_imin:
case nir_intrinsic_ssbo_atomic_umin:
@@ -306,6 +398,17 @@ get_store_writemask(nir_intrinsic_instr *instr)
case nir_intrinsic_ssbo_atomic_xor:
case nir_intrinsic_ssbo_atomic_exchange:
case nir_intrinsic_ssbo_atomic_comp_swap:
+ /* fall-through to shared variable atomics */
+ case nir_intrinsic_shared_atomic_add:
+ case nir_intrinsic_shared_atomic_imin:
+ case nir_intrinsic_shared_atomic_umin:
+ case nir_intrinsic_shared_atomic_imax:
+ case nir_intrinsic_shared_atomic_umax:
+ case nir_intrinsic_shared_atomic_and:
+ case nir_intrinsic_shared_atomic_or:
+ case nir_intrinsic_shared_atomic_xor:
+ case nir_intrinsic_shared_atomic_exchange:
+ case nir_intrinsic_shared_atomic_comp_swap:
return WRITEMASK_X;
default:
--
2.7.0
More information about the mesa-dev
mailing list