[Mesa-dev] [PATCH 8/9] ir3: Extend lower_io_offsets pass to lower SSBO dword offset computation
Eduardo Lima Mitev
elima at igalia.com
Wed Feb 13 21:29:55 UTC 2019
The lowering will take an SSBO intrinsic and replace it with the new
ir3-specific version that adds an extra source. That source will hold
the SSA value resulting from inserting a division by 4 (an SHR op) of
the original byte-offset source of the intrinsic.
---
src/freedreno/ir3/ir3_nir_lower_io_offsets.c | 170 ++++++++++++++++++-
1 file changed, 164 insertions(+), 6 deletions(-)
diff --git a/src/freedreno/ir3/ir3_nir_lower_io_offsets.c b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c
index a43b3895fd8..d03dc6048cb 100644
--- a/src/freedreno/ir3/ir3_nir_lower_io_offsets.c
+++ b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c
@@ -33,6 +33,12 @@
* compute (x*bpp) + y*y_stride + z*z_stride), and place the resulting
* SSA value in the 4th-component of the vec4 instruction that defines
* the offset.
+ *
+ * - Dword-offset for SSBO load, store and atomics: A new, similar intrinsic
+ * is emitted that replaces the original one, adding a new source that
+ * holds the result of the original byte-offset source divided by 4.
+ * 'ssbo_atomic_[f]comp_swap' are excluded because those already use
+ * the 4 sources.
*/
@@ -65,6 +71,32 @@ intrinsic_is_image_store_or_atomic(unsigned intrinsic)
return intrinsic_is_image_atomic(intrinsic);
}
+static bool
+intrinsic_is_ssbo(unsigned intrinsic)
+{
+ switch (intrinsic) {
+ case nir_intrinsic_store_ssbo:
+ case nir_intrinsic_load_ssbo:
+ case nir_intrinsic_ssbo_atomic_add:
+ case nir_intrinsic_ssbo_atomic_imin:
+ case nir_intrinsic_ssbo_atomic_umin:
+ case nir_intrinsic_ssbo_atomic_imax:
+ case nir_intrinsic_ssbo_atomic_umax:
+ case nir_intrinsic_ssbo_atomic_and:
+ case nir_intrinsic_ssbo_atomic_or:
+ case nir_intrinsic_ssbo_atomic_xor:
+ case nir_intrinsic_ssbo_atomic_exchange:
+ case nir_intrinsic_ssbo_atomic_fadd:
+ case nir_intrinsic_ssbo_atomic_fmin:
+ case nir_intrinsic_ssbo_atomic_fmax:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
/*
* FIXME: shamelessly copied from ir3_compiler_nir until it gets factorized
* out at some point.
@@ -279,6 +311,131 @@ lower_offset_for_image_store_or_atomic(nir_intrinsic_instr *intrinsic,
return true;
}
+/* Returns the ir3 version of a given SSBO intrinsic. It also conveniently
+ * returns the index of the offset source in 'offset_src_indx'.
+ */
+unsigned
+get_ir3_intrinsic_for_ssbo_intrinsic(unsigned intrinsic,
+ uint8_t *offset_src_idx)
+{
+ debug_assert(offset_src_idx);
+
+ *offset_src_idx = 1;
+
+ switch (intrinsic) {
+ case nir_intrinsic_store_ssbo:
+ *offset_src_idx = 2;
+ return nir_intrinsic_store_ssbo_ir3;
+ case nir_intrinsic_load_ssbo:
+ return nir_intrinsic_load_ssbo_ir3;
+ case nir_intrinsic_ssbo_atomic_add:
+ return nir_intrinsic_ssbo_atomic_add_ir3;
+ case nir_intrinsic_ssbo_atomic_imin:
+ return nir_intrinsic_ssbo_atomic_imin_ir3;
+ case nir_intrinsic_ssbo_atomic_umin:
+ return nir_intrinsic_ssbo_atomic_umin_ir3;
+ case nir_intrinsic_ssbo_atomic_imax:
+ return nir_intrinsic_ssbo_atomic_imax_ir3;
+ case nir_intrinsic_ssbo_atomic_umax:
+ return nir_intrinsic_ssbo_atomic_umax_ir3;
+ case nir_intrinsic_ssbo_atomic_and:
+ return nir_intrinsic_ssbo_atomic_and_ir3;
+ case nir_intrinsic_ssbo_atomic_or:
+ return nir_intrinsic_ssbo_atomic_or_ir3;
+ case nir_intrinsic_ssbo_atomic_xor:
+ return nir_intrinsic_ssbo_atomic_xor_ir3;
+ case nir_intrinsic_ssbo_atomic_exchange:
+ return nir_intrinsic_ssbo_atomic_exchange_ir3;
+ case nir_intrinsic_ssbo_atomic_fadd:
+ return nir_intrinsic_ssbo_atomic_fadd_ir3;
+ case nir_intrinsic_ssbo_atomic_fmin:
+ return nir_intrinsic_ssbo_atomic_fmin_ir3;
+ case nir_intrinsic_ssbo_atomic_fmax:
+ return nir_intrinsic_ssbo_atomic_fmax_ir3;
+ default:
+ debug_assert(!"Unhandled SSBO intrinsic");
+ break;
+ }
+
+ return 0;
+}
+
+static bool
+lower_offset_for_ssbo(nir_intrinsic_instr *intrinsic, nir_builder *b,
+ void *mem_ctx)
+{
+ unsigned num_srcs = nir_intrinsic_infos[intrinsic->intrinsic].num_srcs;
+ debug_assert(num_srcs < 4);
+
+ bool has_dest = nir_intrinsic_infos[intrinsic->intrinsic].has_dest;
+ nir_ssa_def *new_dest = NULL;
+
+ /* Here we create a new intrinsic and copy over all contents from the old one. */
+
+ nir_intrinsic_instr *new_intrinsic;
+ nir_src *target_src;
+ uint8_t offset_src_idx;
+
+ unsigned ir3_intrinsic_opcode =
+ get_ir3_intrinsic_for_ssbo_intrinsic(intrinsic->intrinsic,
+ &offset_src_idx);
+
+ /* 'offset_src_idx' holds the index of the source that represent the offset. */
+ new_intrinsic =
+ nir_intrinsic_instr_create(b->shader, ir3_intrinsic_opcode);
+
+ nir_ssa_def *offset = intrinsic->src[offset_src_idx].ssa;
+
+ /* The new source that will hold the dword-offset is always the last
+ * one for every intrinsic.
+ */
+ target_src = &new_intrinsic->src[num_srcs];
+ *target_src = nir_src_for_ssa(offset);
+
+ if (has_dest) {
+ nir_ssa_def *dest = &intrinsic->dest.ssa;
+ nir_ssa_dest_init(&new_intrinsic->instr, &new_intrinsic->dest,
+ dest->num_components, dest->bit_size, NULL);
+ new_dest = &new_intrinsic->dest.ssa;
+ }
+
+ for (unsigned i = 0; i < num_srcs; i++)
+ new_intrinsic->src[i] = nir_src_for_ssa(intrinsic->src[i].ssa);
+
+ for (unsigned i = 0; i < NIR_INTRINSIC_MAX_CONST_INDEX; i++)
+ new_intrinsic->const_index[i] = intrinsic->const_index[i];
+
+ new_intrinsic->num_components = intrinsic->num_components;
+
+ b->cursor = nir_before_instr(&intrinsic->instr);
+ nir_ssa_def *offset_div_4 = nir_ushr(b, offset, nir_imm_int(b, 2));
+ debug_assert(offset_div_4);
+
+ /* Insert the new intrinsic right before the old one. */
+ b->cursor = nir_before_instr(&intrinsic->instr);
+ nir_builder_instr_insert(b, &new_intrinsic->instr);
+
+ /* Replace the last source of the new intrinsic by the result of
+ * the offset divided by 4.
+ */
+ nir_instr_rewrite_src(&new_intrinsic->instr,
+ target_src,
+ nir_src_for_ssa(offset_div_4));
+
+ if (has_dest) {
+ /* Replace the uses of the original destination by that
+ * of the new intrinsic.
+ */
+ nir_ssa_def_rewrite_uses(&intrinsic->dest.ssa,
+ nir_src_for_ssa(new_dest));
+ }
+
+ /* Finally remove the original intrinsic. */
+ nir_instr_remove(&intrinsic->instr);
+
+ return true;
+}
+
static bool
lower_io_offsets_block(nir_block *block, nir_builder *b, void *mem_ctx)
{
@@ -289,12 +446,13 @@ lower_io_offsets_block(nir_block *block, nir_builder *b, void *mem_ctx)
continue;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
- if (!intrinsic_is_image_store_or_atomic(intr->intrinsic))
- continue;
-
- const nir_variable *var = nir_intrinsic_get_var(intr, 0);
- progress |= lower_offset_for_image_store_or_atomic(intr, var, b,
- mem_ctx);
+ if (intrinsic_is_image_store_or_atomic(intr->intrinsic)) {
+ const nir_variable *var = nir_intrinsic_get_var(intr, 0);
+ progress |= lower_offset_for_image_store_or_atomic(intr, var, b,
+ mem_ctx);
+ } else if (intrinsic_is_ssbo(intr->intrinsic)) {
+ progress |= lower_offset_for_ssbo(intr, b, mem_ctx);
+ }
}
return progress;
--
2.20.1
More information about the mesa-dev
mailing list