[Mesa-dev] [PATCH 8/9] ir3: Extend lower_io_offsets pass to lower SSBO dword offset computation

Eduardo Lima Mitev elima at igalia.com
Wed Feb 13 21:29:55 UTC 2019


The lowering will take an SSBO intrinsic and replace it with the new
ir3-specific version that adds an extra source. That source will hold
the SSA value resulting from inserting a division by 4 (an SHR op) of
the original byte-offset source of the intrinsic.
---
 src/freedreno/ir3/ir3_nir_lower_io_offsets.c | 170 ++++++++++++++++++-
 1 file changed, 164 insertions(+), 6 deletions(-)

diff --git a/src/freedreno/ir3/ir3_nir_lower_io_offsets.c b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c
index a43b3895fd8..d03dc6048cb 100644
--- a/src/freedreno/ir3/ir3_nir_lower_io_offsets.c
+++ b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c
@@ -33,6 +33,12 @@
  *   compute (x*bpp) + y*y_stride + z*z_stride), and place the resulting
  *   SSA value in the 4th-component of the vec4 instruction that defines
  *   the offset.
+ *
+ * - Dword-offset for SSBO load, store and atomics: A new, similar intrinsic
+ *   is emitted that replaces the original one, adding a new source that
+ *   holds the result of the original byte-offset source divided by 4.
+ *   'ssbo_atomic_[f]comp_swap' are excluded because those already use
+ *   the 4 sources.
  */
 
 
@@ -65,6 +71,32 @@ intrinsic_is_image_store_or_atomic(unsigned intrinsic)
 		return intrinsic_is_image_atomic(intrinsic);
 }
 
+static bool
+intrinsic_is_ssbo(unsigned intrinsic)
+{
+	switch (intrinsic) {
+	case nir_intrinsic_store_ssbo:
+	case nir_intrinsic_load_ssbo:
+	case nir_intrinsic_ssbo_atomic_add:
+	case nir_intrinsic_ssbo_atomic_imin:
+	case nir_intrinsic_ssbo_atomic_umin:
+	case nir_intrinsic_ssbo_atomic_imax:
+	case nir_intrinsic_ssbo_atomic_umax:
+	case nir_intrinsic_ssbo_atomic_and:
+	case nir_intrinsic_ssbo_atomic_or:
+	case nir_intrinsic_ssbo_atomic_xor:
+	case nir_intrinsic_ssbo_atomic_exchange:
+	case nir_intrinsic_ssbo_atomic_fadd:
+	case nir_intrinsic_ssbo_atomic_fmin:
+	case nir_intrinsic_ssbo_atomic_fmax:
+		return true;
+	default:
+		break;
+	}
+
+	return false;
+}
+
 /*
  * FIXME: shamelessly copied from ir3_compiler_nir until it gets factorized
  * out at some point.
@@ -279,6 +311,131 @@ lower_offset_for_image_store_or_atomic(nir_intrinsic_instr *intrinsic,
 	return true;
 }
 
+/* Returns the ir3 version of a given SSBO intrinsic. It also conveniently
+ * returns the index of the offset source in 'offset_src_indx'.
+ */
+unsigned
+get_ir3_intrinsic_for_ssbo_intrinsic(unsigned intrinsic,
+									 uint8_t *offset_src_idx)
+{
+	debug_assert(offset_src_idx);
+
+	*offset_src_idx = 1;
+
+	switch (intrinsic) {
+	case nir_intrinsic_store_ssbo:
+		*offset_src_idx = 2;
+		return nir_intrinsic_store_ssbo_ir3;
+	case nir_intrinsic_load_ssbo:
+		return nir_intrinsic_load_ssbo_ir3;
+	case nir_intrinsic_ssbo_atomic_add:
+		return nir_intrinsic_ssbo_atomic_add_ir3;
+	case nir_intrinsic_ssbo_atomic_imin:
+		return nir_intrinsic_ssbo_atomic_imin_ir3;
+	case nir_intrinsic_ssbo_atomic_umin:
+		return nir_intrinsic_ssbo_atomic_umin_ir3;
+	case nir_intrinsic_ssbo_atomic_imax:
+		return nir_intrinsic_ssbo_atomic_imax_ir3;
+	case nir_intrinsic_ssbo_atomic_umax:
+		return nir_intrinsic_ssbo_atomic_umax_ir3;
+	case nir_intrinsic_ssbo_atomic_and:
+		return nir_intrinsic_ssbo_atomic_and_ir3;
+	case nir_intrinsic_ssbo_atomic_or:
+		return nir_intrinsic_ssbo_atomic_or_ir3;
+	case nir_intrinsic_ssbo_atomic_xor:
+		return nir_intrinsic_ssbo_atomic_xor_ir3;
+	case nir_intrinsic_ssbo_atomic_exchange:
+		return nir_intrinsic_ssbo_atomic_exchange_ir3;
+	case nir_intrinsic_ssbo_atomic_fadd:
+		return nir_intrinsic_ssbo_atomic_fadd_ir3;
+	case nir_intrinsic_ssbo_atomic_fmin:
+		return nir_intrinsic_ssbo_atomic_fmin_ir3;
+	case nir_intrinsic_ssbo_atomic_fmax:
+		return nir_intrinsic_ssbo_atomic_fmax_ir3;
+	default:
+		debug_assert(!"Unhandled SSBO intrinsic");
+		break;
+	}
+
+	return 0;
+}
+
+static bool
+lower_offset_for_ssbo(nir_intrinsic_instr *intrinsic, nir_builder *b,
+					  void *mem_ctx)
+{
+	unsigned num_srcs = nir_intrinsic_infos[intrinsic->intrinsic].num_srcs;
+	debug_assert(num_srcs < 4);
+
+	bool has_dest = nir_intrinsic_infos[intrinsic->intrinsic].has_dest;
+	nir_ssa_def *new_dest = NULL;
+
+	/* Here we create a new intrinsic and copy over all contents from the old one. */
+
+	nir_intrinsic_instr *new_intrinsic;
+	nir_src *target_src;
+	uint8_t offset_src_idx;
+
+	unsigned ir3_intrinsic_opcode =
+		get_ir3_intrinsic_for_ssbo_intrinsic(intrinsic->intrinsic,
+											 &offset_src_idx);
+
+	/* 'offset_src_idx' holds the index of the source that represent the offset. */
+	new_intrinsic =
+		nir_intrinsic_instr_create(b->shader, ir3_intrinsic_opcode);
+
+	nir_ssa_def *offset = intrinsic->src[offset_src_idx].ssa;
+
+	/* The new source that will hold the dword-offset is always the last
+	 * one for every intrinsic.
+	 */
+	target_src = &new_intrinsic->src[num_srcs];
+	*target_src = nir_src_for_ssa(offset);
+
+	if (has_dest) {
+		nir_ssa_def *dest = &intrinsic->dest.ssa;
+		nir_ssa_dest_init(&new_intrinsic->instr, &new_intrinsic->dest,
+						  dest->num_components, dest->bit_size, NULL);
+		new_dest = &new_intrinsic->dest.ssa;
+	}
+
+	for (unsigned i = 0; i < num_srcs; i++)
+		new_intrinsic->src[i] = nir_src_for_ssa(intrinsic->src[i].ssa);
+
+	for (unsigned i = 0; i < NIR_INTRINSIC_MAX_CONST_INDEX; i++)
+		new_intrinsic->const_index[i] = intrinsic->const_index[i];
+
+	new_intrinsic->num_components = intrinsic->num_components;
+
+	b->cursor = nir_before_instr(&intrinsic->instr);
+	nir_ssa_def *offset_div_4 = nir_ushr(b, offset, nir_imm_int(b, 2));
+	debug_assert(offset_div_4);
+
+	/* Insert the new intrinsic right before the old one. */
+	b->cursor = nir_before_instr(&intrinsic->instr);
+	nir_builder_instr_insert(b, &new_intrinsic->instr);
+
+	/* Replace the last source of the new intrinsic by the result of
+	 * the offset divided by 4.
+	 */
+	nir_instr_rewrite_src(&new_intrinsic->instr,
+						  target_src,
+						  nir_src_for_ssa(offset_div_4));
+
+	if (has_dest) {
+		/* Replace the uses of the original destination by that
+		 * of the new intrinsic.
+		 */
+		nir_ssa_def_rewrite_uses(&intrinsic->dest.ssa,
+								 nir_src_for_ssa(new_dest));
+	}
+
+	/* Finally remove the original intrinsic. */
+	nir_instr_remove(&intrinsic->instr);
+
+	return true;
+}
+
 static bool
 lower_io_offsets_block(nir_block *block, nir_builder *b, void *mem_ctx)
 {
@@ -289,12 +446,13 @@ lower_io_offsets_block(nir_block *block, nir_builder *b, void *mem_ctx)
 			continue;
 
 		nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-		if (!intrinsic_is_image_store_or_atomic(intr->intrinsic))
-			continue;
-
-		const nir_variable *var = nir_intrinsic_get_var(intr, 0);
-		progress |= lower_offset_for_image_store_or_atomic(intr, var, b,
-														   mem_ctx);
+		if (intrinsic_is_image_store_or_atomic(intr->intrinsic)) {
+			const nir_variable *var = nir_intrinsic_get_var(intr, 0);
+			progress |= lower_offset_for_image_store_or_atomic(intr, var, b,
+															   mem_ctx);
+		} else if (intrinsic_is_ssbo(intr->intrinsic)) {
+			progress |= lower_offset_for_ssbo(intr, b, mem_ctx);
+		}
 	}
 
 	return progress;
-- 
2.20.1



More information about the mesa-dev mailing list