[Mesa-dev] [PATCH 6/9] ir3/compiler: Use the new lower_io_offsets pass

Wed Feb 13 21:29:53 UTC 2019

This effectively removes all offset calculations in
ir3_compiler_nir::get_image_offset().

No regressions observed on affected tests from Khronos CTS and piglit
suites, compared to master.

Unfortunately shader-db is not helpful for stats in this case. Few
shaders there exercise image store or image atomic, and of those that
do, most require higher versions of GLSL than 3.10, so they get skipped.

Since the emitted instructions are the same as before in the worse case,
there shouldn't be shaders hurt by this pass.
---
 src/freedreno/ir3/ir3_compiler_nir.c | 41 ++++++----------------------
 src/freedreno/ir3/ir3_nir.c          |  1 +
 2 files changed, 9 insertions(+), 33 deletions(-)

diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index 05dc5ef7cf6..0e141f03181 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -1175,44 +1175,19 @@ get_image_type(const nir_variable *var)
 
 static struct ir3_instruction *
 get_image_offset(struct ir3_context *ctx, const nir_variable *var,
-		struct ir3_instruction * const *coords, bool byteoff)
+		struct ir3_instruction * const *coords)
 {
 	struct ir3_block *b = ctx->block;
-	struct ir3_instruction *offset;
-	unsigned ncoords = get_image_coords(var, NULL);
-
-	/* to calculate the byte offset (yes, uggg) we need (up to) three
-	 * const values to know the bytes per pixel, and y and z stride:
-	 */
-	unsigned cb = regid(ctx->so->constbase.image_dims, 0) +
-		ctx->so->const_layout.image_dims.off[var->data.driver_location];
 
 	debug_assert(ctx->so->const_layout.image_dims.mask &
 			(1 << var->data.driver_location));
 
-	/* offset = coords.x * bytes_per_pixel: */
-	offset = ir3_MUL_S(b, coords[0], 0, create_uniform(b, cb + 0), 0);
-	if (ncoords > 1) {
-		/* offset += coords.y * y_pitch: */
-		offset = ir3_MAD_S24(b, create_uniform(b, cb + 1), 0,
-				coords[1], 0, offset, 0);
-	}
-	if (ncoords > 2) {
-		/* offset += coords.z * z_pitch: */
-		offset = ir3_MAD_S24(b, create_uniform(b, cb + 2), 0,
-				coords[2], 0, offset, 0);
-	}
-
-	if (!byteoff) {
-		/* Some cases, like atomics, seem to use dword offset instead
-		 * of byte offsets.. blob just puts an extra shr.b in there
-		 * in those cases:
-		 */
-		offset = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0);
-	}
-
+	/* ir3_nir_lower_sampler_io pass should have placed the final
+	 * byte-offset (or dword offset for atomics) at the 4th component
+	 * of the coordinate vector.
+	 */
 	return ir3_create_collect(ctx, (struct ir3_instruction*[]){
-		offset,
+		coords[3],
 		create_immed(b, 0),
 	}, 2);
 }
@@ -1344,7 +1319,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 	 * src2 is 64b byte offset
 	 */
 
-	offset = get_image_offset(ctx, var, coords, true);
+	offset = get_image_offset(ctx, var, coords);
 
 	/* NOTE: stib seems to take byte offset, but stgb.typed can be used
 	 * too and takes a dword offset.. not quite sure yet why blob uses
@@ -1446,7 +1421,7 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 	 */
 	src0 = ir3_get_src(ctx, &intr->src[3])[0];
 	src1 = ir3_create_collect(ctx, coords, ncoords);
-	src2 = get_image_offset(ctx, var, coords, false);
+	src2 = get_image_offset(ctx, var, coords);
 
 	switch (intr->intrinsic) {
 	case nir_intrinsic_image_deref_atomic_add:
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index d9fcf798b3d..5cd3116b994 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -160,6 +160,7 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
 
 	OPT_V(s, nir_opt_global_to_local);
 	OPT_V(s, nir_lower_regs_to_ssa);
+	OPT_V(s, ir3_nir_lower_io_offsets);
 
 	if (key) {
 		if (s->info.stage == MESA_SHADER_VERTEX) {
-- 
2.20.1