Mesa (staging/20.1): freedreno/ir3: Fix register allocation assertion failures.

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Jun 23 20:35:05 UTC 2020


Module: Mesa
Branch: staging/20.1
Commit: 958af2713d2b94a6959d12d8d5e0d1605c0a6755
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=958af2713d2b94a6959d12d8d5e0d1605c0a6755

Author: Eric Anholt <eric at anholt.net>
Date:   Tue Apr 21 13:26:14 2020 -0700

freedreno/ir3: Fix register allocation assertion failures.

We were failing to tell the allocator about the restriction that scalar
texture instructions (allocated as scalar regs) couldn't be allocated such
that the start of the full unwritemasked vector started before r0.  There
was a patch in select_reg_callback on a6xx that tried to work around that,
but you could still end up backed into a corner you shouldn't be because
we didn't tell the RA what it needed.

Fixes compiler assertion failures on a300-a400's blit_z shader, used for
Z32F gmem blits.

Looks like as a result we get tighter register allocation but more nops:

instructions in affected programs: 757945 -> 760356 (0.32%)
nops in affected programs: 317983 -> 320468 (0.78%)
non-nops in affected programs: 27525 -> 27451 (-0.27%)
mov in affected programs: 3098 -> 3023 (-2.42%)
dwords in affected programs: 109664 -> 110656 (0.90%)
last-baryf in affected programs: 112701 -> 112847 (0.13%)
full in affected programs: 4326 -> 4011 (-7.28%)
sstall in affected programs: 120550 -> 120836 (0.24%)
(ss) in affected programs: 13939 -> 13918 (-0.15%)
(sy) in affected programs: 3006 -> 2786 (-7.32%)

(cherry picked from commit b420d04e1f744d15622f89180d1e3e511d92a8ba)

Signed-off-by: Rob Clark <robdclark at chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5612>

---

 .gitlab-ci/deqp-freedreno-a307-fails.txt |  8 -------
 src/freedreno/ir3/ir3_ra.c               | 41 ++++++++++++++++++++++----------
 src/freedreno/ir3/ir3_ra.h               |  2 ++
 3 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/.gitlab-ci/deqp-freedreno-a307-fails.txt b/.gitlab-ci/deqp-freedreno-a307-fails.txt
index 6c835a85b60..7cf581dc204 100644
--- a/.gitlab-ci/deqp-freedreno-a307-fails.txt
+++ b/.gitlab-ci/deqp-freedreno-a307-fails.txt
@@ -608,14 +608,6 @@ dEQP-GLES3.functional.texture.format.sized.3d.rgb10_a2ui_pot
 dEQP-GLES3.functional.texture.format.sized.cube.rgb10_a2ui_npot
 dEQP-GLES3.functional.texture.format.sized.cube.rgb10_a2ui_pot
 dEQP-GLES3.functional.texture.mipmap.cube.max_level.linear_nearest
-dEQP-GLES3.functional.texture.shadow.2d.linear.less_or_equal_depth_component32f
-dEQP-GLES3.functional.texture.shadow.2d.linear_mipmap_nearest.equal_depth_component32f
-dEQP-GLES3.functional.texture.shadow.2d_array.linear.equal_depth_component32f
-dEQP-GLES3.functional.texture.shadow.2d_array.nearest.less_depth_component32f
-dEQP-GLES3.functional.texture.shadow.2d_array.nearest_mipmap_linear.less_or_equal_depth_component32f
-dEQP-GLES3.functional.texture.shadow.cube.linear_mipmap_linear.less_or_equal_depth_component32f
-dEQP-GLES3.functional.texture.shadow.cube.nearest.less_or_equal_depth_component32f
-dEQP-GLES3.functional.texture.shadow.cube.nearest_mipmap_nearest.equal_depth_component32f
 dEQP-GLES3.functional.texture.specification.basic_teximage2d.r8i_2d
 dEQP-GLES3.functional.texture.specification.basic_teximage2d.r8i_cube
 dEQP-GLES3.functional.texture.specification.basic_teximage2d.r8ui_2d
diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c
index 927f91e98ec..231bfc6ffc1 100644
--- a/src/freedreno/ir3/ir3_ra.c
+++ b/src/freedreno/ir3/ir3_ra.c
@@ -497,19 +497,6 @@ ra_select_reg_merged(unsigned int n, BITSET_WORD *regs, void *data)
 				return reg;
 			}
 		}
-	} else if (is_tex_or_prefetch(instr)) {
-		/* we could have a tex fetch w/ wrmask .z, for example.. these
-		 * cannot land in r0.x since that would underflow when we
-		 * subtract the offset.  Ie. if we pick r0.z, and subtract
-		 * the offset, the register encoded for dst will be r0.x
-		 */
-		unsigned n = ffs(instr->regs[0]->wrmask);
-		debug_assert(n > 0);
-		unsigned offset = n - 1;
-		if (!half)
-			offset *= 2;
-		base += offset;
-		max_target -= offset;
 	}
 
 	int r = pick_in_range(regs, base + start, base + max_target);
@@ -571,6 +558,12 @@ ra_init(struct ir3_ra_ctx *ctx)
 	}
 	ctx->alloc_count += ctx->class_alloc_count[total_class_count];
 
+	/* Add vreg names for r0.xyz */
+	ctx->r0_xyz_nodes = ctx->alloc_count;
+	ctx->alloc_count += 3;
+	ctx->hr0_xyz_nodes = ctx->alloc_count;
+	ctx->alloc_count += 3;
+
 	ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
 	ralloc_steal(ctx->g, ctx->instrd);
 	ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
@@ -710,6 +703,20 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 
 			if ((instr->opc == OPC_META_INPUT) && first_non_input)
 				use(name, first_non_input);
+
+			/* Texture instructions with writemasks can be treated as smaller
+			 * vectors (or just scalars!) to allocate knowing that the
+			 * masked-out regs won't be written, but we need to make sure that
+			 * the start of the vector doesn't come before the first register
+			 * or we'll wrap.
+			 */
+			if (is_tex_or_prefetch(instr)) {
+				int writemask_skipped_regs = ffs(instr->regs[0]->wrmask) - 1;
+				int r0_xyz = (instr->regs[0]->flags & IR3_REG_HALF) ?
+					ctx->hr0_xyz_nodes : ctx->r0_xyz_nodes;
+				for (int i = 0; i < writemask_skipped_regs; i++)
+					ra_add_node_interference(ctx->g, name, r0_xyz + i);
+			}
 		}
 
 		foreach_use (name, ctx, instr) {
@@ -1005,6 +1012,14 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
 		arr->end_ip = 0;
 	}
 
+
+	/* set up the r0.xyz precolor regs. */
+	for (int i = 0; i < 3; i++) {
+		ra_set_node_reg(ctx->g, ctx->r0_xyz_nodes + i, i);
+		ra_set_node_reg(ctx->g, ctx->hr0_xyz_nodes + i,
+				ctx->set->first_half_reg + i);
+	}
+
 	/* compute live ranges (use/def) on a block level, also updating
 	 * block's def/use bitmasks (used below to calculate per-block
 	 * livein/liveout):
diff --git a/src/freedreno/ir3/ir3_ra.h b/src/freedreno/ir3/ir3_ra.h
index 45df2397ddc..35fb618c49a 100644
--- a/src/freedreno/ir3/ir3_ra.h
+++ b/src/freedreno/ir3/ir3_ra.h
@@ -144,6 +144,8 @@ struct ir3_ra_ctx {
 	bool scalar_pass;
 
 	unsigned alloc_count;
+	unsigned r0_xyz_nodes; /* ra node numbers for r0.[xyz] precolors */
+	unsigned hr0_xyz_nodes; /* ra node numbers for hr0.[xyz] precolors pre-a6xx */
 	/* one per class, plus one slot for arrays: */
 	unsigned class_alloc_count[total_class_count + 1];
 	unsigned class_base[total_class_count + 1];



More information about the mesa-commit mailing list