Mesa (master): freedreno/ir3: limit pre-fetched tex dest

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Jun 11 22:24:44 UTC 2020


Module: Mesa
Branch: master
Commit: ee29c682fe6a4cfd2b4221c209bec0a4e4747c4b
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=ee29c682fe6a4cfd2b4221c209bec0a4e4747c4b

Author: Rob Clark <robdclark at chromium.org>
Date:   Thu Jun 11 12:03:03 2020 -0700

freedreno/ir3: limit pre-fetched tex dest

Teach RA to setup additional interference to prevent textures fetched
before the FS starts from ending up in a register that is too high to
encode.

Fixes mis-rendering in multiple playcanv.as webgl apps.

Note that the regression was not actually 733bee57eb8's fault, but
that was the commit that exposed the problem.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3108
Fixes: 733bee57eb8 ("glsl: lower samplers with highp coordinates correctly")
Signed-off-by: Rob Clark <robdclark at chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5431>

---

 src/freedreno/ir3/ir3_ra.c        | 21 +++++++++++++++++++--
 src/freedreno/ir3/ir3_ra.h        | 11 ++++++++++-
 src/freedreno/ir3/ir3_ra_regset.c | 32 +++++++++++++++++++++++++++++++-
 3 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c
index 44dc6f1b8e4..c9a1b679116 100644
--- a/src/freedreno/ir3/ir3_ra.c
+++ b/src/freedreno/ir3/ir3_ra.c
@@ -563,6 +563,9 @@ ra_init(struct ir3_ra_ctx *ctx)
 	ctx->hr0_xyz_nodes = ctx->alloc_count;
 	ctx->alloc_count += 3;
 
+	/* Add vreg name for prefetch-exclusion range: */
+	ctx->prefetch_exclude_node = ctx->alloc_count++;
+
 	ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
 	ralloc_steal(ctx->g, ctx->instrd);
 	ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
@@ -711,11 +714,20 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 			 */
 			if (is_tex_or_prefetch(instr)) {
 				int writemask_skipped_regs = ffs(instr->regs[0]->wrmask) - 1;
-				int r0_xyz = (instr->regs[0]->flags & IR3_REG_HALF) ?
+				int r0_xyz = is_half(instr) ?
 					ctx->hr0_xyz_nodes : ctx->r0_xyz_nodes;
 				for (int i = 0; i < writemask_skipped_regs; i++)
 					ra_add_node_interference(ctx->g, name, r0_xyz + i);
 			}
+
+			/* Pre-fetched textures have a lower limit for bits to encode dst
+			 * register, so add additional interference with registers above
+			 * that limit.
+			 */
+			if (instr->opc == OPC_META_TEX_PREFETCH) {
+				ra_add_node_interference(ctx->g, name,
+						ctx->prefetch_exclude_node);
+			}
 		}
 
 		foreach_use (name, ctx, instr) {
@@ -1011,7 +1023,6 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
 		arr->end_ip = 0;
 	}
 
-
 	/* set up the r0.xyz precolor regs. */
 	for (int i = 0; i < 3; i++) {
 		ra_set_node_reg(ctx->g, ctx->r0_xyz_nodes + i, i);
@@ -1019,6 +1030,12 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
 				ctx->set->first_half_reg + i);
 	}
 
+	/* pre-color node that conflict with half/full regs higher than what
+	 * can be encoded for tex-prefetch:
+	 */
+	ra_set_node_reg(ctx->g, ctx->prefetch_exclude_node,
+			ctx->set->prefetch_exclude_reg);
+
 	/* compute live ranges (use/def) on a block level, also updating
 	 * block's def/use bitmasks (used below to calculate per-block
 	 * livein/liveout):
diff --git a/src/freedreno/ir3/ir3_ra.h b/src/freedreno/ir3/ir3_ra.h
index 437223bd1de..aa703ae645f 100644
--- a/src/freedreno/ir3/ir3_ra.h
+++ b/src/freedreno/ir3/ir3_ra.h
@@ -89,6 +89,14 @@ struct ir3_ra_reg_set {
 	unsigned int half_classes[half_class_count];
 	unsigned int high_classes[high_class_count];
 
+	/* pre-fetched tex dst is limited, on current gens to regs
+	 * 0x3f and below.  An additional register class, with one
+	 * vreg, that is setup to conflict with any regs above that
+	 * limit.
+	 */
+	unsigned prefetch_exclude_class;
+	unsigned prefetch_exclude_reg;
+
 	/* The virtual register space flattens out all the classes,
 	 * starting with full, followed by half and then high, ie:
 	 *
@@ -145,7 +153,8 @@ struct ir3_ra_ctx {
 
 	unsigned alloc_count;
 	unsigned r0_xyz_nodes; /* ra node numbers for r0.[xyz] precolors */
-	unsigned hr0_xyz_nodes; /* ra node numbers for hr0.[xyz] precolors pre-a6xx */
+	unsigned hr0_xyz_nodes; /* ra node numbers for hr0.[xyz] precolors */
+	unsigned prefetch_exclude_node;
 	/* one per class, plus one slot for arrays: */
 	unsigned class_alloc_count[total_class_count + 1];
 	unsigned class_base[total_class_count + 1];
diff --git a/src/freedreno/ir3/ir3_ra_regset.c b/src/freedreno/ir3/ir3_ra_regset.c
index f5acc8f87ac..48fd9f106e8 100644
--- a/src/freedreno/ir3/ir3_ra_regset.c
+++ b/src/freedreno/ir3/ir3_ra_regset.c
@@ -70,6 +70,21 @@ setup_conflicts(struct ir3_ra_reg_set *set)
 			reg++;
 		}
 	}
+
+	/*
+	 * Setup conflicts with registers over 0x3f for the special vreg
+	 * that exists to use as interference for tex-prefetch:
+	 */
+
+	for (unsigned i = 0x40; i < CLASS_REGS(0); i++) {
+		ra_add_transitive_reg_conflict(set->regs, i,
+				set->prefetch_exclude_reg);
+	}
+
+	for (unsigned i = 0x40; i < HALF_CLASS_REGS(0); i++) {
+		ra_add_transitive_reg_conflict(set->regs, i + set->first_half_reg,
+				set->prefetch_exclude_reg);
+	}
 }
 
 /* One-time setup of RA register-set, which describes all the possible
@@ -104,6 +119,8 @@ ir3_ra_alloc_reg_set(struct ir3_compiler *compiler)
 	for (unsigned i = 0; i < high_class_count; i++)
 		ra_reg_count += HIGH_CLASS_REGS(i);
 
+	ra_reg_count += 1;   /* for tex-prefetch excludes */
+
 	/* allocate the reg-set.. */
 	set->regs = ra_alloc_reg_set(set, ra_reg_count, true);
 	set->ra_reg_to_gpr = ralloc_array(set, uint16_t, ra_reg_count);
@@ -164,7 +181,20 @@ ir3_ra_alloc_reg_set(struct ir3_compiler *compiler)
 		}
 	}
 
-	/* starting a6xx, half precision regs conflict w/ full precision regs: */
+	/*
+	 * Setup an additional class, with one vreg, to simply conflict
+	 * with registers that are too high to encode tex-prefetch.  This
+	 * vreg is only used to setup additional conflicts so that RA
+	 * knows to allocate prefetch dst regs below the limit:
+	 */
+	set->prefetch_exclude_class = ra_alloc_reg_class(set->regs);
+	ra_class_add_reg(set->regs, set->prefetch_exclude_class, reg);
+	set->prefetch_exclude_reg = reg++;
+
+	/*
+	 * And finally setup conflicts.  Starting a6xx, half precision regs
+	 * conflict w/ full precision regs (when using MERGEDREGS):
+	 */
 	if (compiler->gpu_id >= 600) {
 		for (unsigned i = 0; i < CLASS_REGS(0) / 2; i++) {
 			unsigned freg  = set->gpr_to_ra_reg[0][i];



More information about the mesa-commit mailing list