Mesa (master): freedreno/ir3: start on indirect gpr reads

Rob Clark robclark at kemper.freedesktop.org
Thu Jan 8 00:38:21 UTC 2015


Module: Mesa
Branch: master
Commit: 1e5c207dba4dbd07919bff2efe57ad361a44ac84
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=1e5c207dba4dbd07919bff2efe57ad361a44ac84

Author: Rob Clark <robclark at freedesktop.org>
Date:   Tue Dec 30 20:02:36 2014 -0500

freedreno/ir3: start on indirect gpr reads

Handle TEMP[ADDR[]] src registers by generating a fanin to group array
elements, similarly to how texture fetch instructions work.

NOTE:
For all the scalar instructions generated for a single tgsi vector
operation which uses an array src (or possibly even uses the same array
as multiple srcs), re-use the same fanin node.  Since a vector operation
operates on all components at the same time, it should never see more
than one version of the same array.

Signed-off-by: Rob Clark <robclark at freedesktop.org>

---

 src/gallium/drivers/freedreno/ir3/ir3.h          |    5 +-
 src/gallium/drivers/freedreno/ir3/ir3_compiler.c |  131 +++++++++++++++++++++-
 src/gallium/drivers/freedreno/ir3/ir3_ra.c       |   18 ++-
 3 files changed, 146 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index b1fb08f..a3bbba9 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -209,6 +209,9 @@ struct ir3_instruction {
 		struct {
 			struct ir3_block *block;
 		} inout;
+		struct {
+			int off;              /* offset relative to addr reg */
+		} deref;
 
 		/* XXX keep this as big as all other union members! */
 		uint32_t info[3];
@@ -465,7 +468,7 @@ static inline struct ir3_instruction *ssa(struct ir3_register *reg)
 
 static inline bool reg_gpr(struct ir3_register *r)
 {
-	if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_RELATIV | IR3_REG_ADDR))
+	if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_ADDR))
 		return false;
 	if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0))
 		return false;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
index 99bad37..8c88bf7 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
@@ -88,6 +88,17 @@ struct ir3_compile_context {
 
 	struct tgsi_shader_info info;
 
+	/* hmm, would be nice if tgsi_scan_shader figured this out
+	 * for us:
+	 */
+	struct {
+		unsigned first, last;
+		struct ir3_instruction *fanin;
+	} array[16];
+	uint32_t array_dirty;
+	/* offset into array[], per file, of first array info */
+	uint8_t array_offsets[TGSI_FILE_COUNT];
+
 	/* for calculating input/output positions/linkages: */
 	unsigned next_inloc;
 
@@ -130,11 +141,21 @@ static void create_mov(struct ir3_compile_context *ctx,
 		struct tgsi_dst_register *dst, struct tgsi_src_register *src);
 static type_t get_ftype(struct ir3_compile_context *ctx);
 
+static unsigned setup_arrays(struct ir3_compile_context *ctx, unsigned file, unsigned i)
+{
+	/* ArrayID 0 for a given file is the legacy array spanning the entire file: */
+	ctx->array[i].first = 0;
+	ctx->array[i].last = ctx->info.file_max[file];
+	ctx->array_offsets[file] = i;
+	i += ctx->info.array_max[file] + 1;
+	return i;
+}
+
 static unsigned
 compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
 		const struct tgsi_token *tokens)
 {
-	unsigned ret;
+	unsigned ret, i;
 	struct tgsi_shader_info *info = &ctx->info;
 	struct tgsi_lowering_config lconfig = {
 			.color_two_side = so->key.color_two_side,
@@ -190,6 +211,7 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
 	}
 	ctx->ir = so->ir;
 	ctx->so = so;
+	ctx->array_dirty = 0;
 	ctx->next_inloc = 8;
 	ctx->num_internal_temps = 0;
 	ctx->branch_count = 0;
@@ -204,10 +226,12 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
 	ctx->using_tmp_dst = false;
 
 	memset(ctx->frag_coord, 0, sizeof(ctx->frag_coord));
+	memset(ctx->array, 0, sizeof(ctx->array));
+	memset(ctx->array_offsets, 0, sizeof(ctx->array_offsets));
 
 #define FM(x) (1 << TGSI_FILE_##x)
 	/* optimize can't deal with relative addressing: */
-	if (info->indirect_files & (FM(TEMPORARY) | FM(INPUT) | FM(OUTPUT)))
+	if (info->indirect_files_written & (FM(TEMPORARY) | FM(INPUT) | FM(OUTPUT)))
 		return TGSI_PARSE_ERROR;
 
 	/* NOTE: if relative addressing is used, we set constlen in
@@ -217,6 +241,12 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
 	if (info->indirect_files & FM(CONSTANT))
 		so->constlen = 4 * (ctx->info.file_max[TGSI_FILE_CONSTANT] + 1);
 
+	i = 0;
+	i += setup_arrays(ctx, TGSI_FILE_INPUT, i);
+	i += setup_arrays(ctx, TGSI_FILE_TEMPORARY, i);
+	i += setup_arrays(ctx, TGSI_FILE_OUTPUT, i);
+	/* any others? we don't track arrays for const..*/
+
 	/* Immediates go after constants: */
 	so->first_immediate = info->file_max[TGSI_FILE_CONSTANT] + 1;
 	ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
@@ -275,6 +305,12 @@ instr_finish(struct ir3_compile_context *ctx)
 		*(ctx->output_updates[i].instrp) = ctx->output_updates[i].instr;
 
 	ctx->num_output_updates = 0;
+
+	while (ctx->array_dirty) {
+		unsigned aid = ffs(ctx->array_dirty) - 1;
+		ctx->array[aid].fanin = NULL;
+		ctx->array_dirty &= ~(1 << aid);
+	}
 }
 
 /* For "atomic" groups of instructions, for example the four scalar
@@ -515,6 +551,8 @@ ssa_instr(struct ir3_compile_context *ctx, unsigned file, unsigned n)
 			 * NOTE: *don't* use instr_create() here!
 			 */
 			instr = create_immed(ctx, 0.0);
+			/* no need to recreate the immed for every access: */
+			block->temporaries[n] = instr;
 		}
 		break;
 	}
@@ -522,17 +560,68 @@ ssa_instr(struct ir3_compile_context *ctx, unsigned file, unsigned n)
 	return instr;
 }
 
+static int array_id(struct ir3_compile_context *ctx,
+		const struct tgsi_src_register *src)
+{
+	// XXX complete hack to recover tgsi_full_src_register...
+	// nothing that isn't wrapped in a tgsi_full_src_register
+	// should be indirect
+	const struct tgsi_full_src_register *fsrc = (const void *)src;
+	debug_assert(src->File != TGSI_FILE_CONSTANT);
+	return fsrc->Indirect.ArrayID + ctx->array_offsets[src->File];
+}
+
 static void
 ssa_src(struct ir3_compile_context *ctx, struct ir3_register *reg,
 		const struct tgsi_src_register *src, unsigned chan)
 {
 	struct ir3_instruction *instr;
 
-	instr = ssa_instr(ctx, src->File, regid(src->Index, chan));
+	if (src->Indirect && (src->File != TGSI_FILE_CONSTANT)) {
+		/* for relative addressing of gpr's (due to register assignment)
+		 * we must generate a fanin instruction to collect all possible
+		 * array elements that the instruction could address together:
+		 */
+		unsigned i, j, aid = array_id(ctx, src);
+
+		if (ctx->array[aid].fanin) {
+			instr = ctx->array[aid].fanin;
+		} else {
+			unsigned first, last;
+
+			first = ctx->array[aid].first;
+			last  = ctx->array[aid].last;
+
+			instr = ir3_instr_create2(ctx->block, -1, OPC_META_FI,
+					1 + (4 * (last + 1 - first)));
+			ir3_reg_create(instr, 0, 0);
+			for (i = first; i <= last; i++) {
+				for (j = 0; j < 4; j++) {
+					unsigned n = (i * 4) + j;
+					ir3_reg_create(instr, 0, IR3_REG_SSA)->instr =
+							ssa_instr(ctx, src->File, n);
+				}
+			}
+			ctx->array[aid].fanin = instr;
+			ctx->array_dirty |= (1 << aid);
+		}
+	} else {
+		/* normal case (not relative addressed GPR) */
+		instr = ssa_instr(ctx, src->File, regid(src->Index, chan));
+	}
 
 	if (instr) {
 		reg->flags |= IR3_REG_SSA;
 		reg->instr = instr;
+	} else if (reg->flags & IR3_REG_SSA) {
+		/* special hack for trans_samp() which calls ssa_src() directly
+		 * to build up the collect (fanin) for const src.. (so SSA flag
+		 * set but no src instr... it basically gets lucky because we
+		 * default to 0.0 for "undefined" src instructions, which is
+		 * what it wants.  We probably need to give it a better way to
+		 * do this, but for now this hack:
+		 */
+		reg->instr = create_immed(ctx, 0.0);
 	}
 }
 
@@ -689,11 +778,23 @@ add_src_reg_wrmask(struct ir3_compile_context *ctx,
 		instr = ir3_instr_create(ctx->block, -1, OPC_META_DEREF);
 		ir3_reg_create(instr, 0, 0);
 		ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = ctx->block->address;
+
+		if (src->File != TGSI_FILE_CONSTANT) {
+			unsigned aid = array_id(ctx, src);
+			unsigned off = src->Index - ctx->array[aid].first; /* vec4 offset */
+			instr->deref.off = regid(off, chan);
+		}
 	}
 
 	reg = ir3_reg_create(instr, regid(num, chan), flags);
 
-	reg->wrmask = wrmask;
+	if (src->Indirect && (src->File != TGSI_FILE_CONSTANT)) {
+		unsigned aid = array_id(ctx, src);
+		reg->size = 4 * (1 + ctx->array[aid].last - ctx->array[aid].first);
+	} else {
+		reg->wrmask = wrmask;
+	}
+
 	if (wrmask == 0x1) {
 		/* normal case */
 		ssa_src(ctx, reg, src, chan);
@@ -729,8 +830,11 @@ add_src_reg_wrmask(struct ir3_compile_context *ctx,
 	}
 
 	if (src->Indirect) {
+		unsigned size = reg->size;
+
 		reg = ir3_reg_create(orig, 0, flags | IR3_REG_SSA);
 		reg->instr = instr;
+		reg->size = size;
 	}
 	return reg;
 }
@@ -2990,11 +3094,26 @@ compile_instructions(struct ir3_compile_context *ctx)
 		case TGSI_TOKEN_TYPE_DECLARATION: {
 			struct tgsi_full_declaration *decl =
 					&ctx->parser.FullToken.FullDeclaration;
-			if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
+			unsigned file = decl->Declaration.File;
+			if (file == TGSI_FILE_OUTPUT) {
 				decl_out(ctx, decl);
-			} else if (decl->Declaration.File == TGSI_FILE_INPUT) {
+			} else if (file == TGSI_FILE_INPUT) {
 				decl_in(ctx, decl);
 			}
+
+			if ((file != TGSI_FILE_CONSTANT) && decl->Declaration.Array) {
+				int aid = decl->Array.ArrayID + ctx->array_offsets[file];
+
+				compile_assert(ctx, aid < ARRAY_SIZE(ctx->array));
+
+				/* legacy ArrayID==0 stuff probably isn't going to work
+				 * well (and is at least untested).. let's just scream:
+				 */
+				compile_assert(ctx, aid != 0);
+
+				ctx->array[aid].first = decl->Range.First;
+				ctx->array[aid].last  = decl->Range.Last;
+			}
 			break;
 		}
 		case TGSI_TOKEN_TYPE_IMMEDIATE: {
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
index 0854046..eaeba0a 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
@@ -199,6 +199,14 @@ static void compute_liveregs(struct ir3_ra_ctx *ctx,
 		if (r)
 			regmask_set_if_not(liveregs, r, &written);
 	}
+
+	/* if instruction is output, we need a reg that isn't written
+	 * before the end.. equiv to the instr_used_by() check above
+	 * in the loop body
+	 * TODO maybe should follow fanin/fanout?
+	 */
+	if (instr_is_output(instr))
+		regmask_or(liveregs, liveregs, &written);
 }
 
 static int find_available(regmask_t *liveregs, int size, bool half)
@@ -364,6 +372,14 @@ static void instr_assign_src(struct ir3_ra_ctx *ctx,
 		case OPC_META_FI:
 			instr_assign(ctx, instr, name - (r - 1));
 			return;
+		case OPC_META_DEREF:
+			/* first arg of meta:deref is the addr reg (do not
+			 * propagate), 2nd is actual src (fanin) which does
+			 * get propagated)
+			 */
+			if (r == 2)
+				instr_assign(ctx, instr, name + instr->deref.off);
+			break;
 		default:
 			break;
 		}
@@ -467,7 +483,7 @@ static void instr_alloc_and_assign(struct ir3_ra_ctx *ctx,
 		/* already partially assigned, just finish the job */
 	} else if (is_addr(instr)) {
 		debug_assert(!instr->cp.right);
-		name = instr->regs[2]->num;
+		name = instr->regs[2]->num + instr->deref.off;
 	} else if (reg_gpr(dst)) {
 		int size;
 		/* number of consecutive registers to assign: */




More information about the mesa-commit mailing list