Mesa (master): freedreno/ir3: cache defining instruction

Rob Clark robclark at kemper.freedesktop.org
Tue Jun 30 16:20:41 UTC 2015


Module: Mesa
Branch: master
Commit: 00b6b41482985ba4a81fbb479a47c06ec83f3797
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=00b6b41482985ba4a81fbb479a47c06ec83f3797

Author: Rob Clark <robclark at freedesktop.org>
Date:   Mon Jun 29 14:49:08 2015 -0400

freedreno/ir3: cache defining instruction

It is silly to traverse back to find first instruction that writes part
of a larger "virtual" register many times per instruction (plus per use
as a src to later instructions).  Cache this information so we only
figure it out once.

Signed-off-by: Rob Clark <robclark at freedesktop.org>

---

 src/gallium/drivers/freedreno/ir3/ir3.c    |    7 +-
 src/gallium/drivers/freedreno/ir3/ir3.h    |    2 +-
 src/gallium/drivers/freedreno/ir3/ir3_ra.c |  151 ++++++++++++++++------------
 3 files changed, 91 insertions(+), 69 deletions(-)

diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c
index 6f6dad5..1da6cf0 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3.c
@@ -722,15 +722,16 @@ ir3_clear_mark(struct ir3 *ir)
 }
 
 /* note: this will destroy instr->depth, don't do it until after sched! */
-void
+unsigned
 ir3_count_instructions(struct ir3 *ir)
 {
-	unsigned ip = 0;
+	unsigned cnt = 0;
 	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
 		list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
-			instr->ip = ip++;
+			instr->ip = cnt++;
 		}
 		block->start_ip = list_first_entry(&block->instr_list, struct ir3_instruction, node)->ip;
 		block->end_ip = list_last_entry(&block->instr_list, struct ir3_instruction, node)->ip;
 	}
+	return cnt;
 }
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index 9c35a76..bc01445 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -431,7 +431,7 @@ static inline bool ir3_instr_check_mark(struct ir3_instruction *instr)
 void ir3_block_clear_mark(struct ir3_block *block);
 void ir3_clear_mark(struct ir3 *shader);
 
-void ir3_count_instructions(struct ir3 *ir);
+unsigned ir3_count_instructions(struct ir3 *ir);
 
 static inline int ir3_instr_regno(struct ir3_instruction *instr,
 		struct ir3_register *reg)
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
index 9f6ff12..de48ecf 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
@@ -241,6 +241,21 @@ ir3_ra_alloc_reg_set(void *memctx)
 	return set;
 }
 
+/* additional block-data (per-block) */
+struct ir3_ra_block_data {
+	BITSET_WORD *def;        /* variables defined before used in block */
+	BITSET_WORD *use;        /* variables used before defined in block */
+	BITSET_WORD *livein;     /* which defs reach entry point of block */
+	BITSET_WORD *liveout;    /* which defs reach exit point of block */
+};
+
+/* additional instruction-data (per-instruction) */
+struct ir3_ra_instr_data {
+	/* cached instruction 'definer' info: */
+	struct ir3_instruction *defn;
+	int off, sz, cls;
+};
+
 /* register-assign context, per-shader */
 struct ir3_ra_ctx {
 	struct ir3 *ir;
@@ -254,14 +269,7 @@ struct ir3_ra_ctx {
 	unsigned class_base[total_class_count];
 	unsigned instr_cnt;
 	unsigned *def, *use;     /* def/use table */
-};
-
-/* additional block-data (per-block) */
-struct ir3_ra_block_data {
-	BITSET_WORD *def;        /* variables defined before used in block */
-	BITSET_WORD *use;        /* variables used before defined in block */
-	BITSET_WORD *livein;     /* which defs reach entry point of block */
-	BITSET_WORD *liveout;    /* which defs reach exit point of block */
+	struct ir3_ra_instr_data *instrd;
 };
 
 static bool
@@ -307,12 +315,20 @@ writes_gpr(struct ir3_instruction *instr)
 }
 
 static struct ir3_instruction *
-get_definer(struct ir3_instruction *instr, int *sz, int *off)
+get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
+		int *sz, int *off)
 {
+	struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
 	struct ir3_instruction *d = NULL;
 
 	if (instr->fanin)
-		return get_definer(instr->fanin, sz, off);
+		return get_definer(ctx, instr->fanin, sz, off);
+
+	if (id->defn) {
+		*sz = id->sz;
+		*off = id->off;
+		return id->defn;
+	}
 
 	if (is_meta(instr) && (instr->opc == OPC_META_FI)) {
 		/* What about the case where collect is subset of array, we
@@ -330,7 +346,7 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
 			if (!src->instr)
 				continue;
 
-			dd = get_definer(src->instr, &dsz, &doff);
+			dd = get_definer(ctx, src->instr, &dsz, &doff);
 
 			if ((!d) || (dd->ip < d->ip)) {
 				d = dd;
@@ -339,7 +355,6 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
 			}
 		}
 
-
 	} else if (instr->cp.right || instr->cp.left) {
 		/* covers also the meta:fo case, which ends up w/ single
 		 * scalar instructions for each component:
@@ -394,7 +409,7 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
 		struct ir3_instruction *dd;
 		int dsz, doff;
 
-		dd = get_definer(phi, &dsz, &doff);
+		dd = get_definer(ctx, phi, &dsz, &doff);
 
 		*sz = MAX2(*sz, dsz);
 		*off = doff;
@@ -428,7 +443,7 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
 		struct ir3_instruction *dd;
 		int dsz, doff;
 
-		dd = get_definer(d->regs[1]->instr, &dsz, &doff);
+		dd = get_definer(ctx, d->regs[1]->instr, &dsz, &doff);
 
 		/* by definition, should come before: */
 		debug_assert(dd->ip < d->ip);
@@ -440,9 +455,25 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
 		d = dd;
 	}
 
+	id->defn = d;
+	id->sz = *sz;
+	id->off = *off;
+
 	return d;
 }
 
+static void
+ra_block_find_definers(struct ir3_ra_ctx *ctx, struct ir3_block *block)
+{
+	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+		struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+		if (instr->regs_count == 0)
+			continue;
+		id->defn = get_definer(ctx, instr, &id->sz, &id->off);
+		id->cls = size_to_class(id->sz, is_half(id->defn));
+	}
+}
+
 /* give each instruction a name (and ip), and count up the # of names
  * of each class
  */
@@ -450,8 +481,7 @@ static void
 ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 {
 	list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
-		struct ir3_instruction *defn;
-		int cls, sz, off;
+		struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
 
 #ifdef DEBUG
 		instr->name = ~0;
@@ -465,9 +495,7 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 		if (!writes_gpr(instr))
 			continue;
 
-		defn = get_definer(instr, &sz, &off);
-
-		if (defn != instr)
+		if (id->defn != instr)
 			continue;
 
 		/* arrays which don't fit in one of the pre-defined class
@@ -475,9 +503,8 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 		 *
 		 * TODO but we still need to allocate names for them, don't we??
 		 */
-		cls = size_to_class(sz, is_half(defn));
-		if (cls >= 0) {
-			instr->name = ctx->class_alloc_count[cls]++;
+		if (id->cls >= 0) {
+			instr->name = ctx->class_alloc_count[id->cls]++;
 			ctx->alloc_count++;
 		}
 	}
@@ -486,8 +513,16 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 static void
 ra_init(struct ir3_ra_ctx *ctx)
 {
+	unsigned n;
+
 	ir3_clear_mark(ctx->ir);
-	ir3_count_instructions(ctx->ir);
+	n = ir3_count_instructions(ctx->ir);
+
+	ctx->instrd = rzalloc_array(NULL, struct ir3_ra_instr_data, n);
+
+	list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
+		ra_block_find_definers(ctx, block);
+	}
 
 	list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
 		ra_block_name_instructions(ctx, block);
@@ -503,6 +538,7 @@ ra_init(struct ir3_ra_ctx *ctx)
 	}
 
 	ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
+	ralloc_steal(ctx->g, ctx->instrd);
 	ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
 	ctx->use = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
 }
@@ -570,39 +606,36 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 		 */
 
 		if (writes_gpr(instr)) {
-			struct ir3_instruction *defn;
-			int cls, sz, off;
+			struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
 
-			defn = get_definer(instr, &sz, &off);
-			if (defn == instr) {
+			if (id->defn == instr) {
 				/* arrays which don't fit in one of the pre-defined class
 				 * sizes are pre-colored:
 				 */
-				cls = size_to_class(sz, is_half(defn));
-				if (cls >= 0) {
-					unsigned name = ra_name(ctx, cls, defn);
+				if (id->cls >= 0) {
+					unsigned name = ra_name(ctx, id->cls, id->defn);
 
-					ctx->def[name] = defn->ip;
-					ctx->use[name] = defn->ip;
+					ctx->def[name] = id->defn->ip;
+					ctx->use[name] = id->defn->ip;
 
 					/* since we are in SSA at this point: */
 					debug_assert(!BITSET_TEST(bd->use, name));
 
 					BITSET_SET(bd->def, name);
 
-					if (is_half(defn)) {
+					if (is_half(id->defn)) {
 						ra_set_node_class(ctx->g, name,
-								ctx->set->half_classes[cls - class_count]);
+								ctx->set->half_classes[id->cls - class_count]);
 					} else {
 						ra_set_node_class(ctx->g, name,
-								ctx->set->classes[cls]);
+								ctx->set->classes[id->cls]);
 					}
 
 					/* extend the live range for phi srcs, which may come
 					 * from the bottom of the loop
 					 */
-					if (defn->regs[0]->flags & IR3_REG_PHI_SRC) {
-						struct ir3_instruction *phi = defn->regs[0]->instr;
+					if (id->defn->regs[0]->flags & IR3_REG_PHI_SRC) {
+						struct ir3_instruction *phi = id->defn->regs[0]->instr;
 						foreach_ssa_src(src, phi) {
 							/* if src is after phi, then we need to extend
 							 * the liverange to the end of src's block:
@@ -621,13 +654,10 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
 
 		foreach_ssa_src(src, instr) {
 			if (writes_gpr(src)) {
-				struct ir3_instruction *srcdefn;
-				int cls, sz, off;
+				struct ir3_ra_instr_data *id = &ctx->instrd[src->ip];
 
-				srcdefn = get_definer(src, &sz, &off);
-				cls = size_to_class(sz, is_half(srcdefn));
-				if (cls >= 0) {
-					unsigned name = ra_name(ctx, cls, srcdefn);
+				if (id->cls >= 0) {
+					unsigned name = ra_name(ctx, id->cls, id->defn);
 					ctx->use[name] = MAX2(ctx->use[name], instr->ip);
 					if (!BITSET_TEST(bd->def, name))
 						BITSET_SET(bd->use, name);
@@ -719,13 +749,10 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
 	/* need to fix things up to keep outputs live: */
 	for (unsigned i = 0; i < ir->noutputs; i++) {
 		struct ir3_instruction *instr = ir->outputs[i];
-		struct ir3_instruction *defn;
-		int cls, sz, off;
+		struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
 
-		defn = get_definer(instr, &sz, &off);
-		cls = size_to_class(sz, is_half(defn));
-		if (cls >= 0) {
-			unsigned name = ra_name(ctx, cls, defn);
+		if (id->cls >= 0) {
+			unsigned name = ra_name(ctx, id->cls, id->defn);
 			ctx->use[name] = ctx->instr_cnt;
 		}
 	}
@@ -795,15 +822,12 @@ static void
 reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg,
 		struct ir3_instruction *instr)
 {
-	struct ir3_instruction *defn;
-	int cls, sz, off;
+	struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
 
-	defn = get_definer(instr, &sz, &off);
-	cls = size_to_class(sz, is_half(defn));
-	if (cls >= 0) {
-		unsigned name = ra_name(ctx, cls, defn);
+	if (id->cls >= 0) {
+		unsigned name = ra_name(ctx, id->cls, id->defn);
 		unsigned r = ra_get_node_reg(ctx->g, name);
-		unsigned num = ctx->set->ra_reg_to_gpr[r] + off;
+		unsigned num = ctx->set->ra_reg_to_gpr[r] + id->off;
 
 		if (reg->flags & IR3_REG_RELATIV)
 			num += reg->offset;
@@ -811,7 +835,7 @@ reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg,
 		reg->num = num;
 		reg->flags &= ~(IR3_REG_SSA | IR3_REG_PHI_SRC);
 
-		if (is_half(defn))
+		if (is_half(id->defn))
 			reg->flags |= IR3_REG_HALF;
 	}
 }
@@ -866,19 +890,16 @@ ra_alloc(struct ir3_ra_ctx *ctx)
 		for (j = 0; i < ir->ninputs; i++) {
 			struct ir3_instruction *instr = ir->inputs[i];
 			if (instr) {
-				struct ir3_instruction *defn;
-				int cls, sz, off;
+				struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
 
-				defn = get_definer(instr, &sz, &off);
-				if (defn == instr) {
+				if (id->defn == instr) {
 					unsigned name, reg;
 
-					cls = size_to_class(sz, is_half(defn));
-					name = ra_name(ctx, cls, defn);
-					reg = ctx->set->gpr_to_ra_reg[cls][j];
+					name = ra_name(ctx, id->cls, id->defn);
+					reg = ctx->set->gpr_to_ra_reg[id->cls][j];
 
 					ra_set_node_reg(ctx->g, name, reg);
-					j += sz;
+					j += id->sz;
 				}
 			}
 		}




More information about the mesa-commit mailing list