[Mesa-dev] [PATCH 14/16] freedreno: a2xx: add ir2 copy propagation

Jonathan Marek jonathan at marek.ca
Wed Dec 19 16:40:03 UTC 2018


Two cases:
* replacing srcs which refer to MOV instructions
* replacing MOVs used to write to exports

Signed-off-by: Jonathan Marek <jonathan at marek.ca>
---
 .../drivers/freedreno/Makefile.sources        |   1 +
 src/gallium/drivers/freedreno/a2xx/ir2.c      |   6 +
 src/gallium/drivers/freedreno/a2xx/ir2_cp.c   | 225 ++++++++++++++++++
 .../drivers/freedreno/a2xx/ir2_private.h      |   3 +
 src/gallium/drivers/freedreno/meson.build     |   1 +
 5 files changed, 236 insertions(+)
 create mode 100644 src/gallium/drivers/freedreno/a2xx/ir2_cp.c

diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources
index 8421318081..f4979953e8 100644
--- a/src/gallium/drivers/freedreno/Makefile.sources
+++ b/src/gallium/drivers/freedreno/Makefile.sources
@@ -71,6 +71,7 @@ a2xx_SOURCES := \
 	a2xx/ir2.c \
 	a2xx/ir2.h \
 	a2xx/ir2_assemble.c \
+	a2xx/ir2_cp.c \
 	a2xx/ir2_nir.c \
 	a2xx/ir2_private.h \
 	a2xx/ir2_ra.c
diff --git a/src/gallium/drivers/freedreno/a2xx/ir2.c b/src/gallium/drivers/freedreno/a2xx/ir2.c
index 344f62defe..bc1d7c23b8 100644
--- a/src/gallium/drivers/freedreno/a2xx/ir2.c
+++ b/src/gallium/drivers/freedreno/a2xx/ir2.c
@@ -422,9 +422,15 @@ ir2_compile(struct fd2_shader_stateobj *so, unsigned variant,
 	/* convert nir to internal representation */
 	ir2_nir_compile(&ctx, binning);
 
+	/* copy propagate srcs */
+	cp_src(&ctx);
+
 	/* get ref_counts and kill non-needed instructions */
 	ra_count_refs(&ctx);
 
+	/* remove movs used to write outputs */
+	cp_export(&ctx);
+
 	/* instruction order.. and vector->scalar conversions */
 	schedule_instrs(&ctx);
 
diff --git a/src/gallium/drivers/freedreno/a2xx/ir2_cp.c b/src/gallium/drivers/freedreno/a2xx/ir2_cp.c
new file mode 100644
index 0000000000..fa155887f8
--- /dev/null
+++ b/src/gallium/drivers/freedreno/a2xx/ir2_cp.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright (C) 2018 Jonathan Marek <jonathan at marek.ca>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Jonathan Marek <jonathan at marek.ca>
+ */
+
+#include "ir2_private.h"
+
+static bool is_mov(struct ir2_instr *instr)
+{
+	return instr->type == IR2_ALU && instr->alu.vector_opc == MAXv &&
+		instr->src_count == 1;
+}
+
+static void src_combine(struct ir2_src *src, struct ir2_src b)
+{
+	src->num = b.num;
+	src->type = b.type;
+	src->swizzle = swiz_merge(b.swizzle, src->swizzle);
+	if (!src->abs) /* if we have abs we don't care about previous negate */
+		src->negate ^= b.negate;
+	src->abs |= b.abs;
+}
+
+/* cp_src: replace src regs when they refer to a mov instruction
+ * example:
+ *	ALU:      MAXv    R7 = C7, C7
+ *	ALU:      MULADDv R7 = R7, R10, R0.xxxx
+ * becomes:
+ *	ALU:      MULADDv R7 = C7, R10, R0.xxxx
+ */
+void cp_src(struct ir2_context *ctx)
+{
+	struct ir2_instr *p;
+
+	ir2_foreach_instr(instr, ctx) {
+		ir2_foreach_src(src, instr) {
+			/* loop to replace recursively */
+			do {
+				if (src->type != IR2_SRC_SSA)
+					break;
+
+				p = &ctx->instr[src->num];
+				/* don't work across blocks to avoid possible issues */
+				if (p->block_idx != instr->block_idx)
+					break;
+
+				if (!is_mov(p))
+					break;
+
+				/* cant apply abs to const src, const src only for alu */
+				if (p->src[0].type == IR2_SRC_CONST &&
+					(src->abs || instr->type != IR2_ALU))
+					break;
+
+				src_combine(src, p->src[0]);
+			} while (1);
+		}
+	}
+}
+
+/* cp_export: replace mov to export when possible
+ * in the cp_src pass we bypass any mov instructions related
+ * to the src registers, but for exports for need something different
+ * example:
+ *	ALU:      MAXv    R3.x___ = C9.x???, C9.x???
+ *	ALU:      MAXv    R3._y__ = R0.?x??, C8.?x??
+ *	ALU:      MAXv    export0 = R3.yyyx, R3.yyyx
+ * becomes:
+ *	ALU:      MAXv    export0.___w = C9.???x, C9.???x
+ *	ALU:      MAXv    export0.xyz_ = R0.xxx?, C8.xxx?
+ *
+ */
+void cp_export(struct ir2_context *ctx)
+{
+	struct ir2_instr *c[4], *ins[4];
+	struct ir2_src *src;
+	struct ir2_reg *reg;
+	unsigned ncomp;
+
+	ir2_foreach_instr(instr, ctx) {
+		if (!is_export(instr)) /* TODO */
+			continue;
+
+		if (!is_mov(instr))
+			continue;
+
+		src = &instr->src[0];
+
+		if (src->negate || src->abs) /* TODO handle these cases */
+			continue;
+
+		if (src->type == IR2_SRC_INPUT || src->type == IR2_SRC_CONST)
+			continue;
+
+		reg = get_reg_src(ctx, src);
+		ncomp = dst_ncomp(instr);
+
+		unsigned reswiz[4] = {};
+		unsigned num_instr = 0;
+
+		/* fill array c with pointers to instrs that write each component */
+		if (src->type == IR2_SRC_SSA) {
+			struct ir2_instr *instr = &ctx->instr[src->num];
+
+			if (instr->type != IR2_ALU)
+				continue;
+
+			for (int i = 0; i < ncomp; i++)
+				c[i] = instr;
+
+			ins[num_instr++] = instr;
+			reswiz[0] = src->swizzle;
+		} else {
+			bool ok = true;
+			unsigned write_mask = 0;
+
+			ir2_foreach_instr(instr, ctx) {
+				if (instr->is_ssa || instr->reg != reg)
+					continue;
+
+				/* set by non-ALU */
+				if (instr->type != IR2_ALU) {
+					ok = false;
+					break;
+				}
+
+				/* component written more than once */
+				if (write_mask & instr->alu.write_mask) {
+					ok = false;
+					break;
+				}
+
+				write_mask |= instr->alu.write_mask;
+
+				/* src pointers for components */
+				for (int i = 0, j = 0; i < 4; i++) {
+					unsigned k = swiz_get(src->swizzle, i);
+					if (instr->alu.write_mask & 1 << k) {
+						c[i] = instr;
+
+						/* reswiz = compressed src->swizzle */
+						unsigned x = 0;
+						for (int i = 0; i < k; i++)
+							x += !!(instr->alu.write_mask & 1 << i);
+
+						assert(src->swizzle || x == j);
+						reswiz[num_instr] |= swiz_set(x, j++);
+					}
+				}
+				ins[num_instr++] = instr;
+			}
+			if (!ok)
+				continue;
+		}
+
+		bool redirect = true;
+
+		/* must all be in same block */
+		for (int i = 0; i < ncomp; i++)
+			redirect &= (c[i]->block_idx == instr->block_idx);
+
+		/* no other instr using the value */
+		ir2_foreach_instr(p, ctx) {
+			if (p == instr)
+				continue;
+			ir2_foreach_src(src, p)
+				redirect &= reg != get_reg_src(ctx, src);
+		}
+
+		if (!redirect)
+			continue;
+
+		/* redirect the instructions writing to the register */
+		for (int i = 0; i < num_instr; i++) {
+			struct ir2_instr *p = ins[i];
+
+			p->alu.export = instr->alu.export;
+			p->alu.write_mask = 0;
+			p->is_ssa = true;
+			p->ssa.ncomp = 0;
+			memset(p->ssa.comp, 0, sizeof(p->ssa.comp));
+
+			switch (instr->alu.vector_opc) {
+			case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:
+			case DOT2ADDv:
+			case DOT3v:
+			case DOT4v:
+			case CUBEv:
+				continue;
+			default:
+				break;
+			}
+			ir2_foreach_src(s, p)
+				swiz_merge_p(&s->swizzle, reswiz[i]);
+		}
+
+		for (int i = 0; i < ncomp; i++) {
+			c[i]->alu.write_mask |= (1 << i);
+			c[i]->ssa.ncomp++;
+		}
+		instr->type = IR2_NONE;
+		instr->need_emit = false;
+	}
+}
diff --git a/src/gallium/drivers/freedreno/a2xx/ir2_private.h b/src/gallium/drivers/freedreno/a2xx/ir2_private.h
index d1fbacd908..3bb3fe483d 100644
--- a/src/gallium/drivers/freedreno/a2xx/ir2_private.h
+++ b/src/gallium/drivers/freedreno/a2xx/ir2_private.h
@@ -195,6 +195,9 @@ void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx,
 void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr);
 void ra_block_free(struct ir2_context *ctx, unsigned block);
 
+void cp_src(struct ir2_context *ctx);
+void cp_export(struct ir2_context *ctx);
+
 /* utils */
 enum {
 	IR2_SWIZZLE_Y = 1 << 0,
diff --git a/src/gallium/drivers/freedreno/meson.build b/src/gallium/drivers/freedreno/meson.build
index 6f8d8bdcc4..3565432512 100644
--- a/src/gallium/drivers/freedreno/meson.build
+++ b/src/gallium/drivers/freedreno/meson.build
@@ -89,6 +89,7 @@ files_libfreedreno = files(
   'a2xx/ir2.c',
   'a2xx/ir2.h',
   'a2xx/ir2_assemble.c',
+  'a2xx/ir2_cp.c',
   'a2xx/ir2_nir.c',
   'a2xx/ir2_private.h',
   'a2xx/ir2_ra.c',
-- 
2.17.1



More information about the mesa-dev mailing list