Mesa (master): freedreno/ir3/cp: extract valid_flags

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Jun 16 21:14:02 UTC 2020


Module: Mesa
Branch: master
Commit: a60d48a863d8e9d459562b874c463cd798f47a87
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a60d48a863d8e9d459562b874c463cd798f47a87

Author: Rob Clark <robdclark at chromium.org>
Date:   Thu May 14 09:31:32 2020 -0700

freedreno/ir3/cp: extract valid_flags

We'll also need this in the postsched-cp pass.

Signed-off-by: Rob Clark <robdclark at chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5280>

---

 src/freedreno/ir3/ir3.c    | 170 ++++++++++++++++++++++++++++++++++++++++++
 src/freedreno/ir3/ir3.h    |   2 +
 src/freedreno/ir3/ir3_cp.c | 180 ++-------------------------------------------
 3 files changed, 178 insertions(+), 174 deletions(-)

diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c
index 5f4b86d610d..463c7664332 100644
--- a/src/freedreno/ir3/ir3.c
+++ b/src/freedreno/ir3/ir3.c
@@ -1285,3 +1285,173 @@ ir3_fixup_src_type(struct ir3_instruction *instr)
 		break;
 	}
 }
+
+static unsigned
+cp_flags(unsigned flags)
+{
+	/* only considering these flags (at least for now): */
+	flags &= (IR3_REG_CONST | IR3_REG_IMMED |
+			IR3_REG_FNEG | IR3_REG_FABS |
+			IR3_REG_SNEG | IR3_REG_SABS |
+			IR3_REG_BNOT | IR3_REG_RELATIV);
+	return flags;
+}
+
+bool
+ir3_valid_flags(struct ir3_instruction *instr, unsigned n,
+		unsigned flags)
+{
+	struct ir3_compiler *compiler = instr->block->shader->compiler;
+	unsigned valid_flags;
+
+	if ((flags & IR3_REG_HIGH) &&
+			(opc_cat(instr->opc) > 1) &&
+			(compiler->gpu_id >= 600))
+		return false;
+
+	flags = cp_flags(flags);
+
+	/* If destination is indirect, then source cannot be.. at least
+	 * I don't think so..
+	 */
+	if ((instr->regs[0]->flags & IR3_REG_RELATIV) &&
+			(flags & IR3_REG_RELATIV))
+		return false;
+
+	if (flags & IR3_REG_RELATIV) {
+		/* TODO need to test on earlier gens.. pretty sure the earlier
+		 * problem was just that we didn't check that the src was from
+		 * same block (since we can't propagate address register values
+		 * across blocks currently)
+		 */
+		if (compiler->gpu_id < 600)
+			return false;
+
+		/* NOTE in the special try_swap_mad_two_srcs() case we can be
+		 * called on a src that has already had an indirect load folded
+		 * in, in which case ssa() returns NULL
+		 */
+		if (instr->regs[n+1]->flags & IR3_REG_SSA) {
+			struct ir3_instruction *src = ssa(instr->regs[n+1]);
+			if (src->address->block != instr->block)
+				return false;
+		}
+	}
+
+	switch (opc_cat(instr->opc)) {
+	case 1:
+		valid_flags = IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV;
+		if (flags & ~valid_flags)
+			return false;
+		break;
+	case 2:
+		valid_flags = ir3_cat2_absneg(instr->opc) |
+				IR3_REG_CONST | IR3_REG_RELATIV;
+
+		if (ir3_cat2_int(instr->opc))
+			valid_flags |= IR3_REG_IMMED;
+
+		if (flags & ~valid_flags)
+			return false;
+
+		if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) {
+			unsigned m = (n ^ 1) + 1;
+			/* cannot deal w/ const in both srcs:
+			 * (note that some cat2 actually only have a single src)
+			 */
+			if (m < instr->regs_count) {
+				struct ir3_register *reg = instr->regs[m];
+				if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST))
+					return false;
+				if ((flags & IR3_REG_IMMED) && (reg->flags & IR3_REG_IMMED))
+					return false;
+			}
+		}
+		break;
+	case 3:
+		valid_flags = ir3_cat3_absneg(instr->opc) |
+				IR3_REG_CONST | IR3_REG_RELATIV;
+
+		if (flags & ~valid_flags)
+			return false;
+
+		if (flags & (IR3_REG_CONST | IR3_REG_RELATIV)) {
+			/* cannot deal w/ const/relativ in 2nd src: */
+			if (n == 1)
+				return false;
+		}
+
+		break;
+	case 4:
+		/* seems like blob compiler avoids const as src.. */
+		/* TODO double check if this is still the case on a4xx */
+		if (flags & (IR3_REG_CONST | IR3_REG_IMMED))
+			return false;
+		if (flags & (IR3_REG_SABS | IR3_REG_SNEG))
+			return false;
+		break;
+	case 5:
+		/* no flags allowed */
+		if (flags)
+			return false;
+		break;
+	case 6:
+		valid_flags = IR3_REG_IMMED;
+		if (flags & ~valid_flags)
+			return false;
+
+		if (flags & IR3_REG_IMMED) {
+			/* doesn't seem like we can have immediate src for store
+			 * instructions:
+			 *
+			 * TODO this restriction could also apply to load instructions,
+			 * but for load instructions this arg is the address (and not
+			 * really sure any good way to test a hard-coded immed addr src)
+			 */
+			if (is_store(instr) && (n == 1))
+				return false;
+
+			if ((instr->opc == OPC_LDL) && (n == 0))
+				return false;
+
+			if ((instr->opc == OPC_STL) && (n != 2))
+				return false;
+
+			if (instr->opc == OPC_STLW && n == 0)
+				return false;
+
+			if (instr->opc == OPC_LDLW && n == 0)
+				return false;
+
+			/* disallow immediates in anything but the SSBO slot argument for
+			 * cat6 instructions:
+			 */
+			if (is_atomic(instr->opc) && (n != 0))
+				return false;
+
+			if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G))
+				return false;
+
+			if (instr->opc == OPC_STG && (instr->flags & IR3_INSTR_G) && (n != 2))
+				return false;
+
+			/* as with atomics, these cat6 instrs can only have an immediate
+			 * for SSBO/IBO slot argument
+			 */
+			switch (instr->opc) {
+			case OPC_LDIB:
+			case OPC_LDC:
+			case OPC_RESINFO:
+				if (n != 0)
+					return false;
+				break;
+			default:
+				break;
+			}
+		}
+
+		break;
+	}
+
+	return true;
+}
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index 90086294de8..1d37afe0ad6 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -620,6 +620,8 @@ void ir3_find_ssa_uses(struct ir3 *ir, void *mem_ctx, bool falsedeps);
 void ir3_set_dst_type(struct ir3_instruction *instr, bool half);
 void ir3_fixup_src_type(struct ir3_instruction *instr);
 
+bool ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags);
+
 #include "util/set.h"
 #define foreach_ssa_use(__use, __instr) \
 	for (struct ir3_instruction *__use = (void *)~0; \
diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c
index b95827e97e9..ed116a9f3b7 100644
--- a/src/freedreno/ir3/ir3_cp.c
+++ b/src/freedreno/ir3/ir3_cp.c
@@ -100,174 +100,6 @@ static bool is_eligible_mov(struct ir3_instruction *instr,
 	return false;
 }
 
-static unsigned cp_flags(unsigned flags)
-{
-	/* only considering these flags (at least for now): */
-	flags &= (IR3_REG_CONST | IR3_REG_IMMED |
-			IR3_REG_FNEG | IR3_REG_FABS |
-			IR3_REG_SNEG | IR3_REG_SABS |
-			IR3_REG_BNOT | IR3_REG_RELATIV);
-	return flags;
-}
-
-static bool valid_flags(struct ir3_instruction *instr, unsigned n,
-		unsigned flags)
-{
-	struct ir3_compiler *compiler = instr->block->shader->compiler;
-	unsigned valid_flags;
-
-	if ((flags & IR3_REG_HIGH) &&
-			(opc_cat(instr->opc) > 1) &&
-			(compiler->gpu_id >= 600))
-		return false;
-
-	flags = cp_flags(flags);
-
-	/* If destination is indirect, then source cannot be.. at least
-	 * I don't think so..
-	 */
-	if ((instr->regs[0]->flags & IR3_REG_RELATIV) &&
-			(flags & IR3_REG_RELATIV))
-		return false;
-
-	if (flags & IR3_REG_RELATIV) {
-		/* TODO need to test on earlier gens.. pretty sure the earlier
-		 * problem was just that we didn't check that the src was from
-		 * same block (since we can't propagate address register values
-		 * across blocks currently)
-		 */
-		if (compiler->gpu_id < 600)
-			return false;
-
-		/* NOTE in the special try_swap_mad_two_srcs() case we can be
-		 * called on a src that has already had an indirect load folded
-		 * in, in which case ssa() returns NULL
-		 */
-		if (instr->regs[n+1]->flags & IR3_REG_SSA) {
-			struct ir3_instruction *src = ssa(instr->regs[n+1]);
-			if (src->address->block != instr->block)
-				return false;
-		}
-	}
-
-	switch (opc_cat(instr->opc)) {
-	case 1:
-		valid_flags = IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV;
-		if (flags & ~valid_flags)
-			return false;
-		break;
-	case 2:
-		valid_flags = ir3_cat2_absneg(instr->opc) |
-				IR3_REG_CONST | IR3_REG_RELATIV;
-
-		if (ir3_cat2_int(instr->opc))
-			valid_flags |= IR3_REG_IMMED;
-
-		if (flags & ~valid_flags)
-			return false;
-
-		if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) {
-			unsigned m = (n ^ 1) + 1;
-			/* cannot deal w/ const in both srcs:
-			 * (note that some cat2 actually only have a single src)
-			 */
-			if (m < instr->regs_count) {
-				struct ir3_register *reg = instr->regs[m];
-				if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST))
-					return false;
-				if ((flags & IR3_REG_IMMED) && (reg->flags & IR3_REG_IMMED))
-					return false;
-			}
-		}
-		break;
-	case 3:
-		valid_flags = ir3_cat3_absneg(instr->opc) |
-				IR3_REG_CONST | IR3_REG_RELATIV;
-
-		if (flags & ~valid_flags)
-			return false;
-
-		if (flags & (IR3_REG_CONST | IR3_REG_RELATIV)) {
-			/* cannot deal w/ const/relativ in 2nd src: */
-			if (n == 1)
-				return false;
-		}
-
-		break;
-	case 4:
-		/* seems like blob compiler avoids const as src.. */
-		/* TODO double check if this is still the case on a4xx */
-		if (flags & (IR3_REG_CONST | IR3_REG_IMMED))
-			return false;
-		if (flags & (IR3_REG_SABS | IR3_REG_SNEG))
-			return false;
-		break;
-	case 5:
-		/* no flags allowed */
-		if (flags)
-			return false;
-		break;
-	case 6:
-		valid_flags = IR3_REG_IMMED;
-		if (flags & ~valid_flags)
-			return false;
-
-		if (flags & IR3_REG_IMMED) {
-			/* doesn't seem like we can have immediate src for store
-			 * instructions:
-			 *
-			 * TODO this restriction could also apply to load instructions,
-			 * but for load instructions this arg is the address (and not
-			 * really sure any good way to test a hard-coded immed addr src)
-			 */
-			if (is_store(instr) && (n == 1))
-				return false;
-
-			if ((instr->opc == OPC_LDL) && (n == 0))
-				return false;
-
-			if ((instr->opc == OPC_STL) && (n != 2))
-				return false;
-
-			if (instr->opc == OPC_STLW && n == 0)
-				return false;
-
-			if (instr->opc == OPC_LDLW && n == 0)
-				return false;
-
-			/* disallow immediates in anything but the SSBO slot argument for
-			 * cat6 instructions:
-			 */
-			if (is_atomic(instr->opc) && (n != 0))
-				return false;
-
-			if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G))
-				return false;
-
-			if (instr->opc == OPC_STG && (instr->flags & IR3_INSTR_G) && (n != 2))
-				return false;
-
-			/* as with atomics, these cat6 instrs can only have an immediate
-			 * for SSBO/IBO slot argument
-			 */
-			switch (instr->opc) {
-			case OPC_LDIB:
-			case OPC_LDC:
-			case OPC_RESINFO:
-				if (n != 0)
-					return false;
-				break;
-			default:
-				break;
-			}
-		}
-
-		break;
-	}
-
-	return true;
-}
-
 /* propagate register flags from src to dst.. negates need special
  * handling to cancel each other out.
  */
@@ -326,7 +158,7 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, unsigned n,
 	new_flags &= ~IR3_REG_IMMED;
 	new_flags |= IR3_REG_CONST;
 
-	if (!valid_flags(instr, n, new_flags))
+	if (!ir3_valid_flags(instr, n, new_flags))
 		return false;
 
 	unsigned swiz, idx, i;
@@ -455,9 +287,9 @@ try_swap_mad_two_srcs(struct ir3_instruction *instr, unsigned new_flags)
 
 	bool valid_swap =
 		/* can we propagate mov if we move 2nd src to first? */
-		valid_flags(instr, 0, new_flags) &&
+		ir3_valid_flags(instr, 0, new_flags) &&
 		/* and does first src fit in second slot? */
-		valid_flags(instr, 1, instr->regs[1 + 1]->flags);
+		ir3_valid_flags(instr, 1, instr->regs[1 + 1]->flags);
 
 	if (!valid_swap) {
 		/* put things back the way they were: */
@@ -487,7 +319,7 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
 
 		combine_flags(&new_flags, src);
 
-		if (valid_flags(instr, n, new_flags)) {
+		if (ir3_valid_flags(instr, n, new_flags)) {
 			if (new_flags & IR3_REG_ARRAY) {
 				debug_assert(!(reg->flags & IR3_REG_ARRAY));
 				reg->array = src_reg->array;
@@ -512,7 +344,7 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
 
 		combine_flags(&new_flags, src);
 
-		if (!valid_flags(instr, n, new_flags)) {
+		if (!ir3_valid_flags(instr, n, new_flags)) {
 			/* See if lowering an immediate to const would help. */
 			if (lower_immed(ctx, instr, n, src_reg, new_flags))
 				return true;
@@ -613,7 +445,7 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
 				iim_val = ~iim_val;
 
 			/* other than category 1 (mov) we can only encode up to 10 bits: */
-			if (valid_flags(instr, n, new_flags) &&
+			if (ir3_valid_flags(instr, n, new_flags) &&
 					((instr->opc == OPC_MOV) ||
 					 !((iim_val & ~0x3ff) && (-iim_val & ~0x3ff)))) {
 				new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT);



More information about the mesa-commit mailing list