Mesa (main): r300: merge simple movs with constant swizzles together

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Sun Jun 5 21:47:43 UTC 2022


Module: Mesa
Branch: main
Commit: 6c2959c0256167bc97ed338e12e0543a967f2fc3
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=6c2959c0256167bc97ed338e12e0543a967f2fc3

Author: Pavel Ondračka <pavel.ondracka at gmail.com>
Date:   Fri May 20 11:11:07 2022 +0200

r300: merge simple movs with constant swizzles together

This pass will merge instructions like these

MOV output[0].x, temp[5].x___;
MOV output[0].yzw, none._001;

into

MOV output[0].xyzw, temp[5].x001;

It is currently very careful with control flow and dependency
tracking, so there is still room for improvements.

Shader-db stats with RV530:
total instructions in shared programs: 132486 -> 132256 (-0.17%)
instructions in affected programs: 6186 -> 5956 (-3.72%)
helped: 65
HURT: 0
total temps in shared programs: 18035 -> 18014 (-0.12%)
temps in affected programs: 295 -> 274 (-7.12%)
helped: 22
HURT: 1

Signed-off-by: Pavel Ondračka <pavel.ondracka at gmail.com>
Reviewed-by: Filip Gawin <filip at gawin.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16657>

---

 .../drivers/r300/compiler/radeon_optimize.c        | 82 ++++++++++++++++++++++
 1 file changed, 82 insertions(+)

diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c
index 5a9f4a529df..2609534187d 100644
--- a/src/gallium/drivers/r300/compiler/radeon_optimize.c
+++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c
@@ -887,6 +887,86 @@ static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
 	return 0;
 }
 
+static unsigned int merge_swizzles(unsigned int swz1, unsigned int swz2) {
+	unsigned int new_swz = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
+	for (unsigned int chan = 0; chan < 4; chan++) {
+		unsigned int swz = GET_SWZ(swz1, chan);
+		if (swz != RC_SWIZZLE_UNUSED) {
+			SET_SWZ(new_swz, chan, swz);
+			continue;
+		}
+		swz = GET_SWZ(swz2, chan);
+		SET_SWZ(new_swz, chan, swz);
+	}
+	return new_swz;
+}
+
+static int merge_movs(struct radeon_compiler * c, struct rc_instruction * inst)
+{
+	unsigned int orig_dst_reg = inst->U.I.DstReg.Index;
+	unsigned int orig_dst_file = inst->U.I.DstReg.File;
+	unsigned int orig_dst_wmask = inst->U.I.DstReg.WriteMask;
+	unsigned int orig_src_reg = inst->U.I.SrcReg[0].Index;
+	unsigned int orig_src_file = inst->U.I.SrcReg[0].File;
+
+	struct rc_instruction * cur = inst;
+	while (cur!= &c->Program.Instructions) {
+		cur = cur->Next;
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(cur->U.I.Opcode);
+
+		/* Keep it simple for now and stop when encountering any
+		 * control flow.
+		 */
+		if (opcode->IsFlowControl)
+			return 0;
+
+		/* Stop when the original destination is overwritten */
+		if (orig_dst_reg == cur->U.I.DstReg.Index &&
+			orig_dst_file == cur->U.I.DstReg.File &&
+			(orig_dst_wmask & cur->U.I.DstReg.WriteMask) != 0)
+			return 0;
+
+		/* Stop the search when the original instruction destination
+		 * is used as a source for anything.
+		 */
+		for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
+			if (cur->U.I.SrcReg[i].File == orig_dst_file &&
+				cur->U.I.SrcReg[i].Index == orig_dst_reg)
+				return 0;
+		}
+
+		if (cur->U.I.Opcode == RC_OPCODE_MOV &&
+			cur->U.I.DstReg.File == orig_dst_file &&
+			cur->U.I.DstReg.Index == orig_dst_reg &&
+			(cur->U.I.DstReg.WriteMask & orig_dst_wmask) == 0) {
+
+			/* We can merge the movs if one of them is from inline constant */
+			if (cur->U.I.SrcReg[0].File == RC_FILE_NONE ||
+				orig_src_file == RC_FILE_NONE) {
+				cur->U.I.DstReg.WriteMask |= orig_dst_wmask;
+
+				if (cur->U.I.SrcReg[0].File == RC_FILE_NONE) {
+					cur->U.I.SrcReg[0].File = orig_src_file;
+					cur->U.I.SrcReg[0].Index = orig_src_reg;
+					cur->U.I.SrcReg[0].Abs = inst->U.I.SrcReg[0].Abs;
+					cur->U.I.SrcReg[0].RelAddr = inst->U.I.SrcReg[0].RelAddr;
+				}
+				cur->U.I.SrcReg[0].Swizzle =
+					merge_swizzles(cur->U.I.SrcReg[0].Swizzle,
+							inst->U.I.SrcReg[0].Swizzle);
+
+				cur->U.I.SrcReg[0].Negate |= inst->U.I.SrcReg[0].Negate;
+
+				/* finally delete the original mov */
+				rc_remove_instruction(inst);
+
+				return 1;
+			}
+		}
+	}
+	return 0;
+}
+
 void rc_optimize(struct radeon_compiler * c, void *user)
 {
 	struct rc_instruction * inst = c->Program.Instructions.Next;
@@ -900,6 +980,8 @@ void rc_optimize(struct radeon_compiler * c, void *user)
 			continue;
 
 		if (cur->U.I.Opcode == RC_OPCODE_MOV) {
+			if (merge_movs(c,cur))
+				continue;
 			copy_propagate(c, cur);
 			/* cur may no longer be part of the program */
 		}



More information about the mesa-commit mailing list