Mesa (master): r300/compiler: Add a new function for more efficient dataflow analysis

Tom Stellard tstellar at kemper.freedesktop.org
Tue Oct 19 04:06:23 UTC 2010


Module: Mesa
Branch: master
Commit: 9d2ab6cb00e72fd8b53d0f97578758504b49ee23
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=9d2ab6cb00e72fd8b53d0f97578758504b49ee23

Author: Tom Stellard <tstellar at gmail.com>
Date:   Sun Oct 10 12:39:00 2010 -0700

r300/compiler: Add a new function for more efficient dataflow analysis

rc_get_readers_normal() supplies a list of readers for a given
instruction.  This function is now being used by the copy propagate
optimization and will eventually be used by most other optimization
passes as well.

---

 src/mesa/drivers/dri/r300/compiler/Makefile        |    1 +
 src/mesa/drivers/dri/r300/compiler/SConscript      |    1 +
 .../dri/r300/compiler/radeon_compiler_util.c       |   61 +++++
 .../dri/r300/compiler/radeon_compiler_util.h       |   16 ++
 .../drivers/dri/r300/compiler/radeon_dataflow.c    |  252 ++++++++++++++++++++
 .../drivers/dri/r300/compiler/radeon_dataflow.h    |   28 +++
 .../drivers/dri/r300/compiler/radeon_optimize.c    |  170 +++----------
 7 files changed, 397 insertions(+), 132 deletions(-)

diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile
index d0eb170..51b896a 100644
--- a/src/mesa/drivers/dri/r300/compiler/Makefile
+++ b/src/mesa/drivers/dri/r300/compiler/Makefile
@@ -8,6 +8,7 @@ LIBNAME = r300compiler
 C_SOURCES = \
 		radeon_code.c \
 		radeon_compiler.c \
+		radeon_compiler_util.c \
 		radeon_emulate_branches.c \
 		radeon_emulate_loops.c \
 		radeon_program.c \
diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript
index 847857b..2b4bce1 100755
--- a/src/mesa/drivers/dri/r300/compiler/SConscript
+++ b/src/mesa/drivers/dri/r300/compiler/SConscript
@@ -12,6 +12,7 @@ r300compiler = env.ConvenienceLibrary(
     source = [
         'radeon_code.c',
         'radeon_compiler.c',
+        'radeon_compiler_util.c',
         'radeon_program.c',
         'radeon_program_print.c',
         'radeon_opcodes.c',
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
new file mode 100644
index 0000000..97f4c75
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2010 Tom Stellard <tstellar at gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ */
+
+#include "radeon_compiler_util.h"
+
+/**
+ */
+unsigned int rc_swizzle_to_writemask(unsigned int swz)
+{
+	unsigned int mask = 0;
+	unsigned int i;
+
+	for(i = 0; i < 4; i++) {
+		mask |= 1 << GET_SWZ(swz, i);
+	}
+	mask &= RC_MASK_XYZW;
+
+	return mask;
+}
+
+unsigned int rc_src_reads_dst_mask(
+		rc_register_file src_file,
+		unsigned int src_idx,
+		unsigned int src_swz,
+		rc_register_file dst_file,
+		unsigned int dst_idx,
+		unsigned int dst_mask)
+{
+	if (src_file != dst_file || src_idx != dst_idx) {
+		return RC_MASK_NONE;
+	}
+	return dst_mask & rc_swizzle_to_writemask(src_swz);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
new file mode 100644
index 0000000..1a14e7c
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
@@ -0,0 +1,16 @@
+#include "radeon_program_constants.h"
+
+#ifndef RADEON_PROGRAM_UTIL_H
+#define RADEON_PROGRAM_UTIL_H
+
+unsigned int rc_swizzle_to_writemask(unsigned int swz);
+
+unsigned int rc_src_reads_dst_mask(
+		rc_register_file src_file,
+		unsigned int src_idx,
+		unsigned int src_swz,
+		rc_register_file dst_file,
+		unsigned int dst_idx,
+		unsigned int dst_mask);
+
+#endif /* RADEON_PROGRAM_UTIL_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
index a27d395..5927498 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2010 Tom Stellard <tstellar at gmail.com>
  *
  * All Rights Reserved.
  *
@@ -27,6 +28,8 @@
 
 #include "radeon_dataflow.h"
 
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
 #include "radeon_program.h"
 
 struct read_write_mask_data {
@@ -402,3 +405,252 @@ void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, v
 	else
 		remap_pair_instruction(inst, cb, userdata);
 }
+
+/**
+ * @return RC_OPCODE_NOOP if inst is not a flow control instruction.
+ * @return The opcode of inst if it is a flow control instruction.
+ */
+static rc_opcode get_flow_control_inst(struct rc_instruction * inst)
+{
+	const struct rc_opcode_info * info;
+	if (inst->Type == RC_INSTRUCTION_NORMAL) {
+		info = rc_get_opcode_info(inst->U.I.Opcode);
+	} else {
+		info = rc_get_opcode_info(inst->U.P.RGB.Opcode);
+		/*A flow control instruction shouldn't have an alpha
+		 * instruction.*/
+		assert(!info->IsFlowControl ||
+				inst->U.P.Alpha.Opcode == RC_OPCODE_NOP);
+	}
+
+	if (info->IsFlowControl)
+		return info->Opcode;
+	else
+		return RC_OPCODE_NOP;
+
+}
+
+struct get_readers_callback_data {
+	struct radeon_compiler * C;
+	struct rc_reader_data * ReaderData;
+	rc_read_src_fn ReadCB;
+	rc_read_write_mask_fn WriteCB;
+	unsigned int AliveWriteMask;
+};
+
+static void add_reader(
+	struct memory_pool * pool,
+	struct rc_reader_data * data,
+	struct rc_instruction * inst,
+	unsigned int mask,
+	struct rc_src_register * src)
+{
+	struct rc_reader * new;
+	memory_pool_array_reserve(pool, struct rc_reader, data->Readers,
+				data->ReaderCount, data->ReadersReserved, 1);
+	new = &data->Readers[data->ReaderCount++];
+	new->Inst = inst;
+	new->WriteMask = mask;
+	new->Src = src;
+}
+
+/**
+ * This function is used by rc_get_readers_normal() to determine whether inst
+ * is a reader of userdata->ReaderData->Writer
+ */
+static void get_readers_normal_read_callback(
+	void * userdata,
+	struct rc_instruction * inst,
+	struct rc_src_register * src)
+{
+	struct get_readers_callback_data * d = userdata;
+	unsigned int read_mask;
+
+	if (src->RelAddr)
+		d->ReaderData->Abort = 1;
+
+	unsigned int shared_mask = rc_src_reads_dst_mask(src->File, src->Index,
+				src->Swizzle,
+				d->ReaderData->Writer->U.I.DstReg.File,
+				d->ReaderData->Writer->U.I.DstReg.Index,
+				d->AliveWriteMask);
+
+	if (shared_mask == RC_MASK_NONE)
+		return;
+
+	/* If we make it this far, it means that this source reads from the
+	 * same register written to by d->ReaderData->Writer. */
+
+	if (d->ReaderData->AbortOnRead) {
+		d->ReaderData->Abort = 1;
+		return;
+	}
+
+	read_mask = rc_swizzle_to_writemask(src->Swizzle);
+	/* XXX The behavior in this case should be configurable. */
+	if ((read_mask & d->AliveWriteMask) != read_mask) {
+		d->ReaderData->Abort = 1;
+		return;
+	}
+
+	d->ReadCB(d->ReaderData, inst, src);
+	if (d->ReaderData->Abort)
+		return;
+
+	add_reader(&d->C->Pool, d->ReaderData, inst, shared_mask, src);
+}
+
+/**
+ * This function is used by rc_get_readers_normal() to determine when
+ * userdata->ReaderData->Writer is dead (i. e. All compontents of its
+ * destination register have been overwritten by other instructions).
+ */
+static void get_readers_write_callback(
+	void *userdata,
+	struct rc_instruction * inst,
+	rc_register_file file,
+	unsigned int index,
+	unsigned int mask)
+{
+	struct get_readers_callback_data * d = userdata;
+
+	if (index == d->ReaderData->Writer->U.I.DstReg.Index
+		&& file == d->ReaderData->Writer->U.I.DstReg.File) {
+			unsigned int shared_mask = mask
+				& d->ReaderData->Writer->U.I.DstReg.WriteMask;
+		if (d->ReaderData->InElse) {
+			if (shared_mask & d->AliveWriteMask) {
+				/* We set AbortOnRead here because the
+				 * destination register of d->ReaderData->Writer
+				 * is written to in both the IF and the
+				 * ELSE block of this IF/ELSE statement.
+				 * This means that readers of this
+				 * destination register that follow this IF/ELSE
+				 * statement use the value of different
+				 * instructions depending on the control flow
+				 * decisions made by the program. */
+				d->ReaderData->AbortOnRead = 1;
+			}
+		} else {
+			d->AliveWriteMask &= ~shared_mask;
+		}
+	}
+
+	d->WriteCB(d->ReaderData, inst, file, index, mask);
+}
+
+/**
+ * This function will create a list of readers via the rc_reader_data struct.
+ * This function will abort (set the flag data->Abort) and return if it
+ * encounters an instruction that reads from @param writer and also a different
+ * instruction.  Here are some examples:
+ *
+ * writer = instruction 0;
+ * 0 MOV TEMP[0].xy, TEMP[1].xy
+ * 1 MOV TEMP[0].zw, TEMP[2].xy
+ * 2 MOV TEMP[3], TEMP[0]
+ * The Abort flag will be set on instruction 2, because it reads values written
+ * by instructions 0 and 1.
+ *
+ * writer = instruction 1;
+ * 0 IF TEMP[0].x
+ * 1 MOV TEMP[1], TEMP[2]
+ * 2 ELSE
+ * 3 MOV TEMP[1], TEMP[2]
+ * 4 ENDIF
+ * 5 MOV TEMP[3], TEMP[1]
+ * The Abort flag will be set on instruction 5, because it could read from the
+ * value written by either instruction 1 or 3, depending on the jump decision
+ * made at instruction 0.
+ *
+ * writer = instruction 0;
+ * 0 MOV TEMP[0], TEMP[1]
+ * 2 BGNLOOP
+ * 3 ADD TEMP[0], TEMP[0], none.1
+ * 4 ENDLOOP
+ * The Abort flag will be set on instruction 3, because in the first iteration
+ * of the loop it reads the value written by instruction 0 and in all other
+ * iterations it reads the value written by instruction 3.
+ *
+ * @param read_cb This function will be called for for every instruction that
+ * has been determined to be a reader of writer.
+ * @param write_cb This function will be called for every instruction after
+ * writer.
+ */
+void  rc_get_readers_normal(
+	struct radeon_compiler * c,
+	struct rc_instruction * writer,
+	struct rc_reader_data * data,
+	rc_read_src_fn read_cb,
+	rc_read_write_mask_fn write_cb)
+{
+	struct rc_instruction * tmp;
+	struct get_readers_callback_data d;
+	unsigned int branch_depth = 0;
+
+	data->Writer = writer;
+	data->Abort = 0;
+	data->AbortOnRead = 0;
+	data->InElse = 0;
+	data->ReaderCount = 0;
+	data->ReadersReserved = 0;
+	data->Readers = NULL;
+
+	d.C = c;
+	d.AliveWriteMask = writer->U.I.DstReg.WriteMask;
+	d.ReaderData = data;
+	d.ReadCB = read_cb;
+	d.WriteCB = write_cb;
+
+	if (!writer->U.I.DstReg.WriteMask)
+		return;
+
+	for(tmp = writer->Next; tmp != &c->Program.Instructions;
+							tmp = tmp->Next){
+		rc_opcode opcode = get_flow_control_inst(tmp);
+		switch(opcode) {
+		case RC_OPCODE_BGNLOOP:
+			/* XXX We can do better when we see a BGNLOOP if we
+			 * add a flag called AbortOnWrite to struct
+			 * rc_reader_data and leave it set until the next
+			 * ENDLOOP. */
+		case RC_OPCODE_ENDLOOP:
+			/* XXX We can do better when we see an ENDLOOP by
+			 * searching backwards from writer and looking for
+			 * readers of writer's destination index.  If we find a
+			 * reader before we get to the BGNLOOP, we must abort
+			 * unless there is another writer between that reader
+			 * and the BGNLOOP. */
+			data->Abort = 1;
+			return;
+		case RC_OPCODE_IF:
+			branch_depth++;
+			break;
+		case RC_OPCODE_ELSE:
+			if (branch_depth == 0)
+				data->InElse = 1;
+			break;
+		case RC_OPCODE_ENDIF:
+			if (branch_depth == 0) {
+				data->AbortOnRead = 1;
+				data->InElse = 0;
+			}
+			else {
+				branch_depth--;
+			}
+			break;
+		default:
+			break;
+		}
+
+		if (!data->InElse)
+			rc_for_all_reads_src(tmp, get_readers_normal_read_callback, &d);
+		rc_for_all_writes_mask(tmp, get_readers_write_callback, &d);
+
+		if (data->Abort)
+			return;
+
+		if (!d.AliveWriteMask)
+			return;
+	}
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
index d10ae3c..7de6b98 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2010 Tom Stellard <tstellar at gmail.com>
  *
  * All Rights Reserved.
  *
@@ -35,6 +36,7 @@ struct rc_instruction;
 struct rc_swizzle_caps;
 struct rc_src_register;
 struct rc_pair_instruction_arg;
+struct rc_compiler;
 
 
 /**
@@ -66,6 +68,32 @@ typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * in
 void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata);
 /*@}*/
 
+struct rc_reader {
+	struct rc_instruction * Inst;
+	unsigned int WriteMask;
+	struct rc_src_register * Src;
+};
+
+struct rc_reader_data {
+	unsigned int Abort;
+	unsigned int AbortOnRead;
+	unsigned int InElse;
+	struct rc_instruction * Writer;
+
+	unsigned int ReaderCount;
+	unsigned int ReadersReserved;
+	struct rc_reader * Readers;
+
+	void * CbData;
+};
+
+void rc_get_readers_normal(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	struct rc_reader_data * data,
+	/*XXX: These should be their own function types. */
+	rc_read_src_fn read_cb,
+	rc_read_write_mask_fn write_cb);
 
 /**
  * Compiler passes based on dataflow analysis.
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index e895841..4d9120f 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2010 Tom Stellard <tstellar at gmail.com>
  *
  * All Rights Reserved.
  *
@@ -28,6 +29,7 @@
 #include "radeon_dataflow.h"
 
 #include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
 #include "radeon_swizzle.h"
 
 struct peephole_state {
@@ -86,80 +88,60 @@ struct copy_propagate_state {
 	int BranchDepth;
 };
 
-/**
- * This is a callback function that is meant to be passed to
- * rc_for_all_reads_mask.  This function will be called once for each source
- * register in inst.
- * @param inst The instruction that the source register belongs to.
- * @param file The register file of the source register.
- * @param index The index of the source register.
- * @param mask The components of the source register that are being read from.
- */
 static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
-		rc_register_file file, unsigned int index, unsigned int mask)
+						struct rc_src_register * src)
 {
-	struct copy_propagate_state * s = data;
+	rc_register_file file = src->File;
+	struct rc_reader_data * reader_data = data;
+	const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
 
-	/* XXX This could probably be handled better. */
-	if (file == RC_FILE_ADDRESS) {
-		s->Conflict = 1;
+	/* It is possible to do copy propigation in this situation,
+	 * just not right now, see peephole_add_presub_inv() */
+	if (reader_data->Writer->U.I.PreSub.Opcode != RC_PRESUB_NONE &&
+			(info->NumSrcRegs > 2 || info->HasTexture)) {
+		reader_data->Abort = 1;
 		return;
 	}
 
-	if (file != RC_FILE_TEMPORARY || index != s->Mov->U.I.DstReg.Index)
+	/* XXX This could probably be handled better. */
+	if (file == RC_FILE_ADDRESS) {
+		reader_data->Abort = 1;
 		return;
+	}
 
 	/* These instructions cannot read from the constants file.
 	 * see radeonTransformTEX()
 	 */
-	if(s->Mov->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
-			s->Mov->U.I.SrcReg[0].File != RC_FILE_INPUT &&
+	if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
+			reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
 				(inst->U.I.Opcode == RC_OPCODE_TEX ||
 				inst->U.I.Opcode == RC_OPCODE_TXB ||
 				inst->U.I.Opcode == RC_OPCODE_TXP ||
 				inst->U.I.Opcode == RC_OPCODE_KIL)){
-		s->Conflict = 1;
+		reader_data->Abort = 1;
 		return;
 	}
-	if ((mask & s->MovMask) == mask) {
-		if (s->SourceClobbered) {
-			s->Conflict = 1;
-		}
-	} else if ((mask & s->DefinedMask) == mask) {
-		/* read from something entirely written by other instruction: this is okay */
-	} else {
-		/* read from component combination that is not well-defined without
-		 * the MOV: cannot remove it */
-		s->Conflict = 1;
-	}
 }
 
 static void copy_propagate_scan_write(void * data, struct rc_instruction * inst,
 		rc_register_file file, unsigned int index, unsigned int mask)
 {
-	struct copy_propagate_state * s = data;
-
-	if (s->BranchDepth < 0)
-		return;
+	struct rc_reader_data * reader_data = data;
+	struct copy_propagate_state * s = reader_data->CbData;
 
-	if (file == s->Mov->U.I.DstReg.File && index == s->Mov->U.I.DstReg.Index) {
-		s->MovMask &= ~mask;
-		if (s->BranchDepth == 0)
-			s->DefinedMask |= mask;
-		else
-			s->DefinedMask &= ~mask;
-	}
-	if (file == s->Mov->U.I.SrcReg[0].File && index == s->Mov->U.I.SrcReg[0].Index) {
+	if (file == reader_data->Writer->U.I.SrcReg[0].File && index == reader_data->Writer->U.I.SrcReg[0].Index) {
 		if (mask & s->SourcedMask)
-			s->SourceClobbered = 1;
+			reader_data->AbortOnRead = 1;
 	} else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) {
-		s->SourceClobbered = 1;
+		reader_data->AbortOnRead = 1;
 	}
 }
 
 static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
 {
 	struct copy_propagate_state s;
+	struct rc_reader_data reader_data;
+	unsigned int i;
 
 	if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
 	    inst_mov->U.I.DstReg.RelAddr ||
@@ -173,95 +155,27 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i
 	s.MovMask = inst_mov->U.I.DstReg.WriteMask;
 	s.DefinedMask = RC_MASK_XYZW & ~s.MovMask;
 
+	reader_data.CbData = &s;
+
 	for(unsigned int chan = 0; chan < 4; ++chan) {
 		unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan);
 		s.SourcedMask |= (1 << swz) & RC_MASK_XYZW;
 	}
 
-	/* 1st pass: Check whether all subsequent readers can be changed */
-	for(struct rc_instruction * inst = inst_mov->Next;
-	    inst != &c->Program.Instructions;
-	    inst = inst->Next) {
-		const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
-		/* XXX In the future we might be able to make the optimizer
-		 * smart enough to handle loops. */
-		if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP
-				|| inst->U.I.Opcode == RC_OPCODE_ENDLOOP){
-			return;
-		}
-
-		/* It is possible to do copy propigation in this situation,
-		 * just not right now, see peephole_add_presub_inv() */
-		if (inst_mov->U.I.PreSub.Opcode != RC_PRESUB_NONE &&
-				(info->NumSrcRegs > 2 || info->HasTexture)) {
-			return;
-		}
-
-		rc_for_all_reads_mask(inst, copy_propagate_scan_read, &s);
-		rc_for_all_writes_mask(inst, copy_propagate_scan_write, &s);
-		if (s.Conflict)
-			return;
+	/* Get a list of all the readers of this MOV instruction. */
+	rc_get_readers_normal(c, inst_mov, &reader_data,
+			copy_propagate_scan_read, copy_propagate_scan_write);
 
-		if (s.BranchDepth >= 0) {
-			if (inst->U.I.Opcode == RC_OPCODE_IF) {
-				s.BranchDepth++;
-			} else if (inst->U.I.Opcode == RC_OPCODE_ENDIF
-				|| inst->U.I.Opcode == RC_OPCODE_ELSE) {
-				s.BranchDepth--;
-				if (s.BranchDepth < 0) {
-					s.DefinedMask &= ~s.MovMask;
-					s.MovMask = 0;
-				}
-			}
-		}
-	}
-
-	if (s.Conflict)
+	if (reader_data.Abort || reader_data.ReaderCount == 0)
 		return;
 
-	/* 2nd pass: We can satisfy all readers, so switch them over all at once */
-	s.MovMask = inst_mov->U.I.DstReg.WriteMask;
-	s.BranchDepth = 0;
-
-	for(struct rc_instruction * inst = inst_mov->Next;
-	    inst != &c->Program.Instructions;
-	    inst = inst->Next) {
-		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
-		for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
-			if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY &&
-			    inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) {
-				unsigned int refmask = 0;
-
-				for(unsigned int chan = 0; chan < 4; ++chan) {
-					unsigned int swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
-					refmask |= (1 << swz) & RC_MASK_XYZW;
-				}
-
-				if ((refmask & s.MovMask) == refmask) {
-					inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]);
-					if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
-						inst->U.I.PreSub = s.Mov->U.I.PreSub;
-				}
-			}
-		}
-
-		if (opcode->HasDstReg) {
-			if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY &&
-			    inst->U.I.DstReg.Index == s.Mov->U.I.DstReg.Index) {
-				s.MovMask &= ~inst->U.I.DstReg.WriteMask;
-			}
-		}
+	/* Propagate the MOV instruction. */
+	for (i = 0; i < reader_data.ReaderCount; i++) {
+		struct rc_instruction * inst = reader_data.Readers[i].Inst;
+		*reader_data.Readers[i].Src = chain_srcregs(*reader_data.Readers[i].Src, s.Mov->U.I.SrcReg[0]);
 
-		if (s.BranchDepth >= 0) {
-			if (inst->U.I.Opcode == RC_OPCODE_IF) {
-				s.BranchDepth++;
-			} else if (inst->U.I.Opcode == RC_OPCODE_ENDIF
-				|| inst->U.I.Opcode == RC_OPCODE_ELSE) {
-				s.BranchDepth--;
-				if (s.BranchDepth < 0)
-					break; /* no more readers after this point */
-			}
-		}
+		if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
+			inst->U.I.PreSub = s.Mov->U.I.PreSub;
 	}
 
 	/* Finally, remove the original MOV instruction */
@@ -497,18 +411,10 @@ static void constant_folding(struct radeon_compiler * c, struct rc_instruction *
 static unsigned int src_reads_dst_mask(struct rc_src_register src,
 						struct rc_dst_register dst)
 {
-	unsigned int mask = 0;
-	unsigned int i;
 	if (dst.File != src.File || dst.Index != src.Index) {
 		return 0;
 	}
-
-	for(i = 0; i < 4; i++) {
-		mask |= 1 << GET_SWZ(src.Swizzle, i);
-	}
-	mask &= RC_MASK_XYZW;
-
-	return mask;
+	return rc_swizzle_to_writemask(src.Swizzle);
 }
 
 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)




More information about the mesa-commit mailing list