Mesa (master): r300/compiler: Implement the texture semaphore

Tom Stellard tstellar at kemper.freedesktop.org
Sat Oct 15 01:40:43 UTC 2011


Module: Mesa
Branch: master
Commit: 163629fd05166b78d70c2c26f4a922b296e8999d
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=163629fd05166b78d70c2c26f4a922b296e8999d

Author: Tom Stellard <tstellar at gmail.com>
Date:   Thu May 12 23:13:21 2011 -0700

r300/compiler: Implement the texture semaphore

The texture semaphore allows for prefetching of texture data.  On my
RV515, this increases the FPS of Lightsmark by 33% (This is with the
reg_rename pass enabled, which is enabled in the next commit).

There is a new env variable now called RADEON_TEX_GROUP, which allows
you to specify the maximum number of texture lookups to do at once.
The default is 8, but different values could produce better results
for various application / card combinations.

---

 .../drivers/r300/compiler/r500_fragprog_emit.c     |    9 +-
 .../drivers/r300/compiler/radeon_pair_schedule.c   |  328 +++++++++++++++++---
 src/gallium/drivers/r300/compiler/radeon_program.h |    4 +
 .../drivers/r300/compiler/radeon_program_pair.h    |    1 +
 .../drivers/r300/compiler/radeon_program_print.c   |    9 +-
 src/gallium/drivers/r300/r300_reg.h                |    6 +-
 6 files changed, 308 insertions(+), 49 deletions(-)

diff --git a/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c b/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c
index 1ceef3b..bfd4aa8 100644
--- a/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c
+++ b/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c
@@ -262,7 +262,7 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair
 	} else {
 		code->inst[ip].inst0 = R500_INST_TYPE_ALU;
 	}
-	code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
+	code->inst[ip].inst0 |= (inst->SemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
 
 	code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11);
 	code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0;
@@ -380,9 +380,9 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst
 
 	code->inst[ip].inst0 = R500_INST_TYPE_TEX
 		| (inst->DstReg.WriteMask << 11)
-		| R500_INST_TEX_SEM_WAIT;
+		| (inst->TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
 	code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
-		| R500_TEX_SEM_ACQUIRE;
+		| (inst->TexSemAcquire << R500_TEX_SEM_ACQUIRE_SHIFT);
 
 	if (inst->TexSrcTarget == RC_TEXTURE_RECT)
 		code->inst[ip].inst1 |= R500_TEX_UNSCALED;
@@ -650,6 +650,9 @@ void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
 		}
 	}
 
+	/* Make sure TEX_SEM_WAIT is set on the last instruction */
+	code->inst[code->inst_end].inst0 |= R500_INST_TEX_SEM_WAIT;
+
 	if (code->max_temp_idx >= compiler->Base.max_temp_regs)
 		rc_error(&compiler->Base, "Too many hardware temporaries used");
 
diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c b/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c
index ee868cc..e03afb4 100644
--- a/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c
+++ b/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c
@@ -32,7 +32,10 @@
 #include "radeon_compiler.h"
 #include "radeon_compiler_util.h"
 #include "radeon_dataflow.h"
+#include "radeon_list.h"
+#include "radeon_variable.h"
 
+#include "util/u_debug.h"
 
 #define VERBOSE 0
 
@@ -65,6 +68,17 @@ struct schedule_instruction {
 	 * PairedInst references the alpha insturction's dependency information.
 	 */
 	struct schedule_instruction * PairedInst;
+
+	/** This scheduler uses the value of Score to determine which
+	 * instruction to schedule.  Instructions with a higher value of Score
+	 * will be scheduled first. */
+	int Score;
+
+	/** The number of components that read from a TEX instruction. */
+	unsigned TexReadCount;
+
+	/** For TEX instructions a list of readers */
+	struct rc_list * TexReaders;
 };
 
 
@@ -131,6 +145,12 @@ struct schedule_state {
 	struct schedule_instruction *ReadyAlpha;
 	struct schedule_instruction *ReadyTEX;
 	/*@}*/
+	struct rc_list *PendingTEX;
+
+	void (*CalcScore)(struct schedule_instruction *);
+	long max_tex_group;
+	unsigned PrevBlockHasTex:1;
+	unsigned TEXCount;
 };
 
 static struct reg_value ** get_reg_valuep(struct schedule_state * s,
@@ -147,6 +167,29 @@ static struct reg_value ** get_reg_valuep(struct schedule_state * s,
 	return &s->Temporary[index].Values[chan];
 }
 
+static unsigned get_tex_read_count(struct schedule_instruction * sinst)
+{
+	unsigned tex_read_count = sinst->TexReadCount;
+	if (sinst->PairedInst) {
+		tex_read_count += sinst->PairedInst->TexReadCount;
+	}
+	return tex_read_count;
+}
+
+#if VERBOSE
+static void print_list(struct schedule_instruction * sinst)
+{
+	struct schedule_instruction * ptr;
+	for (ptr = sinst; ptr; ptr=ptr->NextReady) {
+		unsigned tex_read_count = get_tex_read_count(ptr);
+		unsigned score = sinst->Score;
+		fprintf(stderr,"%u (%d) [%u],", ptr->Instruction->IP, score,
+						tex_read_count);
+	}
+	fprintf(stderr, "\n");
+}
+#endif
+
 static void remove_inst_from_list(struct schedule_instruction ** list,
 					struct schedule_instruction * inst)
 {
@@ -172,17 +215,28 @@ static void add_inst_to_list(struct schedule_instruction ** list, struct schedul
 	*list = inst;
 }
 
-static void add_inst_to_list_end(struct schedule_instruction ** list,
+static void add_inst_to_list_score(struct schedule_instruction ** list,
 					struct schedule_instruction * inst)
 {
-	if(!*list){
+	struct schedule_instruction * temp;
+	struct schedule_instruction * prev;
+	if (!*list) {
 		*list = inst;
-	}else{
-		struct schedule_instruction * temp = *list;
-		while(temp->NextReady){
-			temp = temp->NextReady;
-		}
-		temp->NextReady = inst;
+		return;
+	}
+	temp = *list;
+	prev = NULL;
+	while(temp && inst->Score <= temp->Score) {
+		prev = temp;
+		temp = temp->NextReady;
+	}
+
+	if (!prev) {
+		inst->NextReady = temp;
+		*list = inst;
+	} else {
+		prev->NextReady = inst;
+		inst->NextReady = temp;
 	}
 }
 
@@ -193,13 +247,13 @@ static void instruction_ready(struct schedule_state * s, struct schedule_instruc
 	/* Adding Ready TEX instructions to the end of the "Ready List" helps
 	 * us emit TEX instructions in blocks without losing our place. */
 	if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
-		add_inst_to_list_end(&s->ReadyTEX, sinst);
+		add_inst_to_list_score(&s->ReadyTEX, sinst);
 	else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
-		add_inst_to_list(&s->ReadyRGB, sinst);
+		add_inst_to_list_score(&s->ReadyRGB, sinst);
 	else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
-		add_inst_to_list(&s->ReadyAlpha, sinst);
+		add_inst_to_list_score(&s->ReadyAlpha, sinst);
 	else
-		add_inst_to_list(&s->ReadyFullALU, sinst);
+		add_inst_to_list_score(&s->ReadyFullALU, sinst);
 }
 
 static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
@@ -210,6 +264,68 @@ static void decrease_dependencies(struct schedule_state * s, struct schedule_ins
 		instruction_ready(s, sinst);
 }
 
+/* These functions provide different heuristics for scheduling instructions.
+ * The default is calc_score_readers. */
+
+#if 0
+
+static void calc_score_zero(struct schedule_instruction * sinst)
+{
+	sinst->Score = 0;
+}
+
+static void calc_score_deps(struct schedule_instruction * sinst)
+{
+	int i;
+	sinst->Score = 0;
+	for (i = 0; i < sinst->NumWriteValues; i++) {
+		struct reg_value * v = sinst->WriteValues[i];
+		if (v->NumReaders) {
+			struct reg_value_reader * r;
+			for (r = v->Readers; r; r = r->Next) {
+				if (r->Reader->NumDependencies == 1) {
+					sinst->Score += 100;
+				}
+				sinst->Score += r->Reader->NumDependencies;
+			}
+		}
+	}
+}
+
+#endif
+
+#define NO_READ_TEX_SCORE (1 << 16)
+#define NO_OUTPUT_SCORE (1 << 24)
+
+static void calc_score_readers(struct schedule_instruction * sinst)
+{
+	if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) {
+		sinst->Score = 0;
+	} else {
+		sinst->Score = sinst->NumReadValues;
+		if (sinst->PairedInst) {
+			sinst->Score += sinst->PairedInst->NumReadValues;
+		}
+		if (get_tex_read_count(sinst) == 0) {
+			sinst->Score |= NO_READ_TEX_SCORE;
+		}
+		if (!sinst->Instruction->U.P.RGB.OutputWriteMask &&
+			!sinst->Instruction->U.P.Alpha.OutputWriteMask) {
+			if (sinst->PairedInst) {
+				if (!sinst->PairedInst->Instruction->U.P.
+						RGB.OutputWriteMask
+				&& !sinst->PairedInst->Instruction->U.P.
+						Alpha.OutputWriteMask) {
+					sinst->Score |= NO_OUTPUT_SCORE;
+				}
+
+			} else {
+				sinst->Score |= NO_OUTPUT_SCORE;
+			}
+		}
+	}
+}
+
 /**
  * This function decreases the dependencies of the next instruction that
  * wants to write to each of sinst's read values.
@@ -222,8 +338,9 @@ static void commit_update_reads(struct schedule_state * s,
 		assert(v->NumReaders > 0);
 		v->NumReaders--;
 		if (!v->NumReaders) {
-			if (v->Next)
+			if (v->Next) {
 				decrease_dependencies(s, v->Next->Writer);
+			}
 		}
 	}
 	if (sinst->PairedInst) {
@@ -256,13 +373,33 @@ static void commit_update_writes(struct schedule_state * s,
 	}
 }
 
+static void notify_sem_wait(struct schedule_state *s)
+{
+	struct rc_list * pend_ptr;
+	for (pend_ptr = s->PendingTEX; pend_ptr; pend_ptr = pend_ptr->Next) {
+		struct rc_list * read_ptr;
+		struct schedule_instruction * pending = pend_ptr->Item;
+		for (read_ptr = pending->TexReaders; read_ptr;
+						read_ptr = read_ptr->Next) {
+			struct schedule_instruction * reader = read_ptr->Item;
+			reader->TexReadCount--;
+		}
+	}
+	s->PendingTEX = NULL;
+}
+
 static void commit_alu_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
 {
-	DBG("%i: commit\n", sinst->Instruction->IP);
+	DBG("%i: commit score = %d\n", sinst->Instruction->IP, sinst->Score);
 
 	commit_update_reads(s, sinst);
 
 	commit_update_writes(s, sinst);
+
+	if (get_tex_read_count(sinst) > 0) {
+		sinst->Instruction->U.P.SemWait = 1;
+		notify_sem_wait(s);
+	}
 }
 
 /**
@@ -277,6 +414,7 @@ static void emit_all_tex(struct schedule_state * s, struct rc_instruction * befo
 	struct rc_instruction * inst_begin;
 
 	assert(s->ReadyTEX);
+	notify_sem_wait(s);
 
 	/* Node marker for R300 */
 	inst_begin = rc_insert_new_instruction(s->C, before->Prev);
@@ -308,6 +446,12 @@ static void emit_all_tex(struct schedule_state * s, struct rc_instruction * befo
 	while(readytex){
 		DBG("%i: commit TEX writes\n", readytex->Instruction->IP);
 		commit_update_writes(s, readytex);
+		/* Set semaphore bits for last TEX instruction in the block */
+		if (!readytex->NextReady) {
+			readytex->Instruction->U.I.TexSemAcquire = 1;
+			readytex->Instruction->U.I.TexSemWait = 1;
+		}
+		rc_list_add(&s->PendingTEX, rc_list(&s->C->Pool, readytex));
 		readytex = readytex->NextReady;
 	}
 }
@@ -491,6 +635,9 @@ static int destructive_merge_instructions(
 		rgb->ALUResultCompare = alpha->ALUResultCompare;
 	}
 
+	/* Copy SemWait */
+	rgb->SemWait |= alpha->SemWait;
+
 	return 1;
 }
 
@@ -824,14 +971,14 @@ static void pair_instructions(struct schedule_state * s)
 					&& convert_rgb_to_alpha(s, rgb_ptr)) {
 
 			struct schedule_instruction * pair_ptr;
+			remove_inst_from_list(&s->ReadyRGB, rgb_ptr);
+			add_inst_to_list_score(&s->ReadyAlpha, rgb_ptr);
+
 			for (pair_ptr = s->ReadyRGB; pair_ptr;
 					pair_ptr = pair_ptr->NextReady) {
-				if (pair_ptr == rgb_ptr) {
-					continue;
-				}
 				if (merge_instructions(&pair_ptr->Instruction->U.P,
 						&rgb_ptr->Instruction->U.P)) {
-					remove_inst_from_list(&s->ReadyRGB, rgb_ptr);
+					remove_inst_from_list(&s->ReadyAlpha, rgb_ptr);
 					remove_inst_from_list(&s->ReadyRGB, pair_ptr);
 					pair_ptr->PairedInst = rgb_ptr;
 
@@ -849,6 +996,68 @@ static void pair_instructions(struct schedule_state * s)
 	}
 }
 
+static void update_max_score(
+	struct schedule_state * s,
+	struct schedule_instruction ** list,
+	int * max_score,
+	struct schedule_instruction ** max_inst_out,
+	struct schedule_instruction *** list_out)
+{
+	struct schedule_instruction * list_ptr;
+	for (list_ptr = *list; list_ptr; list_ptr = list_ptr->NextReady) {
+		int score;
+		s->CalcScore(list_ptr);
+		score = list_ptr->Score;
+		if (!*max_inst_out || score > *max_score) {
+			*max_score = score;
+			*max_inst_out = list_ptr;
+			*list_out = list;
+		}
+	}
+}
+
+static void emit_instruction(
+	struct schedule_state * s,
+	struct rc_instruction * before)
+{
+	int max_score = -1;
+	struct schedule_instruction * max_inst = NULL;
+	struct schedule_instruction ** max_list = NULL;
+	unsigned tex_count = 0;
+	struct schedule_instruction * tex_ptr;
+
+	pair_instructions(s);
+#if VERBOSE
+	fprintf(stderr, "Full:\n");
+	print_list(s->ReadyFullALU);
+	fprintf(stderr, "RGB:\n");
+	print_list(s->ReadyRGB);
+	fprintf(stderr, "Alpha:\n");
+	print_list(s->ReadyAlpha);
+	fprintf(stderr, "TEX:\n");
+	print_list(s->ReadyTEX);
+#endif
+
+	for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) {
+		tex_count++;
+	}
+	update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list);
+	update_max_score(s, &s->ReadyRGB, &max_score, &max_inst, &max_list);
+	update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list);
+
+	if (tex_count >= s->max_tex_group || max_score == -1
+		|| (s->TEXCount > 0 && tex_count == s->TEXCount)) {
+		emit_all_tex(s, before);
+	} else {
+
+
+		remove_inst_from_list(max_list, max_inst);
+		rc_insert_instruction(before->Prev, max_inst->Instruction);
+		commit_alu_instruction(s, max_inst);
+
+		presub_nop(before->Prev);
+	}
+}
 
 /**
  * Find a good ALU instruction or pair of ALU instruction and emit it.
@@ -860,6 +1069,7 @@ static void pair_instructions(struct schedule_state * s)
 static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before)
 {
 	struct schedule_instruction * sinst;
+	int rgb_score = -1, alpha_score = -1;
 
 	/* Try to merge RGB and Alpha instructions together. */
 	pair_instructions(s);
@@ -871,6 +1081,12 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor
 		commit_alu_instruction(s, sinst);
 	} else {
 		if (s->ReadyRGB) {
+			rgb_score = s->ReadyRGB->Score;
+		}
+		if (s->ReadyAlpha) {
+			alpha_score = s->ReadyAlpha->Score;
+		}
+		if (rgb_score > alpha_score) {
 			sinst = s->ReadyRGB;
 			s->ReadyRGB = s->ReadyRGB->NextReady;
 		} else if (s->ReadyAlpha) {
@@ -924,6 +1140,12 @@ static void scan_read(void * data, struct rc_instruction * inst,
 		/* Only update the current instruction's dependencies if the
 		 * register it reads from has been written to in this block. */
 		if ((*v)->Writer) {
+			if ((*v)->Writer->Instruction->Type ==
+						RC_INSTRUCTION_NORMAL) {
+				s->Current->TexReadCount++;
+				rc_list_add(&((*v)->Writer->TexReaders),
+					rc_list(&s->C->Pool, s->Current));
+			}
 			s->Current->NumDependencies++;
 		}
 	}
@@ -977,22 +1199,33 @@ static void is_rgb_to_alpha_possible_normal(
 
 }
 
-static void schedule_block(struct r300_fragment_program_compiler * c,
+static void schedule_block(struct schedule_state * s,
 		struct rc_instruction * begin, struct rc_instruction * end)
 {
-	struct schedule_state s;
 	unsigned int ip;
 
-	memset(&s, 0, sizeof(s));
-	s.C = &c->Base;
-
 	/* Scan instructions for data dependencies */
 	ip = 0;
 	for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
-		s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current));
-		memset(s.Current, 0, sizeof(struct schedule_instruction));
+		s->Current = memory_pool_malloc(&s->C->Pool, sizeof(*s->Current));
+		memset(s->Current, 0, sizeof(struct schedule_instruction));
+
+		if (inst->Type == RC_INSTRUCTION_NORMAL) {
+			const struct rc_opcode_info * info =
+					rc_get_opcode_info(inst->U.I.Opcode);
+			if (info->HasTexture) {
+				s->TEXCount++;
+			}
+		}
 
-		s.Current->Instruction = inst;
+		/* XXX: This causes SemWait to be set for all instructions in
+		 * a block if the previous block contained a TEX instruction.
+		 * We can do better here, but it will take a lot of work. */
+		if (s->PrevBlockHasTex) {
+			s->Current->TexReadCount = 1;
+		}
+
+		s->Current->Instruction = inst;
 		inst->IP = ip++;
 
 		DBG("%i: Scanning\n", inst->IP);
@@ -1001,17 +1234,18 @@ static void schedule_block(struct r300_fragment_program_compiler * c,
 		 * counter-intuitive, to account for the case where an
 		 * instruction writes to the same register as it reads
 		 * from. */
-		rc_for_all_writes_chan(inst, &scan_write, &s);
-		rc_for_all_reads_chan(inst, &scan_read, &s);
+		rc_for_all_writes_chan(inst, &scan_write, s);
+		rc_for_all_reads_chan(inst, &scan_read, s);
 
-		DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies);
+		DBG("%i: Has %i dependencies\n", inst->IP, s->Current->NumDependencies);
 
-		if (!s.Current->NumDependencies)
-			instruction_ready(&s, s.Current);
+		if (!s->Current->NumDependencies) {
+			instruction_ready(s, s->Current);
+		}
 
 		/* Get global readers for possible RGB->Alpha conversion. */
-		s.Current->GlobalReaders.ExitOnAbort = 1;
-		rc_get_readers(s.C, inst, &s.Current->GlobalReaders,
+		s->Current->GlobalReaders.ExitOnAbort = 1;
+		rc_get_readers(s->C, inst, &s->Current->GlobalReaders,
 				is_rgb_to_alpha_possible_normal,
 				is_rgb_to_alpha_possible, NULL);
 	}
@@ -1021,13 +1255,17 @@ static void schedule_block(struct r300_fragment_program_compiler * c,
 	end->Prev = begin->Prev;
 
 	/* Schedule instructions back */
-	while(!s.C->Error &&
-	      (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
-		if (s.ReadyTEX)
-			emit_all_tex(&s, end);
+	while(!s->C->Error &&
+	      (s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) {
+		if (s->C->is_r500) {
+			emit_instruction(s, end);
+		} else {
+			if (s->ReadyTEX)
+				emit_all_tex(s, end);
 
-		while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha))
-			emit_one_alu(&s, end);
+			while(!s->C->Error && (s->ReadyFullALU || s->ReadyRGB || s->ReadyAlpha))
+				emit_one_alu(s, end);
+		}
 	}
 }
 
@@ -1042,13 +1280,14 @@ static int is_controlflow(struct rc_instruction * inst)
 
 void rc_pair_schedule(struct radeon_compiler *cc, void *user)
 {
-	struct schedule_state s;
-
 	struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
+	struct schedule_state s;
 	struct rc_instruction * inst = c->Base.Program.Instructions.Next;
 
 	memset(&s, 0, sizeof(s));
 	s.C = &c->Base;
+	s.CalcScore = calc_score_readers;
+	s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8);
 	while(inst != &c->Base.Program.Instructions) {
 		struct rc_instruction * first;
 
@@ -1063,6 +1302,11 @@ void rc_pair_schedule(struct radeon_compiler *cc, void *user)
 			inst = inst->Next;
 
 		DBG("Schedule one block\n");
-		schedule_block(c, first, inst);
+		memset(s.Temporary, 0, sizeof(s.Temporary));
+		s.TEXCount = 0;
+		schedule_block(&s, first, inst);
+		if (s.PendingTEX) {
+			s.PrevBlockHasTex = 1;
+		}
 	}
 }
diff --git a/src/gallium/drivers/r300/compiler/radeon_program.h b/src/gallium/drivers/r300/compiler/radeon_program.h
index 98e2353..e68be93 100644
--- a/src/gallium/drivers/r300/compiler/radeon_program.h
+++ b/src/gallium/drivers/r300/compiler/radeon_program.h
@@ -109,6 +109,10 @@ struct rc_sub_instruction {
 	/** True if tex instruction should do shadow comparison */
 	unsigned int TexShadow:1;
 
+	/**/
+	unsigned int TexSemWait:1;
+	unsigned int TexSemAcquire:1;
+
 	/**R500 Only.  How to swizzle the result of a TEX lookup*/
 	unsigned int TexSwizzle:12;
 	/*@}*/
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_pair.h b/src/gallium/drivers/r300/compiler/radeon_program_pair.h
index 0e380e7..b6eb0eb 100644
--- a/src/gallium/drivers/r300/compiler/radeon_program_pair.h
+++ b/src/gallium/drivers/r300/compiler/radeon_program_pair.h
@@ -89,6 +89,7 @@ struct rc_pair_instruction {
 	unsigned int WriteALUResult:2;
 	unsigned int ALUResultCompare:3;
 	unsigned int Nop:1;
+	unsigned int SemWait:1;
 };
 
 typedef void (*rc_pair_foreach_src_fn)
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_print.c b/src/gallium/drivers/r300/compiler/radeon_program_print.c
index 38713a9..dc40d7f 100644
--- a/src/gallium/drivers/r300/compiler/radeon_program_print.c
+++ b/src/gallium/drivers/r300/compiler/radeon_program_print.c
@@ -293,10 +293,12 @@ static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst,
 	}
 
 	if (opcode->HasTexture) {
-		fprintf(f, ", %s%s[%u]",
+		fprintf(f, ", %s%s[%u]%s%s",
 			textarget_to_string(inst->U.I.TexSrcTarget),
 			inst->U.I.TexShadow ? "SHADOW" : "",
-			inst->U.I.TexSrcUnit);
+			inst->U.I.TexSrcUnit,
+			inst->U.I.TexSemWait ? " SEM_WAIT" : "",
+			inst->U.I.TexSemAcquire ? " SEM_ACQUIRE" : "");
 	}
 
 	fprintf(f, ";");
@@ -348,6 +350,9 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst
 			presubtract_op_to_string(
 					inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index));
 	}
+	if (inst->SemWait) {
+		fprintf(f, " SEM_WAIT");
+	}
 	fprintf(f, "\n");
 
 	if (inst->RGB.Opcode != RC_OPCODE_NOP) {
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index 2787f6f..5e1d810 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -3244,7 +3244,8 @@ enum {
 #   define R500_INST_TYPE_OUT				(1 << 0)
 #   define R500_INST_TYPE_FC				(2 << 0)
 #   define R500_INST_TYPE_TEX				(3 << 0)
-#   define R500_INST_TEX_SEM_WAIT			(1 << 2)
+#   define R500_INST_TEX_SEM_WAIT_SHIFT			2
+#   define R500_INST_TEX_SEM_WAIT			(1 << R500_INST_TEX_SEM_WAIT_SHIFT)
 #   define R500_INST_RGB_PRED_SEL_NONE			(0 << 3)
 #   define R500_INST_RGB_PRED_SEL_RGBA			(1 << 3)
 #   define R500_INST_RGB_PRED_SEL_RRRR			(2 << 3)
@@ -3426,7 +3427,8 @@ enum {
 #   define R500_TEX_INST_LODBIAS			(4 << 22)
 #   define R500_TEX_INST_LOD				(5 << 22)
 #   define R500_TEX_INST_DXDY				(6 << 22)
-#   define R500_TEX_SEM_ACQUIRE				(1 << 25)
+#   define R500_TEX_SEM_ACQUIRE_SHIFT			25
+#   define R500_TEX_SEM_ACQUIRE				(1 << R500_TEX_SEM_ACQUIRE_SHIFT)
 #   define R500_TEX_IGNORE_UNCOVERED			(1 << 26)
 #   define R500_TEX_UNSCALED				(1 << 27)
 #define R300_US_W_FMT					0x46b4




More information about the mesa-commit mailing list