[Mesa-dev] [PATCH] r300g: Increase fragment shader limits for r400 cards

Tom Stellard tstellar at gmail.com
Sat Jan 22 22:39:47 PST 2011


r400 fragment shaders now support up to 64 temporary registers,
512 ALU instructions, and 512 TEX instructions.
---
 src/gallium/drivers/r300/r300_fs.c                 |  111 +++++++++++++----
 src/gallium/drivers/r300/r300_reg.h                |    9 +-
 src/gallium/drivers/r300/r300_screen.c             |    6 -
 src/mesa/drivers/dri/r300/compiler/r300_fragprog.c |   43 +++++--
 .../drivers/dri/r300/compiler/r300_fragprog_emit.c |  133 +++++++++++++++++---
 src/mesa/drivers/dri/r300/compiler/radeon_code.h   |   16 ++-
 .../drivers/dri/r300/compiler/radeon_compiler.h    |    2 +
 src/mesa/drivers/dri/r300/r300_reg.h               |   44 +++++++
 8 files changed, 293 insertions(+), 71 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index 6d4091d..dafc5cd 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -298,44 +298,98 @@ static void r300_emit_fs_code_to_buffer(
         }
     } else { /* r300 */
         struct r300_fragment_program_code *code = &generic_code->code.r300;
-
-        shader->cb_code_size = 19 +
-                               (r300->screen->caps.is_r400 ? 2 : 0) +
-                               code->alu.length * 4 +
-                               (code->tex.length ? (1 + code->tex.length) : 0) +
-                               imm_count * 5;
+        unsigned int alu_length = code->alu.length;
+        unsigned int alu_iterations = ((alu_length - 1) / 64) + 1;
+        unsigned int tex_length = code->tex.length;
+        unsigned int tex_iterations =
+            tex_length > 0 ? ((tex_length - 1) / 32) + 1 : 0;
+        unsigned int iterations =
+            alu_iterations > tex_iterations ? alu_iterations : tex_iterations;
+        unsigned int bank = 0;
+
+        shader->cb_code_size = 15 +
+            /* R400_US_CODE_BANK */
+            (r300->screen->caps.is_r400 ? 2 * (iterations + 1): 0) +
+            /* R400_US_CODE_EXT */
+            (r300->screen->caps.is_r400 ? 2 : 0) +
+            /* R300_US_ALU_{RGB,ALPHA}_{INST,ADDR}_0, R400_US_ALU_EXT_ADDR_0 */
+            (code->r390_mode ? (5 * alu_iterations) : 4) +
+            /* R400_US_ALU_EXT_ADDR_[0-63] */
+            (code->r390_mode ? (code->alu.length) : 0) +
+            /* R300_US_ALU_{RGB,ALPHA}_{INST,ADDR}_0 */
+            code->alu.length * 4 +
+            /* R300_US_TEX_INST_0, R300_US_TEX_INST_[0-31] */
+            (code->tex.length > 0 ? code->tex.length + tex_iterations : 0) +
+            imm_count * 5;
 
         NEW_CB(shader->cb_code, shader->cb_code_size);
 
-        if (r300->screen->caps.is_r400)
-            OUT_CB_REG(R400_US_CODE_BANK, 0);
-
         OUT_CB_REG(R300_US_CONFIG, code->config);
         OUT_CB_REG(R300_US_PIXSIZE, code->pixsize);
         OUT_CB_REG(R300_US_CODE_OFFSET, code->code_offset);
 
+        if (code->r390_mode) {
+            OUT_CB_REG(R400_US_CODE_EXT, code->r400_code_offset_ext);
+        } else if (r300->screen->caps.is_r400) {
+            /* This register appears to affect shaders even if r390_mode is
+             * disabled, so it needs to be set to 0 for shaders that
+             * don't use r390_mode. */
+            OUT_CB_REG(R400_US_CODE_EXT, 0);
+        }
+
         OUT_CB_REG_SEQ(R300_US_CODE_ADDR_0, 4);
         OUT_CB_TABLE(code->code_addr, 4);
 
-        OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, code->alu.length);
-        for (i = 0; i < code->alu.length; i++)
-            OUT_CB(code->alu.inst[i].rgb_inst);
+        do {
+            unsigned int bank_alu_length = (alu_length < 64 ? alu_length : 64);
+            unsigned int bank_alu_offset = bank * 64;
+            unsigned int bank_tex_length = (tex_length < 32 ? tex_length : 32);
+            unsigned int bank_tex_offset = bank * 32;
+
+            if (r300->screen->caps.is_r400) {
+                OUT_CB_REG(R400_US_CODE_BANK, code->r390_mode ?
+                                (bank << R400_BANK_SHIFT) | R400_R390_MODE_ENABLE : 0);//2
+            }
+
+            if (bank_alu_length > 0) {
+                OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, bank_alu_length);
+                for (i = 0; i < bank_alu_length; i++)
+                    OUT_CB(code->alu.inst[i + bank_alu_offset].rgb_inst);
+
+                OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, bank_alu_length);
+                for (i = 0; i < bank_alu_length; i++)
+                    OUT_CB(code->alu.inst[i + bank_alu_offset].rgb_addr);
 
-        OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length);
-        for (i = 0; i < code->alu.length; i++)
-            OUT_CB(code->alu.inst[i].rgb_addr);
+                OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, bank_alu_length);
+                for (i = 0; i < bank_alu_length; i++)
+                    OUT_CB(code->alu.inst[i + bank_alu_offset].alpha_inst);
 
-        OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length);
-        for (i = 0; i < code->alu.length; i++)
-            OUT_CB(code->alu.inst[i].alpha_inst);
+                OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, bank_alu_length);
+                for (i = 0; i < bank_alu_length; i++)
+                    OUT_CB(code->alu.inst[i + bank_alu_offset].alpha_addr);
+
+                if (code->r390_mode) {
+                    OUT_CB_REG_SEQ(R400_US_ALU_EXT_ADDR_0, bank_alu_length);
+                    for (i = 0; i < bank_alu_length; i++)
+                        OUT_CB(code->alu.inst[i + bank_alu_offset].r400_ext_addr);
+                }
+            }
+
+            if (bank_tex_length > 0) {
+                OUT_CB_REG_SEQ(R300_US_TEX_INST_0, bank_tex_length);
+                OUT_CB_TABLE(code->tex.inst + bank_tex_offset, bank_tex_length);
+            }
 
-        OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
-        for (i = 0; i < code->alu.length; i++)
-            OUT_CB(code->alu.inst[i].alpha_addr);
+            alu_length -= bank_alu_length;
+            tex_length -= bank_tex_length;
+            bank++;
+        } while(code->r390_mode && (alu_length > 0 || tex_length > 0));
 
-        if (code->tex.length) {
-            OUT_CB_REG_SEQ(R300_US_TEX_INST_0, code->tex.length);
-            OUT_CB_TABLE(code->tex.inst, code->tex.length);
+        /* R400_US_CODE_BANK needs to be reset to 0, otherwise some shaders
+         * will be rendered incorrectly. */
+        if (r300->screen->caps.is_r400) {
+            OUT_CB_REG(R400_US_CODE_BANK,
+                code->r390_mode ? R400_R390_MODE_ENABLE : 0);
         }
 
         /* Emit immediates. */
@@ -384,12 +438,17 @@ static void r300_translate_fragment_shader(
     compiler.code = &shader->code;
     compiler.state = shader->compare_state;
     compiler.Base.is_r500 = r300->screen->caps.is_r500;
+    compiler.Base.is_r400 = r300->screen->caps.is_r400;
     compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
     compiler.Base.has_half_swizzles = TRUE;
     compiler.Base.has_presub = TRUE;
-    compiler.Base.max_temp_regs = compiler.Base.is_r500 ? 128 : 32;
+    compiler.Base.max_temp_regs =
+        compiler.Base.is_r500 ? 128 : (compiler.Base.is_r400 ? 64 : 32);
     compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32;
-    compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64;
+    compiler.Base.max_alu_insts =
+        (compiler.Base.is_r500 || compiler.Base.is_r400) ? 512 : 64;
+    compiler.Base.max_tex_insts =
+        (compiler.Base.is_r500 || compiler.Base.is_r400) ? 512 : 32;
     compiler.AllocateHwInputs = &allocate_hardware_inputs;
     compiler.UserData = &shader->inputs;
 
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index d1154de..1d93dab 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -2162,14 +2162,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 /* R4xx extended fragment shader registers. */
 #define R400_US_ALU_EXT_ADDR_0              0x4ac0 /* up to 63 (0x4bbc) */
-#   define R400_ADDR0_EXT_RGB_MSB_BIT       0x01
-#   define R400_ADDR1_EXT_RGB_MSB_BIT       0x02
-#   define R400_ADDR2_EXT_RGB_MSB_BIT       0x04
+#   define R400_ADDR_EXT_RGB_MSB_BIT(x)     (1 << (x))
 #   define R400_ADDRD_EXT_RGB_MSB_BIT       0x08
-#   define R400_ADDR0_EXT_A_MSB_BIT         0x10
-#   define R400_ADDR1_EXT_A_MSB_BIT         0x20
-#   define R400_ADDR2_EXT_A_MSB_BIT         0x40
+#   define R400_ADDR_EXT_A_MSB_BIT(x)       (1 << ((x) + 4))
 #   define R400_ADDRD_EXT_A_MSB_BIT         0x80
+
 #define R400_US_CODE_BANK                   0x46b8
 #   define R400_BANK_SHIFT                  0
 #   define R400_BANK_MASK                   0xf
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index c75aeaa..fbacfae 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -90,9 +90,6 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
     boolean is_r400 = r300screen->caps.is_r400;
     boolean is_r500 = r300screen->caps.is_r500;
 
-    /* XXX extended shader capabilities of r400 unimplemented */
-    is_r400 = FALSE;
-
     switch (param) {
         /* Supported features (boolean caps). */
         case PIPE_CAP_NPOT_TEXTURES:
@@ -175,9 +172,6 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
    boolean is_r400 = r300screen->caps.is_r400;
    boolean is_r500 = r300screen->caps.is_r500;
 
-   /* XXX extended shader capabilities of r400 unimplemented */
-   is_r400 = FALSE;
-
    switch (shader)
     {
     case PIPE_SHADER_FRAGMENT:
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
index 782671b..deba9ca 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
@@ -49,6 +49,11 @@ static void presub_string(char out[10], unsigned int inst)
 	}
 }
 
+static int get_msb(unsigned int bit, unsigned int r400_ext_addr)
+{
+	return (r400_ext_addr & bit) ? 1 << 5 : 0;
+}
+
 /* just some random things... */
 void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
 {
@@ -61,16 +66,21 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
 
 	fprintf(stderr, "Hardware program\n");
 	fprintf(stderr, "----------------\n");
+	if (c->is_r400) {
+		fprintf(stderr, "code_offset_ext: %08x\n", code->r400_code_offset_ext);
+	}
 
 	for (n = 0; n <= (code->config & 3); n++) {
 		uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n];
-		int alu_offset = (code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT;
-		int alu_end = (code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT;
+		unsigned int alu_offset = ((code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT) +
+				(((code->r400_code_offset_ext >> (24 - (n * 6))) & 0x7) << 6);
+		unsigned int alu_end = ((code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT) +
+				(((code->r400_code_offset_ext >> (27 - (n * 6))) & 0x7) << 6);
 		int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT;
 		int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT;
 
-		fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "
-			"alu_end: %d, tex_end: %d  (code_addr: %08x)\n", n,
+		fprintf(stderr, "NODE %d: alu_offset: %u, tex_offset: %d, "
+			"alu_end: %u, tex_end: %d  (code_addr: %08x)\n", n,
 			alu_offset, tex_offset, alu_end, tex_end, code_addr);
 
 		if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) {
@@ -125,11 +135,15 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
 			for (j = 0; j < 3; ++j) {
 				int regc = code->alu.inst[i].rgb_addr >> (j * 6);
 				int rega = code->alu.inst[i].alpha_addr >> (j * 6);
+				int msbc = get_msb(R400_ADDR_EXT_RGB_MSB_BIT(j),
+					code->alu.inst[i].r400_ext_addr);
+				int msba = get_msb(R400_ADDR_EXT_A_MSB_BIT(j),
+					code->alu.inst[i].r400_ext_addr);
 
 				sprintf(srcc[j], "%c%i",
-					(regc & 32) ? 'c' : 't', regc & 31);
+					(regc & 32) ? 'c' : 't', (regc & 31) | msbc);
 				sprintf(srca[j], "%c%i",
-					(rega & 32) ? 'c' : 't', rega & 31);
+					(rega & 32) ? 'c' : 't', (rega & 31) | msba);
 			}
 
 			dstc[0] = 0;
@@ -141,9 +155,14 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
 				(code->alu.inst[i].
 				 rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : "");
 			if (flags[0] != 0) {
+				unsigned int msb = get_msb(
+					R400_ADDRD_EXT_RGB_MSB_BIT,
+					code->alu.inst[i].r400_ext_addr);
+
 				sprintf(dstc, "t%i.%s ",
-					(code->alu.inst[i].
-					 rgb_addr >> R300_ALU_DSTC_SHIFT) & 31,
+					((code->alu.inst[i].
+					 rgb_addr >> R300_ALU_DSTC_SHIFT)
+					 & 31) | msb,
 					flags);
 			}
 			sprintf(flags, "%s%s%s",
@@ -166,9 +185,13 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
 
 			dsta[0] = 0;
 			if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) {
+				unsigned int msb = get_msb(
+					R400_ADDRD_EXT_A_MSB_BIT,
+					code->alu.inst[i].r400_ext_addr);
 				sprintf(dsta, "t%i.w ",
-					(code->alu.inst[i].
-					 alpha_addr >> R300_ALU_DSTA_SHIFT) & 31);
+					((code->alu.inst[i].
+					 alpha_addr >> R300_ALU_DSTA_SHIFT) & 31)
+					 | msb);
 			}
 			if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) {
 				sprintf(tmp, "o%i.w ",
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
index 1db8678..28d132a 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -64,6 +64,20 @@ struct r300_emit_state {
 			__FILE__, __FUNCTION__, ##args);	\
 	} while(0)
 
+static unsigned int get_msbs_alu(unsigned int bits)
+{
+	return (bits >> 6) & 0x7;
+}
+
+/**
+ * @param lsbs The number of least significant bits
+ */
+static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs)
+{
+	return (bits >> lsbs) & 0x15;
+}
+
+#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask)
 
 /**
  * Mark a temporary register as used.
@@ -83,7 +97,7 @@ static unsigned int use_source(struct r300_fragment_program_code* code, struct r
 		return src.Index | (1 << 5);
 	} else if (src.File == RC_FILE_TEMPORARY) {
 		use_temporary(code, src.Index);
-		return src.Index;
+		return src.Index & 0x1f;
 	}
 
 	return 0;
@@ -151,11 +165,19 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i
 	code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
 
 	for(j = 0; j < 3; ++j) {
+		/* Set the RGB address */
 		unsigned int src = use_source(code, inst->RGB.Src[j]);
 		unsigned int arg;
+		if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
+			code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j);
+
 		code->alu.inst[ip].rgb_addr |= src << (6*j);
 
+		/* Set the Alpha address */
 		src = use_source(code, inst->Alpha.Src[j]);
+		if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
+			code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j);
+
 		code->alu.inst[ip].alpha_addr |= src << (6*j);
 
 		arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
@@ -223,8 +245,10 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i
 
 	if (inst->RGB.WriteMask) {
 		use_temporary(code, inst->RGB.DestIndex);
+		if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS)
+			code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT;
 		code->alu.inst[ip].rgb_addr |=
-			(inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) |
+			((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) |
 			(inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
 	}
 	if (inst->RGB.OutputWriteMask) {
@@ -236,8 +260,10 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i
 
 	if (inst->Alpha.WriteMask) {
 		use_temporary(code, inst->Alpha.DestIndex);
+		if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS)
+			code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT;
 		code->alu.inst[ip].alpha_addr |=
-			(inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) |
+			((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) |
 			R300_ALU_DSTA_REG;
 	}
 	if (inst->Alpha.OutputWriteMask) {
@@ -269,6 +295,8 @@ static int finish_node(struct r300_emit_state * emit)
 	unsigned tex_offset;
 	unsigned tex_end;
 
+	unsigned int alu_offset_msbs, alu_end_msbs;
+
 	if (code->alu.length == emit->node_first_alu) {
 		/* Generate a single NOP for this node */
 		struct rc_pair_instruction inst;
@@ -301,13 +329,48 @@ static int finish_node(struct r300_emit_state * emit)
 	 *
 	 * Also note that the register specification from AMD is slightly
 	 * incorrect in its description of this register. */
-	code->code_addr[emit->current_node] =
-			(alu_offset << R300_ALU_START_SHIFT) |
-			(alu_end << R300_ALU_SIZE_SHIFT) |
-			(tex_offset << R300_TEX_START_SHIFT) |
-			(tex_end << R300_TEX_SIZE_SHIFT) |
-			emit->node_flags;
-
+	code->code_addr[emit->current_node]  =
+			((alu_offset << R300_ALU_START_SHIFT)
+				& R300_ALU_START_MASK)
+			| ((alu_end << R300_ALU_SIZE_SHIFT)
+				& R300_ALU_SIZE_MASK)
+			| ((tex_offset << R300_TEX_START_SHIFT)
+				& R300_TEX_START_MASK)
+			| ((tex_end << R300_TEX_SIZE_SHIFT)
+				& R300_TEX_SIZE_MASK)
+			| emit->node_flags
+			| (get_msbs_tex(tex_offset, 5)
+				<< R400_TEX_START_MSB_SHIFT)
+			| (get_msbs_tex(tex_end, 5)
+				<< R400_TEX_SIZE_MSB_SHIFT)
+			;
+
+	/* Write r400 extended instruction fields.  These will be ignored on
+	 * r300 cards.  */
+	alu_offset_msbs = get_msbs_alu(alu_offset);
+	alu_end_msbs = get_msbs_alu(alu_end);
+	switch(emit->current_node) {
+	case 0:
+		code->r400_code_offset_ext |=
+			alu_offset_msbs << R400_ALU_START3_MSB_SHIFT
+			| alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT;
+		break;
+	case 1:
+		code->r400_code_offset_ext |=
+			alu_offset_msbs << R400_ALU_START2_MSB_SHIFT
+			| alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT;
+		break;
+	case 2:
+		code->r400_code_offset_ext |=
+			alu_offset_msbs << R400_ALU_START1_MSB_SHIFT
+			| alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT;
+		break;
+	case 3:
+		code->r400_code_offset_ext |=
+			alu_offset_msbs << R400_ALU_START0_MSB_SHIFT
+			| alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT;
+		break;
+	}
 	return 1;
 }
 
@@ -348,7 +411,7 @@ static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
 	unsigned int opcode;
 	PROG_CODE;
 
-	if (code->tex.length >= R300_PFS_MAX_TEX_INST) {
+	if (code->tex.length >= emit->compiler->Base.max_tex_insts) {
 		error("Too many TEX instructions");
 		return 0;
 	}
@@ -376,10 +439,17 @@ static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
 	use_temporary(code, inst->U.I.SrcReg[0].Index);
 
 	code->tex.inst[code->tex.length++] =
-		(inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) |
-		(dest << R300_DST_ADDR_SHIFT) |
-		(unit << R300_TEX_ID_SHIFT) |
-		(opcode << R300_TEX_INST_SHIFT);
+		((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT)
+			& R300_SRC_ADDR_MASK)
+		| ((dest << R300_DST_ADDR_SHIFT)
+			& R300_DST_ADDR_MASK)
+		| (unit << R300_TEX_ID_SHIFT)
+		| (opcode << R300_TEX_INST_SHIFT)
+		| (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ?
+			R400_SRC_ADDR_EXT_BIT : 0)
+		| (dest >= R300_PFS_NUM_TEMP_REGS ?
+			R400_DST_ADDR_EXT_BIT : 0)
+		;
 	return 1;
 }
 
@@ -393,6 +463,7 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
 	struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
 	struct r300_emit_state emit;
 	struct r300_fragment_program_code *code = &compiler->code->code.r300;
+	unsigned int tex_end;
 
 	memset(&emit, 0, sizeof(emit));
 	emit.compiler = compiler;
@@ -424,11 +495,28 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
 	finish_node(&emit);
 
 	code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
+
+	/* Set r400 extended instruction fields.  These values will be ignored
+	 * on r300 cards. */
+	code->r400_code_offset_ext |=
+		(get_msbs_alu(0)
+				<< R400_ALU_OFFSET_MSB_SHIFT)
+		| (get_msbs_alu(code->alu.length - 1)
+				<< R400_ALU_SIZE_MSB_SHIFT);
+
+	tex_end = code->tex.length ? code->tex.length - 1 : 0;
 	code->code_offset =
-		(0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) |
-		((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) |
-		(0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) |
-		((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT);
+		((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
+			& R300_PFS_CNTL_ALU_OFFSET_MASK)
+		| (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT)
+			& R300_PFS_CNTL_ALU_END_MASK)
+		| ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
+			& R300_PFS_CNTL_TEX_OFFSET_MASK)
+		| ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT)
+			& R300_PFS_CNTL_TEX_END_MASK)
+		| (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT)
+		| (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT)
+		;
 
 	if (emit.current_node < 3) {
 		int shift = 3 - emit.current_node;
@@ -438,4 +526,11 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
 		for(i = 0; i < shift; ++i)
 			code->code_addr[i] = 0;
 	}
+
+	if (code->pixsize >= R300_PFS_NUM_TEMP_REGS
+	    || code->alu.length > R300_PFS_MAX_ALU_INST
+	    || code->tex.length > R300_PFS_MAX_TEX_INST) {
+
+		code->r390_mode = 1;
+	}
 }
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index b69e816..d145166 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -31,6 +31,9 @@
 #define R300_PFS_NUM_TEMP_REGS    32
 #define R300_PFS_NUM_CONST_REGS   32
 
+#define R400_PFS_MAX_ALU_INST     512
+#define R400_PFS_MAX_TEX_INST     512
+
 #define R500_PFS_MAX_INST         512
 #define R500_PFS_NUM_TEMP_REGS    128
 #define R500_PFS_NUM_CONST_REGS   256
@@ -187,24 +190,29 @@ struct r300_fragment_program_node {
  */
 struct r300_fragment_program_code {
 	struct {
-		int length; /**< total # of texture instructions used */
-		uint32_t inst[R300_PFS_MAX_TEX_INST];
+		unsigned int length; /**< total # of texture instructions used */
+		uint32_t inst[R400_PFS_MAX_TEX_INST];
 	} tex;
 
 	struct {
-		int length; /**< total # of ALU instructions used */
+		unsigned int length; /**< total # of ALU instructions used */
 		struct {
 			uint32_t rgb_inst;
 			uint32_t rgb_addr;
 			uint32_t alpha_inst;
 			uint32_t alpha_addr;
-		} inst[R300_PFS_MAX_ALU_INST];
+			uint32_t r400_ext_addr;
+		} inst[R400_PFS_MAX_ALU_INST];
 	} alu;
 
 	uint32_t config; /* US_CONFIG */
 	uint32_t pixsize; /* US_PIXSIZE */
 	uint32_t code_offset; /* US_CODE_OFFSET */
+	uint32_t r400_code_offset_ext; /* US_CODE_EXT */
 	uint32_t code_addr[4]; /* US_CODE_ADDR */
+	/*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries
+	 * for r400 cards */
+	unsigned int r390_mode:1;
 };
 
 
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index e663339..1e64af0 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -50,6 +50,7 @@ struct radeon_compiler {
 	char * ErrorMsg;
 
 	/* Hardware specification. */
+	unsigned is_r400:1;
 	unsigned is_r500:1;
 	unsigned has_half_swizzles:1;
 	unsigned has_presub:1;
@@ -57,6 +58,7 @@ struct radeon_compiler {
 	unsigned max_temp_regs;
 	unsigned max_constants;
 	int max_alu_insts;
+	unsigned max_tex_insts;
 
 	/* Whether to remove unused constants and empty holes in constant space. */
 	unsigned remove_unused_constants:1;
diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
index f7705b0..2b9d85f 100644
--- a/src/mesa/drivers/dri/r300/r300_reg.h
+++ b/src/mesa/drivers/dri/r300/r300_reg.h
@@ -1658,6 +1658,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #       define R300_PFS_CNTL_TEX_OFFSET_MASK     (31 << 13)
 #       define R300_PFS_CNTL_TEX_END_SHIFT       18
 #       define R300_PFS_CNTL_TEX_END_MASK        (31 << 18)
+#       define R400_PFS_CNTL_TEX_OFFSET_MSB_SHIFT 24
+#       define R400_PFS_CNTL_TEX_OFFSET_MSB_MASK (0xf << 24)
+#       define R400_PFS_CNTL_TEX_END_MSB_SHIFT   28
+#       define R400_PFS_CNTL_TEX_END_MSB_MASK    (0xf << 28)
 
 /* gap */
 
@@ -1682,6 +1686,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #       define R300_TEX_SIZE_MASK           (31 << 17)
 #	define R300_RGBA_OUT                (1 << 22)
 #	define R300_W_OUT                   (1 << 23)
+#       define R400_TEX_START_MSB_SHIFT     24
+#       define R400_TEX_START_MSG_MASK      (0xf << 24)
+#       define R400_TEX_SIZE_MSB_SHIFT      28
+#       define R400_TEX_SIZE_MSG_MASK       (0xf << 28)
 
 /* TEX
  * As far as I can tell, texture instructions cannot write into output
@@ -1702,6 +1710,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #		define R300_TEX_OP_TXP	        3
 #		define R300_TEX_OP_TXB	        4
 #	define R300_TEX_INST_MASK               (7 << 15)
+#      define R400_SRC_ADDR_EXT_BIT         (1 << 19)
+#      define R400_DST_ADDR_EXT_BIT         (1 << 20)
 
 /* Output format from the unfied shader */
 #define R300_US_OUT_FMT                     0x46A4
@@ -1979,6 +1989,40 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #       define R300_ALU_OUTA_CLAMP              (1 << 30)
 /* END: Fragment program instruction set */
 
+/* R4xx extended fragment shader registers. */
+#define R400_US_ALU_EXT_ADDR_0              0x4ac0 /* up to 63 (0x4bbc) */
+#   define R400_ADDR_EXT_RGB_MSB_BIT(x)     (1 << (x))
+#   define R400_ADDRD_EXT_RGB_MSB_BIT       0x08
+#   define R400_ADDR_EXT_A_MSB_BIT(x)       (1 << ((x) + 4))
+#   define R400_ADDRD_EXT_A_MSB_BIT         0x80
+
+#define R400_US_CODE_BANK                   0x46b8
+#   define R400_BANK_SHIFT                  0
+#   define R400_BANK_MASK                   0xf
+#   define R400_R390_MODE_ENABLE            (1 << 4)
+#define R400_US_CODE_EXT                    0x46bc
+#   define R400_ALU_OFFSET_MSB_SHIFT        0
+#   define R400_ALU_OFFSET_MSB_MASK         (0x7 << 0)
+#   define R400_ALU_SIZE_MSB_SHIFT          3
+#   define R400_ALU_SIZE_MSB_MASK           (0x7 << 3)
+#   define R400_ALU_START0_MSB_SHIFT        6
+#   define R400_ALU_START0_MSB_MASK         (0x7 << 6)
+#   define R400_ALU_SIZE0_MSB_SHIFT         9
+#   define R400_ALU_SIZE0_MSB_MASK          (0x7 << 9)
+#   define R400_ALU_START1_MSB_SHIFT        12
+#   define R400_ALU_START1_MSB_MASK         (0x7 << 12)
+#   define R400_ALU_SIZE1_MSB_SHIFT         15
+#   define R400_ALU_SIZE1_MSB_MASK          (0x7 << 15)
+#   define R400_ALU_START2_MSB_SHIFT        18
+#   define R400_ALU_START2_MSB_MASK         (0x7 << 18)
+#   define R400_ALU_SIZE2_MSB_SHIFT         21
+#   define R400_ALU_SIZE2_MSB_MASK          (0x7 << 21)
+#   define R400_ALU_START3_MSB_SHIFT        24
+#   define R400_ALU_START3_MSB_MASK         (0x7 << 24)
+#   define R400_ALU_SIZE3_MSB_SHIFT         27
+#   define R400_ALU_SIZE3_MSB_MASK          (0x7 << 27)
+/* END: R4xx extended fragment shader registers. */
+
 /* Fog: Fog Blending Enable */
 #define R300_FG_FOG_BLEND                             0x4bc0
 #       define R300_FG_FOG_BLEND_DISABLE              (0 << 0)
-- 
1.7.2.5



More information about the mesa-dev mailing list