Mesa (master): r600g: rework literal handling

Christian König deathsimple at kemper.freedesktop.org
Thu Jan 13 22:01:46 UTC 2011


Module: Mesa
Branch: master
Commit: 96f8f8db7bcddec7ef0fce62cf0e23f1c2fb8c8d
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=96f8f8db7bcddec7ef0fce62cf0e23f1c2fb8c8d

Author: Christian König <deathsimple at vodafone.de>
Date:   Wed Dec 22 17:45:51 2010 +0100

r600g: rework literal handling

---

 src/gallium/drivers/r600/r600_asm.c    |  189 +++++++++++++++++-----------
 src/gallium/drivers/r600/r600_asm.h    |    6 +-
 src/gallium/drivers/r600/r600_shader.c |  221 +++++---------------------------
 src/gallium/drivers/r600/r600_shader.h |    2 +-
 src/gallium/drivers/r600/r700_asm.c    |   10 --
 5 files changed, 151 insertions(+), 277 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index ca2bf93..e96236e 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -608,10 +608,90 @@ static int replace_gpr_with_pv_ps(struct r600_bc_alu *slots[5], struct r600_bc_a
 	return 0;
 }
 
+void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg)
+{
+	switch(value) {
+	case 0:
+		*sel = V_SQ_ALU_SRC_0;
+		break;
+	case 1:
+		*sel = V_SQ_ALU_SRC_1_INT;
+		break;
+	case -1:
+		*sel = V_SQ_ALU_SRC_M_1_INT;
+		break;
+	case 0x3F800000: // 1.0f
+		*sel = V_SQ_ALU_SRC_1;
+		break;
+	case 0x3F000000: // 0.5f
+		*sel = V_SQ_ALU_SRC_0_5;
+		break;
+	case 0xBF800000: // -1.0f
+		*sel = V_SQ_ALU_SRC_1;
+		*neg ^= 1;
+		break;
+	case 0xBF000000: // -0.5f
+		*sel = V_SQ_ALU_SRC_0_5;
+		*neg ^= 1;
+		break;
+	default:
+		*sel = V_SQ_ALU_SRC_LITERAL;
+		break;
+	}
+}
+
+/* compute how many literal are needed */
+static int r600_bc_alu_nliterals(struct r600_bc_alu *alu, uint32_t literal[4], unsigned *nliteral)
+{
+	unsigned num_src = r600_bc_get_num_operands(alu);
+	unsigned i, j;
+
+	for (i = 0; i < num_src; ++i) {
+		if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
+			uint32_t value = alu->src[i].value[alu->src[i].chan];
+			unsigned found = 0;
+			for (j = 0; j < *nliteral; ++j) {
+				if (literal[j] == value) {
+					found = 1;
+					break;
+				}
+			}
+			if (!found) {
+				if (*nliteral >= 4)
+					return -EINVAL;
+				literal[(*nliteral)++] = value;
+			}
+		}
+	}
+	return 0;
+}
+
+static void r600_bc_alu_adjust_literals(struct r600_bc_alu *alu, uint32_t literal[4], unsigned nliteral)
+{
+	unsigned num_src = r600_bc_get_num_operands(alu);
+	unsigned i, j;
+
+	for (i = 0; i < num_src; ++i) {
+		if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
+			uint32_t value = alu->src[i].value[alu->src[i].chan];
+			for (j = 0; j < nliteral; ++j) {
+				if (literal[j] == value) {
+					alu->src[i].chan = j;
+					break;
+				}
+			}
+		}
+	}
+}
+
 static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], struct r600_bc_alu *alu_prev)
 {
 	struct r600_bc_alu *prev[5];
 	struct r600_bc_alu *result[5] = { NULL };
+	
+	uint32_t literal[4];
+	unsigned nliteral = 0;
+
 	int i, j, r, src, num_src;
 	int num_once_inst = 0;
 
@@ -620,13 +700,12 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], s
 		return r;
 
 	for (i = 0; i < 5; ++i) {
-		// TODO: we have literals? forget it!
-		if (prev[i] && prev[i]->nliteral)
+		/* check number of literals */
+		if (prev[i] && r600_bc_alu_nliterals(prev[i], literal, &nliteral))
 			return 0;
-		if (slots[i] && slots[i]->nliteral)
+		if (slots[i] && r600_bc_alu_nliterals(slots[i], literal, &nliteral))
 			return 0;
 
-
 		// let's check used slots
 		if (prev[i] && !slots[i]) {
 			result[i] = prev[i];
@@ -834,7 +913,6 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 	if (nalu == NULL)
 		return -ENOMEM;
 	memcpy(nalu, alu, sizeof(struct r600_bc_alu));
-	nalu->nliteral = 0;
 
 	if (bc->cf_last != NULL && bc->cf_last->inst != (type << 3)) {
 		/* check if we could add it anyway */
@@ -880,20 +958,10 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 		if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) {
 			bc->ngpr = nalu->src[i].sel + 1;
 		}
-		/* compute how many literal are needed
-		 * either 2 or 4 literals
-		 */
-		if (nalu->src[i].sel == 253) {
-			if (((nalu->src[i].chan + 2) & 0x6) > nalu->nliteral) {
-				nalu->nliteral = (nalu->src[i].chan + 2) & 0x6;
-			}
-		}
-	}
-	if (!LIST_IS_EMPTY(&bc->cf_last->alu)) {
-		lalu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list);
-		if (!lalu->last && lalu->nliteral > nalu->nliteral) {
-			nalu->nliteral = lalu->nliteral;
-		}
+		if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
+			r600_bc_special_constants(
+				nalu->src[i].value[nalu->src[i].chan], 
+				&nalu->src[i].sel, &nalu->src[i].neg);
 	}
 	if (nalu->dst.sel >= bc->ngpr) {
 		bc->ngpr = nalu->dst.sel + 1;
@@ -938,46 +1006,6 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
 	return r600_bc_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
 }
 
-int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
-{
-	struct r600_bc_alu *alu;
-
-	if (bc->cf_last == NULL) {
-		return 0;
-	}
-	if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) {
-		return 0;
-	}
-	/* all same on EG */
-	if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_JUMP ||
-	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_ELSE ||
-	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL ||
-	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK ||
-	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE ||
-	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END ||
-	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) {
-		return 0;
-	}
-	/* same on EG */
-	if (((bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) &&
-	     (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3)) &&
-	     (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3)) &&
-	     (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3))) ||
-		LIST_IS_EMPTY(&bc->cf_last->alu)) {
-		R600_ERR("last CF is not ALU (%p)\n", bc->cf_last);
-		return -EINVAL;
-	}
-	alu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list);
-	if (!alu->last || !alu->nliteral || alu->literal_added) {
-		return 0;
-	}
-	memcpy(alu->value, value, 4 * 4);
-	bc->cf_last->ndw += alu->nliteral;
-	bc->ndw += alu->nliteral;
-	alu->literal_added = 1;
-	return 0;
-}
-
 int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
 {
 	struct r600_bc_vtx *nvtx = r600_bc_vtx();
@@ -1134,8 +1162,6 @@ static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsign
 /* r600 only, r700/eg bits in r700_asm.c */
 static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
 {
-	unsigned i;
-
 	/* don't replace gpr by pv or ps for destination register */
 	bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
 				S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
@@ -1172,14 +1198,6 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign
 					S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |
 					S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate);
 	}
-	if (alu->last) {
-		if (alu->nliteral && !alu->literal_added) {
-			R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst);
-		}
-		for (i = 0; i < alu->nliteral; i++) {
-			bc->bytecode[id++] = alu->value[i];
-		}
-	}
 	return 0;
 }
 
@@ -1257,8 +1275,10 @@ int r600_bc_build(struct r600_bc *bc)
 	struct r600_bc_alu *alu;
 	struct r600_bc_vtx *vtx;
 	struct r600_bc_tex *tex;
+	uint32_t literal[4];
+	unsigned nliteral;
 	unsigned addr;
-	int r;
+	int i, r;
 
 	if (bc->callstack[0].max > 0)
 		bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2;
@@ -1275,6 +1295,16 @@ int r600_bc_build(struct r600_bc *bc)
 		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
 		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
 		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+			nliteral = 0;
+			LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+				r = r600_bc_alu_nliterals(alu, literal, &nliteral);
+				if (r)
+					return r;
+				if (alu->last) {
+					cf->ndw += align(nliteral, 2);
+					nliteral = 0;
+				}
+			}
 			break;
 		case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
 		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
@@ -1323,7 +1353,12 @@ int r600_bc_build(struct r600_bc *bc)
 		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
 		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
 		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+			nliteral = 0;
 			LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+				r = r600_bc_alu_nliterals(alu, literal, &nliteral);
+				if (r)
+					return r;
+				r600_bc_alu_adjust_literals(alu, literal, nliteral);
 				switch(bc->chiprev) {
 				case CHIPREV_R600:
 					r = r600_bc_alu_build(bc, alu, addr);
@@ -1340,7 +1375,10 @@ int r600_bc_build(struct r600_bc *bc)
 					return r;
 				addr += 2;
 				if (alu->last) {
-					addr += alu->nliteral;
+					for (i = 0; i < align(nliteral, 2); ++i) {
+						bc->bytecode[addr++] = literal[i];
+					}
+					nliteral = 0;
 				}
 			}
 			break;
@@ -1427,6 +1465,8 @@ void r600_bc_dump(struct r600_bc *bc)
 	struct r600_bc_tex *tex = NULL;
 
 	unsigned i, id;
+	uint32_t literal[4];
+	unsigned nliteral;
 	char chip = '6';
 
 	switch (bc->chiprev) {
@@ -1513,7 +1553,10 @@ void r600_bc_dump(struct r600_bc *bc)
 		}
 
 		id = cf->addr;
+		nliteral = 0;
 		LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+			r600_bc_alu_nliterals(alu, literal, &nliteral);
+
 			fprintf(stderr, "%04d %08X   ", id, bc->bytecode[id]);
 			fprintf(stderr, "SRC0(SEL:%d ", alu->src[0].sel);
 			fprintf(stderr, "REL:%d ", alu->src[0].rel);
@@ -1548,10 +1591,12 @@ void r600_bc_dump(struct r600_bc *bc)
 
 			id++;
 			if (alu->last) {
-				for (i = 0; i < alu->nliteral; i++, id++) {
+				for (i = 0; i < nliteral; i++, id++) {
 					float *f = (float*)(bc->bytecode + id);
 					fprintf(stderr, "%04d %08X\t%f\n", id, bc->bytecode[id], *f);
 				}
+				id += nliteral & 1;
+				nliteral = 0;
 			}
 		}
 
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 570292e..278b446 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -34,6 +34,7 @@ struct r600_bc_alu_src {
 	unsigned			neg;
 	unsigned			abs;
 	unsigned			rel;
+	u32				*value;
 };
 
 struct r600_bc_alu_dst {
@@ -52,11 +53,8 @@ struct r600_bc_alu {
 	unsigned			last;
 	unsigned			is_op3;
 	unsigned			predicate;
-	unsigned			nliteral;
-	unsigned			literal_added;
 	unsigned			bank_swizzle;
 	unsigned			bank_swizzle_force;
-	u32				value[4];
 	unsigned			omod;
 };
 
@@ -196,13 +194,13 @@ void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
 int r600_bc_init(struct r600_bc *bc, enum radeon_family family);
 void r600_bc_clear(struct r600_bc *bc);
 int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu);
-int r600_bc_add_literal(struct r600_bc *bc, const u32 *value);
 int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx);
 int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex);
 int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output);
 int r600_bc_build(struct r600_bc *bc);
 int r600_bc_add_cfinst(struct r600_bc *bc, int inst);
 int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type);
+void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg);
 void r600_bc_dump(struct r600_bc *bc);
 void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
 void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 78739bf..e85e829 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -225,11 +225,12 @@ int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
 	return 0;
 }
 
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
+int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals);
 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
 {
 	static int dump_shaders = -1;
 	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+	u32 *literals;
 	int r;
 
         /* Would like some magic "get_bool_option_once" routine.
@@ -242,12 +243,13 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s
 		tgsi_dump(tokens, 0);
 	}
 	shader->shader.family = r600_get_family(rctx->radeon);
-	r = r600_shader_from_tgsi(tokens, &shader->shader);
+	r = r600_shader_from_tgsi(tokens, &shader->shader, &literals);
 	if (r) {
 		R600_ERR("translation from TGSI failed !\n");
 		return r;
 	}
 	r = r600_bc_build(&shader->shader.bc);
+	free(literals);
 	if (r) {
 		R600_ERR("building bytecode failed !\n");
 		return r;
@@ -282,7 +284,6 @@ struct r600_shader_ctx {
 	struct r600_shader_tgsi_instruction	*inst_info;
 	struct r600_bc				*bc;
 	struct r600_shader			*shader;
-	u32					value[4];
 	u32					*literals;
 	u32					nliterals;
 	u32					max_driver_temp_used;
@@ -491,7 +492,7 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
 	return ctx->num_interp_gpr;
 }
 
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
+int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals)
 {
 	struct tgsi_full_immediate *immediate;
 	struct r600_shader_ctx ctx;
@@ -599,9 +600,6 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
 			r = ctx.inst_info->process(&ctx);
 			if (r)
 				goto out_err;
-			r = r600_bc_add_literal(ctx.bc, ctx.value);
-			if (r)
-				goto out_err;
 			break;
 		case TGSI_TOKEN_TYPE_PROPERTY:
 			break;
@@ -722,7 +720,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
 		if (r)
 			goto out_err;
 	}
-	free(ctx.literals);
+	*literals = ctx.literals;
 	tgsi_parse_free(&ctx.parse);
 	return 0;
 out_err:
@@ -756,38 +754,13 @@ static int tgsi_src(struct r600_shader_ctx *ctx,
 			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
 
 			index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
-			switch(ctx->literals[index]) {
-			case 0:
-				r600_src->sel = V_SQ_ALU_SRC_0;
-				return 0;
-			case 1:
-				r600_src->sel = V_SQ_ALU_SRC_1_INT;
-				return 0;
-			case -1:
-				r600_src->sel = V_SQ_ALU_SRC_M_1_INT;
-				return 0;
-			case 0x3F800000: // 1.0f
-				r600_src->sel = V_SQ_ALU_SRC_1;
+			r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
+			if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
 				return 0;
-			case 0x3F000000: // 0.5f
-				r600_src->sel = V_SQ_ALU_SRC_0_5;
-				return 0;
-			case 0xBF800000: // -1.0f
-				r600_src->sel = V_SQ_ALU_SRC_1;
-				r600_src->neg ^= 1;
-				return 0;
-			case 0xBF000000: // -0.5f
-				r600_src->sel = V_SQ_ALU_SRC_0_5;
-				r600_src->neg ^= 1;
-				return 0;
-			}
 		}
 		index = tgsi_src->Register.Index;
 		r600_src->sel = V_SQ_ALU_SRC_LITERAL;
-		ctx->value[0] = ctx->literals[index * 4 + 0];
-		ctx->value[1] = ctx->literals[index * 4 + 1];
-		ctx->value[2] = ctx->literals[index * 4 + 2];
-		ctx->value[3] = ctx->literals[index * 4 + 3];
+		r600_src->value = ctx->literals + index * 4;
 	} else {
 		if (tgsi_src->Register.Indirect)
 			r600_src->rel = V_SQ_REL_RELATIVE;
@@ -893,6 +866,7 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
 				alu.src[0].sel = r600_src[i].sel;
 				alu.src[0].chan = k;
+				alu.src[0].value = r600_src[i].value;
 				alu.dst.sel = treg;
 				alu.dst.chan = k;
 				alu.dst.write = 1;
@@ -902,9 +876,6 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_
 				if (r)
 					return r;
 			}
-			r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]);
-			if (r)
-				return r;
 			r600_src[i].sel = treg;
 			j--;
 		}
@@ -999,12 +970,14 @@ static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
 static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
 			   struct r600_bc_alu_src r600_src[3])
 {
+	static float half_inv_pi = 1.0 /(3.1415926535 * 2);
+	static float double_pi = 3.1415926535 * 2;
+	static float neg_pi = -3.1415926535;
+
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	int r, src0_chan;
-	uint32_t lit_vals[4];
+	int r;
 	struct r600_bc_alu alu;
 
-	memset(lit_vals, 0, 4*4);
 	r = tgsi_split_constant(ctx, r600_src);
 	if (r)
 		return r;
@@ -1012,22 +985,6 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
 	if (r)
 		return r;
 
-	src0_chan = tgsi_chan(&inst->Src[0], 0);
-
-	/* We are going to feed two literals to the MAD below,
-	 * which means that if the first operand is a literal as well,
-	 * we need to copy its value manually.
-	 */
-	if (r600_src[0].sel == V_SQ_ALU_SRC_LITERAL) {
-		unsigned index = inst->Src[0].Register.Index;
-
-		lit_vals[2] = ctx->literals[index * 4 + src0_chan];
-		src0_chan = 2;
-	}
-
-	lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
-	lit_vals[1] = fui(0.5f);
-
 	memset(&alu, 0, sizeof(struct r600_bc_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
 	alu.is_op3 = 1;
@@ -1037,19 +994,17 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
 	alu.dst.write = 1;
 
 	alu.src[0] = r600_src[0];
-	alu.src[0].chan = src0_chan;
+	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
 
 	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
 	alu.src[1].chan = 0;
-	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
+	alu.src[1].value = (uint32_t *)&half_inv_pi;
+	alu.src[2].sel = V_SQ_ALU_SRC_0_5;
 	alu.src[2].chan = 1;
 	alu.last = 1;
 	r = r600_bc_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
-	r = r600_bc_add_literal(ctx->bc, lit_vals);
-	if (r)
-		return r;
 
 	memset(&alu, 0, sizeof(struct r600_bc_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
@@ -1065,14 +1020,6 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
 	if (r)
 		return r;
 
-	if (ctx->bc->chiprev == CHIPREV_R600) {
-		lit_vals[0] = fui(3.1415926535897f * 2.0f);
-		lit_vals[1] = fui(-3.1415926535897f);
-	} else {
-		lit_vals[0] = fui(1.0f);
-		lit_vals[1] = fui(-0.5f);
-	}
-
 	memset(&alu, 0, sizeof(struct r600_bc_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
 	alu.is_op3 = 1;
@@ -1088,13 +1035,20 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
 	alu.src[1].chan = 0;
 	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
 	alu.src[2].chan = 1;
+
+	if (ctx->bc->chiprev == CHIPREV_R600) {
+		alu.src[1].value = (uint32_t *)&double_pi;
+		alu.src[2].value = (uint32_t *)&neg_pi;
+	} else {
+		alu.src[1].sel = V_SQ_ALU_SRC_1;
+		alu.src[2].sel = V_SQ_ALU_SRC_0_5;
+		alu.src[2].neg = 1;
+	}
+
 	alu.last = 1;
 	r = r600_bc_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
-	r = r600_bc_add_literal(ctx->bc, lit_vals);
-	if (r)
-		return r;
 	return 0;
 }
 
@@ -1210,10 +1164,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
-
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 	}
 
 	/* dst.w = 1.0; */
@@ -1234,10 +1184,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
-
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 	}
 
 	return 0;
@@ -1273,9 +1219,6 @@ static int tgsi_kill(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 	}
-	r = r600_bc_add_literal(ctx->bc, ctx->value);
-	if (r)
-		return r;
 
 	/* kill must be last in ALU */
 	ctx->bc->force_add_cf = 1;
@@ -1338,10 +1281,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
 	if (r)
 		return r;
 
-	r = r600_bc_add_literal(ctx->bc, ctx->value);
-	if (r)
-		return r;
-
 	if (inst->Dst[0].Register.WriteMask & (1 << 2))
 	{
 		int chan;
@@ -1360,10 +1299,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
-
 		chan = alu.dst.chan;
 		sel = alu.dst.sel;
 
@@ -1386,9 +1321,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 		/* dst.z = exp(tmp.x) */
 		memset(&alu, 0, sizeof(struct r600_bc_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
@@ -1432,9 +1364,6 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx)
 	r = r600_bc_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
-	r = r600_bc_add_literal(ctx->bc, ctx->value);
-	if (r)
-		return r;
 	/* replicate result */
 	return tgsi_helper_tempx_replicate(ctx);
 }
@@ -1483,9 +1412,6 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
 	r = r600_bc_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
-	r = r600_bc_add_literal(ctx->bc, ctx->value);
-	if (r)
-		return r;
 	/* replicate result */
 	return tgsi_helper_tempx_replicate(ctx);
 }
@@ -1509,9 +1435,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)
 	r = r600_bc_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
-	r = r600_bc_add_literal(ctx->bc,ctx->value);
-	if (r)
-		return r;
 	/* b * LOG2(a) */
 	memset(&alu, 0, sizeof(struct r600_bc_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
@@ -1526,9 +1449,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)
 	r = r600_bc_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
-	r = r600_bc_add_literal(ctx->bc,ctx->value);
-	if (r)
-		return r;
 	/* POW(a,b) = EXP2(b * LOG2(a))*/
 	memset(&alu, 0, sizeof(struct r600_bc_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
@@ -1539,9 +1459,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)
 	r = r600_bc_add_alu(ctx->bc, &alu);
 	if (r)
 		return r;
-	r = r600_bc_add_literal(ctx->bc,ctx->value);
-	if (r)
-		return r;
 	return tgsi_helper_tempx_replicate(ctx);
 }
 
@@ -1581,9 +1498,6 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 	}
-	r = r600_bc_add_literal(ctx->bc, ctx->value);
-	if (r)
-		return r;
 
 	/* dst = (-tmp > 0 ? -1 : tmp) */
 	for (i = 0; i < 4; i++) {
@@ -1618,9 +1532,6 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru
 	struct r600_bc_alu alu;
 	int i, r;
 
-	r = r600_bc_add_literal(ctx->bc, ctx->value);
-	if (r)
-		return r;
 	for (i = 0; i < 4; i++) {
 		memset(&alu, 0, sizeof(struct r600_bc_alu));
 		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
@@ -1749,6 +1660,7 @@ static int tgsi_dp(struct r600_shader_ctx *ctx)
 
 static int tgsi_tex(struct r600_shader_ctx *ctx)
 {
+	static float one_point_five = 1.5f;
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
 	struct r600_bc_tex tex;
 	struct r600_bc_alu alu;
@@ -1758,7 +1670,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 	boolean src_not_temp =
 		inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&
 		inst->Src[0].Register.File != TGSI_FILE_INPUT;
-	uint32_t lit_vals[4];
 
 	src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
 
@@ -1887,6 +1798,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 
 		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
 		alu.src[2].chan = 0;
+		alu.src[2].value = (u32*)&one_point_five;
 
 		alu.dst.sel = ctx->temp_reg;
 		alu.dst.chan = 0;
@@ -1907,6 +1819,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 
 		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
 		alu.src[2].chan = 0;
+		alu.src[2].value = (u32*)&one_point_five;
 
 		alu.dst.sel = ctx->temp_reg;
 		alu.dst.chan = 1;
@@ -1917,11 +1830,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 
-		lit_vals[0] = fui(1.5f);
-
-		r = r600_bc_add_literal(ctx->bc, lit_vals);
-		if (r)
-			return r;
 		src_not_temp = FALSE;
 		src_gpr = ctx->temp_reg;
 	}
@@ -2055,9 +1963,6 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 	}
-	r = r600_bc_add_literal(ctx->bc, ctx->value);
-	if (r)
-		return r;
 
 	/* (1 - src0) * src2 */
 	for (i = 0; i < lasti + 1; i++) {
@@ -2080,9 +1985,6 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 	}
-	r = r600_bc_add_literal(ctx->bc, ctx->value);
-	if (r)
-		return r;
 
 	/* src0 * src1 + (1 - src0) * src2 */
 	for (i = 0; i < lasti + 1; i++) {
@@ -2223,10 +2125,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
-
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 	}
 
 	for (i = 0; i < 4; i++) {
@@ -2284,10 +2182,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
-
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 	}
 	if (use_temp)
 		return tgsi_helper_copy(ctx, inst);
@@ -2320,10 +2214,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
-
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
 		alu.src[0].sel = ctx->temp_reg;
 		alu.src[0].chan = 0;
@@ -2335,10 +2225,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
-
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 	}
 
 	/* result.y = tmp - floor(tmp); */
@@ -2364,9 +2250,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 	}
 
 	/* result.z = RoughApprox2ToX(tmp);*/
@@ -2387,9 +2270,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 	}
 
 	/* result.w = 1.0;*/
@@ -2407,9 +2287,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 	}
 	return tgsi_helper_copy(ctx, inst);
 }
@@ -2439,10 +2316,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
-
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
 		alu.src[0].sel = ctx->temp_reg;
 		alu.src[0].chan = 0;
@@ -2455,10 +2328,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
-
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 	}
 
 	/* result.y = src.x / (2 ^ floor(log2(src.x))); */
@@ -2481,10 +2350,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
-
 		memset(&alu, 0, sizeof(struct r600_bc_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
@@ -2500,10 +2365,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
-
 		memset(&alu, 0, sizeof(struct r600_bc_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
@@ -2519,10 +2380,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
-
 		memset(&alu, 0, sizeof(struct r600_bc_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
@@ -2538,10 +2395,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
-
 		memset(&alu, 0, sizeof(struct r600_bc_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
@@ -2563,10 +2416,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
-
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 	}
 
 	/* result.z = log2(src);*/
@@ -2588,10 +2437,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
-
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 	}
 
 	/* result.w = 1.0; */
@@ -2610,10 +2455,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
-
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 	}
 
 	return tgsi_helper_copy(ctx, inst);
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index 35b0331..935dd6f 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -47,6 +47,6 @@ struct r600_shader {
 	boolean			uses_kill;
 };
 
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
+int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals);
 
 #endif
diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c
index 3eb6fb5..a7f2f54 100644
--- a/src/gallium/drivers/r600/r700_asm.c
+++ b/src/gallium/drivers/r600/r700_asm.c
@@ -29,8 +29,6 @@
 
 int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
 {
-	unsigned i;
-
 	bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
 		S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
 		S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
@@ -67,13 +65,5 @@ int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
 			                S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |
 		 	                S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate);
 	}
-	if (alu->last) {
-		if (alu->nliteral && !alu->literal_added) {
-			R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst);
-		}
-		for (i = 0; i < alu->nliteral; i++) {
-			bc->bytecode[id++] = alu->value[i];
-		}
-	}
 	return 0;
 }




More information about the mesa-commit mailing list