[PATCH] nv50: support for SLE, SNE, SEQ, SGT

Christoph Bumiller e0425955 at student.tuwien.ac.at
Sun Jun 21 07:14:29 PDT 2009


---
 src/gallium/drivers/nv50/nv50_program.c |  118 +++++++++++++++++++++----------
 1 files changed, 80 insertions(+), 38 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 16bf2f1..75c5cea 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -810,7 +810,11 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 #define CVTOP_TRUNC	0x07
 #define CVTOP_SAT	0x08
 #define CVTOP_ABS	0x10
+#define CVTOP_ABSRN	0x11
 
+/* 0x04 == 32 bit */
+/* 0x40 == dst is float */
+/* 0x80 == src is float */
 #define CVT_F32_F32 0xc4
 #define CVT_F32_S32 0x44
 #define CVT_F32_U32 0x64
@@ -819,8 +823,8 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 #define CVT_F32_F32_ROP 0xcc
 
 static void
-emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
-	 int wp, unsigned cop, unsigned fmt)
+emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, int wp,
+	 struct nv50_reg *src, unsigned cvn, unsigned fmt)
 {
 	struct nv50_program_exec *e;
 
@@ -829,7 +833,7 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
 
 	e->inst[0] |= 0xa0000000;
 	e->inst[1] |= 0x00004000;
-	e->inst[1] |= (cop << 16);
+	e->inst[1] |= (cvn << 16);
 	e->inst[1] |= (fmt << 24);
 	set_src_0(pc, src, e);
 
@@ -846,55 +850,94 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
 	emit(pc, e);
 }
 
+static inline unsigned
+map_opcode_ccode(unsigned op)
+{
+	switch (op) {
+	case TGSI_OPCODE_SLT: return TGSI_CC_LT;
+	case TGSI_OPCODE_SGE: return TGSI_CC_GE;
+	case TGSI_OPCODE_SEQ: return TGSI_CC_EQ;
+	case TGSI_OPCODE_SGT: return TGSI_CC_GT;
+	case TGSI_OPCODE_SLE: return TGSI_CC_LE;
+	case TGSI_OPCODE_SNE: return TGSI_CC_NE;
+	default:
+		assert(0);
+		return 0;
+	}
+}
+
+static inline unsigned
+map_ccode_nv50(unsigned cc)
+{
+	assert(cc < 16);
+
+	switch (cc) {
+	case TGSI_CC_GT: return 0x4;
+	case TGSI_CC_EQ: return 0x2;
+	case TGSI_CC_LT: return 0x1;
+	case TGSI_CC_GE: return 0x6;
+	case TGSI_CC_LE: return 0x3;
+	case TGSI_CC_NE: return 0xd;
+
+	case TGSI_CC_GT + 8: return 0x3;
+	case TGSI_CC_EQ + 8: return 0xd;
+	case TGSI_CC_LT + 8: return 0x6;
+	case TGSI_CC_GE + 8: return 0x1;
+	case TGSI_CC_LE + 8: return 0x4;
+	case TGSI_CC_NE + 8: return 0x2;
+
+	default:
+		assert(!"invalid condition code");
+		return 0x0;
+	}
+}
+
 static void
-emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
+emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, int wp,
 	 struct nv50_reg *src0, struct nv50_reg *src1)
 {
 	struct nv50_program_exec *e = exec(pc);
-	unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
 	struct nv50_reg *rdst;
 
-	assert(c_op <= 7);
 	if (check_swap_src_0_1(pc, &src0, &src1))
-		c_op = inv_cop[c_op];
+		c_op += 8;
 
 	rdst = dst;
-	if (dst->type != P_TEMP)
-		dst = alloc_temp(pc, NULL);
+	if (dst && dst->type != P_TEMP)
+		dst = temp_temp(pc);
 
 	/* set.u32 */
 	set_long(pc, e);
 	e->inst[0] |= 0xb0000000;
-	e->inst[1] |= (3 << 29);
-	e->inst[1] |= (c_op << 14);
-	/*XXX: breaks things, .u32 by default?
-	 *     decuda will disasm as .u16 and use .lo/.hi regs, but this
-	 *     doesn't seem to match what the hw actually does.
-	inst[1] |= 0x04000000; << breaks things.. .u32 by default?
-	 */
-	set_dst(pc, dst, e);
+	e->inst[1] |= 0x60000000;
+	/* XXX: decuda will disasm .u16 lo/hi,
+	 *      but 32 bit flag breaks things: */
+	/* e->inst[1] |= 0x04000000; */
+	e->inst[1] |= (map_ccode_nv50(c_op) << 14);
+
+	if (wp >= 0)
+		set_pred_wr(pc, 1, wp, e);
+	if (dst)
+		set_dst(pc, dst, e);
+	else {
+		e->inst[0] |= 0x000001fc;
+		e->inst[1] |= 0x00000008;
+	}
+
 	set_src_0(pc, src0, e);
 	set_src_1(pc, src1, e);
-	emit(pc, e);
 
-	/* cvt.f32.u32 */
-	e = exec(pc);
-	e->inst[0] = 0xa0000001;
-	e->inst[1] = 0x64014780;
-	set_dst(pc, rdst, e);
-	set_src_0(pc, dst, e);
 	emit(pc, e);
-
 	pc->if_cond = e;
 
-	if (dst != rdst)
-		free_temp(pc, dst);
+	if (rdst)
+		emit_cvt(pc, rdst, -1, dst, CVTOP_ABSRN, CVT_F32_S32);
 }
 
 static INLINE void
 emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 {
-	emit_cvt(pc, dst, src, -1, CVTOP_FLOOR, CVT_F32_F32_ROP);
+	emit_cvt(pc, dst, -1, src, CVTOP_FLOOR, CVT_F32_F32_ROP);
 }
 
 static void
@@ -914,7 +957,7 @@ emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
 static INLINE void
 emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 {
-	emit_cvt(pc, dst, src, -1, CVTOP_ABS, CVT_F32_F32);
+	emit_cvt(pc, dst, -1, src, CVTOP_ABS, CVT_F32_F32);
 }
 
 static void
@@ -1611,13 +1654,6 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
 		if (mask & (1 << 3))
 			emit_mov_immdval(pc, dst[3], 1.0);
 		break;
-	case TGSI_OPCODE_SGE:
-		for (c = 0; c < 4; c++) {
-			if (!(mask & (1 << c)))
-				continue;
-			emit_set(pc, 6, dst[c], src[0][c], src[1][c]);
-		}
-		break;
 	case TGSI_OPCODE_SIN:
 		temp = temp_temp(pc);
 		rtmp = *pp_rtmp;
@@ -1630,10 +1666,16 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
 		}
 		break;
 	case TGSI_OPCODE_SLT:
+	case TGSI_OPCODE_SGE:
+	case TGSI_OPCODE_SEQ:
+	case TGSI_OPCODE_SGT:
+	case TGSI_OPCODE_SLE:
+	case TGSI_OPCODE_SNE:
+		i = map_opcode_ccode(inst->Instruction.Opcode);
 		for (c = 0; c < 4; c++) {
 			if (!(mask & (1 << c)))
 				continue;
-			emit_set(pc, 1, dst[c], src[0][c], src[1][c]);
+			emit_set(pc, i, dst[c], -1, src[0][c], src[1][c]);
 		}
 		break;
 	case TGSI_OPCODE_SUB:
@@ -1690,7 +1732,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
 		for (c = 0; c < 4; c++) {
 			if (!(mask & (1 << c)))
 				continue;
-			emit_cvt(pc, rdst[c], dst[c], -1, CVTOP_SAT, 0xc4);
+			emit_cvt(pc, rdst[c], -1, dst[c], CVTOP_SAT, 0xc4);
 		}
 	}
 
-- 
1.6.0.6


--------------090503050107050804030002
Content-Type: text/plain;
 name="0014-nv50-don-t-allocate-in-the-param-buffer.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="0014-nv50-don-t-allocate-in-the-param-buffer.patch"



More information about the Nouveau mailing list