[PATCH] nv50: support for SLE, SNE, SEQ, SGT
Christoph Bumiller
e0425955 at student.tuwien.ac.at
Sun Jun 21 07:14:29 PDT 2009
---
src/gallium/drivers/nv50/nv50_program.c | 118 +++++++++++++++++++++----------
1 files changed, 80 insertions(+), 38 deletions(-)
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 16bf2f1..75c5cea 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -810,7 +810,11 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
#define CVTOP_TRUNC 0x07
#define CVTOP_SAT 0x08
#define CVTOP_ABS 0x10
+#define CVTOP_ABSRN 0x11
+/* 0x04 == 32 bit */
+/* 0x40 == dst is float */
+/* 0x80 == src is float */
#define CVT_F32_F32 0xc4
#define CVT_F32_S32 0x44
#define CVT_F32_U32 0x64
@@ -819,8 +823,8 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
#define CVT_F32_F32_ROP 0xcc
static void
-emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
- int wp, unsigned cop, unsigned fmt)
+emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, int wp,
+ struct nv50_reg *src, unsigned cvn, unsigned fmt)
{
struct nv50_program_exec *e;
@@ -829,7 +833,7 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
e->inst[0] |= 0xa0000000;
e->inst[1] |= 0x00004000;
- e->inst[1] |= (cop << 16);
+ e->inst[1] |= (cvn << 16);
e->inst[1] |= (fmt << 24);
set_src_0(pc, src, e);
@@ -846,55 +850,94 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
emit(pc, e);
}
+static inline unsigned
+map_opcode_ccode(unsigned op)
+{
+ switch (op) {
+ case TGSI_OPCODE_SLT: return TGSI_CC_LT;
+ case TGSI_OPCODE_SGE: return TGSI_CC_GE;
+ case TGSI_OPCODE_SEQ: return TGSI_CC_EQ;
+ case TGSI_OPCODE_SGT: return TGSI_CC_GT;
+ case TGSI_OPCODE_SLE: return TGSI_CC_LE;
+ case TGSI_OPCODE_SNE: return TGSI_CC_NE;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static inline unsigned
+map_ccode_nv50(unsigned cc)
+{
+ assert(cc < 16);
+
+ switch (cc) {
+ case TGSI_CC_GT: return 0x4;
+ case TGSI_CC_EQ: return 0x2;
+ case TGSI_CC_LT: return 0x1;
+ case TGSI_CC_GE: return 0x6;
+ case TGSI_CC_LE: return 0x3;
+ case TGSI_CC_NE: return 0xd;
+
+ case TGSI_CC_GT + 8: return 0x3;
+ case TGSI_CC_EQ + 8: return 0xd;
+ case TGSI_CC_LT + 8: return 0x6;
+ case TGSI_CC_GE + 8: return 0x1;
+ case TGSI_CC_LE + 8: return 0x4;
+ case TGSI_CC_NE + 8: return 0x2;
+
+ default:
+ assert(!"invalid condition code");
+ return 0x0;
+ }
+}
+
static void
-emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
+emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, int wp,
struct nv50_reg *src0, struct nv50_reg *src1)
{
struct nv50_program_exec *e = exec(pc);
- unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
struct nv50_reg *rdst;
- assert(c_op <= 7);
if (check_swap_src_0_1(pc, &src0, &src1))
- c_op = inv_cop[c_op];
+ c_op += 8;
rdst = dst;
- if (dst->type != P_TEMP)
- dst = alloc_temp(pc, NULL);
+ if (dst && dst->type != P_TEMP)
+ dst = temp_temp(pc);
/* set.u32 */
set_long(pc, e);
e->inst[0] |= 0xb0000000;
- e->inst[1] |= (3 << 29);
- e->inst[1] |= (c_op << 14);
- /*XXX: breaks things, .u32 by default?
- * decuda will disasm as .u16 and use .lo/.hi regs, but this
- * doesn't seem to match what the hw actually does.
- inst[1] |= 0x04000000; << breaks things.. .u32 by default?
- */
- set_dst(pc, dst, e);
+ e->inst[1] |= 0x60000000;
+ /* XXX: decuda will disasm .u16 lo/hi,
+ * but 32 bit flag breaks things: */
+ /* e->inst[1] |= 0x04000000; */
+ e->inst[1] |= (map_ccode_nv50(c_op) << 14);
+
+ if (wp >= 0)
+ set_pred_wr(pc, 1, wp, e);
+ if (dst)
+ set_dst(pc, dst, e);
+ else {
+ e->inst[0] |= 0x000001fc;
+ e->inst[1] |= 0x00000008;
+ }
+
set_src_0(pc, src0, e);
set_src_1(pc, src1, e);
- emit(pc, e);
- /* cvt.f32.u32 */
- e = exec(pc);
- e->inst[0] = 0xa0000001;
- e->inst[1] = 0x64014780;
- set_dst(pc, rdst, e);
- set_src_0(pc, dst, e);
emit(pc, e);
-
pc->if_cond = e;
- if (dst != rdst)
- free_temp(pc, dst);
+ if (rdst)
+ emit_cvt(pc, rdst, -1, dst, CVTOP_ABSRN, CVT_F32_S32);
}
static INLINE void
emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
{
- emit_cvt(pc, dst, src, -1, CVTOP_FLOOR, CVT_F32_F32_ROP);
+ emit_cvt(pc, dst, -1, src, CVTOP_FLOOR, CVT_F32_F32_ROP);
}
static void
@@ -914,7 +957,7 @@ emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
static INLINE void
emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
{
- emit_cvt(pc, dst, src, -1, CVTOP_ABS, CVT_F32_F32);
+ emit_cvt(pc, dst, -1, src, CVTOP_ABS, CVT_F32_F32);
}
static void
@@ -1611,13 +1654,6 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
if (mask & (1 << 3))
emit_mov_immdval(pc, dst[3], 1.0);
break;
- case TGSI_OPCODE_SGE:
- for (c = 0; c < 4; c++) {
- if (!(mask & (1 << c)))
- continue;
- emit_set(pc, 6, dst[c], src[0][c], src[1][c]);
- }
- break;
case TGSI_OPCODE_SIN:
temp = temp_temp(pc);
rtmp = *pp_rtmp;
@@ -1630,10 +1666,16 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
}
break;
case TGSI_OPCODE_SLT:
+ case TGSI_OPCODE_SGE:
+ case TGSI_OPCODE_SEQ:
+ case TGSI_OPCODE_SGT:
+ case TGSI_OPCODE_SLE:
+ case TGSI_OPCODE_SNE:
+ i = map_opcode_ccode(inst->Instruction.Opcode);
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
- emit_set(pc, 1, dst[c], src[0][c], src[1][c]);
+ emit_set(pc, i, dst[c], -1, src[0][c], src[1][c]);
}
break;
case TGSI_OPCODE_SUB:
@@ -1690,7 +1732,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
for (c = 0; c < 4; c++) {
if (!(mask & (1 << c)))
continue;
- emit_cvt(pc, rdst[c], dst[c], -1, CVTOP_SAT, 0xc4);
+ emit_cvt(pc, rdst[c], -1, dst[c], CVTOP_SAT, 0xc4);
}
}
--
1.6.0.6
--------------090503050107050804030002
Content-Type: text/plain;
name="0014-nv50-don-t-allocate-in-the-param-buffer.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="0014-nv50-don-t-allocate-in-the-param-buffer.patch"
More information about the Nouveau
mailing list