[PATCH] nv50: initial support for IF, ELSE, ENDIF insns

Christoph Bumiller e0425955 at student.tuwien.ac.at
Sun Jun 21 09:40:39 PDT 2009


---
 src/gallium/drivers/nv50/nv50_program.c |  162 +++++++++++++++++++++++++------
 src/gallium/drivers/nv50/nv50_program.h |    1 +
 2 files changed, 132 insertions(+), 31 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 5594560..16bf2f1 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -90,6 +90,8 @@ struct nv50_reg {
 	int acc; /* instruction where this reg is last read (first insn == 1) */
 };
 
+#define MAX_IF_LEVEL 4 /* arbitrary value */
+
 struct nv50_pc {
 	struct nv50_program *p;
 
@@ -119,11 +121,17 @@ struct nv50_pc {
 
 	struct nv50_reg r_hpos[4];
 
+	struct nv50_program_exec *if_cond;
+	struct nv50_program_exec *if_insn[MAX_IF_LEVEL];
+	struct nv50_program_exec *if_join[MAX_IF_LEVEL];
+	unsigned if_lvl;
+
 	/* current instruction and total number of insns */
 	unsigned insn_cur;
 	unsigned insn_nr;
 
 	boolean allow32;
+	boolean join_on;
 };
 
 static inline void
@@ -208,22 +216,6 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
 	return NULL;
 }
 
-/* Assign the hw of the discarded temporary register src
- * to the tgsi register dst and free src.
- */
-static void
-assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
-{
-	assert(src->index == -1 && src->hw != -1);
-
-	if (dst->hw != -1)
-		pc->r_temp[dst->hw] = NULL;
-	pc->r_temp[src->hw] = dst;
-	dst->hw = src->hw;
-
-	FREE(src);
-}
-
 /* release the hardware resource held by r */
 static void
 release_hw(struct nv50_pc *pc, struct nv50_reg *r)
@@ -351,6 +343,11 @@ emit(struct nv50_pc *pc, struct nv50_program_exec *e)
 		p->exec_head = e;
 	p->exec_tail = e;
 	p->exec_size += (e->inst[0] & 1) ? 2 : 1;
+
+	if (pc->join_on) {
+		e->inst[1] |= 0x00000002;
+		pc->join_on = FALSE;
+	}
 }
 
 static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *);
@@ -524,6 +521,28 @@ emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f)
 	FREE(imm);
 }
 
+/* Assign the hw of the discarded temporary register src
+ * to the tgsi register dst and free src.
+ */
+static void
+assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+	assert(src->index == -1 && src->hw != -1);
+
+	if (pc->if_lvl > 0) {
+		emit_mov(pc, dst, src);
+		free_temp(pc, src);
+		return;
+	}
+
+	if (dst->hw != -1)
+		pc->r_temp[dst->hw] = NULL;
+	pc->r_temp[src->hw] = dst;
+	dst->hw = src->hw;
+
+	FREE(src);
+}
+
 static boolean
 check_swap_src_0_1(struct nv50_pc *pc,
 		   struct nv50_reg **s0, struct nv50_reg **s1)
@@ -866,6 +885,8 @@ emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
 	set_src_0(pc, dst, e);
 	emit(pc, e);
 
+	pc->if_cond = e;
+
 	if (dst != rdst)
 		free_temp(pc, dst);
 }
@@ -1098,6 +1119,39 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
 }
 
 static void
+emit_branch(struct nv50_pc *pc, int pred, unsigned cc, void *join)
+{
+	struct nv50_program_exec *e = exec(pc);
+
+	if (join) {
+		set_long(pc, e);
+		e->inst[0] |= 0xa0000002;
+		emit(pc, e);
+		*(struct nv50_program_exec **)join = e;
+		e = exec(pc);
+	}
+
+	set_long(pc, e);
+	e->inst[0] |= 0x10000002;
+	if (pred >= 0)
+		set_pred(pc, cc, pred, e);
+	emit(pc, e);
+}
+
+static void
+emit_nop(struct nv50_pc *pc, boolean full)
+{
+	struct nv50_program_exec *e = exec(pc);
+
+	e->inst[0] = 0xf0000000;
+	if (full) {
+		set_long(pc, e);
+		e->inst[1] = 0xe0000000;
+	}
+	emit(pc, e);
+}
+
+static void
 convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e)
 {
 	unsigned q = 0, m = ~0;
@@ -1420,6 +1474,22 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
 		FREE(one);
 	}
 		break;
+	case TGSI_OPCODE_ELSE:
+		emit_branch(pc, -1, 0, NULL);
+		pc->if_insn[--pc->if_lvl]->bra = (1 << 31) | pc->p->exec_size;
+		pc->if_insn[pc->if_lvl++] = pc->p->exec_tail;
+		break;
+	case TGSI_OPCODE_ENDIF:
+		i = pc->p->exec_size | (1 << 31);
+		pc->if_insn[--pc->if_lvl]->bra = i;
+		if (pc->if_join[pc->if_lvl]) {
+			pc->if_join[pc->if_lvl]->bra = i;
+			pc->if_join[pc->if_lvl] = NULL;
+			pc->join_on = TRUE;
+		}
+		if (pc->insn_cur == (pc->insn_nr - 1))
+			emit_nop(pc, TRUE);
+		break;
 	case TGSI_OPCODE_EX2:
 		temp = temp_temp(pc);
 		rtmp = *pp_rtmp;
@@ -1442,6 +1512,12 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
 			emit_sub(pc, dst[c], src[0][c], temp);
 		}
 		break;
+	case TGSI_OPCODE_IF:
+		assert(pc->if_lvl < MAX_IF_LEVEL);
+		set_pred_wr(pc, 1, 0, pc->if_cond);
+		emit_branch(pc, 0, 2, &pc->if_join[pc->if_lvl]);
+		pc->if_insn[pc->if_lvl++] = pc->p->exec_tail;
+		break;
 	case TGSI_OPCODE_KIL:
 		emit_kil(pc, src[0][0]);
 		emit_kil(pc, src[0][1]);
@@ -2181,8 +2257,8 @@ nv50vp_ucp_append(struct nv50_pc *pc)
 
 static void nv50_program_tx_postprocess(struct nv50_pc *pc)
 {
-	struct nv50_program_exec *e, *e_prev = NULL;
-	unsigned pos;
+	struct nv50_program_exec *e, **e_list, *e_prev = NULL;
+	unsigned i, n, pos;
 
 	if (pc->p->type == PIPE_SHADER_FRAGMENT)
 		nv50fp_move_outputs(pc);
@@ -2190,15 +2266,31 @@ static void nv50_program_tx_postprocess(struct nv50_pc *pc)
 	if (pc->p->type == PIPE_SHADER_VERTEX)
 		nv50vp_ucp_append(pc);
 
+	/* collect branching instructions, we need to adjust their target
+	 * offsets when converting half insns
+	 */
+	e_list = MALLOC(pc->p->exec_size * sizeof(struct nv50_program_exec *));
+
+	for (n = 0, e = pc->p->exec_head; e; e = e->next) {
+		if (e->bra) {
+			e_list[n++] = e;
+			e->bra &= ~(1 << 31);
+		}
+	}
+
 	for (e = pc->p->exec_head, pos = 0; e; e = e->next) {
 		pos += is_long(e) ? 2 : 1;
 
 		if ((!e->next || is_long(e->next)) && (pos & 1)) {
+			for (i = 0; i < n; i++)
+				if (e_list[i]->bra > (pos - 1))
+					e_list[i]->bra++;
 			convert_to_long(pc, e);
 			pos++;
 		}
 		e_prev = e->next ? e : e_prev;
 	}
+	FREE(e_list);
 
 	/* last instruction must be long */
 	if (!is_long(pc->p->exec_tail)) {
@@ -2234,7 +2326,8 @@ nv50_program_tx(struct nv50_program *p)
 
 		/* don't allow half insn/immd on first and last instruction */
 		pc->allow32 = TRUE;
-		if (pc->insn_cur == 0 || pc->insn_cur + 2 == pc->insn_nr)
+		if (pc->insn_cur == 0 || pc->insn_cur + 2 == pc->insn_nr ||
+		    pc->join_on)
 			pc->allow32 = FALSE;
 
 		tgsi_parse_token(&parse);
@@ -2383,11 +2476,29 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
 	heap = nv50->screen->code_heap[p->type];
 	code = nv50->screen->sprogbuf_code[p->type];
 
+	size = align(p->exec_size * 4, 0x100);
+
+	if (!p->code) {
+		ret = nouveau_resource_alloc(heap, size, p, &p->code);
+		if (ret)
+			assert(!"No more space in program VRAM buffer.");
+	}
+
 	if ((p->data[0] && p->data[0]->start != p->data_start[0]) ||
-		(p->data[1] && p->data[1]->start != p->data_start[1])) {
+	    (p->data[1] && p->data[1]->start != p->data_start[1]))
+		upload = TRUE;
+
+	if (upload) {
 		for (e = p->exec_head; e; e = e->next) {
 			unsigned ei, ci, bs;
 
+			if (e->bra) {
+				assert(!(e->bra & 1));
+				bs = (e->bra >> 1) + (p->code->start >> 3);
+				e->inst[0] &= 0xF0000FFF;
+				e->inst[0] |= (bs << 12);
+			}
+
 			if (e->param.index < 0)
 				continue;
 			bs = (e->inst[1] >> 22) & 0x07;
@@ -2403,8 +2514,6 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
 			p->data_start[0] = p->data[0]->start;
 		if (p->data[1])
 			p->data_start[1] = p->data[1]->start;
-
-		upload = TRUE;
 	}
 
 	if (!upload)
@@ -2434,15 +2543,6 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
 
 	nouveau_bo_unmap(p->bo);
 
-	size = align(p->exec_size * 4, 0x100);
-	if (!p->code) {
-		ret = nouveau_resource_alloc(heap, size, p, &p->code);
-		if (ret) {
-			NOUVEAU_ERR("Program VRAM buffer is full.\n");
-			abort();
-		}
-	}
-
 	nv50_transfer_gart_vram(&nv50->screen->base.base,
 				code, p->code->start, p->bo, 0, size);
 }
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
index 1206aab..ac5230d 100644
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -13,6 +13,7 @@ struct nv50_program_exec {
 		unsigned mask;
 		unsigned shift;
 	} param;
+	unsigned bra;
 };
 
 struct nv50_linkage {
-- 
1.6.0.6


--------------090503050107050804030002
Content-Type: text/plain;
 name="0013-nv50-support-for-SLE-SNE-SEQ-SGT.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="0013-nv50-support-for-SLE-SNE-SEQ-SGT.patch"



More information about the Nouveau mailing list