[PATCH] nv50: initial support for IF, ELSE, ENDIF insns
Christoph Bumiller
e0425955 at student.tuwien.ac.at
Sun Jun 21 09:40:39 PDT 2009
---
src/gallium/drivers/nv50/nv50_program.c | 162 +++++++++++++++++++++++++------
src/gallium/drivers/nv50/nv50_program.h | 1 +
2 files changed, 132 insertions(+), 31 deletions(-)
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 5594560..16bf2f1 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -90,6 +90,8 @@ struct nv50_reg {
int acc; /* instruction where this reg is last read (first insn == 1) */
};
+#define MAX_IF_LEVEL 4 /* arbitrary value */
+
struct nv50_pc {
struct nv50_program *p;
@@ -119,11 +121,17 @@ struct nv50_pc {
struct nv50_reg r_hpos[4];
+ struct nv50_program_exec *if_cond;
+ struct nv50_program_exec *if_insn[MAX_IF_LEVEL];
+ struct nv50_program_exec *if_join[MAX_IF_LEVEL];
+ unsigned if_lvl;
+
/* current instruction and total number of insns */
unsigned insn_cur;
unsigned insn_nr;
boolean allow32;
+ boolean join_on;
};
static inline void
@@ -208,22 +216,6 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
return NULL;
}
-/* Assign the hw of the discarded temporary register src
- * to the tgsi register dst and free src.
- */
-static void
-assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
-{
- assert(src->index == -1 && src->hw != -1);
-
- if (dst->hw != -1)
- pc->r_temp[dst->hw] = NULL;
- pc->r_temp[src->hw] = dst;
- dst->hw = src->hw;
-
- FREE(src);
-}
-
/* release the hardware resource held by r */
static void
release_hw(struct nv50_pc *pc, struct nv50_reg *r)
@@ -351,6 +343,11 @@ emit(struct nv50_pc *pc, struct nv50_program_exec *e)
p->exec_head = e;
p->exec_tail = e;
p->exec_size += (e->inst[0] & 1) ? 2 : 1;
+
+ if (pc->join_on) {
+ e->inst[1] |= 0x00000002;
+ pc->join_on = FALSE;
+ }
}
static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *);
@@ -524,6 +521,28 @@ emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f)
FREE(imm);
}
+/* Assign the hw of the discarded temporary register src
+ * to the tgsi register dst and free src.
+ */
+static void
+assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+ assert(src->index == -1 && src->hw != -1);
+
+ if (pc->if_lvl > 0) {
+ emit_mov(pc, dst, src);
+ free_temp(pc, src);
+ return;
+ }
+
+ if (dst->hw != -1)
+ pc->r_temp[dst->hw] = NULL;
+ pc->r_temp[src->hw] = dst;
+ dst->hw = src->hw;
+
+ FREE(src);
+}
+
static boolean
check_swap_src_0_1(struct nv50_pc *pc,
struct nv50_reg **s0, struct nv50_reg **s1)
@@ -866,6 +885,8 @@ emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst,
set_src_0(pc, dst, e);
emit(pc, e);
+ pc->if_cond = e;
+
if (dst != rdst)
free_temp(pc, dst);
}
@@ -1098,6 +1119,39 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
}
static void
+emit_branch(struct nv50_pc *pc, int pred, unsigned cc, void *join)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ if (join) {
+ set_long(pc, e);
+ e->inst[0] |= 0xa0000002;
+ emit(pc, e);
+ *(struct nv50_program_exec **)join = e;
+ e = exec(pc);
+ }
+
+ set_long(pc, e);
+ e->inst[0] |= 0x10000002;
+ if (pred >= 0)
+ set_pred(pc, cc, pred, e);
+ emit(pc, e);
+}
+
+static void
+emit_nop(struct nv50_pc *pc, boolean full)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ e->inst[0] = 0xf0000000;
+ if (full) {
+ set_long(pc, e);
+ e->inst[1] = 0xe0000000;
+ }
+ emit(pc, e);
+}
+
+static void
convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e)
{
unsigned q = 0, m = ~0;
@@ -1420,6 +1474,22 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
FREE(one);
}
break;
+ case TGSI_OPCODE_ELSE:
+ emit_branch(pc, -1, 0, NULL);
+ pc->if_insn[--pc->if_lvl]->bra = (1 << 31) | pc->p->exec_size;
+ pc->if_insn[pc->if_lvl++] = pc->p->exec_tail;
+ break;
+ case TGSI_OPCODE_ENDIF:
+ i = pc->p->exec_size | (1 << 31);
+ pc->if_insn[--pc->if_lvl]->bra = i;
+ if (pc->if_join[pc->if_lvl]) {
+ pc->if_join[pc->if_lvl]->bra = i;
+ pc->if_join[pc->if_lvl] = NULL;
+ pc->join_on = TRUE;
+ }
+ if (pc->insn_cur == (pc->insn_nr - 1))
+ emit_nop(pc, TRUE);
+ break;
case TGSI_OPCODE_EX2:
temp = temp_temp(pc);
rtmp = *pp_rtmp;
@@ -1442,6 +1512,12 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
emit_sub(pc, dst[c], src[0][c], temp);
}
break;
+ case TGSI_OPCODE_IF:
+ assert(pc->if_lvl < MAX_IF_LEVEL);
+ set_pred_wr(pc, 1, 0, pc->if_cond);
+ emit_branch(pc, 0, 2, &pc->if_join[pc->if_lvl]);
+ pc->if_insn[pc->if_lvl++] = pc->p->exec_tail;
+ break;
case TGSI_OPCODE_KIL:
emit_kil(pc, src[0][0]);
emit_kil(pc, src[0][1]);
@@ -2181,8 +2257,8 @@ nv50vp_ucp_append(struct nv50_pc *pc)
static void nv50_program_tx_postprocess(struct nv50_pc *pc)
{
- struct nv50_program_exec *e, *e_prev = NULL;
- unsigned pos;
+ struct nv50_program_exec *e, **e_list, *e_prev = NULL;
+ unsigned i, n, pos;
if (pc->p->type == PIPE_SHADER_FRAGMENT)
nv50fp_move_outputs(pc);
@@ -2190,15 +2266,31 @@ static void nv50_program_tx_postprocess(struct nv50_pc *pc)
if (pc->p->type == PIPE_SHADER_VERTEX)
nv50vp_ucp_append(pc);
+ /* collect branching instructions, we need to adjust their target
+ * offsets when converting half insns
+ */
+ e_list = MALLOC(pc->p->exec_size * sizeof(struct nv50_program_exec *));
+
+ for (n = 0, e = pc->p->exec_head; e; e = e->next) {
+ if (e->bra) {
+ e_list[n++] = e;
+ e->bra &= ~(1 << 31);
+ }
+ }
+
for (e = pc->p->exec_head, pos = 0; e; e = e->next) {
pos += is_long(e) ? 2 : 1;
if ((!e->next || is_long(e->next)) && (pos & 1)) {
+ for (i = 0; i < n; i++)
+ if (e_list[i]->bra > (pos - 1))
+ e_list[i]->bra++;
convert_to_long(pc, e);
pos++;
}
e_prev = e->next ? e : e_prev;
}
+ FREE(e_list);
/* last instruction must be long */
if (!is_long(pc->p->exec_tail)) {
@@ -2234,7 +2326,8 @@ nv50_program_tx(struct nv50_program *p)
/* don't allow half insn/immd on first and last instruction */
pc->allow32 = TRUE;
- if (pc->insn_cur == 0 || pc->insn_cur + 2 == pc->insn_nr)
+ if (pc->insn_cur == 0 || pc->insn_cur + 2 == pc->insn_nr ||
+ pc->join_on)
pc->allow32 = FALSE;
tgsi_parse_token(&parse);
@@ -2383,11 +2476,29 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
heap = nv50->screen->code_heap[p->type];
code = nv50->screen->sprogbuf_code[p->type];
+ size = align(p->exec_size * 4, 0x100);
+
+ if (!p->code) {
+ ret = nouveau_resource_alloc(heap, size, p, &p->code);
+ if (ret)
+ assert(!"No more space in program VRAM buffer.");
+ }
+
if ((p->data[0] && p->data[0]->start != p->data_start[0]) ||
- (p->data[1] && p->data[1]->start != p->data_start[1])) {
+ (p->data[1] && p->data[1]->start != p->data_start[1]))
+ upload = TRUE;
+
+ if (upload) {
for (e = p->exec_head; e; e = e->next) {
unsigned ei, ci, bs;
+ if (e->bra) {
+ assert(!(e->bra & 1));
+ bs = (e->bra >> 1) + (p->code->start >> 3);
+ e->inst[0] &= 0xF0000FFF;
+ e->inst[0] |= (bs << 12);
+ }
+
if (e->param.index < 0)
continue;
bs = (e->inst[1] >> 22) & 0x07;
@@ -2403,8 +2514,6 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
p->data_start[0] = p->data[0]->start;
if (p->data[1])
p->data_start[1] = p->data[1]->start;
-
- upload = TRUE;
}
if (!upload)
@@ -2434,15 +2543,6 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
nouveau_bo_unmap(p->bo);
- size = align(p->exec_size * 4, 0x100);
- if (!p->code) {
- ret = nouveau_resource_alloc(heap, size, p, &p->code);
- if (ret) {
- NOUVEAU_ERR("Program VRAM buffer is full.\n");
- abort();
- }
- }
-
nv50_transfer_gart_vram(&nv50->screen->base.base,
code, p->code->start, p->bo, 0, size);
}
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
index 1206aab..ac5230d 100644
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -13,6 +13,7 @@ struct nv50_program_exec {
unsigned mask;
unsigned shift;
} param;
+ unsigned bra;
};
struct nv50_linkage {
--
1.6.0.6
--------------090503050107050804030002
Content-Type: text/plain;
name="0013-nv50-support-for-SLE-SNE-SEQ-SGT.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="0013-nv50-support-for-SLE-SNE-SEQ-SGT.patch"
More information about the Nouveau
mailing list