Mesa (master): nv50: add support for address regs

Christoph Bumiller chrisbmr at kemper.freedesktop.org
Mon Oct 19 16:25:45 UTC 2009


Module: Mesa
Branch: master
Commit: ec5c23551cdb4c369d8f8f392208f4d4bf29911b
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=ec5c23551cdb4c369d8f8f392208f4d4bf29911b

Author: Christoph Bumiller <e0425955 at student.tuwien.ac.at>
Date:   Mon Oct 19 18:17:45 2009 +0200

nv50: add support for address regs

Allow indirect uniform access and increase the
limit on parameters from 128 to 512.

---

 src/gallium/drivers/nv50/nv50_program.c |  178 +++++++++++++++++++++++++++++--
 src/gallium/drivers/nv50/nv50_screen.c  |   10 +-
 2 files changed, 175 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index bfd979c..c7145bb 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -32,6 +32,7 @@
 #include "nv50_context.h"
 
 #define NV50_SU_MAX_TEMP 64
+#define NV50_SU_MAX_ADDR 7
 //#define NV50_PROGRAM_DUMP
 
 /* ARL - gallium craps itself on progs/vp/arl.txt
@@ -79,7 +80,8 @@ struct nv50_reg {
 		P_ATTR,
 		P_RESULT,
 		P_CONST,
-		P_IMMD
+		P_IMMD,
+		P_ADDR
 	} type;
 	int index;
 
@@ -99,6 +101,7 @@ struct nv50_pc {
 
 	/* hw resources */
 	struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
+	struct nv50_reg r_addr[NV50_SU_MAX_ADDR];
 
 	/* tgsi resources */
 	struct nv50_reg *temp;
@@ -112,6 +115,8 @@ struct nv50_pc {
 	struct nv50_reg *immd;
 	float *immd_buf;
 	int immd_nr;
+	struct nv50_reg **addr;
+	int addr_nr;
 
 	struct nv50_reg *temp_temp[16];
 	unsigned temp_temp_nr;
@@ -159,6 +164,17 @@ popcnt4(uint32_t val)
 }
 
 static void
+terminate_mbb(struct nv50_pc *pc)
+{
+	int i;
+
+	/* remove records of temporary address register values */
+	for (i = 0; i < NV50_SU_MAX_ADDR; ++i)
+		if (pc->r_addr[i].index < 0)
+			pc->r_addr[i].rhw = -1;
+}
+
+static void
 alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
 {
 	int i = 0;
@@ -454,9 +470,68 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
 	e->inst[1] |= (val >> 6) << 2;
 }
 
+static void
+emit_set_addr(struct nv50_pc *pc, struct nv50_reg *dst, unsigned val)
+{
+	struct nv50_program_exec *e = exec(pc);
+
+	assert(val <= 0xffff);
+	e->inst[0] = 0xd0000000 | ((val & 0xffff) << 9);
+	e->inst[1] = 0x20000000;
+	e->inst[0] |= dst->hw << 2;
+	set_long(pc, e);
+
+	emit(pc, e);
+}
+
+static struct nv50_reg *
+alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref)
+{
+	int i;
+	struct nv50_reg *a = NULL;
+
+	if (!ref) {
+		for (i = 0; i < NV50_SU_MAX_ADDR; ++i) {
+			if (pc->r_addr[i].index >= 0)
+				continue;
+			if (pc->r_addr[i].rhw >= 0 &&
+			    pc->r_addr[i].acc == pc->insn_cur)
+				continue;
+
+			pc->r_addr[i].rhw = -1;
+			pc->r_addr[i].index = i;
+			return &pc->r_addr[i];
+		}
+		assert(0);
+		return NULL;
+	}
+
+	for (i = NV50_SU_MAX_ADDR - 1; i >= 0; --i) {
+		if (pc->r_addr[i].index >= 0) /* occupied for TGSI */
+			continue;
+		if (pc->r_addr[i].rhw < 0) { /* unused */
+			a = &pc->r_addr[i];
+			continue;
+		}
+		if (!a && pc->r_addr[i].acc != pc->insn_cur)
+			a = &pc->r_addr[i];
+
+		if (ref->hw - pc->r_addr[i].rhw < 128) {
+		/* alloc'd & suitable */
+			pc->r_addr[i].acc = pc->insn_cur;
+			return &pc->r_addr[i];
+		}
+	}
+	assert(a);
+	emit_set_addr(pc, a, ref->hw * 4);
+
+	a->rhw = ref->hw % 128;
+	a->acc = pc->insn_cur;
+	return a;
+}
 
 #define INTERP_LINEAR		0
-#define INTERP_FLAT			1
+#define INTERP_FLAT		1
 #define INTERP_PERSPECTIVE	2
 #define INTERP_CENTROID		4
 
@@ -488,6 +563,16 @@ emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *iv,
 	emit(pc, e);
 }
 
+static INLINE void
+set_addr(struct nv50_program_exec *e, struct nv50_reg *a)
+{
+	assert(!(e->inst[0] & 0x0c000000));
+	assert(!(e->inst[1] & 0x00000004));
+
+	e->inst[0] |= (a->hw & 3) << 26;
+	e->inst[1] |= (a->hw >> 2) << 2;
+}
+
 static void
 set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
 	 struct nv50_program_exec *e)
@@ -498,6 +583,14 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
 	e->param.shift = s;
 	e->param.mask = m << (s % 32);
 
+	if (src->hw > 127)
+		set_addr(e, alloc_addr(pc, src));
+	else
+	if (src->acc < 0) {
+		assert(src->type == P_CONST);
+		set_addr(e, pc->addr[src->index]);
+	}
+
 	e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22);
 }
 
@@ -632,7 +725,7 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
 	}
 
 	alloc_reg(pc, src);
-	e->inst[0] |= (src->hw << 16);
+	e->inst[0] |= ((src->hw & 127) << 16);
 }
 
 static void
@@ -660,7 +753,7 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
 	}
 
 	alloc_reg(pc, src);
-	e->inst[1] |= (src->hw << 14);
+	e->inst[1] |= ((src->hw & 127) << 14);
 }
 
 static void
@@ -723,6 +816,22 @@ emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
 }
 
 static void
+emit_arl(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
+	 uint8_t s)
+{
+	struct nv50_program_exec *e = exec(pc);
+
+	set_long(pc, e);
+	e->inst[1] |= 0xc0000000;
+
+	e->inst[0] |= dst->hw << 2;
+	e->inst[0] |= s << 16; /* shift left */
+	set_src_0_restricted(pc, src, e);
+
+	emit(pc, e);
+}
+
+static void
 emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
 	    struct nv50_reg *src0, struct nv50_reg *src1)
 {
@@ -1403,6 +1512,16 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
 		return &pc->temp[dst->DstRegister.Index * 4 + c];
 	case TGSI_FILE_OUTPUT:
 		return &pc->result[dst->DstRegister.Index * 4 + c];
+	case TGSI_FILE_ADDRESS:
+	{
+		struct nv50_reg *r = pc->addr[dst->DstRegister.Index * 4 + c];
+		if (!r) {
+			r = alloc_addr(pc, NULL);
+			pc->addr[dst->DstRegister.Index * 4 + c] = r;
+		}
+		assert(r);
+		return r;
+	}
 	case TGSI_FILE_NULL:
 		return NULL;
 	default:
@@ -1418,7 +1537,10 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
 {
 	struct nv50_reg *r = NULL;
 	struct nv50_reg *temp;
-	unsigned sgn, c;
+	unsigned sgn, c, swz;
+
+	if (src->SrcRegister.File != TGSI_FILE_CONSTANT)
+		assert(!src->SrcRegister.Indirect);
 
 	sgn = tgsi_util_get_full_src_register_sign_mode(src, chan);
 
@@ -1436,13 +1558,29 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
 			r = &pc->temp[src->SrcRegister.Index * 4 + c];
 			break;
 		case TGSI_FILE_CONSTANT:
-			r = &pc->param[src->SrcRegister.Index * 4 + c];
+			if (!src->SrcRegister.Indirect) {
+				r = &pc->param[src->SrcRegister.Index * 4 + c];
+				break;
+			}
+			/* Indicate indirection by setting r->acc < 0 and
+			 * use the index field to select the address reg.
+			 */
+			r = MALLOC_STRUCT(nv50_reg);
+			swz = tgsi_util_get_src_register_swizzle(
+						 &src->SrcRegisterInd, 0);
+			ctor_reg(r, P_CONST,
+				 src->SrcRegisterInd.Index * 4 + swz, c);
+			r->acc = -1;
 			break;
 		case TGSI_FILE_IMMEDIATE:
 			r = &pc->immd[src->SrcRegister.Index * 4 + c];
 			break;
 		case TGSI_FILE_SAMPLER:
 			break;
+		case TGSI_FILE_ADDRESS:
+			r = pc->addr[src->SrcRegister.Index * 4 + c];
+			assert(r);
+			break;
 		default:
 			assert(0);
 			break;
@@ -1678,8 +1816,15 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 			emit_add(pc, dst[c], src[0][c], src[1][c]);
 		}
 		break;
+	case TGSI_OPCODE_ARL:
+		assert(src[0][0]);
+		temp = temp_temp(pc);
+		emit_cvt(pc, temp, src[0][0], -1, CVTOP_FLOOR, CVT_S32_F32);
+		emit_arl(pc, dst[0], temp, 4);
+		break;
 	case TGSI_OPCODE_BGNLOOP:
 		pc->loop_pos[pc->loop_lvl++] = pc->p->exec_size;
+		terminate_mbb(pc);
 		break;
 	case TGSI_OPCODE_BRK:
 		emit_branch(pc, -1, 0, NULL);
@@ -1763,6 +1908,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		emit_branch(pc, -1, 0, NULL);
 		pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size;
 		pc->if_insn[pc->if_lvl++] = pc->p->exec_tail;
+		terminate_mbb(pc);
 		break;
 	case TGSI_OPCODE_ENDIF:
 		pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size;
@@ -1775,6 +1921,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 			pc->br_join[pc->if_lvl]->param.index = pc->p->exec_size;
 			pc->br_join[pc->if_lvl] = NULL;
 		}
+		terminate_mbb(pc);
 		/* emit a NOP as join point, we could set it on the next
 		 * one, but would have to make sure it is long and !immd
 		 */
@@ -1785,6 +1932,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		emit_branch(pc, -1, 0, NULL);
 		pc->p->exec_tail->param.index = pc->loop_pos[--pc->loop_lvl];
 		pc->br_loop[pc->loop_lvl]->param.index = pc->p->exec_size;
+		terminate_mbb(pc);
 		break;
 	case TGSI_OPCODE_EX2:
 		emit_preex2(pc, temp, src[0][0]);
@@ -1812,6 +1960,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		set_pred_wr(pc, 1, 0, pc->if_cond);
 		emit_branch(pc, 0, 2, &pc->br_join[pc->if_lvl]);
 		pc->if_insn[pc->if_lvl++] = pc->p->exec_tail;
+		terminate_mbb(pc);
 		break;
 	case TGSI_OPCODE_KIL:
 		emit_kil(pc, src[0][0]);
@@ -1989,6 +2138,9 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 			src[i][c]->neg = 0;
 			if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD)
 				FREE(src[i][c]);
+			else
+			if (src[i][c]->acc < 0 && src[i][c]->type == P_CONST)
+				FREE(src[i][c]); /* indirect constant */
 		}
 	}
 
@@ -2332,8 +2484,8 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 					pc->interp_mode[i] = mode;
 			}
 				break;
+			case TGSI_FILE_ADDRESS:
 			case TGSI_FILE_CONSTANT:
-				break;
 			case TGSI_FILE_SAMPLER:
 				break;
 			default:
@@ -2527,6 +2679,8 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
 	pc->attr_nr = p->info.file_max[TGSI_FILE_INPUT] + 1;
 	pc->result_nr = p->info.file_max[TGSI_FILE_OUTPUT] + 1;
 	pc->param_nr = p->info.file_max[TGSI_FILE_CONSTANT] + 1;
+	pc->addr_nr = p->info.file_max[TGSI_FILE_ADDRESS] + 1;
+	assert(pc->addr_nr <= 2);
 
 	p->cfg.high_temp = 4;
 
@@ -2595,6 +2749,14 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
 				ctor_reg(&pc->param[rid], P_CONST, i, rid);
 	}
 
+	if (pc->addr_nr) {
+		pc->addr = CALLOC(pc->addr_nr * 4, sizeof(struct nv50_reg *));
+		if (!pc->addr)
+			return FALSE;
+	}
+	for (i = 0; i < NV50_SU_MAX_ADDR; ++i)
+		ctor_reg(&pc->r_addr[i], P_ADDR, -1, i + 1);
+
 	return TRUE;
 }
 
@@ -2774,7 +2936,7 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
 					 p->immd_nr, NV50_CB_PMISC);
 	}
 
-	assert(p->param_nr <= 128);
+	assert(p->param_nr <= 512);
 
 	if (p->param_nr) {
 		unsigned cb;
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 66361dc..0bd5487 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -301,7 +301,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	so_data  (so, 8);
 
 	/* constant buffers for immediates and VP/FP parameters */
-	ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 128*4*4,
+	ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (32 * 4) * 4,
 			     &screen->constbuf_misc[0]);
 	if (ret) {
 		nv50_screen_destroy(pscreen);
@@ -309,7 +309,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	}
 
 	for (i = 0; i < 2; i++) {
-		ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 128*4*4,
+		ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (128 * 4) * 4,
 				     &screen->constbuf_parm[i]);
 		if (ret) {
 			nv50_screen_destroy(pscreen);
@@ -318,8 +318,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	}
 
 	if (nouveau_resource_init(&screen->immd_heap[0], 0, 128) ||
-		nouveau_resource_init(&screen->parm_heap[0], 0, 128) ||
-		nouveau_resource_init(&screen->parm_heap[1], 0, 128))
+	    nouveau_resource_init(&screen->parm_heap[0], 0, 512) ||
+	    nouveau_resource_init(&screen->parm_heap[1], 0, 512))
 	{
 		NOUVEAU_ERR("Error initialising constant buffers.\n");
 		nv50_screen_destroy(pscreen);
@@ -340,7 +340,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 		  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
 	so_reloc (so, screen->constbuf_misc[0], 0, NOUVEAU_BO_VRAM |
 		  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
-	so_data  (so, (NV50_CB_PMISC << 16) | 0x00000800);
+	so_data  (so, (NV50_CB_PMISC << 16) | 0x00000200);
 	so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
 	so_data  (so, 0x00000001 | (NV50_CB_PMISC << 12));
 	so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);




More information about the mesa-commit mailing list