[PATCH] nv50: defer FP attribute loading
Christoph Bumiller
e0425955 at student.tuwien.ac.at
Sun Jun 21 07:54:43 PDT 2009
This might keep the number of used TEMPs down.
---
src/gallium/drivers/nv50/nv50_program.c | 59 ++++++++++++++++++++-----------
1 files changed, 38 insertions(+), 21 deletions(-)
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 28a9f2a..249f069 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -132,6 +132,7 @@ struct nv50_pc {
boolean allow32;
boolean join_on;
+ boolean preload;
};
static inline void
@@ -1242,6 +1243,23 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e)
e->inst[1] |= q;
}
+static void
+load_interpolant(struct nv50_pc *pc, struct nv50_reg *r)
+{
+ struct nv50_reg *iv = pc->iv_p;
+ int rhw = r->rhw;
+
+ if (pc->interp_mode[r->index] & INTERP_CENTROID)
+ iv = pc->iv_c;
+
+ r->rhw = -1;
+ alloc_reg(pc, r);
+ r->rhw = rhw;
+
+ if (pc->preload)
+ emit_interp(pc, r, iv, pc->interp_mode[r->index]);
+}
+
static boolean
negate_supported(const struct tgsi_full_instruction *insn, int i)
{
@@ -1297,6 +1315,8 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
switch (src->SrcRegister.File) {
case TGSI_FILE_INPUT:
r = &pc->attr[src->SrcRegister.Index * 4 + c];
+ if (r->hw == -1 && r->rhw >= 0)
+ load_interpolant(pc, r);
break;
case TGSI_FILE_TEMPORARY:
r = &pc->temp[src->SrcRegister.Index * 4 + c];
@@ -1416,6 +1436,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
if (*pp_rtmp && (*pp_rtmp)->type != P_TEMP && (nr_dst > 1 || sat))
pp_rtmp = &temp;
+ pc->preload = (inst->Instruction.Opcode != TGSI_OPCODE_TXP);
+
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *fs =
&inst->FullSrcRegisters[i];
@@ -1860,18 +1882,15 @@ prep_inspect_insn(struct nv50_pc *pc, const union tgsi_full_token *tok,
}
static unsigned
-load_fp_attrib(struct nv50_pc *pc, int i, unsigned *acc, int *mid,
+prep_fp_attrib(struct nv50_pc *pc, int i, unsigned *acc, int *p_mid,
int *aid, int *p_oid)
{
- struct nv50_reg *iv;
- int oid, c, n;
+ int c, n, oid = *p_oid, mid = *p_mid;
unsigned mask = 0;
- iv = (pc->interp_mode[i] & INTERP_CENTROID) ? pc->iv_c : pc->iv_p;
-
for (c = 0, n = i * 4; c < 4; c++, n++) {
- oid = (*p_oid)++;
pc->attr[n].type = P_TEMP;
+ pc->attr[n].hw = -1;
pc->attr[n].index = i;
if (pc->attr[n].acc == acc[n])
@@ -1879,17 +1898,15 @@ load_fp_attrib(struct nv50_pc *pc, int i, unsigned *acc, int *mid,
mask |= (1 << c);
pc->attr[n].acc = acc[n];
- pc->attr[n].rhw = pc->attr[n].hw = -1;
- alloc_reg(pc, &pc->attr[n]);
-
pc->attr[n].rhw = (*aid)++;
- emit_interp(pc, &pc->attr[n], iv, pc->interp_mode[i]);
- pc->p->cfg.fp.map[(*mid) / 4] |= oid << (8 * ((*mid) % 4));
- (*mid)++;
+ pc->p->cfg.fp.map[mid / 4] |= (oid + c) << (8 * (mid % 4));
+ mid++;
pc->p->cfg.fp.regs[1] += 0x00010001;
}
+ *p_mid = mid;
+ *p_oid = oid + 4;
return mask;
}
@@ -2063,7 +2080,7 @@ nv50_program_tx_prep(struct nv50_pc *pc)
if (fcrd != 0xffff) {
unsigned mask;
oid = mid = 0;
- mask = load_fp_attrib(pc, fcrd, r_usage[1],
+ mask = prep_fp_attrib(pc, fcrd, r_usage[1],
&mid, &aid, &oid);
pc->p->cfg.fp.regs[1] |= (mask << 24);
pc->p->cfg.fp.map[0] += 0x04040404 * fcrd;
@@ -2103,10 +2120,10 @@ nv50_program_tx_prep(struct nv50_pc *pc)
i = mid;
if (fcol[0] != 0xffff)
- load_fp_attrib(pc, fcol[0], r_usage[1],
+ prep_fp_attrib(pc, fcol[0], r_usage[1],
&mid, &aid, &oid);
if (fcol[1] != 0xffff)
- load_fp_attrib(pc, fcol[1], r_usage[1],
+ prep_fp_attrib(pc, fcol[1], r_usage[1],
&mid, &aid, &oid);
/* set count of mapped color components */
@@ -2115,14 +2132,9 @@ nv50_program_tx_prep(struct nv50_pc *pc)
/* reset oid and load remaining attrs */
oid = (fcrd == 0xffff) ? 4 : 0;
for (i = 0; i < pc->attr_nr; i++)
- load_fp_attrib(pc, i, r_usage[1],
+ prep_fp_attrib(pc, i, r_usage[1],
&mid, &aid, &oid);
- if (pc->iv_p)
- free_temp(pc, pc->iv_p);
- if (pc->iv_c)
- free_temp(pc, pc->iv_c);
-
pc->p->cfg.fp.high_map = mid;
} else {
/* vertex program */
@@ -2228,6 +2240,11 @@ free_nv50_pc(struct nv50_pc *pc)
if (pc->temp)
FREE(pc->temp);
+ if (pc->iv_p)
+ free_temp(pc, pc->iv_p);
+ if (pc->iv_c)
+ free_temp(pc, pc->iv_c);
+
FREE(pc);
}
--
1.6.0.6
--------------090503050107050804030002
Content-Type: text/plain;
name="0016-nv50-update-comments.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="0016-nv50-update-comments.patch"
More information about the Nouveau
mailing list