Mesa (master): freedreno/ir3: use nopN encoding when possible
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Tue Feb 26 18:23:25 UTC 2019
Module: Mesa
Branch: master
Commit: cb884d8ab210b4793eb55852b4f07642c71a99a5
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cb884d8ab210b4793eb55852b4f07642c71a99a5
Author: Rob Clark <robdclark at gmail.com>
Date: Sat Feb 23 11:14:32 2019 -0500
freedreno/ir3: use nopN encoding when possible
Use the (nopN) encoding for slightly denser shaders.. this lets us fold
nop instructions into the previous alu instruction in certain cases.
Shouldn't change the # of cycles a shader takes to execute, but reduces
the size. (ex: glmark2 refract goes from 168 to 116 instructions)
Currently only enabled for a6xx, but I think we could enable this for
a5xx and possibly a4xx.
Signed-off-by: Rob Clark <robdclark at gmail.com>
---
src/freedreno/ir3/ir3.c | 28 +++++++++++++++++++++++-----
src/freedreno/ir3/ir3.h | 3 ++-
src/freedreno/ir3/ir3_legalize.c | 10 ++++++++++
3 files changed, 35 insertions(+), 6 deletions(-)
diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c
index a4de1d3ab49..ed14c343faa 100644
--- a/src/freedreno/ir3/ir3.c
+++ b/src/freedreno/ir3/ir3.c
@@ -211,6 +211,18 @@ static int emit_cat2(struct ir3_instruction *instr, void *ptr,
iassert((instr->regs_count == 2) || (instr->regs_count == 3));
+ if (instr->nop) {
+ iassert(!instr->repeat);
+ iassert(instr->nop <= 3);
+
+ cat2->src1_r = instr->nop & 0x1;
+ cat2->src2_r = (instr->nop >> 1) & 0x1;
+ } else {
+ cat2->src1_r = !!(src1->flags & IR3_REG_R);
+ if (src2)
+ cat2->src2_r = !!(src2->flags & IR3_REG_R);
+ }
+
if (src1->flags & IR3_REG_RELATIV) {
iassert(src1->array.offset < (1 << 10));
cat2->rel1.src1 = reg(src1, info, instr->repeat,
@@ -232,7 +244,6 @@ static int emit_cat2(struct ir3_instruction *instr, void *ptr,
cat2->src1_im = !!(src1->flags & IR3_REG_IMMED);
cat2->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
cat2->src1_abs = !!(src1->flags & (IR3_REG_FABS | IR3_REG_SABS));
- cat2->src1_r = !!(src1->flags & IR3_REG_R);
if (src2) {
iassert((src2->flags & IR3_REG_IMMED) ||
@@ -260,7 +271,6 @@ static int emit_cat2(struct ir3_instruction *instr, void *ptr,
cat2->src2_im = !!(src2->flags & IR3_REG_IMMED);
cat2->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
cat2->src2_abs = !!(src2->flags & (IR3_REG_FABS | IR3_REG_SABS));
- cat2->src2_r = !!(src2->flags & IR3_REG_R);
}
cat2->dst = reg(dst, info, instr->repeat,
@@ -312,6 +322,17 @@ static int emit_cat3(struct ir3_instruction *instr, void *ptr,
iassert(!((src2->flags ^ src_flags) & IR3_REG_HALF));
iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF));
+ if (instr->nop) {
+ iassert(!instr->repeat);
+ iassert(instr->nop <= 3);
+
+ cat3->src1_r = instr->nop & 0x1;
+ cat3->src2_r = (instr->nop >> 1) & 0x1;
+ } else {
+ cat3->src1_r = !!(src1->flags & IR3_REG_R);
+ cat3->src2_r = !!(src2->flags & IR3_REG_R);
+ }
+
if (src1->flags & IR3_REG_RELATIV) {
iassert(src1->array.offset < (1 << 10));
cat3->rel1.src1 = reg(src1, info, instr->repeat,
@@ -331,14 +352,11 @@ static int emit_cat3(struct ir3_instruction *instr, void *ptr,
}
cat3->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
- cat3->src1_r = !!(src1->flags & IR3_REG_R);
cat3->src2 = reg(src2, info, instr->repeat,
IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF | absneg);
cat3->src2_c = !!(src2->flags & IR3_REG_CONST);
cat3->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT));
- cat3->src2_r = !!(src2->flags & IR3_REG_R);
-
if (src3->flags & IR3_REG_RELATIV) {
iassert(src3->array.offset < (1 << 10));
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index 36d645f0b50..27a52c54ad4 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -212,7 +212,8 @@ struct ir3_instruction {
IR3_INSTR_MARK = 0x1000,
IR3_INSTR_UNUSED= 0x2000,
} flags;
- int repeat;
+ uint8_t repeat;
+ uint8_t nop;
#ifdef DEBUG
unsigned regs_max;
#endif
diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c
index 00f0c9c9b8d..b14a789efb2 100644
--- a/src/freedreno/ir3/ir3_legalize.c
+++ b/src/freedreno/ir3/ir3_legalize.c
@@ -200,6 +200,16 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
last->flags |= n->flags;
continue;
}
+
+ /* NOTE: I think the nopN encoding works for a5xx and
+ * probably a4xx, but not a3xx. So far only tested on
+ * a6xx.
+ */
+ if ((ctx->compiler->gpu_id >= 600) && !n->flags && (last->nop < 3) &&
+ ((opc_cat(last->opc) == 2) || (opc_cat(last->opc) == 3))) {
+ last->nop++;
+ continue;
+ }
}
list_addtail(&n->node, &block->instr_list);
More information about the mesa-commit
mailing list