[Mesa-dev] [PATCH] squash: Fix up VPM read optimization.
Eric Anholt
eric at anholt.net
Thu Mar 10 20:21:52 UTC 2016
- There's no reason there would be only 64 operations that read from the
output of a mov from VPM, so we might smash the stack (fixes etqw trace)
- Fixes segfault where we assumed that a single-use temp had a def (fixes
2 piglit tests)
- We need to only mark progress when we actually did the optimization, or
we'll infinite loop (0ad trace).
- Misc style fixes.
- No reordering sampler instructions (fixes a glean test)
shader-db results:
total instructions in shared programs: 78513 -> 78071 (-0.56%)
instructions in affected programs: 10406 -> 9964 (-4.25%)
total estimated cycles in shared programs: 234674 -> 234274 (-0.17%)
estimated cycles in affected programs: 35188 -> 34788 (-1.14%)
---
Varad, here's what I came up with trying to test your patch. If these
changes look good to you, I can squash them in and push.
src/gallium/drivers/vc4/vc4_opt_vpm.c | 45 +++++++++++++++++------------------
1 file changed, 22 insertions(+), 23 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_opt_vpm.c b/src/gallium/drivers/vc4/vc4_opt_vpm.c
index 277b345..a4ee6af 100644
--- a/src/gallium/drivers/vc4/vc4_opt_vpm.c
+++ b/src/gallium/drivers/vc4/vc4_opt_vpm.c
@@ -40,10 +40,8 @@ qir_opt_vpm(struct vc4_compile *c)
bool progress = false;
struct qinst *vpm_writes[64] = { 0 };
- struct qinst *vpm_reads[64] = { 0 };
uint32_t use_count[c->num_temps];
uint32_t vpm_write_count = 0;
- uint32_t vpm_read_count = 0;
memset(&use_count, 0, sizeof(use_count));
list_for_each_entry(struct qinst, inst, &c->instructions, link) {
@@ -59,24 +57,14 @@ qir_opt_vpm(struct vc4_compile *c)
if (inst->src[i].file == QFILE_TEMP) {
uint32_t temp = inst->src[i].index;
use_count[temp]++;
-
- struct qinst *mov = c->defs[temp];
- if (!mov ||
- (mov->op != QOP_MOV &&
- mov->op != QOP_FMOV &&
- mov->op != QOP_MMOV)) {
- continue;
- }
-
- if (mov->src[0].file == QFILE_VPM)
- vpm_reads[vpm_read_count++] = inst;
}
}
}
- for (int i = 0; i < vpm_read_count; i++) {
- struct qinst *inst = vpm_reads[i];
-
+ /* For instructions reading from a temporary that contains a VPM read
+ * result, try to move the instruction up in place of the VPM read.
+ */
+ list_for_each_entry(struct qinst, inst, &c->instructions, link) {
if (!inst || qir_is_multi_instruction(inst))
continue;
@@ -84,21 +72,32 @@ qir_opt_vpm(struct vc4_compile *c)
continue;
if (qir_has_side_effects(c, inst) ||
- qir_has_side_effect_reads(c, inst))
+ qir_has_side_effect_reads(c, inst) ||
+ qir_is_tex(inst))
continue;
for (int j = 0; j < qir_get_op_nsrc(inst->op); j++) {
- if(inst->src[j].file != QFILE_TEMP)
+ if (inst->src[j].file != QFILE_TEMP)
continue;
uint32_t temp = inst->src[j].index;
+
+ /* Since VPM reads pull from a FIFO, we only get to
+ * read each VPM entry once (unless we reset the read
+ * pointer). That means we can't copy-propagate a VPM
+ * read to multiple locations.
+ */
if (use_count[temp] != 1)
continue;
struct qinst *mov = c->defs[temp];
-
- if (mov->src[0].file != QFILE_VPM)
+ if (!mov ||
+ (mov->op != QOP_MOV &&
+ mov->op != QOP_FMOV &&
+ mov->op != QOP_MMOV) ||
+ mov->src[0].file != QFILE_VPM) {
continue;
+ }
uint32_t temps = 0;
for (int k = 0; k < qir_get_op_nsrc(inst->op); k++) {
@@ -109,15 +108,15 @@ qir_opt_vpm(struct vc4_compile *c)
/* The instruction is safe to reorder if its other
* sources are independent of previous instructions
*/
- if (temps == 1 ) {
+ if (temps == 1) {
list_del(&inst->link);
inst->src[j] = mov->src[0];
list_replace(&mov->link, &inst->link);
c->defs[temp] = NULL;
free(mov);
+ progress = true;
+ break;
}
-
- progress = true;
}
}
--
2.7.0
More information about the mesa-dev
mailing list