Mesa (nv50-compiler): nv50: simple reload elimination and local CSE

Christoph Bumiller chrisbmr at kemper.freedesktop.org
Mon Jul 26 09:21:43 UTC 2010


Module: Mesa
Branch: nv50-compiler
Commit: 5811c6926450c4aafd2f9c87a2c6fe73b517f2c6
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=5811c6926450c4aafd2f9c87a2c6fe73b517f2c6

Author: Christoph Bumiller <e0425955 at student.tuwien.ac.at>
Date:   Sun Jul 25 22:21:38 2010 +0200

nv50: simple reload elimination and local CSE

---

 src/gallium/drivers/nv50/nv50_pc.c          |   18 +++
 src/gallium/drivers/nv50/nv50_pc.h          |   10 +-
 src/gallium/drivers/nv50/nv50_pc_optimize.c |  168 ++++++++++++++++++++++++---
 src/gallium/drivers/nv50/nv50_pc_print.c    |    6 +-
 4 files changed, 178 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c
index 89dbc7a..e09f940 100644
--- a/src/gallium/drivers/nv50/nv50_pc.c
+++ b/src/gallium/drivers/nv50/nv50_pc.c
@@ -163,6 +163,24 @@ nv_nvi_refcount(struct nv_instruction *nvi)
    return rc;
 }
 
+int
+nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val,
+                   struct nv_value *new_val)
+{
+   int i, n;
+
+   if (old_val == new_val)
+      return old_val->refc;
+
+   for (i = 0, n = 0; i < pc->num_refs; ++i) {
+      if (pc->refs[i]->value == old_val) {
+         ++n;
+         nv_reference(pc, &pc->refs[i], new_val);
+      }
+   }
+   return n;
+}
+
 static void
 nv_pc_free_refs(struct nv_pc *pc)
 {
diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h
index 3db300d..ffcdaf4 100644
--- a/src/gallium/drivers/nv50/nv50_pc.h
+++ b/src/gallium/drivers/nv50/nv50_pc.h
@@ -363,11 +363,11 @@ new_ref(struct nv_pc *pc, struct nv_value *val)
       const unsigned old_size = pc->num_refs * sizeof(struct nv_ref *);
       const unsigned new_size = (pc->num_refs + 64) * sizeof(struct nv_ref *);
 
-	   pc->refs = REALLOC(pc->refs, old_size, new_size);
+      pc->refs = REALLOC(pc->refs, old_size, new_size);
 
-	   ref = CALLOC(64, sizeof(struct nv_ref));
-	   for (i = 0; i < 64; ++i)
-		   pc->refs[pc->num_refs + i] = &ref[i];
+      ref = CALLOC(64, sizeof(struct nv_ref));
+      for (i = 0; i < 64; ++i)
+         pc->refs[pc->num_refs + i] = &ref[i];
    }
 
    ref = pc->refs[pc->num_refs++];
@@ -426,6 +426,8 @@ int nv_nvi_refcount(struct nv_instruction *);
 void nv_nvi_delete(struct nv_instruction *);
 void nv_nvi_permute(struct nv_instruction *, struct nv_instruction *);
 void nvbb_attach_block(struct nv_basic_block *parent, struct nv_basic_block *);
+int nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val,
+                       struct nv_value *new_val);
 
 int nv_pc_exec_pass0(struct nv_pc *pc);
 int nv_pc_exec_pass1(struct nv_pc *pc);
diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c
index a514c59..0018131 100644
--- a/src/gallium/drivers/nv50/nv50_pc_optimize.c
+++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c
@@ -570,31 +570,99 @@ nv_pass_lower_cond(struct nv_pass *ctx, struct nv_basic_block *b)
 }
 #endif
 
-/* TODO: reload elimination, redundant store elimination */
+/* TODO: redundant store elimination */
 
-struct nv_pass_reldelim {
+struct load_record {
+   struct load_record *next;
+   uint64_t data;
+   struct nv_value *value;
+};
+
+#define LOAD_RECORD_POOL_SIZE 1024
+
+struct nv_pass_reld_elim {
    struct nv_pc *pc;
+
+   struct load_record *imm;
+   struct load_record *mem_s;
+   struct load_record *mem_v;
+   struct load_record *mem_c[16];
+   struct load_record *mem_l;
+
+   struct load_record pool[LOAD_RECORD_POOL_SIZE];
+   int alloc;
 };
 
 static int
-nv_pass_reload_elim(struct nv_pass_reldelim *ctx, struct nv_basic_block *b)
+nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b)
 {
-   int j;
+   struct load_record **rec, *it;
    struct nv_instruction *ld, *next;
+   uint64_t data;
+   struct nv_value *val;
+   int j;
 
    for (ld = b->entry; ld; ld = next) {
       next = ld->next;
+      if (!ld->src[0])
+         continue;
+      val = ld->src[0]->value;
+      rec = NULL;
 
       if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
-
+         data = val->reg.id;
+         rec = &ctx->mem_v;
       } else
       if (ld->opcode == NV_OP_LDA) {
-         
+         data = val->reg.id;
+         if (val->reg.file >= NV_FILE_MEM_C(0) &&
+             val->reg.file <= NV_FILE_MEM_C(15))
+            rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
+         else
+         if (val->reg.file == NV_FILE_MEM_S)
+            rec = &ctx->mem_s;
+         else
+         if (val->reg.file == NV_FILE_MEM_L)
+            rec = &ctx->mem_l;
       } else
-      if (ld->opcode == NV_OP_MOV) {
-         
+      if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
+         data = val->reg.imm.u32;
+         rec = &ctx->imm;
+      }
+
+      if (!rec || !ld->def[0]->refc)
+         continue;
+
+      for (it = *rec; it; it = it->next)
+         if (it->data == data)
+            break;
+
+      if (it) {
+#if 1
+         nvcg_replace_value(ctx->pc, ld->def[0], it->value);
+#else
+         ld->opcode = NV_OP_MOV;
+         nv_reference(ctx->pc, &ld->src[0], it->value);
+#endif
+      } else {
+         if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
+            continue;
+         it = &ctx->pool[ctx->alloc++];
+         it->next = *rec;
+         it->data = data;
+         it->value = ld->def[0];
+         *rec = it;
       }
    }
+
+   ctx->imm = NULL;
+   ctx->mem_s = NULL;
+   ctx->mem_v = NULL;
+   for (j = 0; j < 16; ++j)
+      ctx->mem_c[j] = NULL;
+   ctx->mem_l = NULL;
+   ctx->alloc = 0;
+
    DESCEND_ARBITRARY(j, nv_pass_reload_elim);
 
    return 0;
@@ -678,23 +746,74 @@ nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
    return 0;
 }
 
+/* local common subexpression elimination, stupid O(n^2) implementation */
+static int
+nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+   struct nv_instruction *ir, *ik, *next;
+   struct nv_instruction *entry = b->phi ? b->phi : b->entry;
+   int s;
+   unsigned int reps;
+
+   do {
+      reps = 0;
+      for (ir = entry; ir; ir = next) {
+         next = ir->next;
+         for (ik = entry; ik != ir; ik = ik->next) {
+            if (ir->opcode != ik->opcode)
+               continue;
+
+            if (ik->opcode == NV_OP_LDA ||
+                ik->opcode == NV_OP_STA ||
+                ik->opcode == NV_OP_MOV ||
+                nv_is_vector_op(ik->opcode))
+               continue; /* ignore loads, stores & moves */
+
+            if (ik->src[4] || ir->src[4])
+               continue; /* don't mess with address registers */
+
+            for (s = 0; s < 3; ++s) {
+               struct nv_value *a, *b;
+
+               if (!ik->src[s]) {
+                  if (ir->src[s])
+                     break;
+                  continue;
+               }
+               if (ik->src[s]->mod != ir->src[s]->mod)
+                  break;
+               a = ik->src[s]->value;
+               b = ir->src[s]->value;
+               if (a == b)
+                  continue;
+               if (a->reg.file != b->reg.file ||
+                   a->reg.id < 0 ||
+                   a->reg.id != b->reg.id)
+                  break;
+            }
+            if (s == 3) {
+               nv_nvi_delete(ir);
+               ++reps;
+               nvcg_replace_value(ctx->pc, ir->def[0], ik->def[0]);
+               break;
+            }
+         }
+      }
+   } while(reps);
+
+   DESCEND_ARBITRARY(s, nv_pass_cse);
+
+   return 0;
+}
+
 int
 nv_pc_exec_pass0(struct nv_pc *pc)
 {
-   struct nv_pass_reldelim *reldelim;
+   struct nv_pass_reld_elim *reldelim;
    struct nv_pass pass;
    struct nv_pass_dce dce;
    int ret;
 
-   reldelim = CALLOC_STRUCT(nv_pass_reldelim);
-   reldelim->pc = pc;
-
-   ret = nv_pass_reload_elim(reldelim, pc->root);
-
-   FREE(reldelim);
-   if (ret)
-      return ret;
-
    pass.pc = pc;
 
    pc->pass_seq++;
@@ -720,6 +839,19 @@ nv_pc_exec_pass0(struct nv_pc *pc)
    if (ret)
       return ret;
 
+   reldelim = CALLOC_STRUCT(nv_pass_reld_elim);
+   reldelim->pc = pc;
+   pc->pass_seq++;
+   ret = nv_pass_reload_elim(reldelim, pc->root);
+   FREE(reldelim);
+   if (ret)
+      return ret;
+
+   pc->pass_seq++;
+   ret = nv_pass_cse(&pass, pc->root);
+   if (ret)
+      return ret;
+
    pc->pass_seq++;
    ret = nv_pass_lower_mods(&pass, pc->root);
    if (ret)
diff --git a/src/gallium/drivers/nv50/nv50_pc_print.c b/src/gallium/drivers/nv50/nv50_pc_print.c
index 00b50b4..8208077 100644
--- a/src/gallium/drivers/nv50/nv50_pc_print.c
+++ b/src/gallium/drivers/nv50/nv50_pc_print.c
@@ -181,7 +181,7 @@ nv_print_address(const char c, int buf, struct nv_value *a, int offset)
 static INLINE void
 nv_print_cond(struct nv_instruction *nvi)
 {
-   PRINT("%s%s%s$c%i ",
+   PRINT("%s%s %s$c%i ",
          gree, nv_cond_name(nvi->cc),
          mgta, nv_value_id(nvi->flags_src->value));
 }
@@ -198,7 +198,7 @@ nv_print_value(struct nv_value *value, struct nv_value *ind, ubyte type)
       PRINT(" %s%s", gree, nv_type_name(type));
 
    if (!nv_value_allocated(value))
-      reg_pfx = '%';
+      reg_pfx = nv_value_allocated(value->join) ? '&' : '%';
 
    switch (value->reg.file) {
    case NV_FILE_GPR:
@@ -268,6 +268,8 @@ nv_print_instruction(struct nv_instruction *i)
 {
    int j;
 
+   PRINT("%i: ", i->serial);
+
    if (i->flags_src)
       nv_print_cond(i);
 




More information about the mesa-commit mailing list