Mesa (nv50-compiler): nv50: loops part 2

Christoph Bumiller chrisbmr at kemper.freedesktop.org
Sun Aug 15 19:39:28 UTC 2010


Module: Mesa
Branch: nv50-compiler
Commit: 4de293bb9acd1ecda683f735af32f7485a0f213e
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=4de293bb9acd1ecda683f735af32f7485a0f213e

Author: Christoph Bumiller <e0425955 at student.tuwien.ac.at>
Date:   Sun Aug 15 21:37:50 2010 +0200

nv50: loops part 2

At least the mesa demo glsl/mandelbrot should work now.

---

 src/gallium/drivers/nv50/nv50_pc.h          |    8 +-
 src/gallium/drivers/nv50/nv50_pc_emit.c     |    1 +
 src/gallium/drivers/nv50/nv50_pc_optimize.c |    4 +-
 src/gallium/drivers/nv50/nv50_pc_print.c    |    2 +-
 src/gallium/drivers/nv50/nv50_screen.c      |   27 +++
 src/gallium/drivers/nv50/nv50_screen.h      |    4 +-
 src/gallium/drivers/nv50/nv50_tgsi_to_nc.c  |  233 +++++++++++++++++----------
 7 files changed, 189 insertions(+), 90 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h
index 8b1c9b3..b24a306 100644
--- a/src/gallium/drivers/nv50/nv50_pc.h
+++ b/src/gallium/drivers/nv50/nv50_pc.h
@@ -47,7 +47,7 @@
 #define NV_OP_SHL       17
 #define NV_OP_SHR       18
 #define NV_OP_RCP       19
-/* gap */
+#define NV_OP_UNDEF     20
 #define NV_OP_RSQ       21
 #define NV_OP_LG2       22
 #define NV_OP_SIN       23
@@ -360,6 +360,12 @@ new_value(struct nv_pc *pc, ubyte file, ubyte type)
    return value;
 }
 
+static INLINE struct nv_value *
+new_value_like(struct nv_pc *pc, struct nv_value *like)
+{
+   return new_value(pc, like->reg.file, like->reg.type);
+}
+
 static INLINE struct nv_ref *
 new_ref(struct nv_pc *pc, struct nv_value *val)
 {
diff --git a/src/gallium/drivers/nv50/nv50_pc_emit.c b/src/gallium/drivers/nv50/nv50_pc_emit.c
index 35bd5ff..fe44b32 100644
--- a/src/gallium/drivers/nv50/nv50_pc_emit.c
+++ b/src/gallium/drivers/nv50/nv50_pc_emit.c
@@ -1130,6 +1130,7 @@ nv50_emit_instruction(struct nv_pc *pc, struct nv_instruction *i)
       pc->emit[1] = 0xe0000000;
       break;
    case NV_OP_PHI:
+   case NV_OP_UNDEF:
    case NV_OP_SUB:
       NOUVEAU_ERR("operation \"%s\" should have been eliminated\n",
 		  nv_opcode_name(i->opcode));
diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c
index 64ffeaf..daf63a3 100644
--- a/src/gallium/drivers/nv50/nv50_pc_optimize.c
+++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c
@@ -89,7 +89,7 @@ inst_cullable(struct nv_instruction *nvi)
 static INLINE boolean
 nvi_isnop(struct nv_instruction *nvi)
 {
-   if (nvi->opcode == NV_OP_EXPORT)
+   if (nvi->opcode == NV_OP_EXPORT || nvi->opcode == NV_OP_UNDEF)
       return TRUE;
 
    if (nvi->fixed ||
@@ -849,7 +849,7 @@ nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
    int j;
    struct nv_instruction *nvi, *next;
 
-   for (nvi = b->entry; nvi; nvi = next) {
+   for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
       next = nvi->next;
 
       if (inst_cullable(nvi)) {
diff --git a/src/gallium/drivers/nv50/nv50_pc_print.c b/src/gallium/drivers/nv50/nv50_pc_print.c
index c812dbd..a4f567b 100644
--- a/src/gallium/drivers/nv50/nv50_pc_print.c
+++ b/src/gallium/drivers/nv50/nv50_pc_print.c
@@ -59,7 +59,7 @@ static const char *nv_opcode_names[NV_OP_COUNT + 1] = {
    "shl",
    "shr",
    "rcp",
-   "(undefined)",
+   "undef",
    "rsqrt",
    "lg2",
    "sin",
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index e0c06c2..78137d6 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -253,14 +253,23 @@ nv50_screen_relocs(struct nv50_screen *screen)
 	}
 }
 
+#ifndef NOUVEAU_GETPARAM_GRAPH_UNITS
+# define NOUVEAU_GETPARAM_GRAPH_UNITS 13
+#endif
+
+extern int nouveau_device_get_param(struct nouveau_device *dev,
+                                    uint64_t param, uint64_t *value);
+
 struct pipe_screen *
 nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 {
 	struct nv50_screen *screen = CALLOC_STRUCT(nv50_screen);
 	struct nouveau_channel *chan;
 	struct pipe_screen *pscreen;
+	uint64_t value;
 	unsigned chipset = dev->chipset;
 	unsigned tesla_class = 0;
+	unsigned stack_size;
 	int ret, i;
 	const unsigned rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
 
@@ -478,6 +487,24 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	OUT_RING  (chan, 0x121 | (NV50_CB_PGP << 12));
 	OUT_RING  (chan, 0x131 | (NV50_CB_PFP << 12));
 
+	/* shader stack */
+	nouveau_device_get_param(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
+
+	stack_size  = util_bitcount(value & 0xffff);
+	stack_size *= util_bitcount((value >> 24) & 0xf);
+	stack_size *= 32 * 64 * 8;
+
+	ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
+			     stack_size, &screen->stack_bo);
+	if (ret) {
+		nv50_screen_destroy(pscreen);
+		return NULL;
+	}
+	BEGIN_RING(chan, screen->tesla, NV50TCL_STACK_ADDRESS_HIGH, 3);
+	OUT_RELOCh(chan, screen->stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+	OUT_RELOCl(chan, screen->stack_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+	OUT_RING  (chan, 4);
+
 	/* Vertex array limits - max them out */
 	for (i = 0; i < 16; i++) {
 		BEGIN_RING(chan, screen->tesla,
diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h
index a491ba3..1517f56 100644
--- a/src/gallium/drivers/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nv50/nv50_screen.h
@@ -22,11 +22,11 @@ struct nv50_screen {
 
 	struct nouveau_resource *immd_heap;
 
-	struct pipe_resource *strm_vbuf[16];
-
 	struct nouveau_bo *tic;
 	struct nouveau_bo *tsc;
 
+	struct nouveau_bo *stack_bo;
+
 	boolean force_push;
 };
 
diff --git a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
index da33adc..7e77ed6 100644
--- a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
+++ b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
@@ -22,19 +22,6 @@
 
 /* XXX: need to clean this up so we get the typecasting right more naturally */
 
-/* LOOP FIXME 1
- * In bld_store_loop_var, only replace values that belong to the TGSI register
- * written.
- * For TGSI MOV, we only associate the source value with the value tracker of
- * the destination, instead of generating an actual MOV.
- *
- * Possible solution: generate PHI functions in loop headers in advance.
- */
-/* LOOP FIXME 2:
- * In fetch_by_bb, when going back through a break-block, we miss all of the
- * definitions from inside the loop.
- */
-
 #include <unistd.h>
 
 #include "nv50_context.h"
@@ -78,6 +65,24 @@ bld_vals_push_val(struct bld_value_stack *stk, struct nv_value *val)
    stk->body[stk->size++] = val;
 }
 
+static INLINE boolean
+bld_vals_del_val(struct bld_value_stack *stk, struct nv_value *val)
+{
+   unsigned i;
+
+   for (i = stk->size - 1; i >= 0; --i)
+      if (stk->body[i] == val)
+         break;
+   if (i < 0)
+      return FALSE;
+
+   if (i != stk->size - 1)
+      stk->body[i] = stk->body[stk->size - 1];
+
+   --stk->size; /* XXX: old size in REALLOC */
+   return TRUE;
+}
+
 static INLINE void
 bld_vals_push(struct bld_value_stack *stk)
 {
@@ -118,7 +123,7 @@ struct bld_context {
    struct bld_value_stack pvs[BLD_MAX_PREDS][4]; /* TGSI_FILE_PREDICATE */
    struct bld_value_stack ovs[PIPE_MAX_SHADER_OUTPUTS][4];
 
-   uint32_t outputs_written[PIPE_MAX_SHADER_OUTPUTS / 32];
+   uint32_t outputs_written[(PIPE_MAX_SHADER_OUTPUTS + 31) / 32];
 
    struct nv_value *frgcrd[4];
    struct nv_value *sysval[4];
@@ -130,6 +135,21 @@ struct bld_context {
    uint num_immds;
 };
 
+static INLINE ubyte
+bld_stack_file(struct bld_context *bld, struct bld_value_stack *stk)
+{
+   if (stk < &bld->avs[0][0])
+      return NV_FILE_GPR;
+   else
+   if (stk < &bld->pvs[0][0])
+      return NV_FILE_ADDR;
+   else
+   if (stk < &bld->ovs[0][0])
+      return NV_FILE_FLAGS;
+   else
+      return NV_FILE_OUT;
+}
+
 static INLINE struct nv_value *
 bld_fetch(struct bld_context *bld, struct bld_value_stack *stk, int i, int c)
 {
@@ -138,16 +158,29 @@ bld_fetch(struct bld_context *bld, struct bld_value_stack *stk, int i, int c)
    return stk[i * 4 + c].top;
 }
 
-static void
-bld_store_loop_var(struct bld_context *, struct bld_value_stack *);
+static struct nv_value *
+bld_loop_phi(struct bld_context *, struct bld_value_stack *, struct nv_value *);
 
+/* If a variable is defined in a loop without prior use, we don't need
+ * a phi in the loop header to account for backwards flow.
+ *
+ * However, if this variable is then also used outside the loop, we do
+ * need a phi after all. But we must not use this phi's def inside the
+ * loop, so we can eliminate the phi if it is unused later.
+ */
 static INLINE void
 bld_store(struct bld_context *bld, struct bld_value_stack *stk, int i, int c,
           struct nv_value *val)
 {
-   bld_store_loop_var(bld, &stk[i * 4 + c]);
+   const uint16_t m = 1 << bld->loop_lvl;
+
+   stk = &stk[i * 4 + c];
 
-   stk[i * 4 + c].top = val;
+   if (bld->loop_lvl && !(m & (stk->loop_def | stk->loop_use)))
+      bld_loop_phi(bld, stk, val);
+
+   stk->top = val;
+   stk->loop_def |= 1 << bld->loop_lvl;
 }
 
 static INLINE void
@@ -182,6 +215,9 @@ bld_warn_uninitialized(struct bld_context *bld, int kind,
    long i = (stk - &bld->tvs[0][0]) / 4;
    long c = (stk - &bld->tvs[0][0]) & 3;
 
+   if (c == 3)
+      c = -1;
+
    debug_printf("WARNING: TEMP[%li].%c %s used uninitialized in BB:%i\n",
                 i, (int)('x' + c), kind ? "may be" : "is", b->id);
 }
@@ -237,6 +273,14 @@ fetch_by_bb(struct bld_value_stack *stack,
 static INLINE struct nv_value *
 bld_load_imm_u32(struct bld_context *bld, uint32_t u);
 
+static INLINE struct nv_value *
+bld_undef(struct bld_context *bld, ubyte file)
+{
+   struct nv_instruction *nvi = new_instruction(bld->pc, NV_OP_UNDEF);
+
+   return bld_def(nvi, 0, new_value(bld->pc, file, NV_TYPE_U32));
+}
+
 static struct nv_value *
 bld_phi(struct bld_context *bld, struct nv_basic_block *b,
         struct bld_value_stack *stack)
@@ -267,21 +311,19 @@ bld_phi(struct bld_context *bld, struct nv_basic_block *b,
             if (in->num_in == 1) {
                in = in->in[0];
             } else {
-               if (!nvbb_reachable_by(in->in[0], vals[0]->insn->bb, b)) {
+               if (!nvbb_reachable_by(in->in[0], vals[0]->insn->bb, b))
                   in = in->in[0];
-                  break;
-               }
-               if (!nvbb_reachable_by(in->in[1], vals[0]->insn->bb, b)) {
+               else
+               if (!nvbb_reachable_by(in->in[1], vals[0]->insn->bb, b))
                   in = in->in[1];
-                  break;
-               }
-               in = in->in[0];
+               else
+                  in = in->in[0];
             }
          }
          bld->pc->current_block = in;
 
          /* should make this a no-op */
-         bld_vals_push_val(stack, bld_load_imm_u32(bld, 0));
+         bld_vals_push_val(stack, bld_undef(bld, vals[0]->reg.file));
          continue;
       }
 
@@ -318,10 +360,55 @@ bld_phi(struct bld_context *bld, struct nv_basic_block *b,
    return phi->def[0];
 }
 
+static struct nv_value *
+bld_loop_phi(struct bld_context *bld, struct bld_value_stack *stack,
+             struct nv_value *def)
+{
+   struct nv_basic_block *bb = bld->pc->current_block;
+   struct nv_instruction *phi;
+   struct nv_value *val;
+
+   val = bld_phi(bld, bld->pc->current_block, stack);
+   if (!val) {
+      bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1]->in[0];
+
+      val = bld_undef(bld, bld_stack_file(bld, stack));
+   }
+
+   bld->pc->current_block = bld->loop_bb[bld->loop_lvl - 1];
+
+   phi = new_instruction(bld->pc, NV_OP_PHI);
+
+   bld_def(phi, 0, new_value_like(bld->pc, val));
+   if (!def)
+      def = phi->def[0];
+
+   bld_vals_push_val(stack, phi->def[0]);
+
+   phi->target = (struct nv_basic_block *)stack; /* cheat */
+
+   nv_reference(bld->pc, &phi->src[0], val);
+   nv_reference(bld->pc, &phi->src[1], def);
+
+   bld->pc->current_block = bb;
+
+   return phi->def[0];
+}
+
 static INLINE struct nv_value *
 bld_fetch_global(struct bld_context *bld, struct bld_value_stack *stack)
 {
-   stack->loop_use |= 1 << bld->loop_lvl;
+   const uint16_t m = 1 << bld->loop_lvl;
+   const uint16_t use = stack->loop_use;
+
+   stack->loop_use |= m;
+
+   /* If neither used nor def'd inside the loop, build a phi in foresight,
+    * so we don't have to replace stuff later on, which requires tracking.
+    */
+   if (bld->loop_lvl && !((use | stack->loop_def) & m))
+      return bld_loop_phi(bld, stack, NULL);
+
    return bld_phi(bld, bld->pc->current_block, stack);
 }
 
@@ -347,72 +434,50 @@ static void
 bld_replace_value(struct nv_pc *, struct nv_basic_block *, struct nv_value *,
                   struct nv_value *);
 
-/* When setting a variable inside a loop, and we have used it before in the
- * loop, we need to insert a phi function in the loop header.
+/* Replace the source of the phi in the loop header by the last assignment,
+ * or eliminate the phi function if there is no assignment inside the loop.
+ *
+ * Redundancy situation 1 - (used) but (not redefined) value:
+ *  %3 = phi %0, %3 = %3 is used
+ *  %3 = phi %0, %4 = is new definition
+ *
+ * Redundancy situation 2 - (not used) but (redefined) value:
+ *  %3 = phi %0, %2 = %2 is used, %3 could be used outside, deleted by DCE
  */
 static void
-bld_store_loop_var(struct bld_context *bld, struct bld_value_stack *stk)
+bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb)
 {
-   struct nv_basic_block *bb;
-   struct nv_instruction *phi;
+   struct nv_instruction *phi, *next;
    struct nv_value *val;
-   int ll;
-   uint16_t loop_def = stk->loop_def;
-
-   if (!(ll = bld->loop_lvl))
-      return;
-   stk->loop_def |= 1 << ll;
-
-   if ((~stk->loop_use | loop_def) & (1 << ll))
-      return;
-
-#if 0
-   debug_printf("TEMP[%li].%c used before loop redef (def=%x/use=%x)\n",
-                (stk - &bld->tvs[0][0]) / 4,
-                (int)('x' + ((stk - &bld->tvs[0][0]) & 3)),
-                loop_def, stk->loop_use);
-#endif
+   struct bld_value_stack *stk;
+   int s;
 
-   stk->loop_def |= 1 << ll;
+   for (phi = bb->phi; phi && phi->opcode == NV_OP_PHI; phi = next) {
+      next = phi->next;
 
-   assert(bld->loop_bb[ll - 1]->num_in == 1);
-
-   /* get last assignment from outside this loop, could be from bld_phi */
-   val = stk->body[stk->size - 1];
-
-   /* create the phi in the loop entry block */
-
-   bb = bld->pc->current_block;
-   bld->pc->current_block = bld->loop_bb[ll - 1];
-
-   phi = new_instruction(bld->pc, NV_OP_PHI);
+      stk = (struct bld_value_stack *)phi->target;
+      phi->target = NULL;
 
-   bld_def(phi, 0, new_value(bld->pc, val->reg.file, val->reg.type));
+      val = bld_fetch_global(bld, stk);
 
-   bld->pc->pass_seq++;
-   bld_replace_value(bld->pc, bld->loop_bb[ll - 1], val, phi->def[0]);
+      nv_reference(bld->pc, &phi->src[1], val);
 
-   assert(!stk->top);
-   bld_vals_push_val(stk, phi->def[0]);
+      s = -1;
+      if (phi->src[0]->value == phi->def[0] ||
+          phi->src[0]->value == phi->src[1]->value)
+         s = 1;
+      else
+      if (phi->src[1]->value == phi->def[0])
+         s = 0;
 
-   phi->target = (struct nv_basic_block *)stk; /* cheat */
+      if (s >= 0) {
+         bld_vals_del_val(stk, phi->def[0]);
 
-   nv_reference(bld->pc, &phi->src[0], val);
-   nv_reference(bld->pc, &phi->src[1], phi->def[0]);
+         ++bld->pc->pass_seq;
+         bld_replace_value(bld->pc, bb, phi->def[0], phi->src[s]->value);
 
-   bld->pc->current_block = bb;
-}
-
-static void
-bld_loop_end(struct bld_context *bld, struct nv_basic_block *bb)
-{
-   struct nv_instruction *phi;
-   struct nv_value *val;
-
-   for (phi = bb->phi; phi && phi->opcode == NV_OP_PHI; phi = phi->next) {
-      val = bld_fetch_global(bld, (struct bld_value_stack *)phi->target);
-      nv_reference(bld->pc, &phi->src[1], val);
-      phi->target = NULL;
+         nv_nvi_delete(phi);
+      }
    }
 }
 
@@ -437,7 +502,7 @@ bld_insn_1(struct bld_context *bld, uint opcode, struct nv_value *src0)
 
 static struct nv_value *
 bld_insn_2(struct bld_context *bld, uint opcode,
-	      struct nv_value *src0, struct nv_value *src1)
+           struct nv_value *src0, struct nv_value *src1)
 {
    struct nv_instruction *insn = new_instruction(bld->pc, opcode);
 
@@ -449,8 +514,8 @@ bld_insn_2(struct bld_context *bld, uint opcode,
 
 static struct nv_value *
 bld_insn_3(struct bld_context *bld, uint opcode,
-              struct nv_value *src0, struct nv_value *src1,
-              struct nv_value *src2)
+           struct nv_value *src0, struct nv_value *src1,
+           struct nv_value *src2)
 {
    struct nv_instruction *insn = new_instruction(bld->pc, opcode);
 




More information about the mesa-commit mailing list