[Mesa-dev] [PATCH 2/8] i965: Don't make consumers of brw_DO()/brw_WHILE() track loop start

Yuanhan Liu yuanhan.liu at linux.intel.com
Wed Dec 21 01:33:37 PST 2011


From: Eric Anholt <eric at anholt.net>

This is a similar cleanup to what we did for brw_IF(), brw_ELSE(),
brw_ENDIF() handling.

Reviewed-by: Yuanhan Liu <yuanhan.liu at linux.intel.com>
---
 src/mesa/drivers/dri/i965/brw_clip_line.c     |    5 +--
 src/mesa/drivers/dri/i965/brw_clip_tri.c      |   15 +++++-------
 src/mesa/drivers/dri/i965/brw_clip_unfilled.c |   14 +++++------
 src/mesa/drivers/dri/i965/brw_eu.c            |    4 +++
 src/mesa/drivers/dri/i965/brw_eu.h            |   15 +++++++++---
 src/mesa/drivers/dri/i965/brw_eu_emit.c       |   30 ++++++++++++++++++++++--
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp     |    2 +-
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp   |    2 +-
 src/mesa/drivers/dri/i965/brw_vs_emit.c       |    2 +-
 9 files changed, 59 insertions(+), 30 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c
index 614849a..6cf2bd2 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_line.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_line.c
@@ -132,7 +132,6 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
    struct brw_indirect newvtx0   = brw_indirect(2, 0);
    struct brw_indirect newvtx1   = brw_indirect(3, 0);
    struct brw_indirect plane_ptr = brw_indirect(4, 0);
-   struct brw_instruction *plane_loop;
    struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD);
    GLuint hpos_offset = brw_vert_result_to_offset(&c->vue_map,
                                                   VERT_RESULT_HPOS);
@@ -160,7 +159,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
 
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 
-   plane_loop = brw_DO(p, BRW_EXECUTE_1);
+   brw_DO(p, BRW_EXECUTE_1);
    {
       /* if (planemask & 1)
        */
@@ -245,7 +244,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
       brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
       brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
    }
-   brw_WHILE(p, plane_loop);
+   brw_WHILE(p);
 
    brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1);
    brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0));
diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c
index 12d6724..a29f8e0 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_tri.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c
@@ -232,8 +232,6 @@ void brw_clip_tri( struct brw_clip_compile *c )
    struct brw_indirect inlist_ptr = brw_indirect(4, 0);
    struct brw_indirect outlist_ptr = brw_indirect(5, 0);
    struct brw_indirect freelist_ptr = brw_indirect(6, 0);
-   struct brw_instruction *plane_loop;
-   struct brw_instruction *vertex_loop;
    GLuint hpos_offset = brw_vert_result_to_offset(&c->vue_map,
                                                   VERT_RESULT_HPOS);
    
@@ -244,7 +242,7 @@ void brw_clip_tri( struct brw_clip_compile *c )
 
    brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) );
 
-   plane_loop = brw_DO(p, BRW_EXECUTE_1);
+   brw_DO(p, BRW_EXECUTE_1);
    {
       /* if (planemask & 1)
        */
@@ -266,7 +264,7 @@ void brw_clip_tri( struct brw_clip_compile *c )
 	 brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
 	 brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0));
 
-	 vertex_loop = brw_DO(p, BRW_EXECUTE_1);
+	 brw_DO(p, BRW_EXECUTE_1);
 	 {
 	    /* vtx = *input_ptr;
 	     */
@@ -364,7 +362,7 @@ void brw_clip_tri( struct brw_clip_compile *c )
 	    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
 	    brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
 	 } 
-	 brw_WHILE(p, vertex_loop);
+	 brw_WHILE(p);
 
 	 /* vtxPrev = *(outlist_ptr-1)  OR: outlist[nr_verts-1]
 	  * inlist = outlist
@@ -396,7 +394,7 @@ void brw_clip_tri( struct brw_clip_compile *c )
       brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
       brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
    }
-   brw_WHILE(p, plane_loop);
+   brw_WHILE(p);
 }
 
 
@@ -404,7 +402,6 @@ void brw_clip_tri( struct brw_clip_compile *c )
 void brw_clip_tri_emit_polygon(struct brw_clip_compile *c)
 {
    struct brw_compile *p = &c->func;
-   struct brw_instruction *loop;
 
    /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--)
     */
@@ -429,7 +426,7 @@ void brw_clip_tri_emit_polygon(struct brw_clip_compile *c)
       brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2));
       brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
 
-      loop = brw_DO(p, BRW_EXECUTE_1);
+      brw_DO(p, BRW_EXECUTE_1);
       {
 	 brw_clip_emit_vue(c, v0, 1, 0,
                            (_3DPRIM_TRIFAN << URB_WRITE_PRIM_TYPE_SHIFT));
@@ -440,7 +437,7 @@ void brw_clip_tri_emit_polygon(struct brw_clip_compile *c)
 	 brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
 	 brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
       }
-      brw_WHILE(p, loop);
+      brw_WHILE(p);
 
       brw_clip_emit_vue(c, v0, 0, 1,
                         ((_3DPRIM_TRIFAN << URB_WRITE_PRIM_TYPE_SHIFT)
diff --git a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c
index 01f14b0..03c7d42 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c
@@ -273,7 +273,6 @@ static void emit_lines(struct brw_clip_compile *c,
 		       bool do_offset)
 {
    struct brw_compile *p = &c->func;
-   struct brw_instruction *loop;
    struct brw_indirect v0 = brw_indirect(0, 0);
    struct brw_indirect v1 = brw_indirect(1, 0);
    struct brw_indirect v0ptr = brw_indirect(2, 0);
@@ -285,7 +284,7 @@ static void emit_lines(struct brw_clip_compile *c,
       brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
       brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
 
-      loop = brw_DO(p, BRW_EXECUTE_1);
+      brw_DO(p, BRW_EXECUTE_1);
       {
 	 brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
 	 brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
@@ -295,7 +294,7 @@ static void emit_lines(struct brw_clip_compile *c,
 	 brw_set_conditionalmod(p, BRW_CONDITIONAL_G);
 	 brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
       }
-      brw_WHILE(p, loop);
+      brw_WHILE(p);
    }
 
    /* v1ptr = &inlist[nr_verts]
@@ -307,7 +306,7 @@ static void emit_lines(struct brw_clip_compile *c,
    brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW));
    brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0));
 
-   loop = brw_DO(p, BRW_EXECUTE_1);
+   brw_DO(p, BRW_EXECUTE_1);
    {
       brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
       brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2));
@@ -333,7 +332,7 @@ static void emit_lines(struct brw_clip_compile *c,
       brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
       brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
    }
-   brw_WHILE(p, loop);
+   brw_WHILE(p);
 }
 
 
@@ -342,7 +341,6 @@ static void emit_points(struct brw_clip_compile *c,
 			bool do_offset )
 {
    struct brw_compile *p = &c->func;
-   struct brw_instruction *loop;
 
    struct brw_indirect v0 = brw_indirect(0, 0);
    struct brw_indirect v0ptr = brw_indirect(2, 0);
@@ -350,7 +348,7 @@ static void emit_points(struct brw_clip_compile *c,
    brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
    brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
 
-   loop = brw_DO(p, BRW_EXECUTE_1);
+   brw_DO(p, BRW_EXECUTE_1);
    {
       brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
       brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
@@ -376,7 +374,7 @@ static void emit_points(struct brw_clip_compile *c,
       brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
       brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
    }
-   brw_WHILE(p, loop);
+   brw_WHILE(p);
 }
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c
index b5a858b..c0126ff 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.c
+++ b/src/mesa/drivers/dri/i965/brw_eu.c
@@ -193,6 +193,10 @@ brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx)
    p->if_stack_array_size = 16;
    p->if_stack =
       rzalloc_array(mem_ctx, struct brw_instruction *, p->if_stack_array_size);
+
+   p->loop_stack_depth = 0;
+   p->loop_stack_array_size = 16;
+   p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
 }
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 596be02..321924e 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -128,6 +128,15 @@ struct brw_compile {
    int if_stack_depth;
    int if_stack_array_size;
 
+   /**
+    * loop_stack contains the instruction pointers of the starts of loops which
+    * must be patched (and popped) once the matching WHILE instruction is
+    * encountered.
+    */
+   int *loop_stack;
+   int loop_stack_depth;
+   int loop_stack_array_size;
+
    struct brw_glsl_label *first_label;  /**< linked list of labels */
    struct brw_glsl_call *first_call;    /**< linked list of CALs */
 };
@@ -1008,13 +1017,11 @@ void brw_ENDIF(struct brw_compile *p);
 struct brw_instruction *brw_DO(struct brw_compile *p,
 			       GLuint execute_size);
 
-struct brw_instruction *brw_WHILE(struct brw_compile *p, 
-	       struct brw_instruction *patch_insn);
+struct brw_instruction *brw_WHILE(struct brw_compile *p);
 
 struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count);
 struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count);
-struct brw_instruction *gen6_CONT(struct brw_compile *p,
-				  struct brw_instruction *do_insn);
+struct brw_instruction *gen6_CONT(struct brw_compile *p);
 /* Forward jumps:
  */
 void brw_land_fwd_jump(struct brw_compile *p, 
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index c4f3a37..0e708d2 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -911,6 +911,25 @@ push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
    }
 }
 
+static void
+push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
+{
+   if (p->loop_stack_array_size < p->loop_stack_depth) {
+      p->loop_stack_array_size *= 2;
+      p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
+			       p->loop_stack_array_size);
+   }
+
+   p->loop_stack[p->loop_stack_depth] = inst - p->store;
+   p->loop_stack_depth++;
+}
+
+static struct brw_instruction *
+get_inner_do_insn(struct brw_compile *p)
+{
+   return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
+}
+
 /* EU takes the value from the flag register and pushes it onto some
  * sort of a stack (presumably merging with any flag value already on
  * the stack).  Within an if block, the flags at the top of the stack
@@ -1301,10 +1320,13 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
    struct intel_context *intel = &p->brw->intel;
 
    if (intel->gen >= 6 || p->single_program_flow) {
+      push_loop_stack(p, &p->store[p->nr_insn]);
       return &p->store[p->nr_insn];
    } else {
       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
 
+      push_loop_stack(p, insn);
+
       /* Override the defaults for this instruction:
        */
       brw_set_dest(p, insn, brw_null_reg());
@@ -1323,13 +1345,15 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
 
 
 
-struct brw_instruction *brw_WHILE(struct brw_compile *p, 
-                                  struct brw_instruction *do_insn)
+struct brw_instruction *brw_WHILE(struct brw_compile *p)
 {
    struct intel_context *intel = &p->brw->intel;
-   struct brw_instruction *insn;
+   struct brw_instruction *insn, *do_insn;
    GLuint br = 1;
 
+   do_insn = get_inner_do_insn(p);
+   p->loop_stack_depth--;
+
    if (intel->gen >= 5)
       br = 2;
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 2f5a026..ded58a2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -828,7 +828,7 @@ fs_visitor::generate_code()
 
 	 assert(loop_stack_depth > 0);
 	 loop_stack_depth--;
-	 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
+	 inst0 = inst1 = brw_WHILE(p);
 	 if (intel->gen < 6) {
 	    /* patch all the BREAK/CONT instructions from last BGNLOOP */
 	    while (inst0 > loop_stack[loop_stack_depth]) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index becb89e..cd1d228 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -842,7 +842,7 @@ vec4_visitor::generate_code()
 
 	 assert(loop_stack_depth > 0);
 	 loop_stack_depth--;
-	 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
+	 inst0 = inst1 = brw_WHILE(p);
 	 if (intel->gen < 6) {
 	    /* patch all the BREAK/CONT instructions from last BGNLOOP */
 	    while (inst0 > loop_stack[loop_stack_depth]) {
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 6c96a48..05f3ecb 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -2123,7 +2123,7 @@ void brw_old_vs_emit(struct brw_vs_compile *c )
 	 if (intel->gen == 5)
 	    br = 2;
 
-	 inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
+	 inst0 = inst1 = brw_WHILE(p);
 
 	 if (intel->gen < 6) {
 	    /* patch all the BREAK/CONT instructions from last BEGINLOOP */
-- 
1.7.4.4



More information about the mesa-dev mailing list