[Mesa-dev] [PATCH] mesa: allow unlimited temporaries

Luca Barbieri luca at luca-barbieri.com
Sat Sep 4 16:59:47 PDT 2010


Currently Mesa has a limit of 1024 temporaries.

Unfortunately, some GLSL shaders such as an SSAO fragment
post-processing shader in Unigine Tropics, go over this limit at
least before program optimizations are applied.

Instead of just enlarging the limit, remove it completely and
replace all arrays using it with dynamic allocation code.

If no one objects, I'll push this.
---
 src/mesa/program/ir_to_mesa.cpp      |    1 -
 src/mesa/program/prog_instruction.h  |   24 +++-----
 src/mesa/program/prog_optimize.c     |  105 ++++++++++++++++++++--------------
 src/mesa/program/prog_optimize.h     |    5 +-
 src/mesa/program/program.c           |   14 -----
 src/mesa/program/program_parse.tab.c |   14 +----
 src/mesa/program/program_parse.y     |   14 +----
 7 files changed, 79 insertions(+), 98 deletions(-)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index d61698b..bc90365 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2166,7 +2166,6 @@ mesa_src_reg_from_ir_src_reg(ir_to_mesa_src_reg reg)
    struct prog_src_register mesa_reg;
 
    mesa_reg.File = reg.file;
-   assert(reg.index < (1 << INST_INDEX_BITS) - 1);
    mesa_reg.Index = reg.index;
    mesa_reg.Swizzle = reg.swizzle;
    mesa_reg.RelAddr = reg.reladdr != NULL;
diff --git a/src/mesa/program/prog_instruction.h b/src/mesa/program/prog_instruction.h
index ca90de7..b2c0027 100644
--- a/src/mesa/program/prog_instruction.h
+++ b/src/mesa/program/prog_instruction.h
@@ -242,23 +242,20 @@ typedef enum prog_opcode {
    MAX_OPCODE
 } gl_inst_opcode;
 
-
-/**
- * Number of bits for the src/dst register Index field.
- * This limits the size of temp/uniform register files.
- */
-#define INST_INDEX_BITS 10
-
-
 /**
  * Instruction source register.
  */
 struct prog_src_register
 {
+   GLint Index; /**< Extra bit here for sign bit.
+	         * May be negative for relative addressing.
+	         */
+
+   GLint Index2; /**< Extra bit here for sign bit.
+                  * May be negative for relative
+                  * addressing. */
+
    GLuint File:4;	/**< One of the PROGRAM_* register file values. */
-   GLint Index:(INST_INDEX_BITS+1); /**< Extra bit here for sign bit.
-                                     * May be negative for relative addressing.
-                                     */
    GLuint Swizzle:12;
    GLuint RelAddr:1;
 
@@ -284,9 +281,6 @@ struct prog_src_register
     */
    GLuint HasIndex2:1;
    GLuint RelAddr2:1;
-   GLint Index2:(INST_INDEX_BITS+1); /**< Extra bit here for sign bit.
-                                       * May be negative for relative
-                                       * addressing. */
 };
 
 
@@ -295,8 +289,8 @@ struct prog_src_register
  */
 struct prog_dst_register
 {
+   GLuint Index;  /**< Unsigned, never negative */
    GLuint File:4;      /**< One of the PROGRAM_* register file values */
-   GLuint Index:INST_INDEX_BITS;  /**< Unsigned, never negative */
    GLuint WriteMask:4;
    GLuint RelAddr:1;
 
diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c
index 0dc7790..72a5ffe 100644
--- a/src/mesa/program/prog_optimize.c
+++ b/src/mesa/program/prog_optimize.c
@@ -34,12 +34,6 @@
 
 
 #define MAX_LOOP_NESTING 50
-/* MAX_PROGRAM_TEMPS is a low number (256), and we want to be able to
- * register allocate many temporary values into that small number of
- * temps.  So allow large temporary indices coming into the register
- * allocator.
- */
-#define REG_ALLOCATE_MAX_PROGRAM_TEMPS	((1 << INST_INDEX_BITS) - 1)
 
 static GLboolean dbg = GL_FALSE;
 
@@ -238,17 +232,18 @@ replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[])
 static GLboolean
 _mesa_remove_dead_code_global(struct gl_program *prog)
 {
-   GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4];
+   GLboolean *tempRead;
    GLboolean *removeInst; /* per-instruction removal flag */
    GLuint i, rem = 0, comp;
-
-   memset(tempRead, 0, sizeof(tempRead));
+   GLuint numRegs = prog->NumTemporaries;
 
    if (dbg) {
       printf("Optimize: Begin dead code removal\n");
       /*_mesa_print_program(prog);*/
    }
 
+   tempRead =
+      calloc(numRegs, sizeof(GLboolean) * 4);
    removeInst = (GLboolean *)
       calloc(1, prog->NumInstructions * sizeof(GLboolean));
 
@@ -263,7 +258,7 @@ _mesa_remove_dead_code_global(struct gl_program *prog)
          if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
             const GLuint index = inst->SrcReg[j].Index;
             GLuint read_mask;
-            ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
+            ASSERT(index < numRegs);
 	    read_mask = get_src_arg_mask(inst, j, NO_MASK);
 
             if (inst->SrcReg[j].RelAddr) {
@@ -278,7 +273,7 @@ _mesa_remove_dead_code_global(struct gl_program *prog)
                if ((read_mask & (1 << swz)) == 0)
 		  continue;
                if (swz <= SWIZZLE_W)
-                  tempRead[index][swz] = GL_TRUE;
+                  tempRead[index * 4 + swz] = GL_TRUE;
 	    }
          }
       }
@@ -286,7 +281,7 @@ _mesa_remove_dead_code_global(struct gl_program *prog)
       /* check dst reg */
       if (inst->DstReg.File == PROGRAM_TEMPORARY) {
          const GLuint index = inst->DstReg.Index;
-         ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
+         ASSERT(index < prog->NumTemporaries);
 
          if (inst->DstReg.RelAddr) {
             if (dbg)
@@ -299,10 +294,10 @@ _mesa_remove_dead_code_global(struct gl_program *prog)
              * codes we cannot remove the instruction.  Prevent removal
              * by setting the 'read' flag.
              */
-            tempRead[index][0] = GL_TRUE;
-            tempRead[index][1] = GL_TRUE;
-            tempRead[index][2] = GL_TRUE;
-            tempRead[index][3] = GL_TRUE;
+            tempRead[index * 4 + 0] = GL_TRUE;
+            tempRead[index * 4 + 1] = GL_TRUE;
+            tempRead[index * 4 + 2] = GL_TRUE;
+            tempRead[index * 4 + 3] = GL_TRUE;
          }
       }
    }
@@ -316,7 +311,7 @@ _mesa_remove_dead_code_global(struct gl_program *prog)
          GLint chan, index = inst->DstReg.Index;
 
 	 for (chan = 0; chan < 4; chan++) {
-	    if (!tempRead[index][chan] &&
+	    if (!tempRead[index * 4 + chan] &&
 		inst->DstReg.WriteMask & (1 << chan)) {
 	       if (dbg) {
 		  printf("Remove writemask on %u.%c\n", i,
@@ -348,6 +343,7 @@ _mesa_remove_dead_code_global(struct gl_program *prog)
 
 done:
    free(removeInst);
+   free(tempRead);
    return rem != 0;
 }
 
@@ -835,7 +831,7 @@ struct interval
 struct interval_list
 {
    GLuint Num;
-   struct interval Intervals[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
+   struct interval* Intervals;
 };
 
 
@@ -848,10 +844,11 @@ append_interval(struct interval_list *list, const struct interval *inv)
 
 /** Insert interval inv into list, sorted by interval end */
 static void
-insert_interval_by_end(struct interval_list *list, const struct interval *inv)
+insert_interval_by_end(struct interval_list *list, const struct interval *inv, GLuint numRegs)
 {
    /* XXX we could do a binary search insertion here since list is sorted */
    GLint i = list->Num - 1;
+   ASSERT(list->Num < numRegs);
    while (i >= 0 && list->Intervals[i].End > inv->End) {
       list->Intervals[i + 1] = list->Intervals[i];
       i--;
@@ -932,7 +929,7 @@ struct loop_info
  * instruction 'ic'.
  */
 static void
-update_interval(GLint intBegin[], GLint intEnd[],
+update_interval(GLint intBegin[], GLint intEnd[], GLuint numRegs,
 		struct loop_info *loopStack, GLuint loopStackDepth,
 		GLuint index, GLuint ic)
 {
@@ -948,7 +945,7 @@ update_interval(GLint intBegin[], GLint intEnd[],
       }
    }
 
-   ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
+   ASSERT(index < numRegs);
    if (intBegin[index] == -1) {
       ASSERT(intEnd[index] == -1);
       intBegin[index] = intEnd[index] = ic;
@@ -965,14 +962,15 @@ update_interval(GLint intBegin[], GLint intEnd[],
 GLboolean
 _mesa_find_temp_intervals(const struct prog_instruction *instructions,
                           GLuint numInstructions,
-                          GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS],
-                          GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
+                          GLint *intBegin,
+                          GLint *intEnd,
+                          GLuint numRegs)
 {
    struct loop_info loopStack[MAX_LOOP_NESTING];
    GLuint loopStackDepth = 0;
    GLuint i;
 
-   for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
+   for (i = 0; i < numRegs; i++){
       intBegin[i] = intEnd[i] = -1;
    }
 
@@ -998,7 +996,7 @@ _mesa_find_temp_intervals(const struct prog_instruction *instructions,
                const GLuint index = inst->SrcReg[j].Index;
                if (inst->SrcReg[j].RelAddr)
                   return GL_FALSE;
-               update_interval(intBegin, intEnd, loopStack, loopStackDepth,
+               update_interval(intBegin, intEnd, numRegs, loopStack, loopStackDepth,
 			       index, i);
             }
          }
@@ -1006,7 +1004,7 @@ _mesa_find_temp_intervals(const struct prog_instruction *instructions,
             const GLuint index = inst->DstReg.Index;
             if (inst->DstReg.RelAddr)
                return GL_FALSE;
-            update_interval(intBegin, intEnd, loopStack, loopStackDepth,
+            update_interval(intBegin, intEnd, numRegs, loopStack, loopStackDepth,
 			    index, i);
          }
       }
@@ -1027,9 +1025,14 @@ static GLboolean
 find_live_intervals(struct gl_program *prog,
                     struct interval_list *liveIntervals)
 {
-   GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
-   GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
+   GLint* intBegin;
+   GLint* intEnd;
    GLuint i;
+   GLuint numRegs = prog->NumTemporaries;
+   GLboolean ret = GL_FALSE;
+
+   intBegin = calloc(numRegs, sizeof(GLint));
+   intEnd = calloc(numRegs, sizeof(GLint));
 
    /*
     * Note: we'll return GL_FALSE below if we find relative indexing
@@ -1043,12 +1046,12 @@ find_live_intervals(struct gl_program *prog,
 
    /* build intermediate arrays */
    if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions,
-                                  intBegin, intEnd))
-      return GL_FALSE;
+                                  intBegin, intEnd, numRegs))
+      goto out;
 
    /* Build live intervals list from intermediate arrays */
    liveIntervals->Num = 0;
-   for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
+   for (i = 0; i < numRegs; i++) {
       if (intBegin[i] >= 0) {
          struct interval inv;
          inv.Reg = i;
@@ -1078,16 +1081,20 @@ find_live_intervals(struct gl_program *prog,
       }
    }
 
-   return GL_TRUE;
+   ret = GL_TRUE;
+out:
+   free(intBegin);
+   free(intEnd);
+   return ret;
 }
 
 
 /** Scan the array of used register flags to find free entry */
 static GLint
-alloc_register(GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
+alloc_register(GLboolean* usedRegs, GLuint numRegs)
 {
    GLuint k;
-   for (k = 0; k < REG_ALLOCATE_MAX_PROGRAM_TEMPS; k++) {
+   for (k = 0; k < numRegs; k++) {
       if (!usedRegs[k]) {
          usedRegs[k] = GL_TRUE;
          return k;
@@ -1109,17 +1116,24 @@ static void
 _mesa_reallocate_registers(struct gl_program *prog)
 {
    struct interval_list liveIntervals;
-   GLint registerMap[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
-   GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
+   struct interval_list activeIntervals;
+   GLint* registerMap;
+   GLboolean* usedRegs;
    GLuint i;
    GLint maxTemp = -1;
+   GLuint numRegs = prog->NumTemporaries;
 
    if (dbg) {
       printf("Optimize: Begin live-interval register reallocation\n");
       _mesa_print_program(prog);
    }
 
-   for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
+   registerMap = calloc(numRegs, sizeof(GLint));
+   usedRegs = calloc(numRegs, sizeof(GLboolean));
+   liveIntervals.Intervals = calloc(numRegs, sizeof(struct interval));
+   activeIntervals.Intervals = 0;
+
+   for (i = 0; i < numRegs; i++){
       registerMap[i] = -1;
       usedRegs[i] = GL_FALSE;
    }
@@ -1127,11 +1141,11 @@ _mesa_reallocate_registers(struct gl_program *prog)
    if (!find_live_intervals(prog, &liveIntervals)) {
       if (dbg)
          printf("Aborting register reallocation\n");
-      return;
+      goto out;
    }
 
    {
-      struct interval_list activeIntervals;
+      activeIntervals.Intervals = calloc(numRegs, sizeof(struct interval));
       activeIntervals.Num = 0;
 
       /* loop over live intervals, allocating a new register for each */
@@ -1177,10 +1191,11 @@ _mesa_reallocate_registers(struct gl_program *prog)
 
          /* find a free register for this live interval */
          {
-            const GLint k = alloc_register(usedRegs);
+            /* TODO: use an O(1)/O(log n) algorithm rather than this linear scan */
+            const GLint k = alloc_register(usedRegs, numRegs);
             if (k < 0) {
                /* out of registers, give up */
-               return;
+               goto out;
             }
             registerMap[live->Reg] = k;
             maxTemp = MAX2(maxTemp, k);
@@ -1191,7 +1206,7 @@ _mesa_reallocate_registers(struct gl_program *prog)
          /* Insert this live interval into the active list which is sorted
           * by increasing end points.
           */
-         insert_interval_by_end(&activeIntervals, live);
+         insert_interval_by_end(&activeIntervals, live, numRegs);
       }
    }
 
@@ -1211,6 +1226,12 @@ _mesa_reallocate_registers(struct gl_program *prog)
                    liveIntervals.Num, maxTemp + 1);
       _mesa_print_program(prog);
    }
+
+out:
+   free(usedRegs);
+   free(registerMap);
+   free(liveIntervals.Intervals);
+   free(activeIntervals.Intervals);
 }
 
 
diff --git a/src/mesa/program/prog_optimize.h b/src/mesa/program/prog_optimize.h
index 06cd9cb..e8cd5e4 100644
--- a/src/mesa/program/prog_optimize.h
+++ b/src/mesa/program/prog_optimize.h
@@ -37,8 +37,9 @@ struct prog_instruction;
 extern GLboolean
 _mesa_find_temp_intervals(const struct prog_instruction *instructions,
                           GLuint numInstructions,
-                          GLint intBegin[MAX_PROGRAM_TEMPS],
-                          GLint intEnd[MAX_PROGRAM_TEMPS]);
+                          GLint* intBegin,
+                          GLint* intEnd,
+                          GLuint numRegs);
 
 extern void
 _mesa_optimize_program(GLcontext *ctx, struct gl_program *program);
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index fd59026..8f65d44 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -53,20 +53,6 @@ _mesa_init_program(GLcontext *ctx)
 {
    GLuint i;
 
-   /*
-    * If this assertion fails, we need to increase the field
-    * size for register indexes (see INST_INDEX_BITS).
-    */
-   ASSERT(ctx->Const.VertexProgram.MaxUniformComponents / 4
-          <= (1 << INST_INDEX_BITS));
-   ASSERT(ctx->Const.FragmentProgram.MaxUniformComponents / 4
-          <= (1 << INST_INDEX_BITS));
-
-   ASSERT(ctx->Const.VertexProgram.MaxTemps <= (1 << INST_INDEX_BITS));
-   ASSERT(ctx->Const.VertexProgram.MaxLocalParams <= (1 << INST_INDEX_BITS));
-   ASSERT(ctx->Const.FragmentProgram.MaxTemps <= (1 << INST_INDEX_BITS));
-   ASSERT(ctx->Const.FragmentProgram.MaxLocalParams <= (1 << INST_INDEX_BITS));
-
    ASSERT(ctx->Const.VertexProgram.MaxUniformComponents <= 4 * MAX_UNIFORMS);
    ASSERT(ctx->Const.FragmentProgram.MaxUniformComponents <= 4 * MAX_UNIFORMS);
 
diff --git a/src/mesa/program/program_parse.tab.c b/src/mesa/program/program_parse.tab.c
index 08ead30..b119998 100644
--- a/src/mesa/program/program_parse.tab.c
+++ b/src/mesa/program/program_parse.tab.c
@@ -5257,12 +5257,7 @@ init_dst_reg(struct prog_dst_register *r)
 void
 set_dst_reg(struct prog_dst_register *r, gl_register_file file, GLint index)
 {
-   const GLint maxIndex = 1 << INST_INDEX_BITS;
-   const GLint minIndex = 0;
-   ASSERT(index >= minIndex);
-   (void) minIndex;
-   ASSERT(index <= maxIndex);
-   (void) maxIndex;
+   ASSERT(index >= 0);
    ASSERT(file == PROGRAM_TEMPORARY ||
 	  file == PROGRAM_ADDRESS ||
 	  file == PROGRAM_OUTPUT);
@@ -5299,13 +5294,8 @@ void
 set_src_reg_swz(struct asm_src_register *r, gl_register_file file, GLint index,
                 GLuint swizzle)
 {
-   const GLint maxIndex = (1 << INST_INDEX_BITS) - 1;
-   const GLint minIndex = -(1 << INST_INDEX_BITS);
    ASSERT(file < PROGRAM_FILE_MAX);
-   ASSERT(index >= minIndex);
-   (void) minIndex;
-   ASSERT(index <= maxIndex);
-   (void) maxIndex;
+   ASSERT(index >= 0);
    memset(r, 0, sizeof(*r));
    r->Base.File = file;
    r->Base.Index = index;
diff --git a/src/mesa/program/program_parse.y b/src/mesa/program/program_parse.y
index cf621ae..227dd00 100644
--- a/src/mesa/program/program_parse.y
+++ b/src/mesa/program/program_parse.y
@@ -2296,12 +2296,7 @@ init_dst_reg(struct prog_dst_register *r)
 void
 set_dst_reg(struct prog_dst_register *r, gl_register_file file, GLint index)
 {
-   const GLint maxIndex = 1 << INST_INDEX_BITS;
-   const GLint minIndex = 0;
-   ASSERT(index >= minIndex);
-   (void) minIndex;
-   ASSERT(index <= maxIndex);
-   (void) maxIndex;
+   ASSERT(index >= 0);
    ASSERT(file == PROGRAM_TEMPORARY ||
 	  file == PROGRAM_ADDRESS ||
 	  file == PROGRAM_OUTPUT);
@@ -2338,13 +2333,8 @@ void
 set_src_reg_swz(struct asm_src_register *r, gl_register_file file, GLint index,
                 GLuint swizzle)
 {
-   const GLint maxIndex = (1 << INST_INDEX_BITS) - 1;
-   const GLint minIndex = -(1 << INST_INDEX_BITS);
    ASSERT(file < PROGRAM_FILE_MAX);
-   ASSERT(index >= minIndex);
-   (void) minIndex;
-   ASSERT(index <= maxIndex);
-   (void) maxIndex;
+   ASSERT(index >= 0);
    memset(r, 0, sizeof(*r));
    r->Base.File = file;
    r->Base.Index = index;
-- 
1.7.0.4



More information about the mesa-dev mailing list