[Mesa-dev] [PATCH 1/3] glsl_to_tgsi: Create a new variable_store class replacing variables field in glsl_to_tgsi_visitor

Vincent Lejeune vljn at ovi.com
Sat Jan 7 10:12:29 PST 2012


---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  414 +++++++++++++++++++++-------
 1 files changed, 309 insertions(+), 105 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index cecceca..17ae525 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -230,14 +230,16 @@ public:
 class variable_storage : public exec_node {
 public:
    variable_storage(ir_variable *var, gl_register_file file, int index)
-      : file(file), index(index), var(var)
+      : file(file), index(index), type(var->type), is_array(var->type->is_array() || var->type->is_record() || var->type->is_matrix()), is_reladdressed(false)
    {
       /* empty */
    }
 
    gl_register_file file;
    int index;
-   ir_variable *var; /* variable that maps to this, if any */
+   const glsl_type *type; /* variable that maps to this, if any */
+   bool is_array;
+   bool is_reladdressed;
 };
 
 class immediate_storage : public exec_node {
@@ -286,6 +288,242 @@ public:
    st_src_reg return_reg;
 };
 
+static int type_size(const glsl_type *type);
+static int swizzle_for_size(int size);
+
+
+/**
+ * Single place to store all temporary values (either explicit - ir_variable* -
+ * or implicit - returned by get_temp -).
+ *
+ * Explicit temps are stored in variables hash_table.
+ * Implicit temps are stored in rvalue_regs array.
+ */
+class variable_store {
+   friend class glsl_to_tgsi_variable_allocator;
+protected:
+   void *mem_ctx;
+   hash_table* variables;
+   unsigned next_temp;
+   unsigned next_temp_array;
+   static void reindex_reladdress(const void *, void *, void *);
+   static void reindex_non_reladdress(const void *, void *, void *);
+   void reindex_rvalue();
+   void reindex_rvalue_reladdressed();
+   variable_storage* rvalue_regs;
+   unsigned rvalue_regs_count;
+
+public:
+   bool native_integers;
+   unsigned temp_amount() const;
+   unsigned temp_array_amount() const;
+   variable_store();
+   ~variable_store();
+   variable_storage *find_variable_storage(class ir_variable *var) const;
+   variable_storage *push(class ir_variable *, gl_register_file, int);
+   variable_storage *push(class ir_variable *);
+   variable_storage *retrieve_anonymous_temp(unsigned);
+   st_src_reg get_temp(const glsl_type *type);
+   void optimise_access(void);
+   unsigned *reindex_table;
+};
+
+unsigned
+variable_store::temp_amount() const
+{
+   return next_temp;
+}
+
+unsigned
+variable_store::temp_array_amount() const
+{
+   return next_temp_array;
+}
+
+variable_store::variable_store():mem_ctx(ralloc_context(NULL)),next_temp(1),next_temp_array(1),rvalue_regs(NULL),rvalue_regs_count(0)
+{
+   variables = hash_table_ctor(0,hash_table_pointer_hash,hash_table_pointer_compare);
+}
+
+variable_store::~variable_store()
+{
+   hash_table_dtor(variables);
+   ralloc_free(mem_ctx);
+}
+
+variable_storage *
+variable_store::find_variable_storage(ir_variable *var) const
+{
+   return (class variable_storage *) hash_table_find(variables,var);
+}
+
+variable_storage*
+variable_store::push(class ir_variable *var, gl_register_file file, int index)
+{
+   variable_storage *storage = new (mem_ctx) variable_storage(var,file,index);
+   hash_table_insert(variables,storage,var);
+   return storage;
+}
+
+variable_storage*
+variable_store::push(ir_variable *ir)
+{
+   variable_storage* retval = push(ir, PROGRAM_TEMPORARY, next_temp);
+   next_temp += type_size(ir->type);
+   if (ir->type->is_array() || ir->type->is_record() || ir->type->is_matrix()) {
+      retval->is_array = true;
+   }
+   return retval;
+}
+
+variable_storage*
+variable_store::retrieve_anonymous_temp(unsigned reg)
+{
+   for (unsigned i = 0; i < rvalue_regs_count; i++) {
+      unsigned range_start = rvalue_regs[i].index;
+      unsigned range_end = range_start + type_size(rvalue_regs[i].type) - 1;
+      if (reg >= range_start && reg <= range_end) {
+         return rvalue_regs + i;
+         }
+   }
+   printf ("Failed to get storage");
+   exit(1);
+}
+
+/**
+ * In the initial pass of codegen, we assign temporary numbers to
+ * intermediate results.  (not SSA -- variable assignments will reuse
+ * storage).
+ */
+st_src_reg
+variable_store::get_temp(const glsl_type *type)
+{
+   st_src_reg src;
+   rvalue_regs_count++;
+   rvalue_regs = reralloc(mem_ctx,rvalue_regs,variable_storage,rvalue_regs_count);
+   variable_storage &entry = rvalue_regs[rvalue_regs_count - 1];
+
+   src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
+   src.file = PROGRAM_TEMPORARY;
+   src.index = next_temp;
+   src.reladdr = NULL;
+   next_temp += type_size(type);
+
+   entry.file = PROGRAM_TEMPORARY;
+   entry.index = src.index;
+   entry.type = type;
+   entry.is_reladdressed = false;
+
+   if (type->is_array() || type->is_record() || type->is_matrix()) {
+      entry.is_array = true;
+   }
+
+   if (type->is_array() || type->is_record()) {
+      src.swizzle = SWIZZLE_NOOP;
+   } else {
+      src.swizzle = swizzle_for_size(type->vector_elements);
+   }
+   src.negate = 0;
+
+   return src;
+}
+
+void variable_store::reindex_reladdress(const void *key, void *data, void *closure)
+{
+   ir_variable *var = (ir_variable *) key;
+   variable_storage *storage = (variable_storage *) data;
+   variable_store *store = (variable_store *) closure;
+
+   if (storage->file == PROGRAM_TEMPORARY && storage->is_array) {
+
+      if (storage->is_reladdressed) {
+         unsigned old_index = storage->index;
+         storage->index = store->next_temp_array;
+         size_t sz = type_size(storage->type);
+         store->next_temp_array += sz;
+         for (unsigned i = 0; i < sz; i++) {
+            store->reindex_table[old_index + i] = storage->index + i;
+         }
+      }
+   }
+}
+
+void variable_store::reindex_non_reladdress(const void *key, void *data, void *closure)
+{
+   ir_variable *var = (ir_variable *) key;
+   variable_storage *storage = (variable_storage *) data;
+   variable_store *store = (variable_store *) closure;
+
+   if (storage->file == PROGRAM_TEMPORARY && !storage->is_reladdressed) {
+      unsigned old_index = storage->index;
+      size_t sz = type_size(storage->type);
+      storage->index = store->next_temp;
+      store->next_temp += sz;
+      for (unsigned i = 0; i < sz; i++) {
+         store->reindex_table[old_index + i] = storage->index + i;
+      }
+   }
+}
+
+void
+variable_store::reindex_rvalue_reladdressed()
+{
+   for (unsigned i = 0; i < rvalue_regs_count; i++) {
+      variable_storage &storage = rvalue_regs[i];
+      if (storage.is_reladdressed) {
+         unsigned old_index = storage.index;
+         size_t sz = type_size(storage.type);
+         storage.index = next_temp_array;
+         next_temp_array += sz;
+         for (unsigned i = 0; i < sz; i++) {
+            reindex_table[old_index + i] = storage.index + i;
+         }
+      }
+   }
+}
+
+void
+variable_store::reindex_rvalue()
+{
+   for (unsigned i = 0; i < rvalue_regs_count; i++) {
+      variable_storage &storage = rvalue_regs[i];
+      if (!storage.is_reladdressed) {
+         unsigned old_index = storage.index;
+         size_t sz = type_size(storage.type);
+         storage.index = next_temp;
+         next_temp += sz;
+         for (unsigned i = 0; i < sz; i++) {
+            reindex_table[old_index + i] = storage.index + i;
+         }
+      }
+   }
+}
+
+
+/**
+ * This function will generate a reindex table for every temp registers to pass to
+ * renumber_temp_regs.
+ * This will move every registers associated to an indirectly addressed variables
+ * to the beginning of TGSI_FILE_TEMPORARY (ie gives them lowest index) ;
+ * thus every registers that have an index >= next_temp_array after this function call
+ * are sure to be directly addressed.
+ *
+ * TODO : In fact some registers before next_temp_array might be directly addressed too
+ * (for instance in a struct type that holds an array indirectly accessed), we can put them
+ * after next_temp_array too.
+ */
+void
+variable_store::optimise_access(void)
+{
+   reindex_table = rzalloc_array(mem_ctx,unsigned,next_temp);
+   next_temp_array = 1;
+   hash_table_call_foreach(variables,variable_store::reindex_reladdress,this);
+   reindex_rvalue_reladdressed();
+   next_temp = next_temp_array + 1;
+   hash_table_call_foreach(variables,variable_store::reindex_non_reladdress,this);
+   reindex_rvalue();
+}
+
 class glsl_to_tgsi_visitor : public ir_visitor {
 public:
    glsl_to_tgsi_visitor();
@@ -298,8 +536,6 @@ public:
    struct gl_shader_program *shader_program;
    struct gl_shader_compiler_options *options;
 
-   int next_temp;
-
    int num_address_regs;
    int samplers_used;
    bool indirect_addr_temps;
@@ -308,14 +544,11 @@ public:
    int glsl_version;
    bool native_integers;
 
-   variable_storage *find_variable_storage(ir_variable *var);
-
    int add_constant(gl_register_file file, gl_constant_value values[4],
                     int size, int datatype, GLuint *swizzle_out);
 
    function_entry *get_function_signature(ir_function_signature *sig);
 
-   st_src_reg get_temp(const glsl_type *type);
    void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
 
    st_src_reg st_src_reg_for_float(float val);
@@ -352,7 +585,7 @@ public:
    st_src_reg result;
 
    /** List of variable_storage */
-   exec_list variables;
+   variable_store store;
 
    /** List of immediate_storage */
    exec_list immediates;
@@ -427,6 +660,7 @@ public:
    int eliminate_dead_code_advanced(void);
    void merge_registers(void);
    void renumber_registers(void);
+   void renumber_temp_regs(unsigned*);
 
    void *mem_ctx;
 };
@@ -797,7 +1031,7 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
     * that will be written by the SCS instrution, we'll need a temporary.
     */
    if (scs_mask != unsigned(dst.writemask)) {
-      tmp = get_temp(glsl_type::vec4_type);
+      tmp = store.get_temp(glsl_type::vec4_type);
    }
 
    for (unsigned i = 0; i < 4; i++) {
@@ -967,48 +1201,6 @@ type_size(const struct glsl_type *type)
    }
 }
 
-/**
- * In the initial pass of codegen, we assign temporary numbers to
- * intermediate results.  (not SSA -- variable assignments will reuse
- * storage).
- */
-st_src_reg
-glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
-{
-   st_src_reg src;
-
-   src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
-   src.file = PROGRAM_TEMPORARY;
-   src.index = next_temp;
-   src.reladdr = NULL;
-   next_temp += type_size(type);
-
-   if (type->is_array() || type->is_record()) {
-      src.swizzle = SWIZZLE_NOOP;
-   } else {
-      src.swizzle = swizzle_for_size(type->vector_elements);
-   }
-   src.negate = 0;
-
-   return src;
-}
-
-variable_storage *
-glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
-{
-   
-   variable_storage *entry;
-
-   foreach_iter(exec_list_iterator, iter, this->variables) {
-      entry = (variable_storage *)iter.get();
-
-      if (entry->var == var)
-         return entry;
-   }
-
-   return NULL;
-}
-
 void
 glsl_to_tgsi_visitor::visit(ir_variable *ir)
 {
@@ -1040,8 +1232,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
       st_dst_reg dst;
       if (i == ir->num_state_slots) {
          /* We'll set the index later. */
-         storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
-         this->variables.push_tail(storage);
+         storage = store.push(ir, PROGRAM_STATE_VAR, -1);
 
          dst = undef_dst;
       } else {
@@ -1051,10 +1242,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
           */
          assert((int) ir->num_state_slots == type_size(ir->type));
 
-         storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
-        					 this->next_temp);
-         this->variables.push_tail(storage);
-         this->next_temp += type_size(ir->type);
+         storage = store.push(ir);
 
          dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index,
                native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT));
@@ -1205,7 +1393,7 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
    ir->operands[nonmul_operand]->accept(this);
    c = this->result;
 
-   this->result = get_temp(ir->type);
+   this->result = store.get_temp(ir->type);
    result_dst = st_dst_reg(this->result);
    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
    emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
@@ -1247,7 +1435,7 @@ glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operan
 
    b.negate = ~b.negate;
 
-   this->result = get_temp(ir->type);
+   this->result = store.get_temp(ir->type);
    emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
 
    return true;
@@ -1288,7 +1476,7 @@ glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
       new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
       new_inst->saturate = true;
    } else {
-      this->result = get_temp(ir->type);
+      this->result = store.get_temp(ir->type);
       st_dst_reg result_dst = st_dst_reg(this->result);
       result_dst.writemask = (1 << ir->type->vector_elements) - 1;
       glsl_to_tgsi_instruction *inst;
@@ -1309,7 +1497,7 @@ glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
    emit_arl(ir, address_reg, *reg->reladdr);
 
    if (*num_reladdr != 1) {
-      st_src_reg temp = get_temp(glsl_type::vec4_type);
+      st_src_reg temp = store.get_temp(glsl_type::vec4_type);
 
       emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
       *reg = temp;
@@ -1378,7 +1566,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
    /* Storage for our result.  Ideally for an assignment we'd be using
     * the actual storage for the result here, instead.
     */
-   result_src = get_temp(ir->type);
+   result_src = store.get_temp(ir->type);
    /* convenience for the emit functions below. */
    result_dst = st_dst_reg(result_src);
    /* Limit writes to the channels that will be used by result_src later.
@@ -1508,7 +1696,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       /* "==" operator producing a scalar boolean. */
       if (ir->operands[0]->type->is_vector() ||
           ir->operands[1]->type->is_vector()) {
-         st_src_reg temp = get_temp(native_integers ?
+         st_src_reg temp = store.get_temp(native_integers ?
                glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
                glsl_type::vec4_type);
          
@@ -1566,7 +1754,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       /* "!=" operator producing a scalar boolean. */
       if (ir->operands[0]->type->is_vector() ||
           ir->operands[1]->type->is_vector()) {
-         st_src_reg temp = get_temp(native_integers ?
+         st_src_reg temp = store.get_temp(native_integers ?
                glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
                glsl_type::vec4_type);
          emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
@@ -2220,7 +2408,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
              * If TGSI had a UCMP instruction or similar, this extra
              * instruction would not be necessary.
              */
-            condition_temp = get_temp(glsl_type::vec4_type);
+            condition_temp = store.get_temp(glsl_type::vec4_type);
             condition.negate = 0;
             emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition);
             condition_temp.swizzle = condition.swizzle;
@@ -2277,7 +2465,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
     * get lucky, copy propagation will eliminate the extra moves.
     */
    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
-      st_src_reg temp_base = get_temp(ir->type);
+      st_src_reg temp_base = store.get_temp(ir->type);
       st_dst_reg temp = st_dst_reg(temp_base);
 
       foreach_iter(exec_list_iterator, iter, ir->components) {
@@ -2301,7 +2489,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
    }
 
    if (ir->type->is_array()) {
-      st_src_reg temp_base = get_temp(ir->type);
+      st_src_reg temp_base = store.get_temp(ir->type);
       st_dst_reg temp = st_dst_reg(temp_base);
       int size = type_size(ir->type->fields.array);
 
@@ -2324,7 +2512,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
    }
 
    if (ir->type->is_matrix()) {
-      st_src_reg mat = get_temp(ir->type);
+      st_src_reg mat = store.get_temp(ir->type);
       st_dst_reg mat_column = st_dst_reg(mat);
 
       for (i = 0; i < ir->type->matrix_columns; i++) {
@@ -2414,18 +2602,14 @@ glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
       ir_variable *param = (ir_variable *)iter.get();
       variable_storage *storage;
 
-      storage = find_variable_storage(param);
+      storage = store.find_variable_storage(param);
       assert(!storage);
 
-      storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
-        				      this->next_temp);
-      this->variables.push_tail(storage);
-
-      this->next_temp += type_size(param->type);
+      storage =store.push(param);
    }
 
    if (!sig->return_type->is_void()) {
-      entry->return_reg = get_temp(sig->return_type);
+      entry->return_reg = store.get_temp(sig->return_type);
    } else {
       entry->return_reg = undef_src;
    }
@@ -2450,7 +2634,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
 
       if (param->mode == ir_var_in ||
           param->mode == ir_var_inout) {
-         variable_storage *storage = find_variable_storage(param);
+         variable_storage *storage = store.find_variable_storage(param);
          assert(storage);
 
          param_rval->accept(this);
@@ -2486,7 +2670,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
 
       if (param->mode == ir_var_out ||
           param->mode == ir_var_inout) {
-         variable_storage *storage = find_variable_storage(param);
+         variable_storage *storage = store.find_variable_storage(param);
          assert(storage);
 
          st_src_reg r;
@@ -2530,7 +2714,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
        * we're doing plain old texturing.  The optimization passes on
        * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
        */
-      coord = get_temp(glsl_type::vec4_type);
+      coord = store.get_temp(glsl_type::vec4_type);
       coord_dst = st_dst_reg(coord);
       emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
    }
@@ -2543,7 +2727,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
    /* Storage for our result.  Ideally for an assignment we'd be using
     * the actual storage for the result here, instead.
     */
-   result_src = get_temp(glsl_type::vec4_type);
+   result_src = store.get_temp(glsl_type::vec4_type);
    result_dst = st_dst_reg(result_src);
 
    switch (ir->op) {
@@ -2613,7 +2797,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
              */
             ir->shadow_comparitor->accept(this);
 
-            tmp_src = get_temp(glsl_type::vec4_type);
+            tmp_src = store.get_temp(glsl_type::vec4_type);
             st_dst_reg tmp_dst = st_dst_reg(tmp_src);
 
 	    /* Projective division not allowed for array samplers. */
@@ -2779,7 +2963,7 @@ glsl_to_tgsi_visitor::visit(ir_if *ir)
        * have something to set cond_update on.
        */
       if (cond_inst == prev_inst) {
-         st_src_reg temp = get_temp(glsl_type::bool_type);
+         st_src_reg temp = store.get_temp(glsl_type::bool_type);
          cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result);
       }
       cond_inst->cond_update = GL_TRUE;
@@ -2805,7 +2989,6 @@ glsl_to_tgsi_visitor::visit(ir_if *ir)
 glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
 {
    result.file = PROGRAM_UNDEFINED;
-   next_temp = 1;
    next_signature_id = 1;
    num_immediates = 0;
    current_function = NULL;
@@ -3266,17 +3449,18 @@ glsl_to_tgsi_visitor::get_last_temp_write(int index)
 void
 glsl_to_tgsi_visitor::copy_propagate(void)
 {
+   int next_temp = store.temp_amount();
    glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
         					    glsl_to_tgsi_instruction *,
-        					    this->next_temp * 4);
-   int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
+                                        next_temp * 4);
+   int *acp_level = rzalloc_array(mem_ctx, int, next_temp * 4);
    int level = 0;
 
    foreach_iter(exec_list_iterator, iter, this->instructions) {
       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
 
       assert(inst->dst.file != PROGRAM_TEMPORARY
-             || inst->dst.index < this->next_temp);
+             || inst->dst.index < next_temp);
 
       /* First, do any copy propagation possible into the src regs. */
       for (int r = 0; r < 3; r++) {
@@ -3336,7 +3520,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
       case TGSI_OPCODE_BGNLOOP:
       case TGSI_OPCODE_ENDLOOP:
          /* End of a basic block, clear the ACP entirely. */
-         memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
+         memset(acp, 0, sizeof(*acp) * next_temp * 4);
          break;
 
       case TGSI_OPCODE_IF:
@@ -3348,7 +3532,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
          /* Clear all channels written inside the block from the ACP, but
           * leaving those that were not touched.
           */
-         for (int r = 0; r < this->next_temp; r++) {
+         for (int r = 0; r < next_temp; r++) {
             for (int c = 0; c < 4; c++) {
                if (!acp[4 * r + c])
         	  continue;
@@ -3369,13 +3553,13 @@ glsl_to_tgsi_visitor::copy_propagate(void)
             /* Any temporary might be written, so no copy propagation
              * across this instruction.
              */
-            memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
+            memset(acp, 0, sizeof(*acp) * next_temp * 4);
          } else if (inst->dst.file == PROGRAM_OUTPUT &&
         	    inst->dst.reladdr) {
             /* Any output might be written, so no copy propagation
              * from outputs across this instruction.
              */
-            for (int r = 0; r < this->next_temp; r++) {
+            for (int r = 0; r < next_temp; r++) {
                for (int c = 0; c < 4; c++) {
         	  if (!acp[4 * r + c])
         	     continue;
@@ -3396,7 +3580,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
             }
 
             /* Clear where it's used as src. */
-            for (int r = 0; r < this->next_temp; r++) {
+            for (int r = 0; r < next_temp; r++) {
                for (int c = 0; c < 4; c++) {
         	  if (!acp[4 * r + c])
         	     continue;
@@ -3457,8 +3641,8 @@ void
 glsl_to_tgsi_visitor::eliminate_dead_code(void)
 {
    int i;
-   
-   for (i=0; i < this->next_temp; i++) {
+   int next_temp = store.temp_amount();
+   for (i=0; i < next_temp; i++) {
       int last_read = get_last_temp_read(i);
       int j = 0;
       
@@ -3492,10 +3676,11 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
 int
 glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
 {
+   int next_temp = store.temp_amount();
    glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
                                                      glsl_to_tgsi_instruction *,
-                                                     this->next_temp * 4);
-   int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
+                                                     next_temp * 4);
+   int *write_level = rzalloc_array(mem_ctx, int, next_temp * 4);
    int level = 0;
    int removed = 0;
 
@@ -3503,7 +3688,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
 
       assert(inst->dst.file != PROGRAM_TEMPORARY
-             || inst->dst.index < this->next_temp);
+             || inst->dst.index < next_temp);
       
       switch (inst->op) {
       case TGSI_OPCODE_BGNLOOP:
@@ -3518,7 +3703,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
           * dead code of this type, so it shouldn't make a difference as long as
           * the dead code elimination pass in the GLSL compiler does its job.
           */
-         memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
+         memset(writes, 0, sizeof(*writes) * next_temp * 4);
          break;
 
       case TGSI_OPCODE_ENDIF:
@@ -3526,7 +3711,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
          /* Promote the recorded level of all channels written inside the
           * preceding if or else block to the level above the if/else block.
           */
-         for (int r = 0; r < this->next_temp; r++) {
+         for (int r = 0; r < next_temp; r++) {
             for (int c = 0; c < 4; c++) {
                if (!writes[4 * r + c])
         	         continue;
@@ -3554,7 +3739,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
                /* Any temporary might be read, so no dead code elimination 
                 * across this instruction.
                 */
-               memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
+               memset(writes, 0, sizeof(*writes) * next_temp * 4);
             } else if (inst->src[i].file == PROGRAM_TEMPORARY) {
                /* Clear where it's used as src. */
                int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
@@ -3595,7 +3780,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
    }
 
    /* Anything still in the write array at this point is dead code. */
-   for (int r = 0; r < this->next_temp; r++) {
+   for (int r = 0; r < next_temp; r++) {
       for (int c = 0; c < 4; c++) {
          glsl_to_tgsi_instruction *inst = writes[4 * r + c];
          if (inst)
@@ -3682,6 +3867,7 @@ glsl_to_tgsi_visitor::merge_registers(void)
  * by optimization passes. */
 void
 glsl_to_tgsi_visitor::renumber_registers(void)
+void glsl_to_tgsi_visitor::renumber_temp_regs(unsigned *reindex_table)
 {
    int i = 0;
    int new_index = 0;
@@ -3691,6 +3877,21 @@ glsl_to_tgsi_visitor::renumber_registers(void)
       if (i != new_index)
          rename_temp_register(i, new_index);
       new_index++;
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      unsigned j;
+
+      for (j=0; j < num_inst_src_regs(inst->op); j++) {
+         if (inst->src[j].file == PROGRAM_TEMPORARY) {
+            unsigned index = inst->src[j].index;
+            inst->src[j].index = reindex_table[index];
+         }
+      }
+
+      if (inst->dst.file == PROGRAM_TEMPORARY) {
+         unsigned index = inst->dst.index;
+         inst->dst.index = reindex_table[index];
+      }
    }
    
    this->next_temp = new_index;
@@ -3720,7 +3921,6 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
    v->glsl_version = original->glsl_version;
    v->native_integers = original->native_integers;
    v->options = original->options;
-   v->next_temp = original->next_temp;
    v->num_address_regs = original->num_address_regs;
    v->samplers_used = prog->SamplersUsed = original->samplers_used;
    v->indirect_addr_temps = original->indirect_addr_temps;
@@ -3732,7 +3932,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
     * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
     */
    coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
-   src0 = v->get_temp(glsl_type::vec4_type);
+   src0 = v->store.get_temp(glsl_type::vec4_type);
    dst0 = st_dst_reg(src0);
    inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
    inst->sampler = 0;
@@ -3762,7 +3962,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
    }
 
    if (pixel_maps) {
-      st_src_reg temp = v->get_temp(glsl_type::vec4_type);
+      st_src_reg temp = v->store.get_temp(glsl_type::vec4_type);
       st_dst_reg temp_dst = st_dst_reg(temp);
 
       assert(st->pixel_xfer.pixelmap_texture);
@@ -3850,7 +4050,6 @@ get_bitmap_visitor(struct st_fragment_program *fp,
    v->glsl_version = original->glsl_version;
    v->native_integers = original->native_integers;
    v->options = original->options;
-   v->next_temp = original->next_temp;
    v->num_address_regs = original->num_address_regs;
    v->samplers_used = prog->SamplersUsed = original->samplers_used;
    v->indirect_addr_temps = original->indirect_addr_temps;
@@ -3859,7 +4058,7 @@ get_bitmap_visitor(struct st_fragment_program *fp,
 
    /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
    coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
-   src0 = v->get_temp(glsl_type::vec4_type);
+   src0 = v->store.get_temp(glsl_type::vec4_type);
    dst0 = st_dst_reg(src0);
    inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
    inst->sampler = samplerIndex;
@@ -4542,6 +4741,7 @@ st_translate_program(
    unsigned i;
    enum pipe_error ret = PIPE_OK;
 
+   int next_temp = program->store.temp_amount();
    assert(numInputs <= Elements(t->inputs));
    assert(numOutputs <= Elements(t->outputs));
 
@@ -4695,7 +4895,7 @@ st_translate_program(
        * in sequential order.  Else, we declare them on demand elsewhere.
        * (Note: the number of temporaries is equal to program->next_temp)
        */
-      for (i = 0; i < (unsigned)program->next_temp; i++) {
+      for (i = 0; i < (unsigned)next_temp; i++) {
          /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
          t->temps[i] = ureg_DECL_temporary(t->ureg);
       }
@@ -4879,6 +5079,7 @@ get_mesa_program(struct gl_context *ctx,
    v->options = options;
    v->glsl_version = ctx->Const.GLSLVersion;
    v->native_integers = ctx->Const.NativeIntegers;
+   v->store.native_integers = v->native_integers;
 
    _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
 					       prog->Parameters);
@@ -4929,6 +5130,9 @@ get_mesa_program(struct gl_context *ctx,
    }
 #endif
 
+   v->store.optimise_access();
+   v->renumber_temp_regs(v->store.reindex_table);
+
    if (!screen->get_shader_param(screen, pipe_shader_type,
                                  PIPE_SHADER_CAP_OUTPUT_READ)) {
       /* Remove reads to output registers, and to varyings in vertex shaders. */
-- 
1.7.7



More information about the mesa-dev mailing list