[Mesa-dev] [PATCH 1/3] glsl_to_tgsi: Create a new variable_store class replacing variables field in glsl_to_tgsi_visitor

Bryan Cain bryancain3 at gmail.com
Sat Jan 7 12:51:29 PST 2012


This is good work.  I just have a few suggested changes.

On 01/07/2012 12:26 PM, Vincent Lejeune wrote:
> ---
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  414 +++++++++++++++++++++-------
>  1 files changed, 309 insertions(+), 105 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index cecceca..17ae525 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -230,14 +230,16 @@ public:
>  class variable_storage : public exec_node {
>  public:
>     variable_storage(ir_variable *var, gl_register_file file, int index)
> -      : file(file), index(index), var(var)
> +      : file(file), index(index), type(var->type), is_array(var->type->is_array() || var->type->is_record() || var->type->is_matrix()), is_reladdressed(false)
>     {
>        /* empty */
>     }
>  
>     gl_register_file file;
>     int index;
> -   ir_variable *var; /* variable that maps to this, if any */
> +   const glsl_type *type; /* variable that maps to this, if any */

The comment there doesn't make sense anymore.

> +   bool is_array;
> +   bool is_reladdressed;
>  };
>  
>  class immediate_storage : public exec_node {
> @@ -286,6 +288,242 @@ public:
>     st_src_reg return_reg;
>  };
>  
> +static int type_size(const glsl_type *type);
> +static int swizzle_for_size(int size);
> +
> +
> +/**
> + * Single place to store all temporary values (either explicit - ir_variable* -
> + * or implicit - returned by get_temp -).

I don't think the last " -" is supposed to be there.

> + *
> + * Explicit temps are stored in variables hash_table.
> + * Implicit temps are stored in rvalue_regs array.
> + */
> +class variable_store {
> +   friend class glsl_to_tgsi_variable_allocator;
> +protected:
> +   void *mem_ctx;
> +   hash_table* variables;
> +   unsigned next_temp;
> +   unsigned next_temp_array;
> +   static void reindex_reladdress(const void *, void *, void *);
> +   static void reindex_non_reladdress(const void *, void *, void *);
> +   void reindex_rvalue();
> +   void reindex_rvalue_reladdressed();
> +   variable_storage* rvalue_regs;
> +   unsigned rvalue_regs_count;
> +
> +public:
> +   bool native_integers;
> +   unsigned temp_amount() const;
> +   unsigned temp_array_amount() const;
> +   variable_store();
> +   ~variable_store();
> +   variable_storage *find_variable_storage(class ir_variable *var) const;
> +   variable_storage *push(class ir_variable *, gl_register_file, int);
> +   variable_storage *push(class ir_variable *);
> +   variable_storage *retrieve_anonymous_temp(unsigned);
> +   st_src_reg get_temp(const glsl_type *type);
> +   void optimise_access(void);
> +   unsigned *reindex_table;
> +};
> +
> +unsigned
> +variable_store::temp_amount() const
> +{
> +   return next_temp;
> +}
> +
> +unsigned
> +variable_store::temp_array_amount() const
> +{
> +   return next_temp_array;
> +}
> +
> +variable_store::variable_store():mem_ctx(ralloc_context(NULL)),next_temp(1),next_temp_array(1),rvalue_regs(NULL),rvalue_regs_count(0)
> +{
> +   variables = hash_table_ctor(0,hash_table_pointer_hash,hash_table_pointer_compare);
> +}
> +
> +variable_store::~variable_store()
> +{
> +   hash_table_dtor(variables);
> +   ralloc_free(mem_ctx);
> +}
> +
> +variable_storage *
> +variable_store::find_variable_storage(ir_variable *var) const
> +{
> +   return (class variable_storage *) hash_table_find(variables,var);

The convention in the rest of glsl_to_tgsi (and Mesa) is to put a space
after the comma separating arguments.  I'd prefer it if this patch
followed the same convention.

> +}
> +
> +variable_storage*
> +variable_store::push(class ir_variable *var, gl_register_file file, int index)
> +{
> +   variable_storage *storage = new (mem_ctx) variable_storage(var,file,index);
> +   hash_table_insert(variables,storage,var);
> +   return storage;
> +}
> +
> +variable_storage*
> +variable_store::push(ir_variable *ir)
> +{
> +   variable_storage* retval = push(ir, PROGRAM_TEMPORARY, next_temp);
> +   next_temp += type_size(ir->type);
> +   if (ir->type->is_array() || ir->type->is_record() || ir->type->is_matrix()) {
> +      retval->is_array = true;
> +   }
> +   return retval;
> +}
> +
> +variable_storage*
> +variable_store::retrieve_anonymous_temp(unsigned reg)
> +{
> +   for (unsigned i = 0; i < rvalue_regs_count; i++) {
> +      unsigned range_start = rvalue_regs[i].index;
> +      unsigned range_end = range_start + type_size(rvalue_regs[i].type) - 1;
> +      if (reg >= range_start && reg <= range_end) {
> +         return rvalue_regs + i;
> +         }
> +   }
> +   printf ("Failed to get storage");
> +   exit(1);

I don't think writing to stdout and exiting cleanly like this is the
right way to handle a fatal error.  It should probably write the error
message to stderr (making clear that it's a fatal error in Mesa, not an
error in the application) and assert (so that a debugger can catch it),
then use call exit(1).

> +}
> +
> +/**
> + * In the initial pass of codegen, we assign temporary numbers to
> + * intermediate results.  (not SSA -- variable assignments will reuse
> + * storage).
> + */
> +st_src_reg
> +variable_store::get_temp(const glsl_type *type)
> +{
> +   st_src_reg src;
> +   rvalue_regs_count++;
> +   rvalue_regs = reralloc(mem_ctx,rvalue_regs,variable_storage,rvalue_regs_count);
> +   variable_storage &entry = rvalue_regs[rvalue_regs_count - 1];
> +
> +   src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
> +   src.file = PROGRAM_TEMPORARY;
> +   src.index = next_temp;
> +   src.reladdr = NULL;
> +   next_temp += type_size(type);
> +
> +   entry.file = PROGRAM_TEMPORARY;
> +   entry.index = src.index;
> +   entry.type = type;
> +   entry.is_reladdressed = false;
> +
> +   if (type->is_array() || type->is_record() || type->is_matrix()) {
> +      entry.is_array = true;
> +   }
> +
> +   if (type->is_array() || type->is_record()) {
> +      src.swizzle = SWIZZLE_NOOP;
> +   } else {
> +      src.swizzle = swizzle_for_size(type->vector_elements);
> +   }
> +   src.negate = 0;
> +
> +   return src;
> +}
> +
> +void variable_store::reindex_reladdress(const void *key, void *data, void *closure)
> +{
> +   ir_variable *var = (ir_variable *) key;
> +   variable_storage *storage = (variable_storage *) data;
> +   variable_store *store = (variable_store *) closure;
> +
> +   if (storage->file == PROGRAM_TEMPORARY && storage->is_array) {
> +
> +      if (storage->is_reladdressed) {
> +         unsigned old_index = storage->index;
> +         storage->index = store->next_temp_array;
> +         size_t sz = type_size(storage->type);
> +         store->next_temp_array += sz;
> +         for (unsigned i = 0; i < sz; i++) {
> +            store->reindex_table[old_index + i] = storage->index + i;
> +         }
> +      }
> +   }
> +}
> +
> +void variable_store::reindex_non_reladdress(const void *key, void *data, void *closure)
> +{
> +   ir_variable *var = (ir_variable *) key;
> +   variable_storage *storage = (variable_storage *) data;
> +   variable_store *store = (variable_store *) closure;
> +
> +   if (storage->file == PROGRAM_TEMPORARY && !storage->is_reladdressed) {
> +      unsigned old_index = storage->index;
> +      size_t sz = type_size(storage->type);
> +      storage->index = store->next_temp;
> +      store->next_temp += sz;
> +      for (unsigned i = 0; i < sz; i++) {
> +         store->reindex_table[old_index + i] = storage->index + i;
> +      }
> +   }
> +}
> +
> +void
> +variable_store::reindex_rvalue_reladdressed()
> +{
> +   for (unsigned i = 0; i < rvalue_regs_count; i++) {
> +      variable_storage &storage = rvalue_regs[i];
> +      if (storage.is_reladdressed) {
> +         unsigned old_index = storage.index;
> +         size_t sz = type_size(storage.type);
> +         storage.index = next_temp_array;
> +         next_temp_array += sz;
> +         for (unsigned i = 0; i < sz; i++) {
> +            reindex_table[old_index + i] = storage.index + i;
> +         }
> +      }
> +   }
> +}
> +
> +void
> +variable_store::reindex_rvalue()
> +{
> +   for (unsigned i = 0; i < rvalue_regs_count; i++) {
> +      variable_storage &storage = rvalue_regs[i];
> +      if (!storage.is_reladdressed) {
> +         unsigned old_index = storage.index;
> +         size_t sz = type_size(storage.type);
> +         storage.index = next_temp;
> +         next_temp += sz;
> +         for (unsigned i = 0; i < sz; i++) {
> +            reindex_table[old_index + i] = storage.index + i;
> +         }
> +      }
> +   }
> +}
> +
> +
> +/**
> + * This function will generate a reindex table for every temp registers to pass to
> + * renumber_temp_regs.
> + * This will move every registers associated to an indirectly addressed variables
> + * to the beginning of TGSI_FILE_TEMPORARY (ie gives them lowest index) ;
> + * thus every registers that have an index >= next_temp_array after this function call
> + * are sure to be directly addressed.
> + *
> + * TODO : In fact some registers before next_temp_array might be directly addressed too
> + * (for instance in a struct type that holds an array indirectly accessed), we can put them
> + * after next_temp_array too.
> + */
> +void
> +variable_store::optimise_access(void)
> +{
> +   reindex_table = rzalloc_array(mem_ctx,unsigned,next_temp);
> +   next_temp_array = 1;
> +   hash_table_call_foreach(variables,variable_store::reindex_reladdress,this);
> +   reindex_rvalue_reladdressed();
> +   next_temp = next_temp_array + 1;
> +   hash_table_call_foreach(variables,variable_store::reindex_non_reladdress,this);
> +   reindex_rvalue();
> +}
> +
>  class glsl_to_tgsi_visitor : public ir_visitor {
>  public:
>     glsl_to_tgsi_visitor();
> @@ -298,8 +536,6 @@ public:
>     struct gl_shader_program *shader_program;
>     struct gl_shader_compiler_options *options;
>  
> -   int next_temp;
> -
>     int num_address_regs;
>     int samplers_used;
>     bool indirect_addr_temps;
> @@ -308,14 +544,11 @@ public:
>     int glsl_version;
>     bool native_integers;
>  
> -   variable_storage *find_variable_storage(ir_variable *var);
> -
>     int add_constant(gl_register_file file, gl_constant_value values[4],
>                      int size, int datatype, GLuint *swizzle_out);
>  
>     function_entry *get_function_signature(ir_function_signature *sig);
>  
> -   st_src_reg get_temp(const glsl_type *type);
>     void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
>  
>     st_src_reg st_src_reg_for_float(float val);
> @@ -352,7 +585,7 @@ public:
>     st_src_reg result;
>  
>     /** List of variable_storage */
> -   exec_list variables;
> +   variable_store store;
>  
>     /** List of immediate_storage */
>     exec_list immediates;
> @@ -427,6 +660,7 @@ public:
>     int eliminate_dead_code_advanced(void);
>     void merge_registers(void);
>     void renumber_registers(void);
> +   void renumber_temp_regs(unsigned*);
>  
>     void *mem_ctx;
>  };
> @@ -797,7 +1031,7 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
>      * that will be written by the SCS instrution, we'll need a temporary.
>      */
>     if (scs_mask != unsigned(dst.writemask)) {
> -      tmp = get_temp(glsl_type::vec4_type);
> +      tmp = store.get_temp(glsl_type::vec4_type);
>     }
>  
>     for (unsigned i = 0; i < 4; i++) {
> @@ -967,48 +1201,6 @@ type_size(const struct glsl_type *type)
>     }
>  }
>  
> -/**
> - * In the initial pass of codegen, we assign temporary numbers to
> - * intermediate results.  (not SSA -- variable assignments will reuse
> - * storage).
> - */
> -st_src_reg
> -glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
> -{
> -   st_src_reg src;
> -
> -   src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
> -   src.file = PROGRAM_TEMPORARY;
> -   src.index = next_temp;
> -   src.reladdr = NULL;
> -   next_temp += type_size(type);
> -
> -   if (type->is_array() || type->is_record()) {
> -      src.swizzle = SWIZZLE_NOOP;
> -   } else {
> -      src.swizzle = swizzle_for_size(type->vector_elements);
> -   }
> -   src.negate = 0;
> -
> -   return src;
> -}
> -
> -variable_storage *
> -glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
> -{
> -   
> -   variable_storage *entry;
> -
> -   foreach_iter(exec_list_iterator, iter, this->variables) {
> -      entry = (variable_storage *)iter.get();
> -
> -      if (entry->var == var)
> -         return entry;
> -   }
> -
> -   return NULL;
> -}
> -
>  void
>  glsl_to_tgsi_visitor::visit(ir_variable *ir)
>  {
> @@ -1040,8 +1232,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
>        st_dst_reg dst;
>        if (i == ir->num_state_slots) {
>           /* We'll set the index later. */
> -         storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
> -         this->variables.push_tail(storage);
> +         storage = store.push(ir, PROGRAM_STATE_VAR, -1);
>  
>           dst = undef_dst;
>        } else {
> @@ -1051,10 +1242,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
>            */
>           assert((int) ir->num_state_slots == type_size(ir->type));
>  
> -         storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
> -        					 this->next_temp);
> -         this->variables.push_tail(storage);
> -         this->next_temp += type_size(ir->type);
> +         storage = store.push(ir);
>  
>           dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index,
>                 native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT));
> @@ -1205,7 +1393,7 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
>     ir->operands[nonmul_operand]->accept(this);
>     c = this->result;
>  
> -   this->result = get_temp(ir->type);
> +   this->result = store.get_temp(ir->type);
>     result_dst = st_dst_reg(this->result);
>     result_dst.writemask = (1 << ir->type->vector_elements) - 1;
>     emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
> @@ -1247,7 +1435,7 @@ glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operan
>  
>     b.negate = ~b.negate;
>  
> -   this->result = get_temp(ir->type);
> +   this->result = store.get_temp(ir->type);
>     emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
>  
>     return true;
> @@ -1288,7 +1476,7 @@ glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
>        new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
>        new_inst->saturate = true;
>     } else {
> -      this->result = get_temp(ir->type);
> +      this->result = store.get_temp(ir->type);
>        st_dst_reg result_dst = st_dst_reg(this->result);
>        result_dst.writemask = (1 << ir->type->vector_elements) - 1;
>        glsl_to_tgsi_instruction *inst;
> @@ -1309,7 +1497,7 @@ glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
>     emit_arl(ir, address_reg, *reg->reladdr);
>  
>     if (*num_reladdr != 1) {
> -      st_src_reg temp = get_temp(glsl_type::vec4_type);
> +      st_src_reg temp = store.get_temp(glsl_type::vec4_type);
>  
>        emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
>        *reg = temp;
> @@ -1378,7 +1566,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
>     /* Storage for our result.  Ideally for an assignment we'd be using
>      * the actual storage for the result here, instead.
>      */
> -   result_src = get_temp(ir->type);
> +   result_src = store.get_temp(ir->type);
>     /* convenience for the emit functions below. */
>     result_dst = st_dst_reg(result_src);
>     /* Limit writes to the channels that will be used by result_src later.
> @@ -1508,7 +1696,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
>        /* "==" operator producing a scalar boolean. */
>        if (ir->operands[0]->type->is_vector() ||
>            ir->operands[1]->type->is_vector()) {
> -         st_src_reg temp = get_temp(native_integers ?
> +         st_src_reg temp = store.get_temp(native_integers ?
>                 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
>                 glsl_type::vec4_type);
>           
> @@ -1566,7 +1754,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
>        /* "!=" operator producing a scalar boolean. */
>        if (ir->operands[0]->type->is_vector() ||
>            ir->operands[1]->type->is_vector()) {
> -         st_src_reg temp = get_temp(native_integers ?
> +         st_src_reg temp = store.get_temp(native_integers ?
>                 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
>                 glsl_type::vec4_type);
>           emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
> @@ -2220,7 +2408,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
>               * If TGSI had a UCMP instruction or similar, this extra
>               * instruction would not be necessary.
>               */
> -            condition_temp = get_temp(glsl_type::vec4_type);
> +            condition_temp = store.get_temp(glsl_type::vec4_type);
>              condition.negate = 0;
>              emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition);
>              condition_temp.swizzle = condition.swizzle;
> @@ -2277,7 +2465,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
>      * get lucky, copy propagation will eliminate the extra moves.
>      */
>     if (ir->type->base_type == GLSL_TYPE_STRUCT) {
> -      st_src_reg temp_base = get_temp(ir->type);
> +      st_src_reg temp_base = store.get_temp(ir->type);
>        st_dst_reg temp = st_dst_reg(temp_base);
>  
>        foreach_iter(exec_list_iterator, iter, ir->components) {
> @@ -2301,7 +2489,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
>     }
>  
>     if (ir->type->is_array()) {
> -      st_src_reg temp_base = get_temp(ir->type);
> +      st_src_reg temp_base = store.get_temp(ir->type);
>        st_dst_reg temp = st_dst_reg(temp_base);
>        int size = type_size(ir->type->fields.array);
>  
> @@ -2324,7 +2512,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
>     }
>  
>     if (ir->type->is_matrix()) {
> -      st_src_reg mat = get_temp(ir->type);
> +      st_src_reg mat = store.get_temp(ir->type);
>        st_dst_reg mat_column = st_dst_reg(mat);
>  
>        for (i = 0; i < ir->type->matrix_columns; i++) {
> @@ -2414,18 +2602,14 @@ glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
>        ir_variable *param = (ir_variable *)iter.get();
>        variable_storage *storage;
>  
> -      storage = find_variable_storage(param);
> +      storage = store.find_variable_storage(param);
>        assert(!storage);
>  
> -      storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
> -        				      this->next_temp);
> -      this->variables.push_tail(storage);
> -
> -      this->next_temp += type_size(param->type);
> +      storage =store.push(param);

There should be whitespace after the "=".

>     }
>  
>     if (!sig->return_type->is_void()) {
> -      entry->return_reg = get_temp(sig->return_type);
> +      entry->return_reg = store.get_temp(sig->return_type);
>     } else {
>        entry->return_reg = undef_src;
>     }
> @@ -2450,7 +2634,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
>  
>        if (param->mode == ir_var_in ||
>            param->mode == ir_var_inout) {
> -         variable_storage *storage = find_variable_storage(param);
> +         variable_storage *storage = store.find_variable_storage(param);
>           assert(storage);
>  
>           param_rval->accept(this);
> @@ -2486,7 +2670,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
>  
>        if (param->mode == ir_var_out ||
>            param->mode == ir_var_inout) {
> -         variable_storage *storage = find_variable_storage(param);
> +         variable_storage *storage = store.find_variable_storage(param);
>           assert(storage);
>  
>           st_src_reg r;
> @@ -2530,7 +2714,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
>         * we're doing plain old texturing.  The optimization passes on
>         * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
>         */
> -      coord = get_temp(glsl_type::vec4_type);
> +      coord = store.get_temp(glsl_type::vec4_type);
>        coord_dst = st_dst_reg(coord);
>        emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
>     }
> @@ -2543,7 +2727,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
>     /* Storage for our result.  Ideally for an assignment we'd be using
>      * the actual storage for the result here, instead.
>      */
> -   result_src = get_temp(glsl_type::vec4_type);
> +   result_src = store.get_temp(glsl_type::vec4_type);
>     result_dst = st_dst_reg(result_src);
>  
>     switch (ir->op) {
> @@ -2613,7 +2797,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
>               */
>              ir->shadow_comparitor->accept(this);
>  
> -            tmp_src = get_temp(glsl_type::vec4_type);
> +            tmp_src = store.get_temp(glsl_type::vec4_type);
>              st_dst_reg tmp_dst = st_dst_reg(tmp_src);
>  
>  	    /* Projective division not allowed for array samplers. */
> @@ -2779,7 +2963,7 @@ glsl_to_tgsi_visitor::visit(ir_if *ir)
>         * have something to set cond_update on.
>         */
>        if (cond_inst == prev_inst) {
> -         st_src_reg temp = get_temp(glsl_type::bool_type);
> +         st_src_reg temp = store.get_temp(glsl_type::bool_type);
>           cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result);
>        }
>        cond_inst->cond_update = GL_TRUE;
> @@ -2805,7 +2989,6 @@ glsl_to_tgsi_visitor::visit(ir_if *ir)
>  glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
>  {
>     result.file = PROGRAM_UNDEFINED;
> -   next_temp = 1;
>     next_signature_id = 1;
>     num_immediates = 0;
>     current_function = NULL;
> @@ -3266,17 +3449,18 @@ glsl_to_tgsi_visitor::get_last_temp_write(int index)
>  void
>  glsl_to_tgsi_visitor::copy_propagate(void)
>  {
> +   int next_temp = store.temp_amount();

This is nitpicking a bit, but since you're no longer bound to the name
"next_temp" it would make more sense to call this "num_temps".

>     glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
>          					    glsl_to_tgsi_instruction *,
> -        					    this->next_temp * 4);
> -   int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
> +                                        next_temp * 4);
> +   int *acp_level = rzalloc_array(mem_ctx, int, next_temp * 4);
>     int level = 0;
>  
>     foreach_iter(exec_list_iterator, iter, this->instructions) {
>        glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
>  
>        assert(inst->dst.file != PROGRAM_TEMPORARY
> -             || inst->dst.index < this->next_temp);
> +             || inst->dst.index < next_temp);
>  
>        /* First, do any copy propagation possible into the src regs. */
>        for (int r = 0; r < 3; r++) {
> @@ -3336,7 +3520,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
>        case TGSI_OPCODE_BGNLOOP:
>        case TGSI_OPCODE_ENDLOOP:
>           /* End of a basic block, clear the ACP entirely. */
> -         memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
> +         memset(acp, 0, sizeof(*acp) * next_temp * 4);
>           break;
>  
>        case TGSI_OPCODE_IF:
> @@ -3348,7 +3532,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
>           /* Clear all channels written inside the block from the ACP, but
>            * leaving those that were not touched.
>            */
> -         for (int r = 0; r < this->next_temp; r++) {
> +         for (int r = 0; r < next_temp; r++) {
>              for (int c = 0; c < 4; c++) {
>                 if (!acp[4 * r + c])
>          	  continue;
> @@ -3369,13 +3553,13 @@ glsl_to_tgsi_visitor::copy_propagate(void)
>              /* Any temporary might be written, so no copy propagation
>               * across this instruction.
>               */
> -            memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
> +            memset(acp, 0, sizeof(*acp) * next_temp * 4);
>           } else if (inst->dst.file == PROGRAM_OUTPUT &&
>          	    inst->dst.reladdr) {
>              /* Any output might be written, so no copy propagation
>               * from outputs across this instruction.
>               */
> -            for (int r = 0; r < this->next_temp; r++) {
> +            for (int r = 0; r < next_temp; r++) {
>                 for (int c = 0; c < 4; c++) {
>          	  if (!acp[4 * r + c])
>          	     continue;
> @@ -3396,7 +3580,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
>              }
>  
>              /* Clear where it's used as src. */
> -            for (int r = 0; r < this->next_temp; r++) {
> +            for (int r = 0; r < next_temp; r++) {
>                 for (int c = 0; c < 4; c++) {
>          	  if (!acp[4 * r + c])
>          	     continue;
> @@ -3457,8 +3641,8 @@ void
>  glsl_to_tgsi_visitor::eliminate_dead_code(void)
>  {
>     int i;
> -   
> -   for (i=0; i < this->next_temp; i++) {
> +   int next_temp = store.temp_amount();

Same comment about the variable name.

> +   for (i=0; i < next_temp; i++) {
>        int last_read = get_last_temp_read(i);
>        int j = 0;
>        
> @@ -3492,10 +3676,11 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
>  int
>  glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
>  {
> +   int next_temp = store.temp_amount();
>     glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
>                                                       glsl_to_tgsi_instruction *,
> -                                                     this->next_temp * 4);
> -   int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
> +                                                     next_temp * 4);
> +   int *write_level = rzalloc_array(mem_ctx, int, next_temp * 4);
>     int level = 0;
>     int removed = 0;
>  
> @@ -3503,7 +3688,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
>        glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
>  
>        assert(inst->dst.file != PROGRAM_TEMPORARY
> -             || inst->dst.index < this->next_temp);
> +             || inst->dst.index < next_temp);
>        
>        switch (inst->op) {
>        case TGSI_OPCODE_BGNLOOP:
> @@ -3518,7 +3703,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
>            * dead code of this type, so it shouldn't make a difference as long as
>            * the dead code elimination pass in the GLSL compiler does its job.
>            */
> -         memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
> +         memset(writes, 0, sizeof(*writes) * next_temp * 4);
>           break;
>  
>        case TGSI_OPCODE_ENDIF:
> @@ -3526,7 +3711,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
>           /* Promote the recorded level of all channels written inside the
>            * preceding if or else block to the level above the if/else block.
>            */
> -         for (int r = 0; r < this->next_temp; r++) {
> +         for (int r = 0; r < next_temp; r++) {
>              for (int c = 0; c < 4; c++) {
>                 if (!writes[4 * r + c])
>          	         continue;
> @@ -3554,7 +3739,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
>                 /* Any temporary might be read, so no dead code elimination 
>                  * across this instruction.
>                  */
> -               memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
> +               memset(writes, 0, sizeof(*writes) * next_temp * 4);
>              } else if (inst->src[i].file == PROGRAM_TEMPORARY) {
>                 /* Clear where it's used as src. */
>                 int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
> @@ -3595,7 +3780,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
>     }
>  
>     /* Anything still in the write array at this point is dead code. */
> -   for (int r = 0; r < this->next_temp; r++) {
> +   for (int r = 0; r < next_temp; r++) {
>        for (int c = 0; c < 4; c++) {
>           glsl_to_tgsi_instruction *inst = writes[4 * r + c];
>           if (inst)
> @@ -3682,6 +3867,7 @@ glsl_to_tgsi_visitor::merge_registers(void)
>   * by optimization passes. */
>  void
>  glsl_to_tgsi_visitor::renumber_registers(void)
> +void glsl_to_tgsi_visitor::renumber_temp_regs(unsigned *reindex_table)
>  {
>     int i = 0;
>     int new_index = 0;
> @@ -3691,6 +3877,21 @@ glsl_to_tgsi_visitor::renumber_registers(void)
>        if (i != new_index)
>           rename_temp_register(i, new_index);
>        new_index++;
> +   foreach_iter(exec_list_iterator, iter, this->instructions) {
> +      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
> +      unsigned j;
> +
> +      for (j=0; j < num_inst_src_regs(inst->op); j++) {
> +         if (inst->src[j].file == PROGRAM_TEMPORARY) {
> +            unsigned index = inst->src[j].index;
> +            inst->src[j].index = reindex_table[index];
> +         }
> +      }
> +
> +      if (inst->dst.file == PROGRAM_TEMPORARY) {
> +         unsigned index = inst->dst.index;
> +         inst->dst.index = reindex_table[index];
> +      }
>     }
>     
>     this->next_temp = new_index;
> @@ -3720,7 +3921,6 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
>     v->glsl_version = original->glsl_version;
>     v->native_integers = original->native_integers;
>     v->options = original->options;
> -   v->next_temp = original->next_temp;
>     v->num_address_regs = original->num_address_regs;
>     v->samplers_used = prog->SamplersUsed = original->samplers_used;
>     v->indirect_addr_temps = original->indirect_addr_temps;
> @@ -3732,7 +3932,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
>      * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
>      */
>     coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
> -   src0 = v->get_temp(glsl_type::vec4_type);
> +   src0 = v->store.get_temp(glsl_type::vec4_type);
>     dst0 = st_dst_reg(src0);
>     inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
>     inst->sampler = 0;
> @@ -3762,7 +3962,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
>     }
>  
>     if (pixel_maps) {
> -      st_src_reg temp = v->get_temp(glsl_type::vec4_type);
> +      st_src_reg temp = v->store.get_temp(glsl_type::vec4_type);
>        st_dst_reg temp_dst = st_dst_reg(temp);
>  
>        assert(st->pixel_xfer.pixelmap_texture);
> @@ -3850,7 +4050,6 @@ get_bitmap_visitor(struct st_fragment_program *fp,
>     v->glsl_version = original->glsl_version;
>     v->native_integers = original->native_integers;
>     v->options = original->options;
> -   v->next_temp = original->next_temp;
>     v->num_address_regs = original->num_address_regs;
>     v->samplers_used = prog->SamplersUsed = original->samplers_used;
>     v->indirect_addr_temps = original->indirect_addr_temps;
> @@ -3859,7 +4058,7 @@ get_bitmap_visitor(struct st_fragment_program *fp,
>  
>     /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
>     coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
> -   src0 = v->get_temp(glsl_type::vec4_type);
> +   src0 = v->store.get_temp(glsl_type::vec4_type);
>     dst0 = st_dst_reg(src0);
>     inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
>     inst->sampler = samplerIndex;
> @@ -4542,6 +4741,7 @@ st_translate_program(
>     unsigned i;
>     enum pipe_error ret = PIPE_OK;
>  
> +   int next_temp = program->store.temp_amount();

Same comment about the variable name.

>     assert(numInputs <= Elements(t->inputs));
>     assert(numOutputs <= Elements(t->outputs));
>  
> @@ -4695,7 +4895,7 @@ st_translate_program(
>         * in sequential order.  Else, we declare them on demand elsewhere.
>         * (Note: the number of temporaries is equal to program->next_temp)
>         */
> -      for (i = 0; i < (unsigned)program->next_temp; i++) {
> +      for (i = 0; i < (unsigned)next_temp; i++) {
>           /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
>           t->temps[i] = ureg_DECL_temporary(t->ureg);
>        }
> @@ -4879,6 +5079,7 @@ get_mesa_program(struct gl_context *ctx,
>     v->options = options;
>     v->glsl_version = ctx->Const.GLSLVersion;
>     v->native_integers = ctx->Const.NativeIntegers;
> +   v->store.native_integers = v->native_integers;
>  
>     _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
>  					       prog->Parameters);
> @@ -4929,6 +5130,9 @@ get_mesa_program(struct gl_context *ctx,
>     }
>  #endif
>  
> +   v->store.optimise_access();
> +   v->renumber_temp_regs(v->store.reindex_table);
> +
>     if (!screen->get_shader_param(screen, pipe_shader_type,
>                                   PIPE_SHADER_CAP_OUTPUT_READ)) {
>        /* Remove reads to output registers, and to varyings in vertex shaders. */



More information about the mesa-dev mailing list