[Mesa-dev] [PATCH 1/3] glsl_to_tgsi: Create a new variable_store class replacing variables field in glsl_to_tgsi_visitor
Bryan Cain
bryancain3 at gmail.com
Sat Jan 7 12:51:29 PST 2012
This is good work. I just have a few suggested changes.
On 01/07/2012 12:26 PM, Vincent Lejeune wrote:
> ---
> src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 414 +++++++++++++++++++++-------
> 1 files changed, 309 insertions(+), 105 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index cecceca..17ae525 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -230,14 +230,16 @@ public:
> class variable_storage : public exec_node {
> public:
> variable_storage(ir_variable *var, gl_register_file file, int index)
> - : file(file), index(index), var(var)
> + : file(file), index(index), type(var->type), is_array(var->type->is_array() || var->type->is_record() || var->type->is_matrix()), is_reladdressed(false)
> {
> /* empty */
> }
>
> gl_register_file file;
> int index;
> - ir_variable *var; /* variable that maps to this, if any */
> + const glsl_type *type; /* variable that maps to this, if any */
The comment there doesn't make sense anymore.
> + bool is_array;
> + bool is_reladdressed;
> };
>
> class immediate_storage : public exec_node {
> @@ -286,6 +288,242 @@ public:
> st_src_reg return_reg;
> };
>
> +static int type_size(const glsl_type *type);
> +static int swizzle_for_size(int size);
> +
> +
> +/**
> + * Single place to store all temporary values (either explicit - ir_variable* -
> + * or implicit - returned by get_temp -).
I don't think the last " -" is supposed to be there.
> + *
> + * Explicit temps are stored in variables hash_table.
> + * Implicit temps are stored in rvalue_regs array.
> + */
> +class variable_store {
> + friend class glsl_to_tgsi_variable_allocator;
> +protected:
> + void *mem_ctx;
> + hash_table* variables;
> + unsigned next_temp;
> + unsigned next_temp_array;
> + static void reindex_reladdress(const void *, void *, void *);
> + static void reindex_non_reladdress(const void *, void *, void *);
> + void reindex_rvalue();
> + void reindex_rvalue_reladdressed();
> + variable_storage* rvalue_regs;
> + unsigned rvalue_regs_count;
> +
> +public:
> + bool native_integers;
> + unsigned temp_amount() const;
> + unsigned temp_array_amount() const;
> + variable_store();
> + ~variable_store();
> + variable_storage *find_variable_storage(class ir_variable *var) const;
> + variable_storage *push(class ir_variable *, gl_register_file, int);
> + variable_storage *push(class ir_variable *);
> + variable_storage *retrieve_anonymous_temp(unsigned);
> + st_src_reg get_temp(const glsl_type *type);
> + void optimise_access(void);
> + unsigned *reindex_table;
> +};
> +
> +unsigned
> +variable_store::temp_amount() const
> +{
> + return next_temp;
> +}
> +
> +unsigned
> +variable_store::temp_array_amount() const
> +{
> + return next_temp_array;
> +}
> +
> +variable_store::variable_store():mem_ctx(ralloc_context(NULL)),next_temp(1),next_temp_array(1),rvalue_regs(NULL),rvalue_regs_count(0)
> +{
> + variables = hash_table_ctor(0,hash_table_pointer_hash,hash_table_pointer_compare);
> +}
> +
> +variable_store::~variable_store()
> +{
> + hash_table_dtor(variables);
> + ralloc_free(mem_ctx);
> +}
> +
> +variable_storage *
> +variable_store::find_variable_storage(ir_variable *var) const
> +{
> + return (class variable_storage *) hash_table_find(variables,var);
The convention in the rest of glsl_to_tgsi (and Mesa) is to put a space
after the comma separating arguments. I'd prefer it if this patch
followed the same convention.
> +}
> +
> +variable_storage*
> +variable_store::push(class ir_variable *var, gl_register_file file, int index)
> +{
> + variable_storage *storage = new (mem_ctx) variable_storage(var,file,index);
> + hash_table_insert(variables,storage,var);
> + return storage;
> +}
> +
> +variable_storage*
> +variable_store::push(ir_variable *ir)
> +{
> + variable_storage* retval = push(ir, PROGRAM_TEMPORARY, next_temp);
> + next_temp += type_size(ir->type);
> + if (ir->type->is_array() || ir->type->is_record() || ir->type->is_matrix()) {
> + retval->is_array = true;
> + }
> + return retval;
> +}
> +
> +variable_storage*
> +variable_store::retrieve_anonymous_temp(unsigned reg)
> +{
> + for (unsigned i = 0; i < rvalue_regs_count; i++) {
> + unsigned range_start = rvalue_regs[i].index;
> + unsigned range_end = range_start + type_size(rvalue_regs[i].type) - 1;
> + if (reg >= range_start && reg <= range_end) {
> + return rvalue_regs + i;
> + }
> + }
> + printf ("Failed to get storage");
> + exit(1);
I don't think writing to stdout and exiting cleanly like this is the
right way to handle a fatal error. It should probably write the error
message to stderr (making clear that it's a fatal error in Mesa, not an
error in the application) and assert (so that a debugger can catch it),
then use call exit(1).
> +}
> +
> +/**
> + * In the initial pass of codegen, we assign temporary numbers to
> + * intermediate results. (not SSA -- variable assignments will reuse
> + * storage).
> + */
> +st_src_reg
> +variable_store::get_temp(const glsl_type *type)
> +{
> + st_src_reg src;
> + rvalue_regs_count++;
> + rvalue_regs = reralloc(mem_ctx,rvalue_regs,variable_storage,rvalue_regs_count);
> + variable_storage &entry = rvalue_regs[rvalue_regs_count - 1];
> +
> + src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
> + src.file = PROGRAM_TEMPORARY;
> + src.index = next_temp;
> + src.reladdr = NULL;
> + next_temp += type_size(type);
> +
> + entry.file = PROGRAM_TEMPORARY;
> + entry.index = src.index;
> + entry.type = type;
> + entry.is_reladdressed = false;
> +
> + if (type->is_array() || type->is_record() || type->is_matrix()) {
> + entry.is_array = true;
> + }
> +
> + if (type->is_array() || type->is_record()) {
> + src.swizzle = SWIZZLE_NOOP;
> + } else {
> + src.swizzle = swizzle_for_size(type->vector_elements);
> + }
> + src.negate = 0;
> +
> + return src;
> +}
> +
> +void variable_store::reindex_reladdress(const void *key, void *data, void *closure)
> +{
> + ir_variable *var = (ir_variable *) key;
> + variable_storage *storage = (variable_storage *) data;
> + variable_store *store = (variable_store *) closure;
> +
> + if (storage->file == PROGRAM_TEMPORARY && storage->is_array) {
> +
> + if (storage->is_reladdressed) {
> + unsigned old_index = storage->index;
> + storage->index = store->next_temp_array;
> + size_t sz = type_size(storage->type);
> + store->next_temp_array += sz;
> + for (unsigned i = 0; i < sz; i++) {
> + store->reindex_table[old_index + i] = storage->index + i;
> + }
> + }
> + }
> +}
> +
> +void variable_store::reindex_non_reladdress(const void *key, void *data, void *closure)
> +{
> + ir_variable *var = (ir_variable *) key;
> + variable_storage *storage = (variable_storage *) data;
> + variable_store *store = (variable_store *) closure;
> +
> + if (storage->file == PROGRAM_TEMPORARY && !storage->is_reladdressed) {
> + unsigned old_index = storage->index;
> + size_t sz = type_size(storage->type);
> + storage->index = store->next_temp;
> + store->next_temp += sz;
> + for (unsigned i = 0; i < sz; i++) {
> + store->reindex_table[old_index + i] = storage->index + i;
> + }
> + }
> +}
> +
> +void
> +variable_store::reindex_rvalue_reladdressed()
> +{
> + for (unsigned i = 0; i < rvalue_regs_count; i++) {
> + variable_storage &storage = rvalue_regs[i];
> + if (storage.is_reladdressed) {
> + unsigned old_index = storage.index;
> + size_t sz = type_size(storage.type);
> + storage.index = next_temp_array;
> + next_temp_array += sz;
> + for (unsigned i = 0; i < sz; i++) {
> + reindex_table[old_index + i] = storage.index + i;
> + }
> + }
> + }
> +}
> +
> +void
> +variable_store::reindex_rvalue()
> +{
> + for (unsigned i = 0; i < rvalue_regs_count; i++) {
> + variable_storage &storage = rvalue_regs[i];
> + if (!storage.is_reladdressed) {
> + unsigned old_index = storage.index;
> + size_t sz = type_size(storage.type);
> + storage.index = next_temp;
> + next_temp += sz;
> + for (unsigned i = 0; i < sz; i++) {
> + reindex_table[old_index + i] = storage.index + i;
> + }
> + }
> + }
> +}
> +
> +
> +/**
> + * This function will generate a reindex table for every temp registers to pass to
> + * renumber_temp_regs.
> + * This will move every registers associated to an indirectly addressed variables
> + * to the beginning of TGSI_FILE_TEMPORARY (ie gives them lowest index) ;
> + * thus every registers that have an index >= next_temp_array after this function call
> + * are sure to be directly addressed.
> + *
> + * TODO : In fact some registers before next_temp_array might be directly addressed too
> + * (for instance in a struct type that holds an array indirectly accessed), we can put them
> + * after next_temp_array too.
> + */
> +void
> +variable_store::optimise_access(void)
> +{
> + reindex_table = rzalloc_array(mem_ctx,unsigned,next_temp);
> + next_temp_array = 1;
> + hash_table_call_foreach(variables,variable_store::reindex_reladdress,this);
> + reindex_rvalue_reladdressed();
> + next_temp = next_temp_array + 1;
> + hash_table_call_foreach(variables,variable_store::reindex_non_reladdress,this);
> + reindex_rvalue();
> +}
> +
> class glsl_to_tgsi_visitor : public ir_visitor {
> public:
> glsl_to_tgsi_visitor();
> @@ -298,8 +536,6 @@ public:
> struct gl_shader_program *shader_program;
> struct gl_shader_compiler_options *options;
>
> - int next_temp;
> -
> int num_address_regs;
> int samplers_used;
> bool indirect_addr_temps;
> @@ -308,14 +544,11 @@ public:
> int glsl_version;
> bool native_integers;
>
> - variable_storage *find_variable_storage(ir_variable *var);
> -
> int add_constant(gl_register_file file, gl_constant_value values[4],
> int size, int datatype, GLuint *swizzle_out);
>
> function_entry *get_function_signature(ir_function_signature *sig);
>
> - st_src_reg get_temp(const glsl_type *type);
> void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
>
> st_src_reg st_src_reg_for_float(float val);
> @@ -352,7 +585,7 @@ public:
> st_src_reg result;
>
> /** List of variable_storage */
> - exec_list variables;
> + variable_store store;
>
> /** List of immediate_storage */
> exec_list immediates;
> @@ -427,6 +660,7 @@ public:
> int eliminate_dead_code_advanced(void);
> void merge_registers(void);
> void renumber_registers(void);
> + void renumber_temp_regs(unsigned*);
>
> void *mem_ctx;
> };
> @@ -797,7 +1031,7 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
> * that will be written by the SCS instrution, we'll need a temporary.
> */
> if (scs_mask != unsigned(dst.writemask)) {
> - tmp = get_temp(glsl_type::vec4_type);
> + tmp = store.get_temp(glsl_type::vec4_type);
> }
>
> for (unsigned i = 0; i < 4; i++) {
> @@ -967,48 +1201,6 @@ type_size(const struct glsl_type *type)
> }
> }
>
> -/**
> - * In the initial pass of codegen, we assign temporary numbers to
> - * intermediate results. (not SSA -- variable assignments will reuse
> - * storage).
> - */
> -st_src_reg
> -glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
> -{
> - st_src_reg src;
> -
> - src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
> - src.file = PROGRAM_TEMPORARY;
> - src.index = next_temp;
> - src.reladdr = NULL;
> - next_temp += type_size(type);
> -
> - if (type->is_array() || type->is_record()) {
> - src.swizzle = SWIZZLE_NOOP;
> - } else {
> - src.swizzle = swizzle_for_size(type->vector_elements);
> - }
> - src.negate = 0;
> -
> - return src;
> -}
> -
> -variable_storage *
> -glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
> -{
> -
> - variable_storage *entry;
> -
> - foreach_iter(exec_list_iterator, iter, this->variables) {
> - entry = (variable_storage *)iter.get();
> -
> - if (entry->var == var)
> - return entry;
> - }
> -
> - return NULL;
> -}
> -
> void
> glsl_to_tgsi_visitor::visit(ir_variable *ir)
> {
> @@ -1040,8 +1232,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
> st_dst_reg dst;
> if (i == ir->num_state_slots) {
> /* We'll set the index later. */
> - storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
> - this->variables.push_tail(storage);
> + storage = store.push(ir, PROGRAM_STATE_VAR, -1);
>
> dst = undef_dst;
> } else {
> @@ -1051,10 +1242,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
> */
> assert((int) ir->num_state_slots == type_size(ir->type));
>
> - storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
> - this->next_temp);
> - this->variables.push_tail(storage);
> - this->next_temp += type_size(ir->type);
> + storage = store.push(ir);
>
> dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index,
> native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT));
> @@ -1205,7 +1393,7 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
> ir->operands[nonmul_operand]->accept(this);
> c = this->result;
>
> - this->result = get_temp(ir->type);
> + this->result = store.get_temp(ir->type);
> result_dst = st_dst_reg(this->result);
> result_dst.writemask = (1 << ir->type->vector_elements) - 1;
> emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
> @@ -1247,7 +1435,7 @@ glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operan
>
> b.negate = ~b.negate;
>
> - this->result = get_temp(ir->type);
> + this->result = store.get_temp(ir->type);
> emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
>
> return true;
> @@ -1288,7 +1476,7 @@ glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
> new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
> new_inst->saturate = true;
> } else {
> - this->result = get_temp(ir->type);
> + this->result = store.get_temp(ir->type);
> st_dst_reg result_dst = st_dst_reg(this->result);
> result_dst.writemask = (1 << ir->type->vector_elements) - 1;
> glsl_to_tgsi_instruction *inst;
> @@ -1309,7 +1497,7 @@ glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
> emit_arl(ir, address_reg, *reg->reladdr);
>
> if (*num_reladdr != 1) {
> - st_src_reg temp = get_temp(glsl_type::vec4_type);
> + st_src_reg temp = store.get_temp(glsl_type::vec4_type);
>
> emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
> *reg = temp;
> @@ -1378,7 +1566,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
> /* Storage for our result. Ideally for an assignment we'd be using
> * the actual storage for the result here, instead.
> */
> - result_src = get_temp(ir->type);
> + result_src = store.get_temp(ir->type);
> /* convenience for the emit functions below. */
> result_dst = st_dst_reg(result_src);
> /* Limit writes to the channels that will be used by result_src later.
> @@ -1508,7 +1696,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
> /* "==" operator producing a scalar boolean. */
> if (ir->operands[0]->type->is_vector() ||
> ir->operands[1]->type->is_vector()) {
> - st_src_reg temp = get_temp(native_integers ?
> + st_src_reg temp = store.get_temp(native_integers ?
> glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
> glsl_type::vec4_type);
>
> @@ -1566,7 +1754,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
> /* "!=" operator producing a scalar boolean. */
> if (ir->operands[0]->type->is_vector() ||
> ir->operands[1]->type->is_vector()) {
> - st_src_reg temp = get_temp(native_integers ?
> + st_src_reg temp = store.get_temp(native_integers ?
> glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
> glsl_type::vec4_type);
> emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
> @@ -2220,7 +2408,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
> * If TGSI had a UCMP instruction or similar, this extra
> * instruction would not be necessary.
> */
> - condition_temp = get_temp(glsl_type::vec4_type);
> + condition_temp = store.get_temp(glsl_type::vec4_type);
> condition.negate = 0;
> emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition);
> condition_temp.swizzle = condition.swizzle;
> @@ -2277,7 +2465,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
> * get lucky, copy propagation will eliminate the extra moves.
> */
> if (ir->type->base_type == GLSL_TYPE_STRUCT) {
> - st_src_reg temp_base = get_temp(ir->type);
> + st_src_reg temp_base = store.get_temp(ir->type);
> st_dst_reg temp = st_dst_reg(temp_base);
>
> foreach_iter(exec_list_iterator, iter, ir->components) {
> @@ -2301,7 +2489,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
> }
>
> if (ir->type->is_array()) {
> - st_src_reg temp_base = get_temp(ir->type);
> + st_src_reg temp_base = store.get_temp(ir->type);
> st_dst_reg temp = st_dst_reg(temp_base);
> int size = type_size(ir->type->fields.array);
>
> @@ -2324,7 +2512,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
> }
>
> if (ir->type->is_matrix()) {
> - st_src_reg mat = get_temp(ir->type);
> + st_src_reg mat = store.get_temp(ir->type);
> st_dst_reg mat_column = st_dst_reg(mat);
>
> for (i = 0; i < ir->type->matrix_columns; i++) {
> @@ -2414,18 +2602,14 @@ glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
> ir_variable *param = (ir_variable *)iter.get();
> variable_storage *storage;
>
> - storage = find_variable_storage(param);
> + storage = store.find_variable_storage(param);
> assert(!storage);
>
> - storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
> - this->next_temp);
> - this->variables.push_tail(storage);
> -
> - this->next_temp += type_size(param->type);
> + storage =store.push(param);
There should be whitespace after the "=".
> }
>
> if (!sig->return_type->is_void()) {
> - entry->return_reg = get_temp(sig->return_type);
> + entry->return_reg = store.get_temp(sig->return_type);
> } else {
> entry->return_reg = undef_src;
> }
> @@ -2450,7 +2634,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
>
> if (param->mode == ir_var_in ||
> param->mode == ir_var_inout) {
> - variable_storage *storage = find_variable_storage(param);
> + variable_storage *storage = store.find_variable_storage(param);
> assert(storage);
>
> param_rval->accept(this);
> @@ -2486,7 +2670,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
>
> if (param->mode == ir_var_out ||
> param->mode == ir_var_inout) {
> - variable_storage *storage = find_variable_storage(param);
> + variable_storage *storage = store.find_variable_storage(param);
> assert(storage);
>
> st_src_reg r;
> @@ -2530,7 +2714,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
> * we're doing plain old texturing. The optimization passes on
> * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
> */
> - coord = get_temp(glsl_type::vec4_type);
> + coord = store.get_temp(glsl_type::vec4_type);
> coord_dst = st_dst_reg(coord);
> emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
> }
> @@ -2543,7 +2727,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
> /* Storage for our result. Ideally for an assignment we'd be using
> * the actual storage for the result here, instead.
> */
> - result_src = get_temp(glsl_type::vec4_type);
> + result_src = store.get_temp(glsl_type::vec4_type);
> result_dst = st_dst_reg(result_src);
>
> switch (ir->op) {
> @@ -2613,7 +2797,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
> */
> ir->shadow_comparitor->accept(this);
>
> - tmp_src = get_temp(glsl_type::vec4_type);
> + tmp_src = store.get_temp(glsl_type::vec4_type);
> st_dst_reg tmp_dst = st_dst_reg(tmp_src);
>
> /* Projective division not allowed for array samplers. */
> @@ -2779,7 +2963,7 @@ glsl_to_tgsi_visitor::visit(ir_if *ir)
> * have something to set cond_update on.
> */
> if (cond_inst == prev_inst) {
> - st_src_reg temp = get_temp(glsl_type::bool_type);
> + st_src_reg temp = store.get_temp(glsl_type::bool_type);
> cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result);
> }
> cond_inst->cond_update = GL_TRUE;
> @@ -2805,7 +2989,6 @@ glsl_to_tgsi_visitor::visit(ir_if *ir)
> glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
> {
> result.file = PROGRAM_UNDEFINED;
> - next_temp = 1;
> next_signature_id = 1;
> num_immediates = 0;
> current_function = NULL;
> @@ -3266,17 +3449,18 @@ glsl_to_tgsi_visitor::get_last_temp_write(int index)
> void
> glsl_to_tgsi_visitor::copy_propagate(void)
> {
> + int next_temp = store.temp_amount();
This is nitpicking a bit, but since you're no longer bound to the name
"next_temp" it would make more sense to call this "num_temps".
> glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
> glsl_to_tgsi_instruction *,
> - this->next_temp * 4);
> - int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
> + next_temp * 4);
> + int *acp_level = rzalloc_array(mem_ctx, int, next_temp * 4);
> int level = 0;
>
> foreach_iter(exec_list_iterator, iter, this->instructions) {
> glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
>
> assert(inst->dst.file != PROGRAM_TEMPORARY
> - || inst->dst.index < this->next_temp);
> + || inst->dst.index < next_temp);
>
> /* First, do any copy propagation possible into the src regs. */
> for (int r = 0; r < 3; r++) {
> @@ -3336,7 +3520,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
> case TGSI_OPCODE_BGNLOOP:
> case TGSI_OPCODE_ENDLOOP:
> /* End of a basic block, clear the ACP entirely. */
> - memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
> + memset(acp, 0, sizeof(*acp) * next_temp * 4);
> break;
>
> case TGSI_OPCODE_IF:
> @@ -3348,7 +3532,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
> /* Clear all channels written inside the block from the ACP, but
> * leaving those that were not touched.
> */
> - for (int r = 0; r < this->next_temp; r++) {
> + for (int r = 0; r < next_temp; r++) {
> for (int c = 0; c < 4; c++) {
> if (!acp[4 * r + c])
> continue;
> @@ -3369,13 +3553,13 @@ glsl_to_tgsi_visitor::copy_propagate(void)
> /* Any temporary might be written, so no copy propagation
> * across this instruction.
> */
> - memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
> + memset(acp, 0, sizeof(*acp) * next_temp * 4);
> } else if (inst->dst.file == PROGRAM_OUTPUT &&
> inst->dst.reladdr) {
> /* Any output might be written, so no copy propagation
> * from outputs across this instruction.
> */
> - for (int r = 0; r < this->next_temp; r++) {
> + for (int r = 0; r < next_temp; r++) {
> for (int c = 0; c < 4; c++) {
> if (!acp[4 * r + c])
> continue;
> @@ -3396,7 +3580,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
> }
>
> /* Clear where it's used as src. */
> - for (int r = 0; r < this->next_temp; r++) {
> + for (int r = 0; r < next_temp; r++) {
> for (int c = 0; c < 4; c++) {
> if (!acp[4 * r + c])
> continue;
> @@ -3457,8 +3641,8 @@ void
> glsl_to_tgsi_visitor::eliminate_dead_code(void)
> {
> int i;
> -
> - for (i=0; i < this->next_temp; i++) {
> + int next_temp = store.temp_amount();
Same comment about the variable name.
> + for (i=0; i < next_temp; i++) {
> int last_read = get_last_temp_read(i);
> int j = 0;
>
> @@ -3492,10 +3676,11 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
> int
> glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
> {
> + int next_temp = store.temp_amount();
> glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
> glsl_to_tgsi_instruction *,
> - this->next_temp * 4);
> - int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
> + next_temp * 4);
> + int *write_level = rzalloc_array(mem_ctx, int, next_temp * 4);
> int level = 0;
> int removed = 0;
>
> @@ -3503,7 +3688,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
> glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
>
> assert(inst->dst.file != PROGRAM_TEMPORARY
> - || inst->dst.index < this->next_temp);
> + || inst->dst.index < next_temp);
>
> switch (inst->op) {
> case TGSI_OPCODE_BGNLOOP:
> @@ -3518,7 +3703,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
> * dead code of this type, so it shouldn't make a difference as long as
> * the dead code elimination pass in the GLSL compiler does its job.
> */
> - memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
> + memset(writes, 0, sizeof(*writes) * next_temp * 4);
> break;
>
> case TGSI_OPCODE_ENDIF:
> @@ -3526,7 +3711,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
> /* Promote the recorded level of all channels written inside the
> * preceding if or else block to the level above the if/else block.
> */
> - for (int r = 0; r < this->next_temp; r++) {
> + for (int r = 0; r < next_temp; r++) {
> for (int c = 0; c < 4; c++) {
> if (!writes[4 * r + c])
> continue;
> @@ -3554,7 +3739,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
> /* Any temporary might be read, so no dead code elimination
> * across this instruction.
> */
> - memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
> + memset(writes, 0, sizeof(*writes) * next_temp * 4);
> } else if (inst->src[i].file == PROGRAM_TEMPORARY) {
> /* Clear where it's used as src. */
> int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
> @@ -3595,7 +3780,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
> }
>
> /* Anything still in the write array at this point is dead code. */
> - for (int r = 0; r < this->next_temp; r++) {
> + for (int r = 0; r < next_temp; r++) {
> for (int c = 0; c < 4; c++) {
> glsl_to_tgsi_instruction *inst = writes[4 * r + c];
> if (inst)
> @@ -3682,6 +3867,7 @@ glsl_to_tgsi_visitor::merge_registers(void)
> * by optimization passes. */
> void
> glsl_to_tgsi_visitor::renumber_registers(void)
> +void glsl_to_tgsi_visitor::renumber_temp_regs(unsigned *reindex_table)
> {
> int i = 0;
> int new_index = 0;
> @@ -3691,6 +3877,21 @@ glsl_to_tgsi_visitor::renumber_registers(void)
> if (i != new_index)
> rename_temp_register(i, new_index);
> new_index++;
> + foreach_iter(exec_list_iterator, iter, this->instructions) {
> + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
> + unsigned j;
> +
> + for (j=0; j < num_inst_src_regs(inst->op); j++) {
> + if (inst->src[j].file == PROGRAM_TEMPORARY) {
> + unsigned index = inst->src[j].index;
> + inst->src[j].index = reindex_table[index];
> + }
> + }
> +
> + if (inst->dst.file == PROGRAM_TEMPORARY) {
> + unsigned index = inst->dst.index;
> + inst->dst.index = reindex_table[index];
> + }
> }
>
> this->next_temp = new_index;
> @@ -3720,7 +3921,6 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
> v->glsl_version = original->glsl_version;
> v->native_integers = original->native_integers;
> v->options = original->options;
> - v->next_temp = original->next_temp;
> v->num_address_regs = original->num_address_regs;
> v->samplers_used = prog->SamplersUsed = original->samplers_used;
> v->indirect_addr_temps = original->indirect_addr_temps;
> @@ -3732,7 +3932,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
> * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
> */
> coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
> - src0 = v->get_temp(glsl_type::vec4_type);
> + src0 = v->store.get_temp(glsl_type::vec4_type);
> dst0 = st_dst_reg(src0);
> inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
> inst->sampler = 0;
> @@ -3762,7 +3962,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
> }
>
> if (pixel_maps) {
> - st_src_reg temp = v->get_temp(glsl_type::vec4_type);
> + st_src_reg temp = v->store.get_temp(glsl_type::vec4_type);
> st_dst_reg temp_dst = st_dst_reg(temp);
>
> assert(st->pixel_xfer.pixelmap_texture);
> @@ -3850,7 +4050,6 @@ get_bitmap_visitor(struct st_fragment_program *fp,
> v->glsl_version = original->glsl_version;
> v->native_integers = original->native_integers;
> v->options = original->options;
> - v->next_temp = original->next_temp;
> v->num_address_regs = original->num_address_regs;
> v->samplers_used = prog->SamplersUsed = original->samplers_used;
> v->indirect_addr_temps = original->indirect_addr_temps;
> @@ -3859,7 +4058,7 @@ get_bitmap_visitor(struct st_fragment_program *fp,
>
> /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
> coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
> - src0 = v->get_temp(glsl_type::vec4_type);
> + src0 = v->store.get_temp(glsl_type::vec4_type);
> dst0 = st_dst_reg(src0);
> inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
> inst->sampler = samplerIndex;
> @@ -4542,6 +4741,7 @@ st_translate_program(
> unsigned i;
> enum pipe_error ret = PIPE_OK;
>
> + int next_temp = program->store.temp_amount();
Same comment about the variable name.
> assert(numInputs <= Elements(t->inputs));
> assert(numOutputs <= Elements(t->outputs));
>
> @@ -4695,7 +4895,7 @@ st_translate_program(
> * in sequential order. Else, we declare them on demand elsewhere.
> * (Note: the number of temporaries is equal to program->next_temp)
> */
> - for (i = 0; i < (unsigned)program->next_temp; i++) {
> + for (i = 0; i < (unsigned)next_temp; i++) {
> /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
> t->temps[i] = ureg_DECL_temporary(t->ureg);
> }
> @@ -4879,6 +5079,7 @@ get_mesa_program(struct gl_context *ctx,
> v->options = options;
> v->glsl_version = ctx->Const.GLSLVersion;
> v->native_integers = ctx->Const.NativeIntegers;
> + v->store.native_integers = v->native_integers;
>
> _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
> prog->Parameters);
> @@ -4929,6 +5130,9 @@ get_mesa_program(struct gl_context *ctx,
> }
> #endif
>
> + v->store.optimise_access();
> + v->renumber_temp_regs(v->store.reindex_table);
> +
> if (!screen->get_shader_param(screen, pipe_shader_type,
> PIPE_SHADER_CAP_OUTPUT_READ)) {
> /* Remove reads to output registers, and to varyings in vertex shaders. */
More information about the mesa-dev
mailing list