[Mesa-dev] [PATCH 1/3] glsl_to_tgsi: Create a new variable_store class replacing variables field in glsl_to_tgsi_visitor
Vincent Lejeune
vljn at ovi.com
Fri Jan 6 11:15:24 PST 2012
---
src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 392 ++++++++++++++++++++--------
1 files changed, 287 insertions(+), 105 deletions(-)
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index cecceca..b4cf76e 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -230,14 +230,16 @@ public:
class variable_storage : public exec_node {
public:
variable_storage(ir_variable *var, gl_register_file file, int index)
- : file(file), index(index), var(var)
+ : file(file), index(index), type(var->type), is_array(var->type->is_array() || var->type->is_record() || var->type->is_matrix()), is_reladdressed(false)
{
/* empty */
}
gl_register_file file;
int index;
- ir_variable *var; /* variable that maps to this, if any */
+ const glsl_type *type; /* variable that maps to this, if any */
+ bool is_array;
+ bool is_reladdressed;
};
class immediate_storage : public exec_node {
@@ -286,6 +288,220 @@ public:
st_src_reg return_reg;
};
+static int type_size(const glsl_type *type);
+static int swizzle_for_size(int size);
+
+class variable_store {
+ friend class glsl_to_tgsi_variable_allocator;
+protected:
+ void *mem_ctx;
+ hash_table* variables;
+ unsigned next_temp;
+ unsigned next_temp_array;
+ static void reindex_reladdress(const void *, void *, void *);
+ static void reindex_non_reladdress(const void *, void *, void *);
+ void reindex_rvalue();
+ void reindex_rvalue_reladdressed();
+ variable_storage* rvalue_regs;
+ unsigned rvalue_regs_count;
+
+public:
+ bool native_integers;
+ unsigned temp_amount() const;
+ unsigned temp_array_amount() const;
+ variable_store();
+ ~variable_store();
+ variable_storage *find_variable_storage(class ir_variable *var) const;
+ variable_storage *push(class ir_variable *, gl_register_file, int);
+ variable_storage *push(class ir_variable *);
+ variable_storage *retrieve_anonymous_temp(unsigned);
+ st_src_reg get_temp(const glsl_type *type);
+ void optimise_access(void);
+ unsigned *reindex_table;
+};
+
+unsigned
+variable_store::temp_amount() const
+{
+ return next_temp;
+}
+
+unsigned
+variable_store::temp_array_amount() const
+{
+ return next_temp_array;
+}
+
+variable_store::variable_store():mem_ctx(ralloc_context(NULL)),next_temp(1),next_temp_array(1),rvalue_regs_count(0),rvalue_regs(NULL)
+{
+ variables = hash_table_ctor(0,hash_table_pointer_hash,hash_table_pointer_compare);
+}
+
+variable_store::~variable_store()
+{
+ hash_table_dtor(variables);
+ ralloc_free(mem_ctx);
+}
+
+variable_storage *
+variable_store::find_variable_storage(ir_variable *var) const
+{
+ return (class variable_storage *) hash_table_find(variables,var);
+}
+
+variable_storage*
+variable_store::push(class ir_variable *var, gl_register_file file, int index)
+{
+ variable_storage *storage = new (mem_ctx) variable_storage(var,file,index);
+ hash_table_insert(variables,storage,var);
+ return storage;
+}
+
+variable_storage*
+variable_store::push(ir_variable *ir)
+{
+ variable_storage* retval = push(ir, PROGRAM_TEMPORARY, next_temp);
+ next_temp += type_size(ir->type);
+ if (ir->type->is_array() || ir->type->is_record() || ir->type->is_matrix()) {
+ retval->is_array = true;
+ }
+ return retval;
+}
+
+variable_storage*
+variable_store::retrieve_anonymous_temp(unsigned reg)
+{
+ for (unsigned i = 0; i < rvalue_regs_count; i++) {
+ unsigned range_start = rvalue_regs[i].index;
+ unsigned range_end = range_start + type_size(rvalue_regs[i].type) - 1;
+ if (reg >= range_start && reg <= range_end) {
+ return rvalue_regs + i;
+ }
+ }
+ printf ("Failed to get storage");
+ exit(1);
+}
+
+/**
+ * In the initial pass of codegen, we assign temporary numbers to
+ * intermediate results. (not SSA -- variable assignments will reuse
+ * storage).
+ */
+st_src_reg
+variable_store::get_temp(const glsl_type *type)
+{
+ st_src_reg src;
+ rvalue_regs_count++;
+ rvalue_regs = reralloc(mem_ctx,rvalue_regs,variable_storage,rvalue_regs_count);
+ variable_storage &entry = rvalue_regs[rvalue_regs_count - 1];
+
+ src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
+ src.file = PROGRAM_TEMPORARY;
+ src.index = next_temp;
+ src.reladdr = NULL;
+ next_temp += type_size(type);
+
+ entry.file = PROGRAM_TEMPORARY;
+ entry.index = src.index;
+ entry.type = type;
+
+ if (type->is_array() || type->is_record() || type->is_matrix()) {
+ entry.is_array = true;
+ }
+
+ if (type->is_array() || type->is_record()) {
+ src.swizzle = SWIZZLE_NOOP;
+ } else {
+ src.swizzle = swizzle_for_size(type->vector_elements);
+ }
+ src.negate = 0;
+
+ return src;
+}
+
+void variable_store::reindex_reladdress(const void *key, void *data, void *closure)
+{
+ ir_variable *var = (ir_variable *) key;
+ variable_storage *storage = (variable_storage *) data;
+ variable_store *store = (variable_store *) closure;
+
+ if (storage->file == PROGRAM_TEMPORARY && storage->is_array) {
+
+ if (storage->is_reladdressed) {
+ unsigned old_index = storage->index;
+ storage->index = store->next_temp_array;
+ size_t sz = type_size(storage->type);
+ store->next_temp_array += sz;
+ for (unsigned i = 0; i < sz; i++) {
+ store->reindex_table[old_index + i] = storage->index + i;
+ }
+ }
+ }
+}
+
+void variable_store::reindex_non_reladdress(const void *key, void *data, void *closure)
+{
+ ir_variable *var = (ir_variable *) key;
+ variable_storage *storage = (variable_storage *) data;
+ variable_store *store = (variable_store *) closure;
+
+ if (storage->file == PROGRAM_TEMPORARY && !storage->is_reladdressed) {
+ unsigned old_index = storage->index;
+ size_t sz = type_size(storage->type);
+ storage->index = store->next_temp;
+ store->next_temp += sz;
+ for (unsigned i = 0; i < sz; i++) {
+ store->reindex_table[old_index + i] = storage->index + i;
+ }
+ }
+}
+
+void
+variable_store::reindex_rvalue_reladdressed()
+{
+ for (unsigned i = 0; i < rvalue_regs_count; i++) {
+ variable_storage &storage = rvalue_regs[i];
+ if (storage.is_reladdressed) {
+ unsigned old_index = storage.index;
+ size_t sz = type_size(storage.type);
+ storage.index = next_temp_array;
+ next_temp_array += sz;
+ for (unsigned i = 0; i < sz; i++) {
+ reindex_table[old_index + i] = storage.index + i;
+ }
+ }
+ }
+}
+
+void
+variable_store::reindex_rvalue()
+{
+ for (unsigned i = 0; i < rvalue_regs_count; i++) {
+ variable_storage &storage = rvalue_regs[i];
+ if (!storage.is_reladdressed) {
+ unsigned old_index = storage.index;
+ size_t sz = type_size(storage.type);
+ storage.index = next_temp;
+ next_temp += sz;
+ for (unsigned i = 0; i < sz; i++) {
+ reindex_table[old_index + i] = storage.index + i;
+ }
+ }
+ }
+}
+
+void
+variable_store::optimise_access(void)
+{
+ reindex_table = rzalloc_array(mem_ctx,unsigned,next_temp);
+ next_temp_array = 1;
+ hash_table_call_foreach(variables,variable_store::reindex_reladdress,this);
+ reindex_rvalue_reladdressed();
+ next_temp = next_temp_array + 1;
+ hash_table_call_foreach(variables,variable_store::reindex_non_reladdress,this);
+ reindex_rvalue();
+}
+
class glsl_to_tgsi_visitor : public ir_visitor {
public:
glsl_to_tgsi_visitor();
@@ -298,8 +514,6 @@ public:
struct gl_shader_program *shader_program;
struct gl_shader_compiler_options *options;
- int next_temp;
-
int num_address_regs;
int samplers_used;
bool indirect_addr_temps;
@@ -308,14 +522,11 @@ public:
int glsl_version;
bool native_integers;
- variable_storage *find_variable_storage(ir_variable *var);
-
int add_constant(gl_register_file file, gl_constant_value values[4],
int size, int datatype, GLuint *swizzle_out);
function_entry *get_function_signature(ir_function_signature *sig);
- st_src_reg get_temp(const glsl_type *type);
void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
st_src_reg st_src_reg_for_float(float val);
@@ -352,7 +563,7 @@ public:
st_src_reg result;
/** List of variable_storage */
- exec_list variables;
+ variable_store store;
/** List of immediate_storage */
exec_list immediates;
@@ -427,6 +638,7 @@ public:
int eliminate_dead_code_advanced(void);
void merge_registers(void);
void renumber_registers(void);
+ void renumber_temp_regs(unsigned*);
void *mem_ctx;
};
@@ -797,7 +1009,7 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
* that will be written by the SCS instrution, we'll need a temporary.
*/
if (scs_mask != unsigned(dst.writemask)) {
- tmp = get_temp(glsl_type::vec4_type);
+ tmp = store.get_temp(glsl_type::vec4_type);
}
for (unsigned i = 0; i < 4; i++) {
@@ -967,48 +1179,6 @@ type_size(const struct glsl_type *type)
}
}
-/**
- * In the initial pass of codegen, we assign temporary numbers to
- * intermediate results. (not SSA -- variable assignments will reuse
- * storage).
- */
-st_src_reg
-glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
-{
- st_src_reg src;
-
- src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
- src.file = PROGRAM_TEMPORARY;
- src.index = next_temp;
- src.reladdr = NULL;
- next_temp += type_size(type);
-
- if (type->is_array() || type->is_record()) {
- src.swizzle = SWIZZLE_NOOP;
- } else {
- src.swizzle = swizzle_for_size(type->vector_elements);
- }
- src.negate = 0;
-
- return src;
-}
-
-variable_storage *
-glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
-{
-
- variable_storage *entry;
-
- foreach_iter(exec_list_iterator, iter, this->variables) {
- entry = (variable_storage *)iter.get();
-
- if (entry->var == var)
- return entry;
- }
-
- return NULL;
-}
-
void
glsl_to_tgsi_visitor::visit(ir_variable *ir)
{
@@ -1040,8 +1210,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
st_dst_reg dst;
if (i == ir->num_state_slots) {
/* We'll set the index later. */
- storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
- this->variables.push_tail(storage);
+ storage = store.push(ir, PROGRAM_STATE_VAR, -1);
dst = undef_dst;
} else {
@@ -1051,10 +1220,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
*/
assert((int) ir->num_state_slots == type_size(ir->type));
- storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
- this->next_temp);
- this->variables.push_tail(storage);
- this->next_temp += type_size(ir->type);
+ storage = store.push(ir);
dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index,
native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT));
@@ -1205,7 +1371,7 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
ir->operands[nonmul_operand]->accept(this);
c = this->result;
- this->result = get_temp(ir->type);
+ this->result = store.get_temp(ir->type);
result_dst = st_dst_reg(this->result);
result_dst.writemask = (1 << ir->type->vector_elements) - 1;
emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
@@ -1247,7 +1413,7 @@ glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operan
b.negate = ~b.negate;
- this->result = get_temp(ir->type);
+ this->result = store.get_temp(ir->type);
emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
return true;
@@ -1288,7 +1454,7 @@ glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
new_inst->saturate = true;
} else {
- this->result = get_temp(ir->type);
+ this->result = store.get_temp(ir->type);
st_dst_reg result_dst = st_dst_reg(this->result);
result_dst.writemask = (1 << ir->type->vector_elements) - 1;
glsl_to_tgsi_instruction *inst;
@@ -1309,7 +1475,7 @@ glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
emit_arl(ir, address_reg, *reg->reladdr);
if (*num_reladdr != 1) {
- st_src_reg temp = get_temp(glsl_type::vec4_type);
+ st_src_reg temp = store.get_temp(glsl_type::vec4_type);
emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
*reg = temp;
@@ -1378,7 +1544,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
/* Storage for our result. Ideally for an assignment we'd be using
* the actual storage for the result here, instead.
*/
- result_src = get_temp(ir->type);
+ result_src = store.get_temp(ir->type);
/* convenience for the emit functions below. */
result_dst = st_dst_reg(result_src);
/* Limit writes to the channels that will be used by result_src later.
@@ -1508,7 +1674,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
/* "==" operator producing a scalar boolean. */
if (ir->operands[0]->type->is_vector() ||
ir->operands[1]->type->is_vector()) {
- st_src_reg temp = get_temp(native_integers ?
+ st_src_reg temp = store.get_temp(native_integers ?
glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
glsl_type::vec4_type);
@@ -1566,7 +1732,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
/* "!=" operator producing a scalar boolean. */
if (ir->operands[0]->type->is_vector() ||
ir->operands[1]->type->is_vector()) {
- st_src_reg temp = get_temp(native_integers ?
+ st_src_reg temp = store.get_temp(native_integers ?
glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
glsl_type::vec4_type);
emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
@@ -2220,7 +2386,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
* If TGSI had a UCMP instruction or similar, this extra
* instruction would not be necessary.
*/
- condition_temp = get_temp(glsl_type::vec4_type);
+ condition_temp = store.get_temp(glsl_type::vec4_type);
condition.negate = 0;
emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition);
condition_temp.swizzle = condition.swizzle;
@@ -2277,7 +2443,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
* get lucky, copy propagation will eliminate the extra moves.
*/
if (ir->type->base_type == GLSL_TYPE_STRUCT) {
- st_src_reg temp_base = get_temp(ir->type);
+ st_src_reg temp_base = store.get_temp(ir->type);
st_dst_reg temp = st_dst_reg(temp_base);
foreach_iter(exec_list_iterator, iter, ir->components) {
@@ -2301,7 +2467,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
}
if (ir->type->is_array()) {
- st_src_reg temp_base = get_temp(ir->type);
+ st_src_reg temp_base = store.get_temp(ir->type);
st_dst_reg temp = st_dst_reg(temp_base);
int size = type_size(ir->type->fields.array);
@@ -2324,7 +2490,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
}
if (ir->type->is_matrix()) {
- st_src_reg mat = get_temp(ir->type);
+ st_src_reg mat = store.get_temp(ir->type);
st_dst_reg mat_column = st_dst_reg(mat);
for (i = 0; i < ir->type->matrix_columns; i++) {
@@ -2414,18 +2580,14 @@ glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
ir_variable *param = (ir_variable *)iter.get();
variable_storage *storage;
- storage = find_variable_storage(param);
+ storage = store.find_variable_storage(param);
assert(!storage);
- storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
- this->next_temp);
- this->variables.push_tail(storage);
-
- this->next_temp += type_size(param->type);
+ storage =store.push(param);
}
if (!sig->return_type->is_void()) {
- entry->return_reg = get_temp(sig->return_type);
+ entry->return_reg = store.get_temp(sig->return_type);
} else {
entry->return_reg = undef_src;
}
@@ -2450,7 +2612,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
if (param->mode == ir_var_in ||
param->mode == ir_var_inout) {
- variable_storage *storage = find_variable_storage(param);
+ variable_storage *storage = store.find_variable_storage(param);
assert(storage);
param_rval->accept(this);
@@ -2486,7 +2648,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
if (param->mode == ir_var_out ||
param->mode == ir_var_inout) {
- variable_storage *storage = find_variable_storage(param);
+ variable_storage *storage = store.find_variable_storage(param);
assert(storage);
st_src_reg r;
@@ -2530,7 +2692,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
* we're doing plain old texturing. The optimization passes on
* glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
*/
- coord = get_temp(glsl_type::vec4_type);
+ coord = store.get_temp(glsl_type::vec4_type);
coord_dst = st_dst_reg(coord);
emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
}
@@ -2543,7 +2705,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
/* Storage for our result. Ideally for an assignment we'd be using
* the actual storage for the result here, instead.
*/
- result_src = get_temp(glsl_type::vec4_type);
+ result_src = store.get_temp(glsl_type::vec4_type);
result_dst = st_dst_reg(result_src);
switch (ir->op) {
@@ -2613,7 +2775,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
*/
ir->shadow_comparitor->accept(this);
- tmp_src = get_temp(glsl_type::vec4_type);
+ tmp_src = store.get_temp(glsl_type::vec4_type);
st_dst_reg tmp_dst = st_dst_reg(tmp_src);
/* Projective division not allowed for array samplers. */
@@ -2779,7 +2941,7 @@ glsl_to_tgsi_visitor::visit(ir_if *ir)
* have something to set cond_update on.
*/
if (cond_inst == prev_inst) {
- st_src_reg temp = get_temp(glsl_type::bool_type);
+ st_src_reg temp = store.get_temp(glsl_type::bool_type);
cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result);
}
cond_inst->cond_update = GL_TRUE;
@@ -2805,7 +2967,6 @@ glsl_to_tgsi_visitor::visit(ir_if *ir)
glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
{
result.file = PROGRAM_UNDEFINED;
- next_temp = 1;
next_signature_id = 1;
num_immediates = 0;
current_function = NULL;
@@ -3266,17 +3427,18 @@ glsl_to_tgsi_visitor::get_last_temp_write(int index)
void
glsl_to_tgsi_visitor::copy_propagate(void)
{
+ int next_temp = store.temp_amount();
glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
glsl_to_tgsi_instruction *,
- this->next_temp * 4);
- int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
+ next_temp * 4);
+ int *acp_level = rzalloc_array(mem_ctx, int, next_temp * 4);
int level = 0;
foreach_iter(exec_list_iterator, iter, this->instructions) {
glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
assert(inst->dst.file != PROGRAM_TEMPORARY
- || inst->dst.index < this->next_temp);
+ || inst->dst.index < next_temp);
/* First, do any copy propagation possible into the src regs. */
for (int r = 0; r < 3; r++) {
@@ -3336,7 +3498,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
case TGSI_OPCODE_BGNLOOP:
case TGSI_OPCODE_ENDLOOP:
/* End of a basic block, clear the ACP entirely. */
- memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
+ memset(acp, 0, sizeof(*acp) * next_temp * 4);
break;
case TGSI_OPCODE_IF:
@@ -3348,7 +3510,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
/* Clear all channels written inside the block from the ACP, but
* leaving those that were not touched.
*/
- for (int r = 0; r < this->next_temp; r++) {
+ for (int r = 0; r < next_temp; r++) {
for (int c = 0; c < 4; c++) {
if (!acp[4 * r + c])
continue;
@@ -3369,13 +3531,13 @@ glsl_to_tgsi_visitor::copy_propagate(void)
/* Any temporary might be written, so no copy propagation
* across this instruction.
*/
- memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
+ memset(acp, 0, sizeof(*acp) * next_temp * 4);
} else if (inst->dst.file == PROGRAM_OUTPUT &&
inst->dst.reladdr) {
/* Any output might be written, so no copy propagation
* from outputs across this instruction.
*/
- for (int r = 0; r < this->next_temp; r++) {
+ for (int r = 0; r < next_temp; r++) {
for (int c = 0; c < 4; c++) {
if (!acp[4 * r + c])
continue;
@@ -3396,7 +3558,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
}
/* Clear where it's used as src. */
- for (int r = 0; r < this->next_temp; r++) {
+ for (int r = 0; r < next_temp; r++) {
for (int c = 0; c < 4; c++) {
if (!acp[4 * r + c])
continue;
@@ -3457,8 +3619,8 @@ void
glsl_to_tgsi_visitor::eliminate_dead_code(void)
{
int i;
-
- for (i=0; i < this->next_temp; i++) {
+ int next_temp = store.temp_amount();
+ for (i=0; i < next_temp; i++) {
int last_read = get_last_temp_read(i);
int j = 0;
@@ -3492,10 +3654,11 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
int
glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
{
+ int next_temp = store.temp_amount();
glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
glsl_to_tgsi_instruction *,
- this->next_temp * 4);
- int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
+ next_temp * 4);
+ int *write_level = rzalloc_array(mem_ctx, int, next_temp * 4);
int level = 0;
int removed = 0;
@@ -3503,7 +3666,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
assert(inst->dst.file != PROGRAM_TEMPORARY
- || inst->dst.index < this->next_temp);
+ || inst->dst.index < next_temp);
switch (inst->op) {
case TGSI_OPCODE_BGNLOOP:
@@ -3518,7 +3681,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
* dead code of this type, so it shouldn't make a difference as long as
* the dead code elimination pass in the GLSL compiler does its job.
*/
- memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
+ memset(writes, 0, sizeof(*writes) * next_temp * 4);
break;
case TGSI_OPCODE_ENDIF:
@@ -3526,7 +3689,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
/* Promote the recorded level of all channels written inside the
* preceding if or else block to the level above the if/else block.
*/
- for (int r = 0; r < this->next_temp; r++) {
+ for (int r = 0; r < next_temp; r++) {
for (int c = 0; c < 4; c++) {
if (!writes[4 * r + c])
continue;
@@ -3554,7 +3717,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
/* Any temporary might be read, so no dead code elimination
* across this instruction.
*/
- memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
+ memset(writes, 0, sizeof(*writes) * next_temp * 4);
} else if (inst->src[i].file == PROGRAM_TEMPORARY) {
/* Clear where it's used as src. */
int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
@@ -3595,7 +3758,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
}
/* Anything still in the write array at this point is dead code. */
- for (int r = 0; r < this->next_temp; r++) {
+ for (int r = 0; r < next_temp; r++) {
for (int c = 0; c < 4; c++) {
glsl_to_tgsi_instruction *inst = writes[4 * r + c];
if (inst)
@@ -3682,6 +3845,7 @@ glsl_to_tgsi_visitor::merge_registers(void)
* by optimization passes. */
void
glsl_to_tgsi_visitor::renumber_registers(void)
+void glsl_to_tgsi_visitor::renumber_temp_regs(unsigned *reindex_table)
{
int i = 0;
int new_index = 0;
@@ -3691,6 +3855,21 @@ glsl_to_tgsi_visitor::renumber_registers(void)
if (i != new_index)
rename_temp_register(i, new_index);
new_index++;
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+ unsigned j;
+
+ for (j=0; j < num_inst_src_regs(inst->op); j++) {
+ if (inst->src[j].file == PROGRAM_TEMPORARY) {
+ unsigned index = inst->src[j].index;
+ inst->src[j].index = reindex_table[index];
+ }
+ }
+
+ if (inst->dst.file == PROGRAM_TEMPORARY) {
+ unsigned index = inst->dst.index;
+ inst->dst.index = reindex_table[index];
+ }
}
this->next_temp = new_index;
@@ -3720,7 +3899,6 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
v->glsl_version = original->glsl_version;
v->native_integers = original->native_integers;
v->options = original->options;
- v->next_temp = original->next_temp;
v->num_address_regs = original->num_address_regs;
v->samplers_used = prog->SamplersUsed = original->samplers_used;
v->indirect_addr_temps = original->indirect_addr_temps;
@@ -3732,7 +3910,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
* TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
*/
coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
- src0 = v->get_temp(glsl_type::vec4_type);
+ src0 = v->store.get_temp(glsl_type::vec4_type);
dst0 = st_dst_reg(src0);
inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
inst->sampler = 0;
@@ -3762,7 +3940,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
}
if (pixel_maps) {
- st_src_reg temp = v->get_temp(glsl_type::vec4_type);
+ st_src_reg temp = v->store.get_temp(glsl_type::vec4_type);
st_dst_reg temp_dst = st_dst_reg(temp);
assert(st->pixel_xfer.pixelmap_texture);
@@ -3850,7 +4028,6 @@ get_bitmap_visitor(struct st_fragment_program *fp,
v->glsl_version = original->glsl_version;
v->native_integers = original->native_integers;
v->options = original->options;
- v->next_temp = original->next_temp;
v->num_address_regs = original->num_address_regs;
v->samplers_used = prog->SamplersUsed = original->samplers_used;
v->indirect_addr_temps = original->indirect_addr_temps;
@@ -3859,7 +4036,7 @@ get_bitmap_visitor(struct st_fragment_program *fp,
/* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
- src0 = v->get_temp(glsl_type::vec4_type);
+ src0 = v->store.get_temp(glsl_type::vec4_type);
dst0 = st_dst_reg(src0);
inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
inst->sampler = samplerIndex;
@@ -4542,6 +4719,7 @@ st_translate_program(
unsigned i;
enum pipe_error ret = PIPE_OK;
+ int next_temp = program->store.temp_amount();
assert(numInputs <= Elements(t->inputs));
assert(numOutputs <= Elements(t->outputs));
@@ -4695,7 +4873,7 @@ st_translate_program(
* in sequential order. Else, we declare them on demand elsewhere.
* (Note: the number of temporaries is equal to program->next_temp)
*/
- for (i = 0; i < (unsigned)program->next_temp; i++) {
+ for (i = 0; i < (unsigned)next_temp; i++) {
/* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
t->temps[i] = ureg_DECL_temporary(t->ureg);
}
@@ -4879,6 +5057,7 @@ get_mesa_program(struct gl_context *ctx,
v->options = options;
v->glsl_version = ctx->Const.GLSLVersion;
v->native_integers = ctx->Const.NativeIntegers;
+ v->store.native_integers = v->native_integers;
_mesa_generate_parameters_list_for_uniforms(shader_program, shader,
prog->Parameters);
@@ -4929,6 +5108,9 @@ get_mesa_program(struct gl_context *ctx,
}
#endif
+ v->store.optimise_access();
+ v->renumber_temp_regs(v->store.reindex_table);
+
if (!screen->get_shader_param(screen, pipe_shader_type,
PIPE_SHADER_CAP_OUTPUT_READ)) {
/* Remove reads to output registers, and to varyings in vertex shaders. */
--
1.7.7
More information about the mesa-dev
mailing list