[Mesa-dev] [PATCH 01/32] i965: Factor out virtual GRF allocation to a separate object.
Connor Abbott
cwabbott0 at gmail.com
Fri Feb 6 12:58:10 PST 2015
On Fri, Feb 6, 2015 at 9:42 AM, Francisco Jerez <currojerez at riseup.net> wrote:
> Right now virtual GRF book-keeping and allocation is performed in each
> visitor class separately (among other hundred different things),
> leading to duplicated logic in each visitor and preventing layering as
> it forces any code that manipulates i965 IR and needs to allocate
> virtual registers to depend on the specific visitor that happens to be
> used to translate from GLSL IR.
> ---
> src/mesa/drivers/dri/i965/brw_fs.cpp | 77 ++++++++-----------
> src/mesa/drivers/dri/i965/brw_fs.h | 8 +-
> src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 2 +-
> .../drivers/dri/i965/brw_fs_live_variables.cpp | 8 +-
> src/mesa/drivers/dri/i965/brw_fs_live_variables.h | 2 +-
> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 2 +-
> src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 50 ++++++------
> .../drivers/dri/i965/brw_fs_register_coalesce.cpp | 8 +-
> src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 49 ++++++------
> src/mesa/drivers/dri/i965/brw_ir_allocator.h | 89 ++++++++++++++++++++++
> .../drivers/dri/i965/brw_schedule_instructions.cpp | 10 +--
> src/mesa/drivers/dri/i965/brw_shader.h | 6 ++
> src/mesa/drivers/dri/i965/brw_vec4.cpp | 20 ++---
> src/mesa/drivers/dri/i965/brw_vec4.h | 12 ---
> .../drivers/dri/i965/brw_vec4_copy_propagation.cpp | 8 +-
> src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 2 +-
> .../drivers/dri/i965/brw_vec4_live_variables.cpp | 10 +--
> .../drivers/dri/i965/brw_vec4_reg_allocate.cpp | 43 +++++------
> src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 36 ++-------
> 19 files changed, 239 insertions(+), 203 deletions(-)
> create mode 100644 src/mesa/drivers/dri/i965/brw_ir_allocator.h
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 83c09e1..3acbb0b 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -424,7 +424,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst,
>
> assert(dst.width % 8 == 0);
> int regs_written = 4 * (dst.width / 8) * scale;
> - fs_reg vec4_result = fs_reg(GRF, virtual_grf_alloc(regs_written),
> + fs_reg vec4_result = fs_reg(GRF, alloc.allocate(regs_written),
> dst.type, dst.width);
> inst = new(mem_ctx) fs_inst(op, vec4_result, surf_index, vec4_offset);
> inst->regs_written = regs_written;
> @@ -688,7 +688,7 @@ fs_visitor::get_timestamp()
> 0),
> BRW_REGISTER_TYPE_UD));
>
> - fs_reg dst = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD, 4);
> + fs_reg dst = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 4);
>
> fs_inst *mov = emit(MOV(dst, ts));
> /* We want to read the 3 fields we care about even if it's not enabled in
> @@ -764,7 +764,7 @@ fs_visitor::emit_shader_time_end()
>
> fs_reg start = shader_start_time;
> start.negate = true;
> - fs_reg diff = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD, 1);
> + fs_reg diff = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 1);
> emit(ADD(diff, start, shader_end_time));
>
> /* If there were no instructions between the two timestamp gets, the diff
> @@ -1029,26 +1029,11 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
> }
> }
>
> -int
> -fs_visitor::virtual_grf_alloc(int size)
> -{
> - if (virtual_grf_array_size <= virtual_grf_count) {
> - if (virtual_grf_array_size == 0)
> - virtual_grf_array_size = 16;
> - else
> - virtual_grf_array_size *= 2;
> - virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
> - virtual_grf_array_size);
> - }
> - virtual_grf_sizes[virtual_grf_count] = size;
> - return virtual_grf_count++;
> -}
> -
> fs_reg
> fs_visitor::vgrf(const glsl_type *const type)
> {
> int reg_width = dispatch_width / 8;
> - return fs_reg(GRF, virtual_grf_alloc(type_size(type) * reg_width),
> + return fs_reg(GRF, alloc.allocate(type_size(type) * reg_width),
> brw_type_for_base_type(type), dispatch_width);
> }
>
> @@ -1056,7 +1041,7 @@ fs_reg
> fs_visitor::vgrf(int num_components)
> {
> int reg_width = dispatch_width / 8;
> - return fs_reg(GRF, virtual_grf_alloc(num_components * reg_width),
> + return fs_reg(GRF, alloc.allocate(num_components * reg_width),
> BRW_REGISTER_TYPE_F, dispatch_width);
> }
>
> @@ -1912,14 +1897,14 @@ fs_visitor::assign_vs_urb_setup()
> void
> fs_visitor::split_virtual_grfs()
> {
> - int num_vars = this->virtual_grf_count;
> + int num_vars = this->alloc.count;
>
> /* Count the total number of registers */
> int reg_count = 0;
> int vgrf_to_reg[num_vars];
> for (int i = 0; i < num_vars; i++) {
> vgrf_to_reg[i] = reg_count;
> - reg_count += virtual_grf_sizes[i];
> + reg_count += alloc.sizes[i];
> }
>
> /* An array of "split points". For each register slot, this indicates
> @@ -1935,14 +1920,14 @@ fs_visitor::split_virtual_grfs()
> foreach_block_and_inst(block, fs_inst, inst, cfg) {
> if (inst->dst.file == GRF) {
> int reg = vgrf_to_reg[inst->dst.reg];
> - for (int j = 1; j < this->virtual_grf_sizes[inst->dst.reg]; j++)
> + for (unsigned j = 1; j < this->alloc.sizes[inst->dst.reg]; j++)
> split_points[reg + j] = true;
> }
>
> for (int i = 0; i < inst->sources; i++) {
> if (inst->src[i].file == GRF) {
> int reg = vgrf_to_reg[inst->src[i].reg];
> - for (int j = 1; j < this->virtual_grf_sizes[inst->src[i].reg]; j++)
> + for (unsigned j = 1; j < this->alloc.sizes[inst->src[i].reg]; j++)
> split_points[reg + j] = true;
> }
> }
> @@ -1988,13 +1973,13 @@ fs_visitor::split_virtual_grfs()
> int offset = 1;
>
> /* j > 0 case */
> - for (int j = 1; j < virtual_grf_sizes[i]; j++) {
> + for (unsigned j = 1; j < alloc.sizes[i]; j++) {
> /* If this is a split point, reset the offset to 0 and allocate a
> * new virtual GRF for the previous offset many registers
> */
> if (split_points[reg]) {
> assert(offset <= MAX_VGRF_SIZE);
> - int grf = virtual_grf_alloc(offset);
> + int grf = alloc.allocate(offset);
> for (int k = reg - offset; k < reg; k++)
> new_virtual_grf[k] = grf;
> offset = 0;
> @@ -2006,7 +1991,7 @@ fs_visitor::split_virtual_grfs()
>
> /* The last one gets the original register number */
> assert(offset <= MAX_VGRF_SIZE);
> - virtual_grf_sizes[i] = offset;
> + alloc.sizes[i] = offset;
> for (int k = reg - offset; k < reg; k++)
> new_virtual_grf[k] = i;
> }
> @@ -2017,14 +2002,14 @@ fs_visitor::split_virtual_grfs()
> reg = vgrf_to_reg[inst->dst.reg] + inst->dst.reg_offset;
> inst->dst.reg = new_virtual_grf[reg];
> inst->dst.reg_offset = new_reg_offset[reg];
> - assert(new_reg_offset[reg] < virtual_grf_sizes[new_virtual_grf[reg]]);
> + assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]);
> }
> for (int i = 0; i < inst->sources; i++) {
> if (inst->src[i].file == GRF) {
> reg = vgrf_to_reg[inst->src[i].reg] + inst->src[i].reg_offset;
> inst->src[i].reg = new_virtual_grf[reg];
> inst->src[i].reg_offset = new_reg_offset[reg];
> - assert(new_reg_offset[reg] < virtual_grf_sizes[new_virtual_grf[reg]]);
> + assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]);
> }
> }
> }
> @@ -2044,7 +2029,7 @@ bool
> fs_visitor::compact_virtual_grfs()
> {
> bool progress = false;
> - int remap_table[this->virtual_grf_count];
> + int remap_table[this->alloc.count];
> memset(remap_table, -1, sizeof(remap_table));
>
> /* Mark which virtual GRFs are used. */
> @@ -2060,7 +2045,7 @@ fs_visitor::compact_virtual_grfs()
>
> /* Compact the GRF arrays. */
> int new_index = 0;
> - for (int i = 0; i < this->virtual_grf_count; i++) {
> + for (unsigned i = 0; i < this->alloc.count; i++) {
> if (remap_table[i] == -1) {
> /* We just found an unused register. This means that we are
> * actually going to compact something.
> @@ -2068,13 +2053,13 @@ fs_visitor::compact_virtual_grfs()
> progress = true;
> } else {
> remap_table[i] = new_index;
> - virtual_grf_sizes[new_index] = virtual_grf_sizes[i];
> + alloc.sizes[new_index] = alloc.sizes[i];
> invalidate_live_intervals();
> ++new_index;
> }
> }
>
> - this->virtual_grf_count = new_index;
> + this->alloc.count = new_index;
>
> /* Patch all the instructions to use the newly renumbered registers */
> foreach_block_and_inst(block, fs_inst, inst, cfg) {
> @@ -2293,7 +2278,7 @@ fs_visitor::demote_pull_constants()
> } else {
> const unsigned num_regs = 2; /* Fetch 4 owords at a time. */
> const unsigned base = (pull_index * 4) & ~(32 * num_regs - 1);
> - const fs_reg dst(GRF, virtual_grf_alloc(num_regs),
> + const fs_reg dst(GRF, alloc.allocate(num_regs),
> BRW_REGISTER_TYPE_F, dispatch_width);
> fs_inst *pull =
> new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
> @@ -2469,8 +2454,8 @@ fs_visitor::opt_register_renaming()
> bool progress = false;
> int depth = 0;
>
> - int remap[virtual_grf_count];
> - memset(remap, -1, sizeof(int) * virtual_grf_count);
> + int remap[alloc.count];
> + memset(remap, -1, sizeof(int) * alloc.count);
>
> foreach_block_and_inst(block, fs_inst, inst, cfg) {
> if (inst->opcode == BRW_OPCODE_IF || inst->opcode == BRW_OPCODE_DO) {
> @@ -2494,12 +2479,12 @@ fs_visitor::opt_register_renaming()
>
> if (depth == 0 &&
> inst->dst.file == GRF &&
> - virtual_grf_sizes[inst->dst.reg] == inst->dst.width / 8 &&
> + alloc.sizes[inst->dst.reg] == inst->dst.width / 8 &&
> !inst->is_partial_write()) {
> if (remap[dst] == -1) {
> remap[dst] = dst;
> } else {
> - remap[dst] = virtual_grf_alloc(inst->dst.width / 8);
> + remap[dst] = alloc.allocate(inst->dst.width / 8);
> inst->dst.reg = remap[dst];
> progress = true;
> }
> @@ -3068,11 +3053,11 @@ fs_visitor::lower_load_payload()
> {
> bool progress = false;
>
> - int vgrf_to_reg[virtual_grf_count];
> + int vgrf_to_reg[alloc.count];
> int reg_count = 16; /* Leave room for MRF */
> - for (int i = 0; i < virtual_grf_count; ++i) {
> + for (unsigned i = 0; i < alloc.count; ++i) {
> vgrf_to_reg[i] = reg_count;
> - reg_count += virtual_grf_sizes[i];
> + reg_count += alloc.sizes[i];
> }
>
> struct {
> @@ -3242,7 +3227,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
> fprintf(file, "vgrf%d", inst->dst.reg);
> if (inst->dst.width != dispatch_width)
> fprintf(file, "@%d", inst->dst.width);
> - if (virtual_grf_sizes[inst->dst.reg] != inst->dst.width / 8 ||
> + if (alloc.sizes[inst->dst.reg] != inst->dst.width / 8 ||
> inst->dst.subreg_offset)
> fprintf(file, "+%d.%d",
> inst->dst.reg_offset, inst->dst.subreg_offset);
> @@ -3302,7 +3287,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
> fprintf(file, "vgrf%d", inst->src[i].reg);
> if (inst->src[i].width != dispatch_width)
> fprintf(file, "@%d", inst->src[i].width);
> - if (virtual_grf_sizes[inst->src[i].reg] != inst->src[i].width / 8 ||
> + if (alloc.sizes[inst->src[i].reg] != inst->src[i].width / 8 ||
> inst->src[i].subreg_offset)
> fprintf(file, "+%d.%d", inst->src[i].reg_offset,
> inst->src[i].subreg_offset);
> @@ -3553,9 +3538,9 @@ fs_visitor::calculate_register_pressure()
>
> regs_live_at_ip = rzalloc_array(mem_ctx, int, num_instructions);
>
> - for (int reg = 0; reg < virtual_grf_count; reg++) {
> + for (unsigned reg = 0; reg < alloc.count; reg++) {
> for (int ip = virtual_grf_start[reg]; ip <= virtual_grf_end[reg]; ip++)
> - regs_live_at_ip[ip] += virtual_grf_sizes[reg];
> + regs_live_at_ip[ip] += alloc.sizes[reg];
> }
> }
>
> @@ -3643,7 +3628,7 @@ fs_visitor::fixup_3src_null_dest()
> {
> foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
> if (inst->is_3src() && inst->dst.is_null()) {
> - inst->dst = fs_reg(GRF, virtual_grf_alloc(dispatch_width / 8),
> + inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8),
> inst->dst.type);
> }
> }
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
> index 11514d3..449faae 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -321,7 +321,6 @@ public:
> void init();
>
> fs_reg *variable_storage(ir_variable *var);
> - int virtual_grf_alloc(int size);
> fs_reg vgrf(const glsl_type *const type);
> fs_reg vgrf(int num_components);
> void import_uniforms(fs_visitor *v);
> @@ -643,9 +642,6 @@ public:
>
> int *param_size;
>
> - int *virtual_grf_sizes;
> - int virtual_grf_count;
> - int virtual_grf_array_size;
> int *virtual_grf_start;
> int *virtual_grf_end;
> brw::fs_live_variables *live_intervals;
> @@ -679,7 +675,7 @@ public:
> bool do_dual_src;
> int first_non_payload_grf;
> /** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */
> - int max_grf;
> + unsigned max_grf;
>
> fs_reg *fp_temp_regs;
> fs_reg *fp_input_regs;
> @@ -730,7 +726,7 @@ public:
> fs_reg shader_start_time;
> fs_reg userplane[MAX_CLIP_PLANES];
>
> - int grf_used;
> + unsigned grf_used;
> bool spilled_any_registers;
>
> const unsigned dispatch_width; /**< 8 or 16 */
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
> index 11cb327..ae069bb 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
> @@ -224,7 +224,7 @@ fs_visitor::opt_cse_local(bblock_t *block)
> assert(written % dst_width == 0);
>
> fs_reg orig_dst = entry->generator->dst;
> - fs_reg tmp = fs_reg(GRF, virtual_grf_alloc(written),
> + fs_reg tmp = fs_reg(GRF, alloc.allocate(written),
> orig_dst.type, orig_dst.width);
> entry->tmp = tmp;
> entry->generator->dst = tmp;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> index 189a119..968219b 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> @@ -278,17 +278,17 @@ fs_live_variables::fs_live_variables(fs_visitor *v, const cfg_t *cfg)
> {
> mem_ctx = ralloc_context(NULL);
>
> - num_vgrfs = v->virtual_grf_count;
> + num_vgrfs = v->alloc.count;
> num_vars = 0;
> var_from_vgrf = rzalloc_array(mem_ctx, int, num_vgrfs);
> for (int i = 0; i < num_vgrfs; i++) {
> var_from_vgrf[i] = num_vars;
> - num_vars += v->virtual_grf_sizes[i];
> + num_vars += v->alloc.sizes[i];
> }
>
> vgrf_from_var = rzalloc_array(mem_ctx, int, num_vars);
> for (int i = 0; i < num_vgrfs; i++) {
> - for (int j = 0; j < v->virtual_grf_sizes[i]; j++) {
> + for (unsigned j = 0; j < v->alloc.sizes[i]; j++) {
> vgrf_from_var[var_from_vgrf[i] + j] = i;
> }
> }
> @@ -344,7 +344,7 @@ fs_visitor::calculate_live_intervals()
> if (this->live_intervals)
> return;
>
> - int num_vgrfs = this->virtual_grf_count;
> + int num_vgrfs = this->alloc.count;
> ralloc_free(this->virtual_grf_start);
> ralloc_free(this->virtual_grf_end);
> virtual_grf_start = ralloc_array(mem_ctx, int, num_vgrfs);
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.h b/src/mesa/drivers/dri/i965/brw_fs_live_variables.h
> index a52f922..a9b61aa 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.h
> @@ -77,7 +77,7 @@ public:
> /**
> * Map from any index in block_data to the virtual GRF containing it.
> *
> - * For virtual_grf_sizes of [1, 2, 3], vgrf_from_var would contain
> + * For alloc.sizes of [1, 2, 3], vgrf_from_var would contain
> * [0, 1, 1, 2, 2, 2].
> */
> int *vgrf_from_var;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 586b0d5..ee2fd1b 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -1294,7 +1294,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
> const unsigned num_regs = 2; /* Fetch 4 owords at a time. */
> const unsigned base = instr->const_index[0] & ~(32 * num_regs - 1);
> const unsigned delta = instr->const_index[0] & (32 * num_regs - 1);
> - fs_reg packed_consts(GRF, virtual_grf_alloc(num_regs),
> + fs_reg packed_consts(GRF, alloc.allocate(num_regs),
> dest.type, dispatch_width);
> fs_inst *pull = emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
> packed_consts, surf_index, fs_reg(base));
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> index bcd657b..ebe0b12 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> @@ -31,7 +31,7 @@
> #include "glsl/ir_optimization.h"
>
> static void
> -assign_reg(int *reg_hw_locations, fs_reg *reg)
> +assign_reg(unsigned *reg_hw_locations, fs_reg *reg)
> {
> if (reg->file == GRF) {
> assert(reg->reg_offset >= 0);
> @@ -43,17 +43,17 @@ assign_reg(int *reg_hw_locations, fs_reg *reg)
> void
> fs_visitor::assign_regs_trivial()
> {
> - int hw_reg_mapping[this->virtual_grf_count + 1];
> - int i;
> + unsigned hw_reg_mapping[this->alloc.count + 1];
> + unsigned i;
> int reg_width = dispatch_width / 8;
>
> /* Note that compressed instructions require alignment to 2 registers. */
> hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width);
> - for (i = 1; i <= this->virtual_grf_count; i++) {
> + for (i = 1; i <= this->alloc.count; i++) {
> hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
> - this->virtual_grf_sizes[i - 1]);
> + this->alloc.sizes[i - 1]);
> }
> - this->grf_used = hw_reg_mapping[this->virtual_grf_count];
> + this->grf_used = hw_reg_mapping[this->alloc.count];
>
> foreach_block_and_inst(block, fs_inst, inst, cfg) {
> assign_reg(hw_reg_mapping, &inst->dst);
> @@ -66,7 +66,7 @@ fs_visitor::assign_regs_trivial()
> fail("Ran out of regs on trivial allocator (%d/%d)\n",
> this->grf_used, max_grf);
> } else {
> - this->virtual_grf_count = this->grf_used;
> + this->alloc.count = this->grf_used;
> }
>
> }
> @@ -427,7 +427,7 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
> * live between the start of the program and our last use of the payload
> * node.
> */
> - for (int j = 0; j < this->virtual_grf_count; j++) {
> + for (unsigned j = 0; j < this->alloc.count; j++) {
> /* Note that we use a <= comparison, unlike virtual_grf_interferes(),
> * in order to not have to worry about the uniform issue described in
> * calculate_live_intervals().
> @@ -515,7 +515,7 @@ fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node)
> * that are used as conflicting with all virtual GRFs.
> */
> if (mrf_used[i]) {
> - for (int j = 0; j < this->virtual_grf_count; j++) {
> + for (unsigned j = 0; j < this->alloc.count; j++) {
> ra_add_node_interference(g, first_mrf_node + i, j);
> }
> }
> @@ -533,12 +533,12 @@ fs_visitor::assign_regs(bool allow_spilling)
> * for reg_width == 2.
> */
> int reg_width = dispatch_width / 8;
> - int hw_reg_mapping[this->virtual_grf_count];
> + unsigned hw_reg_mapping[this->alloc.count];
> int payload_node_count = ALIGN(this->first_non_payload_grf, reg_width);
> int rsi = reg_width - 1; /* Which screen->wm_reg_sets[] to use */
> calculate_live_intervals();
>
> - int node_count = this->virtual_grf_count;
> + int node_count = this->alloc.count;
> int first_payload_node = node_count;
> node_count += payload_node_count;
> int first_mrf_hack_node = node_count;
> @@ -547,8 +547,8 @@ fs_visitor::assign_regs(bool allow_spilling)
> struct ra_graph *g =
> ra_alloc_interference_graph(screen->wm_reg_sets[rsi].regs, node_count);
>
> - for (int i = 0; i < this->virtual_grf_count; i++) {
> - unsigned size = this->virtual_grf_sizes[i];
> + for (unsigned i = 0; i < this->alloc.count; i++) {
> + unsigned size = this->alloc.sizes[i];
> int c;
>
> assert(size <= ARRAY_SIZE(screen->wm_reg_sets[rsi].classes) &&
> @@ -572,7 +572,7 @@ fs_visitor::assign_regs(bool allow_spilling)
>
> ra_set_node_class(g, i, c);
>
> - for (int j = 0; j < i; j++) {
> + for (unsigned j = 0; j < i; j++) {
> if (virtual_grf_interferes(i, j)) {
> ra_add_node_interference(g, i, j);
> }
> @@ -595,7 +595,7 @@ fs_visitor::assign_regs(bool allow_spilling)
> * highest register that works.
> */
> if (inst->eot) {
> - int size = virtual_grf_sizes[inst->src[0].reg];
> + int size = alloc.sizes[inst->src[0].reg];
> int reg = screen->wm_reg_sets[rsi].class_to_ra_reg_range[size] - 1;
> ra_set_node_reg(g, inst->src[0].reg, reg);
> break;
> @@ -661,12 +661,12 @@ fs_visitor::assign_regs(bool allow_spilling)
> * numbers.
> */
> this->grf_used = payload_node_count;
> - for (int i = 0; i < this->virtual_grf_count; i++) {
> + for (unsigned i = 0; i < this->alloc.count; i++) {
> int reg = ra_get_node_reg(g, i);
>
> hw_reg_mapping[i] = screen->wm_reg_sets[rsi].ra_reg_to_grf[reg];
> this->grf_used = MAX2(this->grf_used,
> - hw_reg_mapping[i] + this->virtual_grf_sizes[i]);
> + hw_reg_mapping[i] + this->alloc.sizes[i]);
> }
>
> foreach_block_and_inst(block, fs_inst, inst, cfg) {
> @@ -676,7 +676,7 @@ fs_visitor::assign_regs(bool allow_spilling)
> }
> }
>
> - this->virtual_grf_count = this->grf_used;
> + this->alloc.count = this->grf_used;
>
> ralloc_free(g);
>
> @@ -747,10 +747,10 @@ int
> fs_visitor::choose_spill_reg(struct ra_graph *g)
> {
> float loop_scale = 1.0;
> - float spill_costs[this->virtual_grf_count];
> - bool no_spill[this->virtual_grf_count];
> + float spill_costs[this->alloc.count];
> + bool no_spill[this->alloc.count];
>
> - for (int i = 0; i < this->virtual_grf_count; i++) {
> + for (unsigned i = 0; i < this->alloc.count; i++) {
> spill_costs[i] = 0.0;
> no_spill[i] = false;
> }
> @@ -811,7 +811,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
> }
> }
>
> - for (int i = 0; i < this->virtual_grf_count; i++) {
> + for (unsigned i = 0; i < this->alloc.count; i++) {
> if (!no_spill[i])
> ra_set_node_spill_cost(g, i, spill_costs[i]);
> }
> @@ -822,7 +822,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
> void
> fs_visitor::spill_reg(int spill_reg)
> {
> - int size = virtual_grf_sizes[spill_reg];
> + int size = alloc.sizes[spill_reg];
> unsigned int spill_offset = last_scratch;
> assert(ALIGN(spill_offset, 16) == spill_offset); /* oword read/write req. */
> int spill_base_mrf = dispatch_width > 8 ? 13 : 14;
> @@ -862,7 +862,7 @@ fs_visitor::spill_reg(int spill_reg)
> int regs_read = inst->regs_read(this, i);
> int subset_spill_offset = (spill_offset +
> REG_SIZE * inst->src[i].reg_offset);
> - fs_reg unspill_dst(GRF, virtual_grf_alloc(regs_read));
> + fs_reg unspill_dst(GRF, alloc.allocate(regs_read));
>
> inst->src[i].reg = unspill_dst.reg;
> inst->src[i].reg_offset = 0;
> @@ -876,7 +876,7 @@ fs_visitor::spill_reg(int spill_reg)
> inst->dst.reg == spill_reg) {
> int subset_spill_offset = (spill_offset +
> REG_SIZE * inst->dst.reg_offset);
> - fs_reg spill_src(GRF, virtual_grf_alloc(inst->regs_written));
> + fs_reg spill_src(GRF, alloc.allocate(inst->regs_written));
>
> inst->dst.reg = spill_src.reg;
> inst->dst.reg_offset = 0;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
> index 62788cd..09f0fad 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
> @@ -66,7 +66,7 @@ is_nop_mov(const fs_inst *inst)
> static bool
> is_copy_payload(const fs_visitor *v, const fs_inst *inst)
> {
> - if (v->virtual_grf_sizes[inst->src[0].reg] != inst->regs_written)
> + if (v->alloc.sizes[inst->src[0].reg] != inst->regs_written)
> return false;
>
> fs_reg reg = inst->src[0];
> @@ -94,8 +94,8 @@ is_coalesce_candidate(const fs_visitor *v, const fs_inst *inst)
> return false;
> }
>
> - if (v->virtual_grf_sizes[inst->src[0].reg] >
> - v->virtual_grf_sizes[inst->dst.reg])
> + if (v->alloc.sizes[inst->src[0].reg] >
> + v->alloc.sizes[inst->dst.reg])
> return false;
>
> if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
> @@ -179,7 +179,7 @@ fs_visitor::register_coalesce()
> if (reg_from != inst->src[0].reg) {
> reg_from = inst->src[0].reg;
>
> - src_size = virtual_grf_sizes[inst->src[0].reg];
> + src_size = alloc.sizes[inst->src[0].reg];
> assert(src_size <= MAX_VGRF_SIZE);
>
> assert(inst->src[0].width % 8 == 0);
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index 5d0ce58..9fbad94 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -1138,7 +1138,7 @@ fs_visitor::visit(ir_expression *ir)
> const unsigned num_regs = 2; /* Fetch 4 owords at a time. */
> const unsigned base = const_offset->value.u[0] & ~(32 * num_regs - 1);
> const unsigned delta = const_offset->value.u[0] & (32 * num_regs - 1);
> - const fs_reg packed_consts(GRF, virtual_grf_alloc(num_regs),
> + const fs_reg packed_consts(GRF, alloc.allocate(num_regs),
> result.type, dispatch_width);
> fs_inst *pull = emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
> packed_consts, surf_index, fs_reg(base));
> @@ -1283,7 +1283,7 @@ fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
> /* If last_rhs_inst wrote a different number of components than our LHS,
> * we can't safely rewrite it.
> */
> - if (virtual_grf_sizes[dst.reg] != modify->regs_written)
> + if (alloc.sizes[dst.reg] != modify->regs_written)
> return false;
>
> /* Success! Rewrite the instruction. */
> @@ -1466,7 +1466,7 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
> * this weirdness around to the expected layout.
> */
> orig_dst = dst;
> - dst = fs_reg(GRF, virtual_grf_alloc(8), orig_dst.type);
> + dst = fs_reg(GRF, alloc.allocate(8), orig_dst.type);
> }
>
> enum opcode opcode;
> @@ -1677,7 +1677,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
> * need to offset the Sampler State Pointer in the header.
> */
> header_present = true;
> - sources[0] = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD);
> + sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
> length++;
> }
>
> @@ -1819,7 +1819,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
> else
> mlen = length * reg_width;
>
> - fs_reg src_payload = fs_reg(GRF, virtual_grf_alloc(mlen),
> + fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
> BRW_REGISTER_TYPE_F);
> emit(LOAD_PAYLOAD(src_payload, sources, length));
>
> @@ -1931,7 +1931,7 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
> * tracking to get the scaling factor.
> */
> if (brw->gen < 6 && is_rect) {
> - fs_reg dst = fs_reg(GRF, virtual_grf_alloc(coord_components));
> + fs_reg dst = fs_reg(GRF, alloc.allocate(coord_components));
> fs_reg src = coordinate;
> coordinate = dst;
>
> @@ -1990,7 +1990,7 @@ fs_reg
> fs_visitor::emit_mcs_fetch(fs_reg coordinate, int components, fs_reg sampler)
> {
> int reg_width = dispatch_width / 8;
> - fs_reg payload = fs_reg(GRF, virtual_grf_alloc(components * reg_width),
> + fs_reg payload = fs_reg(GRF, alloc.allocate(components * reg_width),
> BRW_REGISTER_TYPE_F);
> fs_reg dest = vgrf(glsl_type::uvec4_type);
> fs_reg *sources = ralloc_array(mem_ctx, fs_reg, components);
> @@ -2991,7 +2991,7 @@ fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
>
> fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 4);
>
> - sources[0] = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD);
> + sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
> /* Initialize the sample mask in the message header. */
> emit(MOV(sources[0], fs_reg(0u)))
> ->force_writemask_all = true;
> @@ -3025,7 +3025,7 @@ fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
> }
>
> int mlen = 1 + (length - 1) * reg_width;
> - fs_reg src_payload = fs_reg(GRF, virtual_grf_alloc(mlen),
> + fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
> BRW_REGISTER_TYPE_UD);
> emit(LOAD_PAYLOAD(src_payload, sources, length));
>
> @@ -3046,7 +3046,7 @@ fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst,
>
> fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 2);
>
> - sources[0] = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD);
> + sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
> /* Initialize the sample mask in the message header. */
> emit(MOV(sources[0], fs_reg(0u)))
> ->force_writemask_all = true;
> @@ -3065,7 +3065,7 @@ fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst,
> emit(MOV(sources[1], offset));
>
> int mlen = 1 + reg_width;
> - fs_reg src_payload = fs_reg(GRF, virtual_grf_alloc(mlen),
> + fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
> BRW_REGISTER_TYPE_UD);
> fs_inst *inst = emit(LOAD_PAYLOAD(src_payload, sources, 2));
>
> @@ -3285,7 +3285,7 @@ fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components)
> int len = 0;
> for (unsigned i = 0; i < 4; ++i) {
> if (colors_enabled & (1 << i)) {
> - dst[len] = fs_reg(GRF, virtual_grf_alloc(color.width / 8),
> + dst[len] = fs_reg(GRF, alloc.allocate(color.width / 8),
> color.type, color.width);
> inst = emit(MOV(dst[len], offset(color, i)));
> inst->saturate = key->clamp_fragment_color;
> @@ -3309,11 +3309,11 @@ fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components)
> */
> for (unsigned i = 0; i < 4; ++i) {
> if (colors_enabled & (1 << i)) {
> - dst[i] = fs_reg(GRF, virtual_grf_alloc(1), color.type);
> + dst[i] = fs_reg(GRF, alloc.allocate(1), color.type);
> inst = emit(MOV(dst[i], half(offset(color, i), 0)));
> inst->saturate = key->clamp_fragment_color;
>
> - dst[i + 4] = fs_reg(GRF, virtual_grf_alloc(1), color.type);
> + dst[i + 4] = fs_reg(GRF, alloc.allocate(1), color.type);
> inst = emit(MOV(dst[i + 4], half(offset(color, i), 1)));
> inst->saturate = key->clamp_fragment_color;
> inst->force_sechalf = true;
> @@ -3414,7 +3414,7 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
> length += 2;
>
> if (payload.aa_dest_stencil_reg) {
> - sources[length] = fs_reg(GRF, virtual_grf_alloc(1));
> + sources[length] = fs_reg(GRF, alloc.allocate(1));
> emit(MOV(sources[length],
> fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0))));
> length++;
> @@ -3428,7 +3428,7 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
> /* Hand over gl_SampleMask. Only lower 16 bits are relevant. Since
> * it's unsinged single words, one vgrf is always 16-wide.
> */
> - sources[length] = fs_reg(GRF, virtual_grf_alloc(1),
> + sources[length] = fs_reg(GRF, alloc.allocate(1),
> BRW_REGISTER_TYPE_UW, 16);
> emit(FS_OPCODE_SET_OMASK, sources[length], this->sample_mask);
> length++;
> @@ -3442,7 +3442,7 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
> length += setup_color_payload(sources + length, this->outputs[0], 0);
> } else if (color1.file == BAD_FILE) {
> if (src0_alpha.file != BAD_FILE) {
> - sources[length] = fs_reg(GRF, virtual_grf_alloc(reg_size),
> + sources[length] = fs_reg(GRF, alloc.allocate(reg_size),
> src0_alpha.type, src0_alpha.width);
> fs_inst *inst = emit(MOV(sources[length], src0_alpha));
> inst->saturate = key->clamp_fragment_color;
> @@ -3491,7 +3491,7 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
> /* Send from the GRF */
> fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F);
> load = emit(LOAD_PAYLOAD(payload, sources, length));
> - payload.reg = virtual_grf_alloc(load->regs_written);
> + payload.reg = alloc.allocate(load->regs_written);
> payload.width = dispatch_width;
> load->dst = payload;
> write = emit(FS_OPCODE_FB_WRITE, reg_undef, payload);
> @@ -3660,7 +3660,7 @@ fs_visitor::emit_urb_writes()
> * send to terminate the shader. */
> if (vue_map->slots_valid == 0) {
>
> - fs_reg payload = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD);
> + fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
> fs_inst *inst = emit(MOV(payload, fs_reg(retype(brw_vec8_grf(1, 0),
> BRW_REGISTER_TYPE_UD))));
> inst->force_writemask_all = true;
> @@ -3693,7 +3693,7 @@ fs_visitor::emit_urb_writes()
> break;
> }
>
> - zero = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD);
> + zero = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
> emit(MOV(zero, fs_reg(0u)));
>
> sources[length++] = zero;
> @@ -3747,7 +3747,7 @@ fs_visitor::emit_urb_writes()
> * temp register and use that for the payload.
> */
> for (int i = 0; i < 4; i++) {
> - reg = fs_reg(GRF, virtual_grf_alloc(1), outputs[varying].type);
> + reg = fs_reg(GRF, alloc.allocate(1), outputs[varying].type);
> src = offset(this->outputs[varying], i);
> fs_inst *inst = emit(MOV(reg, src));
> inst->saturate = true;
> @@ -3774,14 +3774,14 @@ fs_visitor::emit_urb_writes()
> emit_shader_time_end();
>
> fs_reg *payload_sources = ralloc_array(mem_ctx, fs_reg, length + 1);
> - fs_reg payload = fs_reg(GRF, virtual_grf_alloc(length + 1),
> + fs_reg payload = fs_reg(GRF, alloc.allocate(length + 1),
> BRW_REGISTER_TYPE_F);
>
> /* We need WE_all on the MOV for the message header (the URB handles)
> * so do a MOV to a dummy register and set force_writemask_all on the
> * MOV. LOAD_PAYLOAD will preserve that.
> */
> - fs_reg dummy = fs_reg(GRF, virtual_grf_alloc(1),
> + fs_reg dummy = fs_reg(GRF, alloc.allocate(1),
> BRW_REGISTER_TYPE_UD);
> fs_inst *inst = emit(MOV(dummy, fs_reg(retype(brw_vec8_grf(1, 0),
> BRW_REGISTER_TYPE_UD))));
> @@ -3897,9 +3897,6 @@ fs_visitor::init()
> this->current_annotation = NULL;
> this->base_ir = NULL;
>
> - this->virtual_grf_sizes = NULL;
> - this->virtual_grf_count = 0;
> - this->virtual_grf_array_size = 0;
> this->virtual_grf_start = NULL;
> this->virtual_grf_end = NULL;
> this->live_intervals = NULL;
> diff --git a/src/mesa/drivers/dri/i965/brw_ir_allocator.h b/src/mesa/drivers/dri/i965/brw_ir_allocator.h
> new file mode 100644
> index 0000000..318185a
> --- /dev/null
> +++ b/src/mesa/drivers/dri/i965/brw_ir_allocator.h
> @@ -0,0 +1,89 @@
> +/* -*- c++ -*- */
> +/*
> + * Copyright © 2010-2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#ifndef BRW_IR_ALLOCATOR_H
> +#define BRW_IR_ALLOCATOR_H
> +
> +#include "main/macros.h"
> +#include "util/ralloc.h"
> +
> +namespace brw {
> + /**
> + * Simple allocator used to keep track of virtual GRFs.
> + */
> + class simple_allocator {
> + public:
> + simple_allocator() :
> + sizes(NULL), offsets(NULL), count(0), total_size(0),
> + mem_ctx(ralloc_context(NULL)), capacity(0)
> + {
> + }
> +
> + ~simple_allocator()
> + {
> + ralloc_free(mem_ctx);
> + }
> +
> + unsigned
> + allocate(unsigned size)
> + {
> + if (capacity <= count) {
> + capacity = MAX2(16, capacity * 2);
> + sizes = reralloc(mem_ctx, sizes, unsigned, capacity);
> + offsets = reralloc(mem_ctx, offsets, unsigned, capacity);
> + }
> +
> + sizes[count] = size;
> + offsets[count] = total_size;
> + total_size += size;
> +
> + return count++;
> + }
> +
> + /**
> + * Array of sizes for each allocation. The allocation unit is up to the
> + * back-end, but it's expected to be one scalar value in the FS back-end
> + * and one vec4 in the VEC4 back-end.
> + */
> + unsigned *sizes;
> +
> + /**
> + * Array of offsets from the start of the VGRF space in allocation
> + * units.
> + */
> + unsigned *offsets;
> +
> + /** Total number of VGRFs allocated. */
> + unsigned count;
> +
> + /** Cumulative size in allocation units. */
> + unsigned total_size;
> +
> + private:
> + void *mem_ctx;
> + unsigned capacity;
> + };
> +}
Just a suggestion: since this class already creates its own ralloc
context and allocates/frees it, maybe we can just have it allocate its
own memory using malloc, realloc, and free and avoid the ralloc
overhead entirely. It doesn't seem like we're gaining much from using
ralloc here.
> +
> +#endif
> diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
> index 40b5715..78666fd 100644
> --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
> @@ -544,9 +544,9 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
>
> if (inst->dst.file == GRF) {
> if (remaining_grf_uses[inst->dst.reg] == 1)
> - benefit += v->virtual_grf_sizes[inst->dst.reg];
> + benefit += v->alloc.sizes[inst->dst.reg];
> if (!grf_active[inst->dst.reg])
> - benefit -= v->virtual_grf_sizes[inst->dst.reg];
> + benefit -= v->alloc.sizes[inst->dst.reg];
> }
>
> for (int i = 0; i < inst->sources; i++) {
> @@ -554,9 +554,9 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
> continue;
>
> if (remaining_grf_uses[inst->src[i].reg] == 1)
> - benefit += v->virtual_grf_sizes[inst->src[i].reg];
> + benefit += v->alloc.sizes[inst->src[i].reg];
> if (!grf_active[inst->src[i].reg])
> - benefit -= v->virtual_grf_sizes[inst->src[i].reg];
> + benefit -= v->alloc.sizes[inst->src[i].reg];
> }
>
> return benefit;
> @@ -1503,7 +1503,7 @@ fs_visitor::schedule_instructions(instruction_scheduler_mode mode)
> if (mode == SCHEDULE_POST)
> grf_count = grf_used;
> else
> - grf_count = virtual_grf_count;
> + grf_count = alloc.count;
>
> fs_instruction_scheduler sched(this, grf_count, mode);
> sched.run(cfg);
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
> index 5ad87d6..ab3ad60 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.h
> +++ b/src/mesa/drivers/dri/i965/brw_shader.h
> @@ -27,6 +27,10 @@
> #include "main/compiler.h"
> #include "glsl/ir.h"
>
> +#ifdef __cplusplus
> +#include "brw_ir_allocator.h"
> +#endif
> +
> #pragma once
>
> enum PACKED register_file {
> @@ -172,6 +176,8 @@ public:
>
> gl_shader_stage stage;
>
> + brw::simple_allocator alloc;
> +
> virtual void dump_instruction(backend_instruction *inst) = 0;
> virtual void dump_instruction(backend_instruction *inst, FILE *file) = 0;
> virtual void dump_instructions();
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index 8988196..206567c 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -1209,7 +1209,7 @@ vec4_visitor::opt_register_coalesce()
> void
> vec4_visitor::split_virtual_grfs()
> {
> - int num_vars = this->virtual_grf_count;
> + int num_vars = this->alloc.count;
> int new_virtual_grf[num_vars];
> bool split_grf[num_vars];
>
> @@ -1217,7 +1217,7 @@ vec4_visitor::split_virtual_grfs()
>
> /* Try to split anything > 0 sized. */
> for (int i = 0; i < num_vars; i++) {
> - split_grf[i] = this->virtual_grf_sizes[i] != 1;
> + split_grf[i] = this->alloc.sizes[i] != 1;
> }
>
> /* Check that the instructions are compatible with the registers we're trying
> @@ -1243,13 +1243,13 @@ vec4_visitor::split_virtual_grfs()
> if (!split_grf[i])
> continue;
>
> - new_virtual_grf[i] = virtual_grf_alloc(1);
> - for (int j = 2; j < this->virtual_grf_sizes[i]; j++) {
> - int reg = virtual_grf_alloc(1);
> + new_virtual_grf[i] = alloc.allocate(1);
> + for (unsigned j = 2; j < this->alloc.sizes[i]; j++) {
> + unsigned reg = alloc.allocate(1);
> assert(reg == new_virtual_grf[i] + j - 1);
> (void) reg;
> }
> - this->virtual_grf_sizes[i] = 1;
> + this->alloc.sizes[i] = 1;
> }
>
> foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
> @@ -1432,7 +1432,7 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
> /* Don't print .0; and only VGRFs have reg_offsets and sizes */
> if (inst->src[i].reg_offset != 0 &&
> inst->src[i].file == GRF &&
> - virtual_grf_sizes[inst->src[i].reg] != 1)
> + alloc.sizes[inst->src[i].reg] != 1)
> fprintf(file, ".%d", inst->src[i].reg_offset);
>
> if (inst->src[i].file != IMM) {
> @@ -1834,9 +1834,9 @@ vec4_visitor::run()
>
> if (false) {
> /* Debug of register spilling: Go spill everything. */
> - const int grf_count = virtual_grf_count;
> - float spill_costs[virtual_grf_count];
> - bool no_spill[virtual_grf_count];
> + const int grf_count = alloc.count;
> + float spill_costs[alloc.count];
> + bool no_spill[alloc.count];
> evaluate_spill_costs(spill_costs, no_spill);
> for (int i = 0; i < grf_count; i++) {
> if (no_spill[i])
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
> index 980544d..6b710c9 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
> @@ -275,9 +275,6 @@ public:
> const void *base_ir;
> const char *current_annotation;
>
> - int *virtual_grf_sizes;
> - int virtual_grf_count;
> - int virtual_grf_array_size;
> int first_non_payload_grf;
> unsigned int max_grf;
> int *virtual_grf_start;
> @@ -285,14 +282,6 @@ public:
> brw::vec4_live_variables *live_intervals;
> dst_reg userplane[MAX_CLIP_PLANES];
>
> - /**
> - * This is the size to be used for an array with an element per
> - * reg_offset
> - */
> - int virtual_grf_reg_count;
> - /** Per-virtual-grf indices into an array of size virtual_grf_reg_count */
> - int *virtual_grf_reg_map;
> -
> dst_reg *variable_storage(ir_variable *var);
>
> void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
> @@ -347,7 +336,6 @@ public:
> bool run(void);
> void fail(const char *msg, ...);
>
> - int virtual_grf_alloc(int size);
> void setup_uniform_clipplane_values();
> void setup_uniform_values(ir_variable *ir);
> void setup_builtin_uniform_values(ir_variable *ir);
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
> index 638d99a..81567d2 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
> @@ -346,7 +346,7 @@ bool
> vec4_visitor::opt_copy_propagation(bool do_constant_prop)
> {
> bool progress = false;
> - struct copy_entry entries[virtual_grf_reg_count];
> + struct copy_entry entries[alloc.total_size];
>
> memset(&entries, 0, sizeof(entries));
>
> @@ -375,7 +375,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop)
> inst->src[i].reladdr)
> continue;
>
> - int reg = (virtual_grf_reg_map[inst->src[i].reg] +
> + int reg = (alloc.offsets[inst->src[i].reg] +
> inst->src[i].reg_offset);
>
> /* Find the regs that each swizzle component came from.
> @@ -418,7 +418,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop)
> /* Track available source registers. */
> if (inst->dst.file == GRF) {
> const int reg =
> - virtual_grf_reg_map[inst->dst.reg] + inst->dst.reg_offset;
> + alloc.offsets[inst->dst.reg] + inst->dst.reg_offset;
>
> /* Update our destination's current channel values. For a direct copy,
> * the value is the newly propagated source. Otherwise, we don't know
> @@ -439,7 +439,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop)
> if (inst->dst.reladdr)
> memset(&entries, 0, sizeof(entries));
> else {
> - for (int i = 0; i < virtual_grf_reg_count; i++) {
> + for (unsigned i = 0; i < alloc.total_size; i++) {
> for (int j = 0; j < 4; j++) {
> if (is_channel_updated(inst, entries[i].value, j)){
> entries[i].value[j] = NULL;
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
> index ee50419..5fb8f31 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
> @@ -241,7 +241,7 @@ vec4_visitor::opt_cse_local(bblock_t *block)
> * more -- a sure sign they'll fail operands_match().
> */
> if (src->file == GRF) {
> - assert((src->reg * 4 + 3) < (virtual_grf_count * 4));
> + assert((unsigned)(src->reg * 4 + 3) < (alloc.count * 4));
>
> int last_reg_use = MAX2(MAX2(virtual_grf_end[src->reg * 4 + 0],
> virtual_grf_end[src->reg * 4 + 1]),
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
> index 9835069..c562b2e 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
> @@ -96,7 +96,7 @@ vec4_live_variables::setup_def_use()
> * variable, and thus qualify for being in def[].
> */
> if (inst->dst.file == GRF &&
> - v->virtual_grf_sizes[inst->dst.reg] == 1 &&
> + v->alloc.sizes[inst->dst.reg] == 1 &&
> !inst->predicate) {
> for (int c = 0; c < 4; c++) {
> if (inst->dst.writemask & (1 << c)) {
> @@ -180,7 +180,7 @@ vec4_live_variables::vec4_live_variables(vec4_visitor *v, cfg_t *cfg)
> {
> mem_ctx = ralloc_context(NULL);
>
> - num_vars = v->virtual_grf_count * 4;
> + num_vars = v->alloc.count * 4;
> block_data = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
>
> bitset_words = BITSET_WORDS(num_vars);
> @@ -230,14 +230,14 @@ vec4_visitor::calculate_live_intervals()
> if (this->live_intervals)
> return;
>
> - int *start = ralloc_array(mem_ctx, int, this->virtual_grf_count * 4);
> - int *end = ralloc_array(mem_ctx, int, this->virtual_grf_count * 4);
> + int *start = ralloc_array(mem_ctx, int, this->alloc.count * 4);
> + int *end = ralloc_array(mem_ctx, int, this->alloc.count * 4);
> ralloc_free(this->virtual_grf_start);
> ralloc_free(this->virtual_grf_end);
> this->virtual_grf_start = start;
> this->virtual_grf_end = end;
>
> - for (int i = 0; i < this->virtual_grf_count * 4; i++) {
> + for (unsigned i = 0; i < this->alloc.count * 4; i++) {
> start[i] = MAX_INSTRUCTION;
> end[i] = -1;
> }
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
> index e8e2185..b944d45 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
> @@ -45,15 +45,14 @@ assign(unsigned int *reg_hw_locations, backend_reg *reg)
> bool
> vec4_visitor::reg_allocate_trivial()
> {
> - unsigned int hw_reg_mapping[this->virtual_grf_count];
> - bool virtual_grf_used[this->virtual_grf_count];
> - int i;
> + unsigned int hw_reg_mapping[this->alloc.count];
> + bool virtual_grf_used[this->alloc.count];
> int next;
>
> /* Calculate which virtual GRFs are actually in use after whatever
> * optimization passes have occurred.
> */
> - for (int i = 0; i < this->virtual_grf_count; i++) {
> + for (unsigned i = 0; i < this->alloc.count; i++) {
> virtual_grf_used[i] = false;
> }
>
> @@ -61,18 +60,18 @@ vec4_visitor::reg_allocate_trivial()
> if (inst->dst.file == GRF)
> virtual_grf_used[inst->dst.reg] = true;
>
> - for (int i = 0; i < 3; i++) {
> + for (unsigned i = 0; i < 3; i++) {
> if (inst->src[i].file == GRF)
> virtual_grf_used[inst->src[i].reg] = true;
> }
> }
>
> hw_reg_mapping[0] = this->first_non_payload_grf;
> - next = hw_reg_mapping[0] + this->virtual_grf_sizes[0];
> - for (i = 1; i < this->virtual_grf_count; i++) {
> + next = hw_reg_mapping[0] + this->alloc.sizes[0];
> + for (unsigned i = 1; i < this->alloc.count; i++) {
> if (virtual_grf_used[i]) {
> hw_reg_mapping[i] = next;
> - next += this->virtual_grf_sizes[i];
> + next += this->alloc.sizes[i];
> }
> }
> prog_data->total_grf = next;
> @@ -176,7 +175,7 @@ bool
> vec4_visitor::reg_allocate()
> {
> struct intel_screen *screen = brw->intelScreen;
> - unsigned int hw_reg_mapping[virtual_grf_count];
> + unsigned int hw_reg_mapping[alloc.count];
> int payload_reg_count = this->first_non_payload_grf;
>
> /* Using the trivial allocator can be useful in debugging undefined
> @@ -187,19 +186,19 @@ vec4_visitor::reg_allocate()
>
> calculate_live_intervals();
>
> - int node_count = virtual_grf_count;
> + int node_count = alloc.count;
> int first_payload_node = node_count;
> node_count += payload_reg_count;
> struct ra_graph *g =
> ra_alloc_interference_graph(screen->vec4_reg_set.regs, node_count);
>
> - for (int i = 0; i < virtual_grf_count; i++) {
> - int size = this->virtual_grf_sizes[i];
> + for (unsigned i = 0; i < alloc.count; i++) {
> + int size = this->alloc.sizes[i];
> assert(size >= 1 && size <= 2 &&
> "Register allocation relies on split_virtual_grfs().");
> ra_set_node_class(g, i, screen->vec4_reg_set.classes[size - 1]);
>
> - for (int j = 0; j < i; j++) {
> + for (unsigned j = 0; j < i; j++) {
> if (virtual_grf_interferes(i, j)) {
> ra_add_node_interference(g, i, j);
> }
> @@ -230,12 +229,12 @@ vec4_visitor::reg_allocate()
> * numbers.
> */
> prog_data->total_grf = payload_reg_count;
> - for (int i = 0; i < virtual_grf_count; i++) {
> + for (unsigned i = 0; i < alloc.count; i++) {
> int reg = ra_get_node_reg(g, i);
>
> hw_reg_mapping[i] = screen->vec4_reg_set.ra_reg_to_grf[reg];
> prog_data->total_grf = MAX2(prog_data->total_grf,
> - hw_reg_mapping[i] + virtual_grf_sizes[i]);
> + hw_reg_mapping[i] + alloc.sizes[i]);
> }
>
> foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
> @@ -255,9 +254,9 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
> {
> float loop_scale = 1.0;
>
> - for (int i = 0; i < this->virtual_grf_count; i++) {
> + for (unsigned i = 0; i < this->alloc.count; i++) {
> spill_costs[i] = 0.0;
> - no_spill[i] = virtual_grf_sizes[i] != 1;
> + no_spill[i] = alloc.sizes[i] != 1;
> }
>
> /* Calculate costs for spilling nodes. Call it a cost of 1 per
> @@ -308,12 +307,12 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
> int
> vec4_visitor::choose_spill_reg(struct ra_graph *g)
> {
> - float spill_costs[this->virtual_grf_count];
> - bool no_spill[this->virtual_grf_count];
> + float spill_costs[this->alloc.count];
> + bool no_spill[this->alloc.count];
>
> evaluate_spill_costs(spill_costs, no_spill);
>
> - for (int i = 0; i < this->virtual_grf_count; i++) {
> + for (unsigned i = 0; i < this->alloc.count; i++) {
> if (!no_spill[i])
> ra_set_node_spill_cost(g, i, spill_costs[i]);
> }
> @@ -324,7 +323,7 @@ vec4_visitor::choose_spill_reg(struct ra_graph *g)
> void
> vec4_visitor::spill_reg(int spill_reg_nr)
> {
> - assert(virtual_grf_sizes[spill_reg_nr] == 1);
> + assert(alloc.sizes[spill_reg_nr] == 1);
> unsigned int spill_offset = c->last_scratch++;
>
> /* Generate spill/unspill instructions for the objects being spilled. */
> @@ -332,7 +331,7 @@ vec4_visitor::spill_reg(int spill_reg_nr)
> for (unsigned int i = 0; i < 3; i++) {
> if (inst->src[i].file == GRF && inst->src[i].reg == spill_reg_nr) {
> src_reg spill_reg = inst->src[i];
> - inst->src[i].reg = virtual_grf_alloc(1);
> + inst->src[i].reg = alloc.allocate(1);
> dst_reg temp = dst_reg(inst->src[i]);
>
> emit_scratch_read(block, inst, temp, spill_reg, spill_offset);
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> index fe05110..4211d0f 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> @@ -616,31 +616,12 @@ type_size(const struct glsl_type *type)
> return 0;
> }
>
> -int
> -vec4_visitor::virtual_grf_alloc(int size)
> -{
> - if (virtual_grf_array_size <= virtual_grf_count) {
> - if (virtual_grf_array_size == 0)
> - virtual_grf_array_size = 16;
> - else
> - virtual_grf_array_size *= 2;
> - virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
> - virtual_grf_array_size);
> - virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int,
> - virtual_grf_array_size);
> - }
> - virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count;
> - virtual_grf_reg_count += size;
> - virtual_grf_sizes[virtual_grf_count] = size;
> - return virtual_grf_count++;
> -}
> -
> src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
> {
> init();
>
> this->file = GRF;
> - this->reg = v->virtual_grf_alloc(type_size(type));
> + this->reg = v->alloc.allocate(type_size(type));
>
> if (type->is_array() || type->is_record()) {
> this->swizzle = BRW_SWIZZLE_NOOP;
> @@ -658,7 +639,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size)
> init();
>
> this->file = GRF;
> - this->reg = v->virtual_grf_alloc(type_size(type) * size);
> + this->reg = v->alloc.allocate(type_size(type) * size);
>
> this->swizzle = BRW_SWIZZLE_NOOP;
>
> @@ -670,7 +651,7 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
> init();
>
> this->file = GRF;
> - this->reg = v->virtual_grf_alloc(type_size(type));
> + this->reg = v->alloc.allocate(type_size(type));
>
> if (type->is_array() || type->is_record()) {
> this->writemask = WRITEMASK_XYZW;
> @@ -3372,7 +3353,7 @@ vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst,
> void
> vec4_visitor::move_grf_array_access_to_scratch()
> {
> - int scratch_loc[this->virtual_grf_count];
> + int scratch_loc[this->alloc.count];
> memset(scratch_loc, -1, sizeof(scratch_loc));
>
> /* First, calculate the set of virtual GRFs that need to be punted
> @@ -3383,7 +3364,7 @@ vec4_visitor::move_grf_array_access_to_scratch()
> if (inst->dst.file == GRF && inst->dst.reladdr &&
> scratch_loc[inst->dst.reg] == -1) {
> scratch_loc[inst->dst.reg] = c->last_scratch;
> - c->last_scratch += this->virtual_grf_sizes[inst->dst.reg];
> + c->last_scratch += this->alloc.sizes[inst->dst.reg];
> }
>
> for (int i = 0 ; i < 3; i++) {
> @@ -3392,7 +3373,7 @@ vec4_visitor::move_grf_array_access_to_scratch()
> if (src->file == GRF && src->reladdr &&
> scratch_loc[src->reg] == -1) {
> scratch_loc[src->reg] = c->last_scratch;
> - c->last_scratch += this->virtual_grf_sizes[src->reg];
> + c->last_scratch += this->alloc.sizes[src->reg];
> }
> }
> }
> @@ -3612,11 +3593,6 @@ vec4_visitor::vec4_visitor(struct brw_context *brw,
>
> this->virtual_grf_start = NULL;
> this->virtual_grf_end = NULL;
> - this->virtual_grf_sizes = NULL;
> - this->virtual_grf_count = 0;
> - this->virtual_grf_reg_map = NULL;
> - this->virtual_grf_reg_count = 0;
> - this->virtual_grf_array_size = 0;
> this->live_intervals = NULL;
>
> this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
> --
> 2.1.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list