[Mesa-dev] [PATCH 1/2] st/mesa: reduce time spent in calculating temp read/writes
Marek Olšák
maraeo at gmail.com
Thu Aug 27 04:57:39 PDT 2015
We could just skip this for radeonsi or any driver that does regalloc,
because it's useless there.
Marek
On Thu, Aug 27, 2015 at 5:30 AM, Dave Airlie <airlied at gmail.com> wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> The glsl->tgsi convertor does some temporary register reduction
> however in profiling shader-db this shows up quite highly,
>
> so optimise things to reduce the number of loops through
> all the instructions we do. This drops merge_registers
> from 4-5% on the profile to 1%. I think this can be reduced
> further by possibly optimising the renumber pass.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
> src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 153 +++++++++++++++--------------
> 1 file changed, 79 insertions(+), 74 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 65aae40..e07db11 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -480,10 +480,9 @@ public:
> void simplify_cmp(void);
>
> void rename_temp_register(int index, int new_index);
> - int get_first_temp_read(int index);
> - int get_first_temp_write(int index);
> - int get_last_temp_read(int index);
> - int get_last_temp_write(int index);
> + void get_first_temp_read(int *first_reads);
> + void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes);
> + void get_last_temp_write(int *last_writes);
>
> void copy_propagate(void);
> int eliminate_dead_code(void);
> @@ -3688,8 +3687,8 @@ glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
> }
> }
>
> -int
> -glsl_to_tgsi_visitor::get_first_temp_read(int index)
> +void
> +glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads)
> {
> int depth = 0; /* loop depth */
> int loop_start = -1; /* index of the first active BGNLOOP (if any) */
> @@ -3697,15 +3696,15 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index)
>
> foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
> for (j = 0; j < num_inst_src_regs(inst); j++) {
> - if (inst->src[j].file == PROGRAM_TEMPORARY &&
> - inst->src[j].index == index) {
> - return (depth == 0) ? i : loop_start;
> + if (inst->src[j].file == PROGRAM_TEMPORARY) {
> + if (first_reads[inst->src[j].index] == -1)
> + first_reads[inst->src[j].index] = (depth == 0) ? i : loop_start;
> }
> }
> for (j = 0; j < inst->tex_offset_num_offset; j++) {
> - if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
> - inst->tex_offsets[j].index == index) {
> - return (depth == 0) ? i : loop_start;
> + if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) {
> + if (first_reads[inst->tex_offsets[j].index] == -1)
> + first_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : loop_start;
> }
> }
> if (inst->op == TGSI_OPCODE_BGNLOOP) {
> @@ -3718,91 +3717,73 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index)
> assert(depth >= 0);
> i++;
> }
> - return -1;
> }
>
> -int
> -glsl_to_tgsi_visitor::get_first_temp_write(int index)
> +void
> +glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int *first_writes)
> {
> int depth = 0; /* loop depth */
> int loop_start = -1; /* index of the first active BGNLOOP (if any) */
> - int i = 0;
> - unsigned j;
> -
> + unsigned i = 0, j;
> + int k;
> foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
> + for (j = 0; j < num_inst_src_regs(inst); j++) {
> + if (inst->src[j].file == PROGRAM_TEMPORARY)
> + last_reads[inst->src[j].index] = (depth == 0) ? i : -2;
> + }
> for (j = 0; j < num_inst_dst_regs(inst); j++) {
> - if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == index) {
> - return (depth == 0) ? i : loop_start;
> - }
> + if (inst->dst[j].file == PROGRAM_TEMPORARY)
> + if (first_writes[inst->dst[j].index] == -1)
> + first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start;
> + }
> + for (j = 0; j < inst->tex_offset_num_offset; j++) {
> + if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
> + last_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : -2;
> }
> if (inst->op == TGSI_OPCODE_BGNLOOP) {
> if(depth++ == 0)
> loop_start = i;
> } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
> - if (--depth == 0)
> + if (--depth == 0) {
> loop_start = -1;
> - }
> - assert(depth >= 0);
> - i++;
> - }
> - return -1;
> -}
> -
> -int
> -glsl_to_tgsi_visitor::get_last_temp_read(int index)
> -{
> - int depth = 0; /* loop depth */
> - int last = -1; /* index of last instruction that reads the temporary */
> - unsigned i = 0, j;
> -
> - foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
> - for (j = 0; j < num_inst_src_regs(inst); j++) {
> - if (inst->src[j].file == PROGRAM_TEMPORARY &&
> - inst->src[j].index == index) {
> - last = (depth == 0) ? i : -2;
> + for (k = 0; k < this->next_temp; k++) {
> + if (last_reads[k] == -2) {
> + last_reads[k] = i;
> + }
> + }
> }
> }
> - for (j = 0; j < inst->tex_offset_num_offset; j++) {
> - if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
> - inst->tex_offsets[j].index == index)
> - last = (depth == 0) ? i : -2;
> - }
> - if (inst->op == TGSI_OPCODE_BGNLOOP)
> - depth++;
> - else if (inst->op == TGSI_OPCODE_ENDLOOP)
> - if (--depth == 0 && last == -2)
> - last = i;
> assert(depth >= 0);
> i++;
> }
> - assert(last >= -1);
> - return last;
> }
>
> -int
> -glsl_to_tgsi_visitor::get_last_temp_write(int index)
> +void
> +glsl_to_tgsi_visitor::get_last_temp_write(int *last_writes)
> {
> int depth = 0; /* loop depth */
> - int last = -1; /* index of last instruction that writes to the temporary */
> - int i = 0;
> + int i = 0, k;
> unsigned j;
>
> foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
> for (j = 0; j < num_inst_dst_regs(inst); j++) {
> - if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == index)
> - last = (depth == 0) ? i : -2;
> + if (inst->dst[j].file == PROGRAM_TEMPORARY)
> + last_writes[inst->dst[j].index] = (depth == 0) ? i : -2;
> }
>
> if (inst->op == TGSI_OPCODE_BGNLOOP)
> depth++;
> else if (inst->op == TGSI_OPCODE_ENDLOOP)
> - if (--depth == 0 && last == -2)
> - last = i;
> + if (--depth == 0) {
> + for (k = 0; k < this->next_temp; k++) {
> + if (last_writes[k] == -2) {
> + last_writes[k] = i;
> + }
> + }
> + }
> assert(depth >= 0);
> i++;
> }
> - assert(last >= -1);
> - return last;
> }
>
> /*
> @@ -4238,9 +4219,10 @@ glsl_to_tgsi_visitor::merge_registers(void)
> * into an array so that we don't have to traverse the instruction list as
> * much. */
> for (i = 0; i < this->next_temp; i++) {
> - last_reads[i] = get_last_temp_read(i);
> - first_writes[i] = get_first_temp_write(i);
> + last_reads[i] = -1;
> + first_writes[i] = -1;
> }
> + get_last_temp_read_first_temp_write(last_reads, first_writes);
>
> /* Start looking for registers with non-overlapping usages that can be
> * merged together. */
> @@ -4281,15 +4263,21 @@ glsl_to_tgsi_visitor::renumber_registers(void)
> {
> int i = 0;
> int new_index = 0;
> + int *first_reads = rzalloc_array(mem_ctx, int, this->next_temp);
> +
> + for (i = 0; i < this->next_temp; i++)
> + first_reads[i] = -1;
> + get_first_temp_read(first_reads);
>
> for (i = 0; i < this->next_temp; i++) {
> - if (get_first_temp_read(i) < 0) continue;
> + if (first_reads[i] < 0) continue;
> if (i != new_index)
> rename_temp_register(i, new_index);
> new_index++;
> }
>
> this->next_temp = new_index;
> + ralloc_free(first_reads);
> }
>
> /**
> @@ -5764,14 +5752,31 @@ get_mesa_program(struct gl_context *ctx,
> #if 0
> /* Print out some information (for debugging purposes) used by the
> * optimization passes. */
> - for (i = 0; i < v->next_temp; i++) {
> - int fr = v->get_first_temp_read(i);
> - int fw = v->get_first_temp_write(i);
> - int lr = v->get_last_temp_read(i);
> - int lw = v->get_last_temp_write(i);
> -
> - printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw);
> - assert(fw <= fr);
> + {
> + int i;
> + int *first_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
> + int *first_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
> + int *last_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
> + int *last_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
> +
> + for (i = 0; i < v->next_temp; i++) {
> + first_writes[i] = -1;
> + first_reads[i] = -1;
> + last_writes[i] = -1;
> + last_reads[i] = -1;
> + }
> + v->get_first_temp_read(first_reads);
> + v->get_last_temp_read_first_temp_write(last_reads, first_writes);
> + v->get_last_temp_write(last_writes);
> + for (i = 0; i < v->next_temp; i++)
> + printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, first_reads[i],
> + first_writes[i],
> + last_reads[i],
> + last_writes[i]);
> + ralloc_free(first_writes);
> + ralloc_free(first_reads);
> + ralloc_free(last_writes);
> + ralloc_free(last_reads);
> }
> #endif
>
> --
> 2.4.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list