[Mesa-dev] [PATCH 1/2] st/mesa: reduce time spent in calculating temp read/writes
Marek Olšák
maraeo at gmail.com
Thu Aug 27 13:26:17 PDT 2015
Yes, I think it's useful too. I'm not familiar with this code, so you
can add
Acked-by: Marek Olšák <marek.olsak at amd.com>
and commit if there is no reviewer.
Marek
On Thu, Aug 27, 2015 at 9:48 PM, Dave Airlie <airlied at gmail.com> wrote:
> On 27 August 2015 at 21:57, Marek Olšák <maraeo at gmail.com> wrote:
>> We could just skip this for radeonsi or any driver that does regalloc,
>> because it's useless there.
>
> I did expect this comment from Ilia, though adding a CAP and
> piping it through is an option I'd look into later. (or someone who
> cares can do it sooner).
>
> it doesn't change however what this patch does, it looks useful for r600
> as is.
>
> Dave.
>
>>
>> Marek
>>
>> On Thu, Aug 27, 2015 at 5:30 AM, Dave Airlie <airlied at gmail.com> wrote:
>>> From: Dave Airlie <airlied at redhat.com>
>>>
>>> The glsl->tgsi convertor does some temporary register reduction
>>> however in profiling shader-db this shows up quite highly,
>>>
>>> so optimise things to reduce the number of loops through
>>> all the instructions we do. This drops merge_registers
>>> from 4-5% on the profile to 1%. I think this can be reduced
>>> further by possibly optimising the renumber pass.
>>>
>>> Signed-off-by: Dave Airlie <airlied at redhat.com>
>>> ---
>>> src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 153 +++++++++++++++--------------
>>> 1 file changed, 79 insertions(+), 74 deletions(-)
>>>
>>> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>>> index 65aae40..e07db11 100644
>>> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>>> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>>> @@ -480,10 +480,9 @@ public:
>>> void simplify_cmp(void);
>>>
>>> void rename_temp_register(int index, int new_index);
>>> - int get_first_temp_read(int index);
>>> - int get_first_temp_write(int index);
>>> - int get_last_temp_read(int index);
>>> - int get_last_temp_write(int index);
>>> + void get_first_temp_read(int *first_reads);
>>> + void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes);
>>> + void get_last_temp_write(int *last_writes);
>>>
>>> void copy_propagate(void);
>>> int eliminate_dead_code(void);
>>> @@ -3688,8 +3687,8 @@ glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
>>> }
>>> }
>>>
>>> -int
>>> -glsl_to_tgsi_visitor::get_first_temp_read(int index)
>>> +void
>>> +glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads)
>>> {
>>> int depth = 0; /* loop depth */
>>> int loop_start = -1; /* index of the first active BGNLOOP (if any) */
>>> @@ -3697,15 +3696,15 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index)
>>>
>>> foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
>>> for (j = 0; j < num_inst_src_regs(inst); j++) {
>>> - if (inst->src[j].file == PROGRAM_TEMPORARY &&
>>> - inst->src[j].index == index) {
>>> - return (depth == 0) ? i : loop_start;
>>> + if (inst->src[j].file == PROGRAM_TEMPORARY) {
>>> + if (first_reads[inst->src[j].index] == -1)
>>> + first_reads[inst->src[j].index] = (depth == 0) ? i : loop_start;
>>> }
>>> }
>>> for (j = 0; j < inst->tex_offset_num_offset; j++) {
>>> - if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
>>> - inst->tex_offsets[j].index == index) {
>>> - return (depth == 0) ? i : loop_start;
>>> + if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) {
>>> + if (first_reads[inst->tex_offsets[j].index] == -1)
>>> + first_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : loop_start;
>>> }
>>> }
>>> if (inst->op == TGSI_OPCODE_BGNLOOP) {
>>> @@ -3718,91 +3717,73 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index)
>>> assert(depth >= 0);
>>> i++;
>>> }
>>> - return -1;
>>> }
>>>
>>> -int
>>> -glsl_to_tgsi_visitor::get_first_temp_write(int index)
>>> +void
>>> +glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int *first_writes)
>>> {
>>> int depth = 0; /* loop depth */
>>> int loop_start = -1; /* index of the first active BGNLOOP (if any) */
>>> - int i = 0;
>>> - unsigned j;
>>> -
>>> + unsigned i = 0, j;
>>> + int k;
>>> foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
>>> + for (j = 0; j < num_inst_src_regs(inst); j++) {
>>> + if (inst->src[j].file == PROGRAM_TEMPORARY)
>>> + last_reads[inst->src[j].index] = (depth == 0) ? i : -2;
>>> + }
>>> for (j = 0; j < num_inst_dst_regs(inst); j++) {
>>> - if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == index) {
>>> - return (depth == 0) ? i : loop_start;
>>> - }
>>> + if (inst->dst[j].file == PROGRAM_TEMPORARY)
>>> + if (first_writes[inst->dst[j].index] == -1)
>>> + first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start;
>>> + }
>>> + for (j = 0; j < inst->tex_offset_num_offset; j++) {
>>> + if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
>>> + last_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : -2;
>>> }
>>> if (inst->op == TGSI_OPCODE_BGNLOOP) {
>>> if(depth++ == 0)
>>> loop_start = i;
>>> } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
>>> - if (--depth == 0)
>>> + if (--depth == 0) {
>>> loop_start = -1;
>>> - }
>>> - assert(depth >= 0);
>>> - i++;
>>> - }
>>> - return -1;
>>> -}
>>> -
>>> -int
>>> -glsl_to_tgsi_visitor::get_last_temp_read(int index)
>>> -{
>>> - int depth = 0; /* loop depth */
>>> - int last = -1; /* index of last instruction that reads the temporary */
>>> - unsigned i = 0, j;
>>> -
>>> - foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
>>> - for (j = 0; j < num_inst_src_regs(inst); j++) {
>>> - if (inst->src[j].file == PROGRAM_TEMPORARY &&
>>> - inst->src[j].index == index) {
>>> - last = (depth == 0) ? i : -2;
>>> + for (k = 0; k < this->next_temp; k++) {
>>> + if (last_reads[k] == -2) {
>>> + last_reads[k] = i;
>>> + }
>>> + }
>>> }
>>> }
>>> - for (j = 0; j < inst->tex_offset_num_offset; j++) {
>>> - if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
>>> - inst->tex_offsets[j].index == index)
>>> - last = (depth == 0) ? i : -2;
>>> - }
>>> - if (inst->op == TGSI_OPCODE_BGNLOOP)
>>> - depth++;
>>> - else if (inst->op == TGSI_OPCODE_ENDLOOP)
>>> - if (--depth == 0 && last == -2)
>>> - last = i;
>>> assert(depth >= 0);
>>> i++;
>>> }
>>> - assert(last >= -1);
>>> - return last;
>>> }
>>>
>>> -int
>>> -glsl_to_tgsi_visitor::get_last_temp_write(int index)
>>> +void
>>> +glsl_to_tgsi_visitor::get_last_temp_write(int *last_writes)
>>> {
>>> int depth = 0; /* loop depth */
>>> - int last = -1; /* index of last instruction that writes to the temporary */
>>> - int i = 0;
>>> + int i = 0, k;
>>> unsigned j;
>>>
>>> foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
>>> for (j = 0; j < num_inst_dst_regs(inst); j++) {
>>> - if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == index)
>>> - last = (depth == 0) ? i : -2;
>>> + if (inst->dst[j].file == PROGRAM_TEMPORARY)
>>> + last_writes[inst->dst[j].index] = (depth == 0) ? i : -2;
>>> }
>>>
>>> if (inst->op == TGSI_OPCODE_BGNLOOP)
>>> depth++;
>>> else if (inst->op == TGSI_OPCODE_ENDLOOP)
>>> - if (--depth == 0 && last == -2)
>>> - last = i;
>>> + if (--depth == 0) {
>>> + for (k = 0; k < this->next_temp; k++) {
>>> + if (last_writes[k] == -2) {
>>> + last_writes[k] = i;
>>> + }
>>> + }
>>> + }
>>> assert(depth >= 0);
>>> i++;
>>> }
>>> - assert(last >= -1);
>>> - return last;
>>> }
>>>
>>> /*
>>> @@ -4238,9 +4219,10 @@ glsl_to_tgsi_visitor::merge_registers(void)
>>> * into an array so that we don't have to traverse the instruction list as
>>> * much. */
>>> for (i = 0; i < this->next_temp; i++) {
>>> - last_reads[i] = get_last_temp_read(i);
>>> - first_writes[i] = get_first_temp_write(i);
>>> + last_reads[i] = -1;
>>> + first_writes[i] = -1;
>>> }
>>> + get_last_temp_read_first_temp_write(last_reads, first_writes);
>>>
>>> /* Start looking for registers with non-overlapping usages that can be
>>> * merged together. */
>>> @@ -4281,15 +4263,21 @@ glsl_to_tgsi_visitor::renumber_registers(void)
>>> {
>>> int i = 0;
>>> int new_index = 0;
>>> + int *first_reads = rzalloc_array(mem_ctx, int, this->next_temp);
>>> +
>>> + for (i = 0; i < this->next_temp; i++)
>>> + first_reads[i] = -1;
>>> + get_first_temp_read(first_reads);
>>>
>>> for (i = 0; i < this->next_temp; i++) {
>>> - if (get_first_temp_read(i) < 0) continue;
>>> + if (first_reads[i] < 0) continue;
>>> if (i != new_index)
>>> rename_temp_register(i, new_index);
>>> new_index++;
>>> }
>>>
>>> this->next_temp = new_index;
>>> + ralloc_free(first_reads);
>>> }
>>>
>>> /**
>>> @@ -5764,14 +5752,31 @@ get_mesa_program(struct gl_context *ctx,
>>> #if 0
>>> /* Print out some information (for debugging purposes) used by the
>>> * optimization passes. */
>>> - for (i = 0; i < v->next_temp; i++) {
>>> - int fr = v->get_first_temp_read(i);
>>> - int fw = v->get_first_temp_write(i);
>>> - int lr = v->get_last_temp_read(i);
>>> - int lw = v->get_last_temp_write(i);
>>> -
>>> - printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw);
>>> - assert(fw <= fr);
>>> + {
>>> + int i;
>>> + int *first_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
>>> + int *first_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
>>> + int *last_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
>>> + int *last_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
>>> +
>>> + for (i = 0; i < v->next_temp; i++) {
>>> + first_writes[i] = -1;
>>> + first_reads[i] = -1;
>>> + last_writes[i] = -1;
>>> + last_reads[i] = -1;
>>> + }
>>> + v->get_first_temp_read(first_reads);
>>> + v->get_last_temp_read_first_temp_write(last_reads, first_writes);
>>> + v->get_last_temp_write(last_writes);
>>> + for (i = 0; i < v->next_temp; i++)
>>> + printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, first_reads[i],
>>> + first_writes[i],
>>> + last_reads[i],
>>> + last_writes[i]);
>>> + ralloc_free(first_writes);
>>> + ralloc_free(first_reads);
>>> + ralloc_free(last_writes);
>>> + ralloc_free(last_reads);
>>> }
>>> #endif
>>>
>>> --
>>> 2.4.3
>>>
>>> _______________________________________________
>>> mesa-dev mailing list
>>> mesa-dev at lists.freedesktop.org
>>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list