[Mesa-dev] [PATCH] st_glsl_to_tgsi: track range for writes in a if/else/endif blocks. (v2)
Nicolai Hähnle
nhaehnle at gmail.com
Thu Jun 8 08:08:15 UTC 2017
On 08.06.2017 05:23, Dave Airlie wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> This overhauls the copy prop and dead code passes to avoid
> major CPU overhead in some corner cases trigged by the fp64 patches
>
> v2: fix level not being updated correctly
> ---
> src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 123 +++++++++++++++++++++++++----
> 1 file changed, 109 insertions(+), 14 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 97f60d3..48d48e8 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -4924,6 +4924,78 @@ glsl_to_tgsi_visitor::get_last_temp_write(int *last_writes)
> *
> * which allows for dead code elimination on TEMP[1]'s writes.
> */
> +#define DEFAULT_LEVELS 8
> +
> +class per_level_info {
> +
> + struct per_level_range {
> + int32_t min_temp_idx;
> + int32_t max_temp_idx;
> + } *lvls;
> +
> + void *mem_ctx;
> + int num_alloced_levels;
> + int level;
> + int max_temps;
> +public:
> +
> + per_level_info(void *mem_ctx_in, int max_temps_in) {
> + num_alloced_levels = DEFAULT_LEVELS;
> + max_temps = max_temps_in;
> + mem_ctx = mem_ctx_in;
> + level = 0;
> + lvls = (struct per_level_range *)reralloc_array_size(mem_ctx,
> + NULL,
> + sizeof(struct per_level_range),
> + num_alloced_levels);
> + lvls[0].min_temp_idx = max_temps;
I believe you can just use INT_MAX here.
> + lvls[0].max_temp_idx = 0;
> + }
> +
> + ~per_level_info() {
> + ralloc_free(lvls);
> + }
> +
> + int get_level(void) {
> + return level;
> + }
> +
> + void push_level(void) {
> + level++;
> + if (level >= num_alloced_levels) {
> + num_alloced_levels += 4;
> + lvls = (struct per_level_range *)reralloc_array_size(mem_ctx,
> + (void *)lvls,
> + sizeof(struct per_level_range),
> + num_alloced_levels);
> + }
> + lvls[level].min_temp_idx = max_temps;
> + lvls[level].max_temp_idx = 0;
> + }
> +
> + void pop_level(void) {
> + if (lvls[level - 1].min_temp_idx > lvls[level].min_temp_idx)
> + lvls[level - 1].min_temp_idx = lvls[level].min_temp_idx;
> + if (lvls[level - 1].max_temp_idx < lvls[level].max_temp_idx)
> + lvls[level - 1].max_temp_idx = lvls[level].max_temp_idx;
> + level--;
> + }
> +
> + void get_level_range(int32_t *min, int32_t *max)
> + {
> + *min = lvls[level].min_temp_idx;
> + *max = lvls[level].max_temp_idx;
> + }
> +
> + void update_level_range(int32_t idx)
> + {
> + if (idx < lvls[level].min_temp_idx)
> + lvls[level].min_temp_idx = idx;
> + if ((idx + 1) > lvls[level].max_temp_idx)
> + lvls[level].max_temp_idx = idx + 1;
> + }
> +};
> +
> void
> glsl_to_tgsi_visitor::copy_propagate(void)
> {
> @@ -4931,7 +5003,9 @@ glsl_to_tgsi_visitor::copy_propagate(void)
> glsl_to_tgsi_instruction *,
> this->next_temp * 4);
> int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
> - int level = 0;
> + class per_level_info lvl_info(mem_ctx, this->next_temp);
class is unnecessary here.
> + int min_lvl, max_lvl;
> + int level;
I'd feel more comfortable about the code if you replaced all occurrences
of level by lvl_info.get_level(), to completely eliminate the chance of
getting stale data. Same in eliminate_dead_code.
>
> foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
> assert(inst->dst[0].file != PROGRAM_TEMPORARY
> @@ -4955,13 +5029,12 @@ glsl_to_tgsi_visitor::copy_propagate(void)
> for (int i = 0; i < 4; i++) {
> int src_chan = GET_SWZ(inst->src[r].swizzle, i);
> glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
> -
> if (!copy_chan) {
> good = false;
> break;
> }
>
> - assert(acp_level[acp_base + src_chan] <= level);
> + assert(acp_level[acp_base + src_chan] <= lvl_info.get_level());
>
> if (!first) {
> first = copy_chan;
> @@ -5006,7 +5079,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
>
> case TGSI_OPCODE_IF:
> case TGSI_OPCODE_UIF:
> - ++level;
> + lvl_info.push_level();
> break;
>
> case TGSI_OPCODE_ENDIF:
> @@ -5014,7 +5087,9 @@ glsl_to_tgsi_visitor::copy_propagate(void)
> /* Clear all channels written inside the block from the ACP, but
> * leaving those that were not touched.
> */
> - for (int r = 0; r < this->next_temp; r++) {
> + lvl_info.get_level_range(&min_lvl, &max_lvl);
> + level = lvl_info.get_level();
> + for (int r = min_lvl; r < max_lvl; r++) {
> for (int c = 0; c < 4; c++) {
> if (!acp[4 * r + c])
> continue;
> @@ -5023,8 +5098,11 @@ glsl_to_tgsi_visitor::copy_propagate(void)
> acp[4 * r + c] = NULL;
> }
> }
> - if (inst->op == TGSI_OPCODE_ENDIF)
> - --level;
> + lvl_info.pop_level();
> +
> + if (inst->op != TGSI_OPCODE_ENDIF)
> + lvl_info.push_level();
> +
> break;
>
> default:
> @@ -5042,7 +5120,8 @@ glsl_to_tgsi_visitor::copy_propagate(void)
> /* Any output might be written, so no copy propagation
> * from outputs across this instruction.
> */
> - for (int r = 0; r < this->next_temp; r++) {
> + lvl_info.get_level_range(&min_lvl, &max_lvl);
> + for (int r = min_lvl; r < max_lvl; r++) {
> for (int c = 0; c < 4; c++) {
> if (!acp[4 * r + c])
> continue;
> @@ -5062,7 +5141,8 @@ glsl_to_tgsi_visitor::copy_propagate(void)
> }
>
> /* Clear where it's used as src. */
> - for (int r = 0; r < this->next_temp; r++) {
> + lvl_info.get_level_range(&min_lvl, &max_lvl);
> + for (int r = min_lvl; r < max_lvl; r++) {
> for (int c = 0; c < 4; c++) {
> if (!acp[4 * r + c])
> continue;
> @@ -5094,12 +5174,15 @@ glsl_to_tgsi_visitor::copy_propagate(void)
> !inst->src[0].reladdr2 &&
> !inst->src[0].negate &&
> !inst->src[0].abs) {
> + level = lvl_info.get_level();
> for (int i = 0; i < 4; i++) {
> if (inst->dst[0].writemask & (1 << i)) {
> acp[4 * inst->dst[0].index + i] = inst;
> acp_level[4 * inst->dst[0].index + i] = level;
> }
> }
> +
> + lvl_info.update_level_range(inst->dst[0].index);
> }
> }
>
> @@ -5130,8 +5213,10 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
> glsl_to_tgsi_instruction *,
> this->next_temp * 4);
> int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
> - int level = 0;
> + int level;
> int removed = 0;
> + int min_lvl, max_lvl;
> + class per_level_info lvl_info(mem_ctx, this->next_temp);
class is unnecessary.
Apart from the comments above, the patch is
Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
> assert(inst->dst[0].file != PROGRAM_TEMPORARY
> @@ -5158,7 +5243,9 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
> /* Promote the recorded level of all channels written inside the
> * preceding if or else block to the level above the if/else block.
> */
> - for (int r = 0; r < this->next_temp; r++) {
> + lvl_info.get_level_range(&min_lvl, &max_lvl);
> + level = lvl_info.get_level();
> + for (int r = min_lvl; r < max_lvl; r++) {
> for (int c = 0; c < 4; c++) {
> if (!writes[4 * r + c])
> continue;
> @@ -5167,13 +5254,18 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
> write_level[4 * r + c] = level-1;
> }
> }
> - if(inst->op == TGSI_OPCODE_ENDIF)
> - --level;
> +
> + lvl_info.pop_level();
> +
> + if(inst->op != TGSI_OPCODE_ENDIF) {
> + lvl_info.push_level();
> + }
> break;
>
> case TGSI_OPCODE_IF:
> case TGSI_OPCODE_UIF:
> - ++level;
> + lvl_info.push_level();
> +
> /* fallthrough to default case to mark the condition as read */
> default:
> /* Continuing the block, clear any channels from the write array that
> @@ -5227,6 +5319,8 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
> for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) {
> if (inst->dst[i].file == PROGRAM_TEMPORARY &&
> !inst->dst[i].reladdr) {
> + level = lvl_info.get_level();
> +
> for (int c = 0; c < 4; c++) {
> if (inst->dst[i].writemask & (1 << c)) {
> if (writes[4 * inst->dst[i].index + c]) {
> @@ -5237,6 +5331,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
> }
> writes[4 * inst->dst[i].index + c] = inst;
> write_level[4 * inst->dst[i].index + c] = level;
> + lvl_info.update_level_range(inst->dst[i].index);
> }
> }
> }
>
--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
More information about the mesa-dev
mailing list