[Mesa-dev] [PATCH] radeonsi: expose performance counters as 64 bit

Marek Olšák maraeo at gmail.com
Sun May 8 09:47:28 UTC 2016


Reviewed-by: Marek Olšák <marek.olsak at amd.com>

Marek

On Sun, May 8, 2016 at 12:06 AM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> This is useful for shader-related counters, since they tend to quickly
> exceed 32 bits.
> ---
>  src/gallium/drivers/radeon/r600_perfcounter.c | 22 +++++++++++-----------
>  src/gallium/drivers/radeonsi/si_perfcounter.c | 13 ++++++++-----
>  2 files changed, 19 insertions(+), 16 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_perfcounter.c b/src/gallium/drivers/radeon/r600_perfcounter.c
> index 9ab17d9..af9a692 100644
> --- a/src/gallium/drivers/radeon/r600_perfcounter.c
> +++ b/src/gallium/drivers/radeon/r600_perfcounter.c
> @@ -84,8 +84,8 @@ struct r600_pc_group {
>
>  struct r600_pc_counter {
>         unsigned base;
> -       unsigned dwords;
> -       unsigned stride;
> +       unsigned qwords;
> +       unsigned stride; /* in uint64s */
>  };
>
>  #define R600_PC_SHADERS_WINDOWING (1 << 31)
> @@ -172,7 +172,7 @@ static void r600_pc_query_emit_stop(struct r600_common_context *ctx,
>                                 pc->emit_read(ctx, block,
>                                               group->num_counters, group->selectors,
>                                               buffer, va);
> -                               va += 4 * group->num_counters;
> +                               va += sizeof(uint64_t) * group->num_counters;
>                         } while (group->instance < 0 && ++instance < block->num_instances);
>                 } while (++se < se_end);
>         }
> @@ -194,15 +194,15 @@ static void r600_pc_query_add_result(struct r600_common_context *ctx,
>                                      union pipe_query_result *result)
>  {
>         struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
> -       uint32_t *results = buffer;
> +       uint64_t *results = buffer;
>         unsigned i, j;
>
>         for (i = 0; i < query->num_counters; ++i) {
>                 struct r600_pc_counter *counter = &query->counters[i];
>
> -               for (j = 0; j < counter->dwords; ++j) {
> +               for (j = 0; j < counter->qwords; ++j) {
>                         uint32_t value = results[counter->base + j * counter->stride];
> -                       result->batch[i].u32 += value;
> +                       result->batch[i].u64 += value;
>                 }
>         }
>  }
> @@ -361,7 +361,7 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
>                         instances *= block->num_instances;
>
>                 group->result_base = i;
> -               query->b.result_size += 4 * instances * group->num_counters;
> +               query->b.result_size += sizeof(uint64_t) * instances * group->num_counters;
>                 i += instances * group->num_counters;
>
>                 pc->get_size(block, group->num_counters, group->selectors,
> @@ -401,11 +401,11 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
>                 counter->base = group->result_base + j;
>                 counter->stride = group->num_counters;
>
> -               counter->dwords = 1;
> +               counter->qwords = 1;
>                 if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
> -                       counter->dwords = screen->info.max_se;
> +                       counter->qwords = screen->info.max_se;
>                 if (group->instance < 0)
> -                       counter->dwords *= block->num_instances;
> +                       counter->qwords *= block->num_instances;
>         }
>
>         if (!r600_query_hw_init(rctx, &query->b))
> @@ -535,7 +535,7 @@ int r600_get_perfcounter_info(struct r600_common_screen *screen,
>         info->name = block->selector_names + sub * block->selector_name_stride;
>         info->query_type = R600_QUERY_FIRST_PERFCOUNTER + index;
>         info->max_value.u64 = 0;
> -       info->type = PIPE_DRIVER_QUERY_TYPE_UINT;
> +       info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
>         info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
>         info->group_id = base_gid + sub / block->num_selectors;
>         info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
> diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c
> index 04da197..96007a5 100644
> --- a/src/gallium/drivers/radeonsi/si_perfcounter.c
> +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
> @@ -208,6 +208,7 @@ static struct si_pc_block_base cik_PA_SC = {
>         .layout = SI_PC_MULTI_ALTERNATE,
>  };
>
> +/* According to docs, PA_SU counters are only 48 bits wide. */
>  static struct si_pc_block_base cik_PA_SU = {
>         .name = "PA_SU",
>         .num_counters = 4,
> @@ -651,24 +652,26 @@ static void si_pc_emit_read(struct r600_common_context *ctx,
>
>                         radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
>                         radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) |
> -                                       COPY_DATA_DST_SEL(COPY_DATA_MEM));
> +                                       COPY_DATA_DST_SEL(COPY_DATA_MEM) |
> +                                       COPY_DATA_COUNT_SEL); /* 64 bits */
>                         radeon_emit(cs, reg >> 2);
>                         radeon_emit(cs, 0); /* unused */
>                         radeon_emit(cs, va);
>                         radeon_emit(cs, va >> 32);
> -                       va += 4;
> +                       va += sizeof(uint64_t);
>                         reg += reg_delta;
>                 }
>         } else {
>                 for (idx = 0; idx < count; ++idx) {
>                         radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
>                         radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) |
> -                                       COPY_DATA_DST_SEL(COPY_DATA_MEM));
> +                                       COPY_DATA_DST_SEL(COPY_DATA_MEM) |
> +                                       COPY_DATA_COUNT_SEL);
>                         radeon_emit(cs, 0); /* immediate */
> -                       radeon_emit(cs, 0); /* unused */
> +                       radeon_emit(cs, 0);
>                         radeon_emit(cs, va);
>                         radeon_emit(cs, va >> 32);
> -                       va += 4;
> +                       va += sizeof(uint64_t);
>                 }
>         }
>  }
> --
> 2.7.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list