[Mesa-dev] [PATCH] radeonsi: expose performance counters as 64 bit
Marek Olšák
maraeo at gmail.com
Sun May 8 09:47:28 UTC 2016
Reviewed-by: Marek Olšák <marek.olsak at amd.com>
Marek
On Sun, May 8, 2016 at 12:06 AM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> This is useful for shader-related counters, since they tend to quickly
> exceed 32 bits.
> ---
> src/gallium/drivers/radeon/r600_perfcounter.c | 22 +++++++++++-----------
> src/gallium/drivers/radeonsi/si_perfcounter.c | 13 ++++++++-----
> 2 files changed, 19 insertions(+), 16 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_perfcounter.c b/src/gallium/drivers/radeon/r600_perfcounter.c
> index 9ab17d9..af9a692 100644
> --- a/src/gallium/drivers/radeon/r600_perfcounter.c
> +++ b/src/gallium/drivers/radeon/r600_perfcounter.c
> @@ -84,8 +84,8 @@ struct r600_pc_group {
>
> struct r600_pc_counter {
> unsigned base;
> - unsigned dwords;
> - unsigned stride;
> + unsigned qwords;
> + unsigned stride; /* in uint64s */
> };
>
> #define R600_PC_SHADERS_WINDOWING (1 << 31)
> @@ -172,7 +172,7 @@ static void r600_pc_query_emit_stop(struct r600_common_context *ctx,
> pc->emit_read(ctx, block,
> group->num_counters, group->selectors,
> buffer, va);
> - va += 4 * group->num_counters;
> + va += sizeof(uint64_t) * group->num_counters;
> } while (group->instance < 0 && ++instance < block->num_instances);
> } while (++se < se_end);
> }
> @@ -194,15 +194,15 @@ static void r600_pc_query_add_result(struct r600_common_context *ctx,
> union pipe_query_result *result)
> {
> struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
> - uint32_t *results = buffer;
> + uint64_t *results = buffer;
> unsigned i, j;
>
> for (i = 0; i < query->num_counters; ++i) {
> struct r600_pc_counter *counter = &query->counters[i];
>
> - for (j = 0; j < counter->dwords; ++j) {
> + for (j = 0; j < counter->qwords; ++j) {
> uint32_t value = results[counter->base + j * counter->stride];
> - result->batch[i].u32 += value;
> + result->batch[i].u64 += value;
> }
> }
> }
> @@ -361,7 +361,7 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
> instances *= block->num_instances;
>
> group->result_base = i;
> - query->b.result_size += 4 * instances * group->num_counters;
> + query->b.result_size += sizeof(uint64_t) * instances * group->num_counters;
> i += instances * group->num_counters;
>
> pc->get_size(block, group->num_counters, group->selectors,
> @@ -401,11 +401,11 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
> counter->base = group->result_base + j;
> counter->stride = group->num_counters;
>
> - counter->dwords = 1;
> + counter->qwords = 1;
> if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
> - counter->dwords = screen->info.max_se;
> + counter->qwords = screen->info.max_se;
> if (group->instance < 0)
> - counter->dwords *= block->num_instances;
> + counter->qwords *= block->num_instances;
> }
>
> if (!r600_query_hw_init(rctx, &query->b))
> @@ -535,7 +535,7 @@ int r600_get_perfcounter_info(struct r600_common_screen *screen,
> info->name = block->selector_names + sub * block->selector_name_stride;
> info->query_type = R600_QUERY_FIRST_PERFCOUNTER + index;
> info->max_value.u64 = 0;
> - info->type = PIPE_DRIVER_QUERY_TYPE_UINT;
> + info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
> info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
> info->group_id = base_gid + sub / block->num_selectors;
> info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
> diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c
> index 04da197..96007a5 100644
> --- a/src/gallium/drivers/radeonsi/si_perfcounter.c
> +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
> @@ -208,6 +208,7 @@ static struct si_pc_block_base cik_PA_SC = {
> .layout = SI_PC_MULTI_ALTERNATE,
> };
>
> +/* According to docs, PA_SU counters are only 48 bits wide. */
> static struct si_pc_block_base cik_PA_SU = {
> .name = "PA_SU",
> .num_counters = 4,
> @@ -651,24 +652,26 @@ static void si_pc_emit_read(struct r600_common_context *ctx,
>
> radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
> radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) |
> - COPY_DATA_DST_SEL(COPY_DATA_MEM));
> + COPY_DATA_DST_SEL(COPY_DATA_MEM) |
> + COPY_DATA_COUNT_SEL); /* 64 bits */
> radeon_emit(cs, reg >> 2);
> radeon_emit(cs, 0); /* unused */
> radeon_emit(cs, va);
> radeon_emit(cs, va >> 32);
> - va += 4;
> + va += sizeof(uint64_t);
> reg += reg_delta;
> }
> } else {
> for (idx = 0; idx < count; ++idx) {
> radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
> radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) |
> - COPY_DATA_DST_SEL(COPY_DATA_MEM));
> + COPY_DATA_DST_SEL(COPY_DATA_MEM) |
> + COPY_DATA_COUNT_SEL);
> radeon_emit(cs, 0); /* immediate */
> - radeon_emit(cs, 0); /* unused */
> + radeon_emit(cs, 0);
> radeon_emit(cs, va);
> radeon_emit(cs, va >> 32);
> - va += 4;
> + va += sizeof(uint64_t);
> }
> }
> }
> --
> 2.7.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list