Mesa (main): ac/rgp: add support for dumping SPM data
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Thu Nov 11 10:39:00 UTC 2021
Module: Mesa
Branch: main
Commit: 3e7bac80ce5cc1de7d13dfc6ae17526a03ddffcc
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3e7bac80ce5cc1de7d13dfc6ae17526a03ddffcc
Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date: Mon May 31 16:42:36 2021 +0200
ac/rgp: add support for dumping SPM data
Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13704>
---
src/amd/common/ac_rgp.c | 120 +++++++++++++++++++++++++++++++-
src/amd/common/ac_rgp.h | 4 +-
src/amd/vulkan/layers/radv_sqtt_layer.c | 2 +-
src/gallium/drivers/radeonsi/si_sqtt.c | 2 +-
4 files changed, 123 insertions(+), 5 deletions(-)
diff --git a/src/amd/common/ac_rgp.c b/src/amd/common/ac_rgp.c
index 85fbff0f837..24ea488cd5b 100644
--- a/src/amd/common/ac_rgp.c
+++ b/src/amd/common/ac_rgp.c
@@ -29,6 +29,7 @@
#include "util/u_process.h"
#include "util/u_math.h"
+#include "ac_spm.h"
#include "ac_sqtt.h"
#include "ac_gpu_info.h"
#ifdef _WIN32
@@ -892,8 +893,118 @@ static enum elf_gfxip_level ac_chip_class_to_elf_gfxip_level(enum chip_class chi
}
}
+/**
+ * SQTT SPM DB info.
+ */
+struct sqtt_spm_counter_info {
+ enum ac_pc_gpu_block block;
+ uint32_t instance;
+ uint32_t data_offset; /* offset of counter from the beginning of the chunk */
+ uint32_t event_index; /* index of counter within the block */
+};
+
+struct sqtt_file_chunk_spm_db {
+ struct sqtt_file_chunk_header header;
+ uint32_t flags;
+ uint32_t num_timestamps;
+ uint32_t num_spm_counter_info;
+ uint32_t sample_interval;
+};
+
+static_assert(sizeof(struct sqtt_file_chunk_spm_db) == 32,
+ "sqtt_file_chunk_spm_db doesn't match RGP spec");
+
+static void ac_sqtt_fill_spm_db(const struct ac_spm_trace_data *spm_trace,
+ struct sqtt_file_chunk_spm_db *chunk,
+ uint32_t num_samples,
+ uint32_t chunk_size)
+{
+ chunk->header.chunk_id.type = SQTT_FILE_CHUNK_TYPE_SPM_DB;
+ chunk->header.chunk_id.index = 0;
+ chunk->header.major_version = 1;
+ chunk->header.minor_version = 3;
+ chunk->header.size_in_bytes = chunk_size;
+
+ chunk->flags = 0;
+ chunk->num_timestamps = num_samples;
+ chunk->num_spm_counter_info = spm_trace->num_counters;
+ chunk->sample_interval = spm_trace->sample_interval;
+}
+
+static void ac_sqtt_dump_spm(const struct ac_spm_trace_data *spm_trace,
+ size_t file_offset,
+ FILE *output)
+{
+ uint32_t sample_size_in_bytes = ac_spm_get_sample_size(spm_trace);
+ uint32_t num_samples = ac_spm_get_num_samples(spm_trace);
+ uint8_t *spm_data_ptr = (uint8_t *)spm_trace->ptr;
+ struct sqtt_file_chunk_spm_db spm_db;
+ size_t file_spm_db_offset = file_offset;
+
+ fseek(output, sizeof(struct sqtt_file_chunk_spm_db), SEEK_CUR);
+ file_offset += sizeof(struct sqtt_file_chunk_spm_db);
+
+ /* Skip the reserved 32 bytes of data at beginning. */
+ spm_data_ptr += 32;
+
+ /* SPM timestamps. */
+ uint32_t sample_size_in_qwords = sample_size_in_bytes / sizeof(uint64_t);
+ uint64_t *timestamp_ptr = (uint64_t *)spm_data_ptr;
+
+ for (uint32_t s = 0; s < num_samples; s++) {
+ uint64_t index = s * sample_size_in_qwords;
+ uint64_t timestamp = timestamp_ptr[index];
+
+ file_offset += sizeof(timestamp);
+ fwrite(×tamp, sizeof(timestamp), 1, output);
+ }
+
+ /* SPM counter info. */
+ uint64_t counter_values_size = num_samples * sizeof(uint16_t);
+ uint64_t counter_values_offset = num_samples * sizeof(uint64_t) +
+ spm_trace->num_counters * sizeof(struct sqtt_spm_counter_info);
+
+ for (uint32_t c = 0; c < spm_trace->num_counters; c++) {
+ struct sqtt_spm_counter_info cntr_info = {
+ .block = spm_trace->counters[c].gpu_block,
+ .instance = spm_trace->counters[c].instance,
+ .data_offset = counter_values_offset,
+ .event_index = spm_trace->counters[c].event_id,
+ };
+
+ file_offset += sizeof(cntr_info);
+ fwrite(&cntr_info, sizeof(cntr_info), 1, output);
+
+ counter_values_offset += counter_values_size;
+ }
+
+ /* SPM counter values. */
+ uint32_t sample_size_in_hwords = sample_size_in_bytes / sizeof(uint16_t);
+ uint16_t *counter_values_ptr = (uint16_t *)spm_data_ptr;
+
+ for (uint32_t c = 0; c < spm_trace->num_counters; c++) {
+ uint64_t offset = spm_trace->counters[c].offset;
+
+ for (uint32_t s = 0; s < num_samples; s++) {
+ uint64_t index = offset + (s * sample_size_in_hwords);
+ uint16_t value = counter_values_ptr[index];
+
+ file_offset += sizeof(value);
+ fwrite(&value, sizeof(value), 1, output);
+ }
+ }
+
+ /* SQTT SPM DB chunk. */
+ ac_sqtt_fill_spm_db(spm_trace, &spm_db, num_samples,
+ file_offset - file_spm_db_offset);
+ fseek(output, file_spm_db_offset, SEEK_SET);
+ fwrite(&spm_db, sizeof(struct sqtt_file_chunk_spm_db), 1, output);
+ fseek(output, file_offset, SEEK_SET);
+}
+
static void ac_sqtt_dump_data(struct radeon_info *rad_info,
struct ac_thread_trace *thread_trace,
+ const struct ac_spm_trace_data *spm_trace,
FILE *output)
{
struct ac_thread_trace_data *thread_trace_data = thread_trace->data;
@@ -1071,10 +1182,15 @@ static void ac_sqtt_dump_data(struct radeon_info *rad_info,
fwrite(se->data_ptr, size, 1, output);
}
}
+
+ if (spm_trace) {
+ ac_sqtt_dump_spm(spm_trace, file_offset, output);
+ }
}
int ac_dump_rgp_capture(struct radeon_info *info,
- struct ac_thread_trace *thread_trace)
+ struct ac_thread_trace *thread_trace,
+ const struct ac_spm_trace_data *spm_trace)
{
char filename[2048];
struct tm now;
@@ -1092,7 +1208,7 @@ int ac_dump_rgp_capture(struct radeon_info *info,
if (!f)
return -1;
- ac_sqtt_dump_data(info, thread_trace, f);
+ ac_sqtt_dump_data(info, thread_trace, spm_trace, f);
fprintf(stderr, "RGP capture saved to '%s'\n", filename);
diff --git a/src/amd/common/ac_rgp.h b/src/amd/common/ac_rgp.h
index 9e38556a090..b53bb02de81 100644
--- a/src/amd/common/ac_rgp.h
+++ b/src/amd/common/ac_rgp.h
@@ -34,6 +34,7 @@
struct radeon_info;
struct ac_thread_trace;
struct ac_thread_trace_data;
+struct ac_spm_trace_data;
enum rgp_hardware_stages {
RGP_HW_STAGE_VS = 0,
@@ -189,7 +190,8 @@ struct rgp_clock_calibration {
int
ac_dump_rgp_capture(struct radeon_info *info,
- struct ac_thread_trace *thread_trace);
+ struct ac_thread_trace *thread_trace,
+ const struct ac_spm_trace_data *spm_trace);
void
ac_rgp_file_write_elf_object(FILE *output, size_t file_elf_start,
diff --git a/src/amd/vulkan/layers/radv_sqtt_layer.c b/src/amd/vulkan/layers/radv_sqtt_layer.c
index 2b9623a811f..5c8cda63809 100644
--- a/src/amd/vulkan/layers/radv_sqtt_layer.c
+++ b/src/amd/vulkan/layers/radv_sqtt_layer.c
@@ -363,7 +363,7 @@ radv_handle_thread_trace(VkQueue _queue)
radv_QueueWaitIdle(_queue);
if (radv_get_thread_trace(queue, &thread_trace)) {
- ac_dump_rgp_capture(&queue->device->physical_device->rad_info, &thread_trace);
+ ac_dump_rgp_capture(&queue->device->physical_device->rad_info, &thread_trace, NULL);
} else {
/* Trigger a new capture if the driver failed to get
* the trace because the buffer was too small.
diff --git a/src/gallium/drivers/radeonsi/si_sqtt.c b/src/gallium/drivers/radeonsi/si_sqtt.c
index a5eb4f5851c..cad12160c14 100644
--- a/src/gallium/drivers/radeonsi/si_sqtt.c
+++ b/src/gallium/drivers/radeonsi/si_sqtt.c
@@ -710,7 +710,7 @@ si_handle_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs)
/* Wait for SQTT to finish and read back the bo */
if (sctx->ws->fence_wait(sctx->ws, sctx->last_sqtt_fence, PIPE_TIMEOUT_INFINITE) &&
si_get_thread_trace(sctx, &thread_trace)) {
- ac_dump_rgp_capture(&sctx->screen->info, &thread_trace);
+ ac_dump_rgp_capture(&sctx->screen->info, &thread_trace, NULL);
} else {
fprintf(stderr, "Failed to read the trace\n");
}
More information about the mesa-commit
mailing list