Mesa (main): ac/rgp: add support for dumping SPM data

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Nov 11 10:39:00 UTC 2021


Module: Mesa
Branch: main
Commit: 3e7bac80ce5cc1de7d13dfc6ae17526a03ddffcc
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=3e7bac80ce5cc1de7d13dfc6ae17526a03ddffcc

Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date:   Mon May 31 16:42:36 2021 +0200

ac/rgp: add support for dumping SPM data

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13704>

---

 src/amd/common/ac_rgp.c                 | 120 +++++++++++++++++++++++++++++++-
 src/amd/common/ac_rgp.h                 |   4 +-
 src/amd/vulkan/layers/radv_sqtt_layer.c |   2 +-
 src/gallium/drivers/radeonsi/si_sqtt.c  |   2 +-
 4 files changed, 123 insertions(+), 5 deletions(-)

diff --git a/src/amd/common/ac_rgp.c b/src/amd/common/ac_rgp.c
index 85fbff0f837..24ea488cd5b 100644
--- a/src/amd/common/ac_rgp.c
+++ b/src/amd/common/ac_rgp.c
@@ -29,6 +29,7 @@
 #include "util/u_process.h"
 #include "util/u_math.h"
 
+#include "ac_spm.h"
 #include "ac_sqtt.h"
 #include "ac_gpu_info.h"
 #ifdef _WIN32
@@ -892,8 +893,118 @@ static enum elf_gfxip_level ac_chip_class_to_elf_gfxip_level(enum chip_class chi
    }
 }
 
+/**
+ * SQTT SPM DB info.
+ */
+struct sqtt_spm_counter_info {
+   enum ac_pc_gpu_block block;
+   uint32_t instance;
+   uint32_t data_offset; /* offset of counter from the beginning of the chunk */
+   uint32_t event_index; /* index of counter within the block */
+};
+
+struct sqtt_file_chunk_spm_db {
+   struct sqtt_file_chunk_header header;
+   uint32_t flags;
+   uint32_t num_timestamps;
+   uint32_t num_spm_counter_info;
+   uint32_t sample_interval;
+};
+
+static_assert(sizeof(struct sqtt_file_chunk_spm_db) == 32,
+              "sqtt_file_chunk_spm_db doesn't match RGP spec");
+
+static void ac_sqtt_fill_spm_db(const struct ac_spm_trace_data *spm_trace,
+                                struct sqtt_file_chunk_spm_db *chunk,
+                                uint32_t num_samples,
+                                uint32_t chunk_size)
+{
+   chunk->header.chunk_id.type = SQTT_FILE_CHUNK_TYPE_SPM_DB;
+   chunk->header.chunk_id.index = 0;
+   chunk->header.major_version = 1;
+   chunk->header.minor_version = 3;
+   chunk->header.size_in_bytes = chunk_size;
+
+   chunk->flags = 0;
+   chunk->num_timestamps = num_samples;
+   chunk->num_spm_counter_info = spm_trace->num_counters;
+   chunk->sample_interval = spm_trace->sample_interval;
+}
+
+static void ac_sqtt_dump_spm(const struct ac_spm_trace_data *spm_trace,
+                             size_t file_offset,
+                             FILE *output)
+{
+   uint32_t sample_size_in_bytes = ac_spm_get_sample_size(spm_trace);
+   uint32_t num_samples = ac_spm_get_num_samples(spm_trace);
+   uint8_t *spm_data_ptr = (uint8_t *)spm_trace->ptr;
+   struct sqtt_file_chunk_spm_db spm_db;
+   size_t file_spm_db_offset = file_offset;
+
+   fseek(output, sizeof(struct sqtt_file_chunk_spm_db), SEEK_CUR);
+   file_offset += sizeof(struct sqtt_file_chunk_spm_db);
+
+   /* Skip the reserved 32 bytes of data at beginning. */
+   spm_data_ptr += 32;
+
+   /* SPM timestamps. */
+   uint32_t sample_size_in_qwords = sample_size_in_bytes / sizeof(uint64_t);
+   uint64_t *timestamp_ptr = (uint64_t *)spm_data_ptr;
+
+   for (uint32_t s = 0; s < num_samples; s++) {
+      uint64_t index = s * sample_size_in_qwords;
+      uint64_t timestamp = timestamp_ptr[index];
+
+      file_offset += sizeof(timestamp);
+      fwrite(&timestamp, sizeof(timestamp), 1, output);
+   }
+
+   /* SPM counter info. */
+   uint64_t counter_values_size = num_samples * sizeof(uint16_t);
+   uint64_t counter_values_offset = num_samples * sizeof(uint64_t) +
+                                    spm_trace->num_counters * sizeof(struct sqtt_spm_counter_info);
+
+   for (uint32_t c = 0; c < spm_trace->num_counters; c++) {
+      struct sqtt_spm_counter_info cntr_info = {
+         .block = spm_trace->counters[c].gpu_block,
+         .instance = spm_trace->counters[c].instance,
+         .data_offset = counter_values_offset,
+         .event_index = spm_trace->counters[c].event_id,
+      };
+
+      file_offset += sizeof(cntr_info);
+      fwrite(&cntr_info, sizeof(cntr_info), 1, output);
+
+      counter_values_offset += counter_values_size;
+   }
+
+   /* SPM counter values. */
+   uint32_t sample_size_in_hwords = sample_size_in_bytes / sizeof(uint16_t);
+   uint16_t *counter_values_ptr = (uint16_t *)spm_data_ptr;
+
+   for (uint32_t c = 0; c < spm_trace->num_counters; c++) {
+      uint64_t offset = spm_trace->counters[c].offset;
+
+      for (uint32_t s = 0; s < num_samples; s++) {
+         uint64_t index = offset + (s * sample_size_in_hwords);
+         uint16_t value = counter_values_ptr[index];
+
+         file_offset += sizeof(value);
+         fwrite(&value, sizeof(value), 1, output);
+      }
+   }
+
+   /* SQTT SPM DB chunk. */
+   ac_sqtt_fill_spm_db(spm_trace, &spm_db, num_samples,
+                       file_offset - file_spm_db_offset);
+   fseek(output, file_spm_db_offset, SEEK_SET);
+   fwrite(&spm_db, sizeof(struct sqtt_file_chunk_spm_db), 1, output);
+   fseek(output, file_offset, SEEK_SET);
+}
+
 static void ac_sqtt_dump_data(struct radeon_info *rad_info,
                               struct ac_thread_trace *thread_trace,
+                              const struct ac_spm_trace_data *spm_trace,
                               FILE *output)
 {
    struct ac_thread_trace_data *thread_trace_data = thread_trace->data;
@@ -1071,10 +1182,15 @@ static void ac_sqtt_dump_data(struct radeon_info *rad_info,
          fwrite(se->data_ptr, size, 1, output);
       }
    }
+
+   if (spm_trace) {
+      ac_sqtt_dump_spm(spm_trace, file_offset, output);
+   }
 }
 
 int ac_dump_rgp_capture(struct radeon_info *info,
-                        struct ac_thread_trace *thread_trace)
+                        struct ac_thread_trace *thread_trace,
+                        const struct ac_spm_trace_data *spm_trace)
 {
    char filename[2048];
    struct tm now;
@@ -1092,7 +1208,7 @@ int ac_dump_rgp_capture(struct radeon_info *info,
    if (!f)
       return -1;
 
-   ac_sqtt_dump_data(info, thread_trace, f);
+   ac_sqtt_dump_data(info, thread_trace, spm_trace, f);
 
    fprintf(stderr, "RGP capture saved to '%s'\n", filename);
 
diff --git a/src/amd/common/ac_rgp.h b/src/amd/common/ac_rgp.h
index 9e38556a090..b53bb02de81 100644
--- a/src/amd/common/ac_rgp.h
+++ b/src/amd/common/ac_rgp.h
@@ -34,6 +34,7 @@
 struct radeon_info;
 struct ac_thread_trace;
 struct ac_thread_trace_data;
+struct ac_spm_trace_data;
 
 enum rgp_hardware_stages {
    RGP_HW_STAGE_VS = 0,
@@ -189,7 +190,8 @@ struct rgp_clock_calibration {
 
 int
 ac_dump_rgp_capture(struct radeon_info *info,
-                    struct ac_thread_trace *thread_trace);
+                    struct ac_thread_trace *thread_trace,
+                    const struct ac_spm_trace_data *spm_trace);
 
 void
 ac_rgp_file_write_elf_object(FILE *output, size_t file_elf_start,
diff --git a/src/amd/vulkan/layers/radv_sqtt_layer.c b/src/amd/vulkan/layers/radv_sqtt_layer.c
index 2b9623a811f..5c8cda63809 100644
--- a/src/amd/vulkan/layers/radv_sqtt_layer.c
+++ b/src/amd/vulkan/layers/radv_sqtt_layer.c
@@ -363,7 +363,7 @@ radv_handle_thread_trace(VkQueue _queue)
       radv_QueueWaitIdle(_queue);
 
       if (radv_get_thread_trace(queue, &thread_trace)) {
-         ac_dump_rgp_capture(&queue->device->physical_device->rad_info, &thread_trace);
+         ac_dump_rgp_capture(&queue->device->physical_device->rad_info, &thread_trace, NULL);
       } else {
          /* Trigger a new capture if the driver failed to get
           * the trace because the buffer was too small.
diff --git a/src/gallium/drivers/radeonsi/si_sqtt.c b/src/gallium/drivers/radeonsi/si_sqtt.c
index a5eb4f5851c..cad12160c14 100644
--- a/src/gallium/drivers/radeonsi/si_sqtt.c
+++ b/src/gallium/drivers/radeonsi/si_sqtt.c
@@ -710,7 +710,7 @@ si_handle_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs)
       /* Wait for SQTT to finish and read back the bo */
       if (sctx->ws->fence_wait(sctx->ws, sctx->last_sqtt_fence, PIPE_TIMEOUT_INFINITE) &&
           si_get_thread_trace(sctx, &thread_trace)) {
-         ac_dump_rgp_capture(&sctx->screen->info, &thread_trace);
+         ac_dump_rgp_capture(&sctx->screen->info, &thread_trace, NULL);
       } else {
          fprintf(stderr, "Failed to read the trace\n");
       }



More information about the mesa-commit mailing list