Mesa (main): ac: add initial SPM support

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Nov 11 10:39:00 UTC 2021


Module: Mesa
Branch: main
Commit: e928f475ccf6534e09c2977314a5360cc4335c3c
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=e928f475ccf6534e09c2977314a5360cc4335c3c

Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date:   Mon May 31 16:38:34 2021 +0200

ac: add initial SPM support

SPM is hardware feature that allows us to dump performance counters
at a sampling interval to a buffer. It is used by RGP to report cache
counters.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13704>

---

 src/amd/common/ac_perfcounter.c |  12 ++
 src/amd/common/ac_perfcounter.h |   3 +
 src/amd/common/ac_spm.c         | 368 ++++++++++++++++++++++++++++++++++++++++
 src/amd/common/ac_spm.h         | 125 ++++++++++++++
 src/amd/common/meson.build      |   2 +
 5 files changed, 510 insertions(+)

diff --git a/src/amd/common/ac_perfcounter.c b/src/amd/common/ac_perfcounter.c
index 42fd96b2944..139db14a4a8 100644
--- a/src/amd/common/ac_perfcounter.c
+++ b/src/amd/common/ac_perfcounter.c
@@ -1235,3 +1235,15 @@ void ac_destroy_perfcounters(struct ac_perfcounters *pc)
    }
    FREE(pc->blocks);
 }
+
+struct ac_pc_block *ac_pc_get_block(const struct ac_perfcounters *pc,
+                                    enum ac_pc_gpu_block gpu_block)
+{
+   for (unsigned i = 0; i < pc->num_blocks; i++) {
+      struct ac_pc_block *block = &pc->blocks[i];
+      if (block->b->b->gpu_block == gpu_block) {
+         return block;
+      }
+   }
+   return NULL;
+}
diff --git a/src/amd/common/ac_perfcounter.h b/src/amd/common/ac_perfcounter.h
index 6c109c9daf0..49416e420ad 100644
--- a/src/amd/common/ac_perfcounter.h
+++ b/src/amd/common/ac_perfcounter.h
@@ -193,6 +193,9 @@ struct ac_pc_block *ac_lookup_counter(const struct ac_perfcounters *pc,
 struct ac_pc_block *ac_lookup_group(const struct ac_perfcounters *pc,
                                     unsigned *index);
 
+struct ac_pc_block *ac_pc_get_block(const struct ac_perfcounters *pc,
+                                    enum ac_pc_gpu_block gpu_block);
+
 bool ac_init_block_names(const struct radeon_info *info,
                          const struct ac_perfcounters *pc,
                          struct ac_pc_block *block);
diff --git a/src/amd/common/ac_spm.c b/src/amd/common/ac_spm.c
new file mode 100644
index 00000000000..11d8fbc3705
--- /dev/null
+++ b/src/amd/common/ac_spm.c
@@ -0,0 +1,368 @@
+/*
+ * Copyright 2021 Valve Corporation
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "ac_spm.h"
+
+#include "util/bitscan.h"
+#include "util/u_memory.h"
+
+static struct ac_spm_block_select *
+ac_spm_get_block_select(struct ac_spm_trace_data *spm_trace,
+                        const struct ac_pc_block *block)
+{
+   struct ac_spm_block_select *block_sel, *new_block_sel;
+   uint32_t num_block_sel;
+
+   for (uint32_t i = 0; i < spm_trace->num_block_sel; i++) {
+      if (spm_trace->block_sel[i].b->b->b->gpu_block == block->b->b->gpu_block)
+         return &spm_trace->block_sel[i];
+   }
+
+   /* Allocate a new select block if it doesn't already exist. */
+   num_block_sel = spm_trace->num_block_sel + 1;
+   block_sel = realloc(spm_trace->block_sel, num_block_sel * sizeof(*block_sel));
+   if (!block_sel)
+      return NULL;
+
+   spm_trace->num_block_sel = num_block_sel;
+   spm_trace->block_sel = block_sel;
+
+   /* Initialize the new select block. */
+   new_block_sel = &spm_trace->block_sel[spm_trace->num_block_sel - 1];
+   memset(new_block_sel, 0, sizeof(*new_block_sel));
+
+   new_block_sel->b = block;
+   new_block_sel->num_counters = block->b->b->num_spm_counters;
+
+   return new_block_sel;
+}
+
+static void
+ac_spm_init_muxsel(const struct ac_pc_block *block,
+                   struct ac_spm_counter_info *counter,
+                   uint32_t spm_wire)
+{
+   struct ac_spm_muxsel *muxsel = &counter->muxsel;
+
+   muxsel->counter = 2 * spm_wire + (counter->is_even ? 0 : 1);
+   muxsel->block = block->b->b->spm_block_select;
+   muxsel->shader_array = 0;
+   muxsel->instance = 0;
+}
+
+static bool
+ac_spm_map_counter(struct ac_spm_trace_data *spm_trace,
+                   struct ac_spm_block_select *block_sel,
+                   struct ac_spm_counter_info *counter,
+                   uint32_t *spm_wire)
+{
+   if (block_sel->b->b->b->gpu_block == SQ) {
+      for (unsigned i = 0; i < ARRAY_SIZE(spm_trace->sq_block_sel); i++) {
+         struct ac_spm_block_select *sq_block_sel = &spm_trace->sq_block_sel[i];
+         struct ac_spm_counter_select *cntr_sel = &sq_block_sel->counters[0];
+         if (i < spm_trace->num_used_sq_block_sel)
+            continue;
+
+         /* SQ doesn't support 16-bit counters. */
+         cntr_sel->sel0 |= S_036700_PERF_SEL(counter->event_id) |
+                           S_036700_SPM_MODE(3) | /* 32-bit clamp */
+                           S_036700_PERF_MODE(0);
+         cntr_sel->active |= 0x3;
+
+         /* 32-bits counter are always even. */
+         counter->is_even = true;
+
+         /* One wire per SQ module. */
+         *spm_wire = i;
+
+         spm_trace->num_used_sq_block_sel++;
+         return true;
+      }
+   } else {
+      /* Generic blocks. */
+      for (unsigned i = 0; i < block_sel->num_counters; i++) {
+         struct ac_spm_counter_select *cntr_sel = &block_sel->counters[i];
+         int index = ffs(~cntr_sel->active);
+
+         switch (index) {
+         case 0: /* use S_037004_PERF_SEL */
+            cntr_sel->sel0 |= S_037004_PERF_SEL(counter->event_id) |
+                              S_037004_CNTR_MODE(1) | /* 16-bit clamp */
+                              S_037004_PERF_MODE(0); /* accum */
+            break;
+         case 1: /* use S_037004_PERF_SEL1 */
+            cntr_sel->sel0 |= S_037004_PERF_SEL1(counter->event_id) |
+                              S_037004_PERF_MODE1(0);
+            break;
+         case 2: /* use S_037004_PERF_SEL2 */
+            cntr_sel->sel1 |= S_037008_PERF_SEL2(counter->event_id) |
+                              S_037008_PERF_MODE2(0);
+            break;
+         case 3: /* use S_037004_PERF_SEL3 */
+            cntr_sel->sel1 |= S_037008_PERF_SEL3(counter->event_id) |
+                              S_037008_PERF_MODE3(0);
+            break;
+         default:
+            return false;
+         }
+
+         /* Mark this 16-bit counter as used. */
+         cntr_sel->active |= 1 << index;
+
+         /* Determine if the counter is even or odd. */
+         counter->is_even = !(index % 2);
+
+         /* Determine the SPM wire (lower 16-bits for even, upper for odd). */
+         *spm_wire = !counter->is_even + i;
+
+         return true;
+      }
+   }
+
+   return false;
+}
+
+static bool
+ac_spm_add_counter(const struct ac_perfcounters *pc,
+                   struct ac_spm_trace_data *spm_trace,
+                   const struct ac_spm_counter_create_info *info)
+{
+   struct ac_spm_counter_info *counter;
+   struct ac_spm_block_select *block_sel;
+   struct ac_pc_block *block;
+   uint32_t spm_wire;
+
+   /* Check if the GPU block is valid. */
+   block = ac_pc_get_block(pc, info->gpu_block);
+   if (!block) {
+      fprintf(stderr, "ac/spm: Invalid GPU block.\n");
+      return false;
+   }
+
+   /* Check if the number of instances is valid. */
+   if (info->instance > block->num_instances) {
+      fprintf(stderr, "ac/spm: Invalid instance ID.\n");
+      return false;
+   }
+
+   /* Check if the event ID is valid. */
+   if (info->event_id > block->b->selectors) {
+      fprintf(stderr, "ac/spm: Invalid event ID.\n");
+      return false;
+   }
+
+   counter = &spm_trace->counters[spm_trace->num_counters];
+   spm_trace->num_counters++;
+
+   counter->gpu_block = info->gpu_block;
+   counter->instance = info->instance;
+   counter->event_id = info->event_id;
+
+   /* Get the select block used to configure the counter. */
+   block_sel = ac_spm_get_block_select(spm_trace, block);
+   if (!block_sel)
+      return false;
+
+   /* Map the counter to the select block. */
+   if (!ac_spm_map_counter(spm_trace, block_sel, counter, &spm_wire)) {
+      fprintf(stderr, "ac/spm: No free slots available!\n");
+      return false;
+   }
+
+   /* Determine the counter segment type. */
+   if (block->b->b->flags & AC_PC_BLOCK_SE) {
+      counter->segment_type = AC_SPM_SEGMENT_TYPE_SE0; // XXX
+   } else {
+      counter->segment_type = AC_SPM_SEGMENT_TYPE_GLOBAL;
+   }
+
+   /* Configure the muxsel for SPM. */
+   ac_spm_init_muxsel(block, counter, spm_wire);
+
+   return true;
+}
+
+bool ac_init_spm(const struct radeon_info *info,
+                 const struct ac_perfcounters *pc,
+                 unsigned num_counters,
+                 const struct ac_spm_counter_create_info *counters,
+                 struct ac_spm_trace_data *spm_trace)
+{
+   spm_trace->counters = CALLOC(num_counters, sizeof(*spm_trace->counters));
+   if (!spm_trace->counters)
+      return false;
+
+   for (unsigned i = 0; i < num_counters; i++) {
+      if (!ac_spm_add_counter(pc, spm_trace, &counters[i])) {
+         fprintf(stderr, "ac/spm: Failed to add SPM counter (%d).\n", i);
+         return false;
+      }
+   }
+
+   /* Determine the segment size and create a muxsel ram for every segment. */
+   for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
+      unsigned num_even_counters = 0, num_odd_counters = 0;
+
+      if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) {
+         /* The global segment always start with a 64-bit timestamp. */
+         num_even_counters += AC_SPM_GLOBAL_TIMESTAMP_COUNTERS;
+      }
+
+      /* Count the number of even/odd counters for this segment. */
+      for (unsigned c = 0; c < spm_trace->num_counters; c++) {
+         struct ac_spm_counter_info *counter = &spm_trace->counters[c];
+
+         if (counter->segment_type != s)
+            continue;
+
+         if (counter->is_even) {
+            num_even_counters++;
+         } else {
+            num_odd_counters++;
+         }
+      }
+
+      /* Compute the number of lines. */
+      unsigned even_lines =
+         DIV_ROUND_UP(num_even_counters, AC_SPM_NUM_COUNTER_PER_MUXSEL);
+      unsigned odd_lines =
+         DIV_ROUND_UP(num_odd_counters, AC_SPM_NUM_COUNTER_PER_MUXSEL);
+      unsigned num_lines = (even_lines > odd_lines) ? (2 * even_lines - 1) : (2 * odd_lines);
+
+      spm_trace->muxsel_lines[s] = CALLOC(num_lines, sizeof(*spm_trace->muxsel_lines[s]));
+      if (!spm_trace->muxsel_lines[s])
+         return false;
+      spm_trace->num_muxsel_lines[s] = num_lines;
+   }
+
+   /* RLC uses the following order: Global, SE0, SE1, SE2, SE3. */
+   const enum ac_spm_segment_type ordered_segment[AC_SPM_SEGMENT_TYPE_COUNT] =
+   {
+      AC_SPM_SEGMENT_TYPE_GLOBAL,
+      AC_SPM_SEGMENT_TYPE_SE0,
+      AC_SPM_SEGMENT_TYPE_SE1,
+      AC_SPM_SEGMENT_TYPE_SE2,
+      AC_SPM_SEGMENT_TYPE_SE3,
+   };
+
+   for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
+      if (!spm_trace->muxsel_lines[s])
+         continue;
+
+      uint32_t segment_offset = 0;
+      for (unsigned i = 0; s != ordered_segment[i]; i++) {
+         segment_offset += spm_trace->num_muxsel_lines[ordered_segment[i]] *
+                           AC_SPM_NUM_COUNTER_PER_MUXSEL;
+      }
+
+      uint32_t even_counter_idx = 0, even_line_idx = 0;
+      uint32_t odd_counter_idx = 0, odd_line_idx = 1;
+
+      /* Add the global timestamps first. */
+      if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) {
+         struct ac_spm_muxsel global_timestamp_muxsel = {
+            .counter = 0x30,
+            .block = 0x3,
+            .shader_array = 0,
+            .instance = 0x1e,
+         };
+
+         for (unsigned i = 0; i < 4; i++) {
+            spm_trace->muxsel_lines[s][even_line_idx].muxsel[even_counter_idx++] = global_timestamp_muxsel;
+         }
+      }
+
+      for (unsigned i = 0; i < spm_trace->num_counters; i++) {
+         struct ac_spm_counter_info *counter = &spm_trace->counters[i];
+
+         if (counter->segment_type != s)
+            continue;
+
+         if (counter->is_even) {
+            counter->offset = segment_offset + even_line_idx *
+                              AC_SPM_NUM_COUNTER_PER_MUXSEL + even_counter_idx;
+
+            spm_trace->muxsel_lines[s][even_line_idx].muxsel[even_counter_idx] = spm_trace->counters[i].muxsel;
+            if (++even_counter_idx == AC_SPM_NUM_COUNTER_PER_MUXSEL) {
+               even_counter_idx = 0;
+               even_line_idx += 2;
+            }
+         } else {
+            counter->offset = segment_offset + odd_line_idx *
+                              AC_SPM_NUM_COUNTER_PER_MUXSEL + odd_counter_idx;
+            
+            spm_trace->muxsel_lines[s][odd_line_idx].muxsel[odd_counter_idx] = spm_trace->counters[i].muxsel;
+            if (++odd_counter_idx == AC_SPM_NUM_COUNTER_PER_MUXSEL) {
+               odd_counter_idx = 0;
+               odd_line_idx += 2;
+            }
+         }
+      }
+   }
+
+   return true;
+}
+
+void ac_destroy_spm(struct ac_spm_trace_data *spm_trace)
+{
+   for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
+      FREE(spm_trace->muxsel_lines[s]);
+   }
+   FREE(spm_trace->block_sel);
+   FREE(spm_trace->counters);
+}
+
+uint32_t ac_spm_get_sample_size(const struct ac_spm_trace_data *spm_trace)
+{
+   uint32_t sample_size = 0; /* in bytes */
+
+   for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
+      sample_size += spm_trace->num_muxsel_lines[s] * AC_SPM_MUXSEL_LINE_SIZE * 4;
+   }
+
+   return sample_size;
+}
+
+uint32_t ac_spm_get_num_samples(const struct ac_spm_trace_data *spm_trace)
+{
+   uint32_t sample_size = ac_spm_get_sample_size(spm_trace);
+   uint32_t *ptr = (uint32_t *)spm_trace->ptr;
+   uint32_t data_size, num_lines_written;
+   uint32_t num_samples = 0;
+
+   /* Get the data size (in bytes) written by the hw to the ring buffer. */
+   data_size = ptr[0];
+
+   /* Compute the number of 256 bits (16 * 16-bits counters) lines written. */
+   num_lines_written = data_size / (2 * AC_SPM_NUM_COUNTER_PER_MUXSEL);
+
+   /* Check for overflow. */
+   if (num_lines_written % (sample_size / 32)) {
+      abort();
+   } else {
+      num_samples = num_lines_written / (sample_size / 32);
+   }
+
+   return num_samples;
+}
diff --git a/src/amd/common/ac_spm.h b/src/amd/common/ac_spm.h
new file mode 100644
index 00000000000..8ce49f84923
--- /dev/null
+++ b/src/amd/common/ac_spm.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2021 Valve Corporation
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef AC_SPM_H
+#define AC_SPM_H
+
+#include <stdint.h>
+
+#include "ac_perfcounter.h"
+
+#define AC_SPM_MAX_COUNTER_PER_BLOCK 16
+#define AC_SPM_GLOBAL_TIMESTAMP_COUNTERS 4 /* in unit of 16-bit counters*/
+#define AC_SPM_NUM_COUNTER_PER_MUXSEL 16 /* 16 16-bit counters per muxsel */
+#define AC_SPM_MUXSEL_LINE_SIZE ((AC_SPM_NUM_COUNTER_PER_MUXSEL * 2) / 4) /* in dwords */
+#define AC_SPM_NUM_PERF_SEL 4
+
+enum ac_spm_segment_type {
+   AC_SPM_SEGMENT_TYPE_SE0,
+   AC_SPM_SEGMENT_TYPE_SE1,
+   AC_SPM_SEGMENT_TYPE_SE2,
+   AC_SPM_SEGMENT_TYPE_SE3,
+   AC_SPM_SEGMENT_TYPE_GLOBAL,
+   AC_SPM_SEGMENT_TYPE_COUNT,
+};
+
+struct ac_spm_counter_create_info {
+   enum ac_pc_gpu_block gpu_block;
+   uint32_t instance;
+   uint32_t event_id;
+};
+
+struct ac_spm_muxsel {
+   uint16_t counter      : 6;
+   uint16_t block        : 4;
+   uint16_t shader_array : 1; /* 0: SA0, 1: SA1 */
+   uint16_t instance     : 5;
+};
+
+struct ac_spm_muxsel_line {
+   struct ac_spm_muxsel muxsel[AC_SPM_NUM_COUNTER_PER_MUXSEL];
+};
+
+struct ac_spm_counter_info {
+   /* General info. */
+   enum ac_pc_gpu_block gpu_block;
+   uint32_t instance;
+   uint32_t event_id;
+
+   /* Muxsel info. */
+   enum ac_spm_segment_type segment_type;
+   bool is_even;
+   struct ac_spm_muxsel muxsel;
+
+   /* Output info. */
+   uint64_t offset;
+};
+
+struct ac_spm_counter_select {
+   uint8_t active; /* mask of used 16-bit counters. */
+   uint32_t sel0;
+   uint32_t sel1;
+};
+
+struct ac_spm_block_select {
+   const struct ac_pc_block *b;
+   uint32_t grbm_gfx_index;
+
+   uint32_t num_counters;
+   struct ac_spm_counter_select counters[AC_SPM_MAX_COUNTER_PER_BLOCK];
+};
+
+struct ac_spm_trace_data {
+   /* struct radeon_winsys_bo or struct pb_buffer */
+   void *bo;
+   void *ptr;
+   uint32_t buffer_size;
+   uint16_t sample_interval;
+
+   /* Enabled counters. */
+   unsigned num_counters;
+   struct ac_spm_counter_info *counters;
+
+   /* Block/counters selection. */
+   uint32_t num_block_sel;
+   struct ac_spm_block_select *block_sel;
+   uint32_t num_used_sq_block_sel;
+   struct ac_spm_block_select sq_block_sel[16];
+
+   /* Muxsel lines. */
+   unsigned num_muxsel_lines[AC_SPM_SEGMENT_TYPE_COUNT];
+   struct ac_spm_muxsel_line *muxsel_lines[AC_SPM_SEGMENT_TYPE_COUNT];
+};
+
+bool ac_init_spm(const struct radeon_info *info,
+                 const struct ac_perfcounters *pc,
+                 unsigned num_counters,
+                 const struct ac_spm_counter_create_info *counters,
+                 struct ac_spm_trace_data *spm_trace);
+void ac_destroy_spm(struct ac_spm_trace_data *spm_trace);
+
+uint32_t ac_spm_get_sample_size(const struct ac_spm_trace_data *spm_trace);
+uint32_t ac_spm_get_num_samples(const struct ac_spm_trace_data *spm_trace);
+
+#endif
diff --git a/src/amd/common/meson.build b/src/amd/common/meson.build
index 09a1ed19129..69b0bfb0716 100644
--- a/src/amd/common/meson.build
+++ b/src/amd/common/meson.build
@@ -80,6 +80,8 @@ amd_common_files = files(
   'ac_debug.h',
   'ac_shadowed_regs.c',
   'ac_shadowed_regs.h',
+  'ac_spm.c',
+  'ac_spm.h',
   'ac_sqtt.c',
   'ac_sqtt.h',
   'ac_rgp.c',



More information about the mesa-commit mailing list