Mesa (main): radeonsi: add spm counters setup code

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Apr 22 12:19:05 UTC 2022


Module: Mesa
Branch: main
Commit: a884f3694984fb2ee446351a1fad30d6d545d7dc
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a884f3694984fb2ee446351a1fad30d6d545d7dc

Author: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Date:   Tue Mar 29 14:30:53 2022 +0200

radeonsi: add spm counters setup code

Based on radv_spm.c and PAL.

Reviewed-by: Marek Olšák <marek.olsak at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15646>

---

 src/gallium/drivers/radeonsi/si_perfcounter.c | 182 ++++++++++++++++++++++++++
 src/gallium/drivers/radeonsi/si_pipe.h        |   5 +
 2 files changed, 187 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c
index 0fb56718935..571424f7fad 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -712,3 +712,185 @@ void si_init_perfcounters(struct si_screen *screen)
       si_destroy_perfcounters(screen);
    }
 }
+
+static void
+si_emit_spm_counters(struct si_context *sctx, struct radeon_cmdbuf *cs)
+{
+   struct ac_spm_trace_data *spm_trace = &sctx->spm_trace;
+
+   radeon_begin(cs);
+
+   for (uint32_t b = 0; b < spm_trace->num_used_sq_block_sel; b++) {
+      struct ac_spm_block_select *sq_block_sel = &spm_trace->sq_block_sel[b];
+      const struct ac_spm_counter_select *cntr_sel = &sq_block_sel->counters[0];
+      uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
+
+      radeon_set_uconfig_reg_seq(reg_base + b * 4, 1, false);
+      radeon_emit(cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */
+   }
+
+   for (uint32_t b = 0; b < spm_trace->num_block_sel; b++) {
+      struct ac_spm_block_select *block_sel = &spm_trace->block_sel[b];
+      struct ac_pc_block_base *regs = block_sel->b->b->b;
+
+      radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, block_sel->grbm_gfx_index);
+
+      for (unsigned c = 0; c < block_sel->num_counters; c++) {
+         const struct ac_spm_counter_select *cntr_sel = &block_sel->counters[c];
+
+         if (!cntr_sel->active)
+            continue;
+
+         radeon_set_uconfig_reg_seq(regs->select0[c], 1, false);
+         radeon_emit(cntr_sel->sel0);
+
+         radeon_set_uconfig_reg_seq(regs->select1[c], 1, false);
+         radeon_emit(cntr_sel->sel1);
+      }
+   }
+
+   /* Restore global broadcasting. */
+   radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
+                          S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
+                          S_030800_INSTANCE_BROADCAST_WRITES(1));
+
+   radeon_end();
+}
+
+#define SPM_RING_BASE_ALIGN 32
+
+void
+si_emit_spm_setup(struct si_context *sctx, struct radeon_cmdbuf *cs)
+{
+   struct ac_spm_trace_data *spm_trace = &sctx->spm_trace;
+   uint64_t va = sctx->screen->ws->buffer_get_virtual_address(spm_trace->bo);
+   uint64_t ring_size = spm_trace->buffer_size;
+
+   /* It's required that the ring VA and the size are correctly aligned. */
+   assert(!(va & (SPM_RING_BASE_ALIGN - 1)));
+   assert(!(ring_size & (SPM_RING_BASE_ALIGN - 1)));
+   assert(spm_trace->sample_interval >= 32);
+
+   radeon_begin(cs);
+
+   /* Configure the SPM ring buffer. */
+   radeon_set_uconfig_reg(R_037200_RLC_SPM_PERFMON_CNTL,
+                          S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */
+                          S_037200_PERFMON_SAMPLE_INTERVAL(spm_trace->sample_interval)); /* in sclk */
+   radeon_set_uconfig_reg(R_037204_RLC_SPM_PERFMON_RING_BASE_LO, va);
+   radeon_set_uconfig_reg(R_037208_RLC_SPM_PERFMON_RING_BASE_HI,
+                          S_037208_RING_BASE_HI(va >> 32));
+   radeon_set_uconfig_reg(R_03720C_RLC_SPM_PERFMON_RING_SIZE, ring_size);
+
+   /* Configure the muxsel. */
+   uint32_t total_muxsel_lines = 0;
+   for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
+      total_muxsel_lines += spm_trace->num_muxsel_lines[s];
+   }
+
+   radeon_set_uconfig_reg(R_03726C_RLC_SPM_ACCUM_MODE, 0);
+   radeon_set_uconfig_reg(R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0);
+   radeon_set_uconfig_reg(R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE,
+                          S_03727C_SE0_NUM_LINE(spm_trace->num_muxsel_lines[0]) |
+                          S_03727C_SE1_NUM_LINE(spm_trace->num_muxsel_lines[1]) |
+                          S_03727C_SE2_NUM_LINE(spm_trace->num_muxsel_lines[2]) |
+                          S_03727C_SE3_NUM_LINE(spm_trace->num_muxsel_lines[3]));
+   radeon_set_uconfig_reg(R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE,
+                          S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) |
+                          S_037280_GLOBAL_NUM_LINE(spm_trace->num_muxsel_lines[4]));
+
+   /* Upload each muxsel ram to the RLC. */
+   for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
+      unsigned rlc_muxsel_addr, rlc_muxsel_data;
+      unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) |
+                                S_030800_INSTANCE_BROADCAST_WRITES(1);
+
+      if (!spm_trace->num_muxsel_lines[s])
+         continue;
+
+      if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) {
+         grbm_gfx_index |= S_030800_SE_BROADCAST_WRITES(1);
+
+         rlc_muxsel_addr = R_037224_RLC_SPM_GLOBAL_MUXSEL_ADDR;
+         rlc_muxsel_data = R_037228_RLC_SPM_GLOBAL_MUXSEL_DATA;
+      } else {
+         grbm_gfx_index |= S_030800_SE_INDEX(s);
+
+         rlc_muxsel_addr = R_03721C_RLC_SPM_SE_MUXSEL_ADDR;
+         rlc_muxsel_data = R_037220_RLC_SPM_SE_MUXSEL_DATA;
+      }
+
+      radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, grbm_gfx_index);
+
+      for (unsigned l = 0; l < spm_trace->num_muxsel_lines[s]; l++) {
+         uint32_t *data = (uint32_t *)spm_trace->muxsel_lines[s][l].muxsel;
+
+         /* Select MUXSEL_ADDR to point to the next muxsel. */
+         radeon_set_uconfig_reg(rlc_muxsel_addr, l * AC_SPM_MUXSEL_LINE_SIZE);
+
+         /* Write the muxsel line configuration with MUXSEL_DATA. */
+         radeon_emit(PKT3(PKT3_WRITE_DATA, 2 + AC_SPM_MUXSEL_LINE_SIZE, 0));
+         radeon_emit(S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) |
+                     S_370_WR_CONFIRM(1) |
+                     S_370_ENGINE_SEL(V_370_ME) |
+                     S_370_WR_ONE_ADDR(1));
+         radeon_emit(rlc_muxsel_data >> 2);
+         radeon_emit(0);
+         radeon_emit_array(data, AC_SPM_MUXSEL_LINE_SIZE);
+      }
+   }
+   radeon_end();
+
+   /* Select SPM counters. */
+   si_emit_spm_counters(sctx, cs);
+}
+
+bool
+si_spm_init(struct si_context *sctx)
+{
+   const struct radeon_info *info = &sctx->screen->info;
+
+   sctx->screen->perfcounters = CALLOC_STRUCT(si_perfcounters);
+   sctx->screen->perfcounters->num_stop_cs_dwords = 14 + si_cp_write_fence_dwords(sctx->screen);
+   sctx->screen->perfcounters->num_instance_cs_dwords = 3;
+
+   struct ac_perfcounters *pc = &sctx->screen->perfcounters->base;
+   struct ac_spm_counter_create_info spm_counters[] = {
+
+      /* XXX: doesn't work */
+      {TCP, 0, 0x9},    /* Number of L2 requests. */
+      {TCP, 0, 0x12},   /* Number of L2 misses. */
+
+      /* Scalar cache hit */
+      {SQ, 0, 0x14f},   /* Number of SCACHE hits. */
+      {SQ, 0, 0x150},   /* Number of SCACHE misses. */
+      {SQ, 0, 0x151},   /* Number of SCACHE misses duplicate. */
+
+      /* Instruction cache hit */
+      {SQ, 0, 0x12c},   /* Number of ICACHE hits. */
+      {SQ, 0, 0x12d},   /* Number of ICACHE misses. */
+      {SQ, 0, 0x12e},   /* Number of ICACHE misses duplicate. */
+
+      /* XXX: doesn't work */
+      {GL1C, 0, 0xe},   /* Number of GL1C requests. */
+      {GL1C, 0, 0x12},  /* Number of GL1C misses. */
+
+      /* L2 cache hit */
+      {GL2C, 0, 0x3},   /* Number of GL2C requests. */
+      {GL2C, 0, info->chip_class >= GFX10_3 ? 0x2b : 0x23},  /* Number of GL2C misses. */
+   };
+
+   if (!ac_init_perfcounters(info, false, false, pc))
+      return false;
+
+   if (!ac_init_spm(info, pc, ARRAY_SIZE(spm_counters), spm_counters, &sctx->spm_trace))
+      return false;
+
+   return true;
+}
+
+void
+si_spm_finish(struct si_context *sctx)
+{
+   ac_destroy_spm(&sctx->spm_trace);
+}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 82075cf9e4d..61c48f7300d 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -33,6 +33,7 @@
 #include "util/u_threaded_context.h"
 #include "util/u_vertex_state_cache.h"
 #include "ac_sqtt.h"
+#include "ac_spm.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -1284,6 +1285,7 @@ struct si_context {
 
    /* SQTT */
    struct ac_thread_trace_data *thread_trace;
+   struct ac_spm_trace_data spm_trace;
    struct pipe_fence_handle *last_sqtt_fence;
    enum rgp_sqtt_marker_event_type sqtt_next_event;
    bool thread_trace_enabled;
@@ -1512,6 +1514,9 @@ void si_pc_emit_shaders(struct radeon_cmdbuf *cs, unsigned shaders);
 void si_pc_emit_spm_start(struct radeon_cmdbuf *cs);
 void si_pc_emit_spm_stop(struct radeon_cmdbuf *cs, bool never_stop_sq_perf_counters);
 void si_pc_emit_spm_reset(struct radeon_cmdbuf *cs);
+void si_emit_spm_setup(struct si_context *sctx, struct radeon_cmdbuf *cs);
+bool si_spm_init(struct si_context *sctx);
+void si_spm_finish(struct si_context *sctx);
 
 /* si_query.c */
 void si_init_screen_query_functions(struct si_screen *sscreen);



More information about the mesa-commit mailing list