[PATCH umr] PM4 packet streaming support
Tom St Denis
tom.stdenis at amd.com
Sat May 19 14:47:33 UTC 2018
This adds the initial bits of PM4 stream decoding that can be
used to achieve other debugging purposes. The first being
tying ring data loosely to shader data when printing waves. Now
the --waves command can print out information about the shader
along with dumping the WAVE_STATUS registers.
Signed-off-by: Tom St Denis <tom.stdenis at amd.com>
---
src/app/print_waves.c | 37 ++++++-
src/lib/CMakeLists.txt | 1 +
src/lib/umr_read_pm4_stream.c | 243 ++++++++++++++++++++++++++++++++++++++++++
src/umr.h | 19 ++++
4 files changed, 298 insertions(+), 2 deletions(-)
create mode 100644 src/lib/umr_read_pm4_stream.c
diff --git a/src/app/print_waves.c b/src/app/print_waves.c
index 6965f7f31854..1800cca1abc0 100644
--- a/src/app/print_waves.c
+++ b/src/app/print_waves.c
@@ -41,10 +41,20 @@ void umr_print_waves(struct umr_asic *asic)
uint64_t pgm_addr;
struct umr_wave_data *wd, *owd;
int first = 1, col = 0;
+ struct umr_shaders_pgm *shader = NULL;
+ struct umr_pm4_stream *stream;
if (asic->options.halt_waves)
umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_HALT);
+ // scan the gfx ring but don't trigger the halt/resume
+ // since it would have already been done
+ // TODO: Make which ring it scans optional
+ x = asic->options.halt_waves;
+ asic->options.halt_waves = 0;
+ stream = umr_pm4_decode_ring(asic, "gfx");
+ asic->options.halt_waves = x;
+
if (asic->family <= FAMILY_CIK)
shift = 3; // on SI..CIK allocations were done in 8-dword blocks
else
@@ -220,8 +230,28 @@ void umr_print_waves(struct umr_asic *asic)
}
}
- printf("\n\nPGM_MEM:\n");
- pgm_addr = (((uint64_t)wd->ws.pc_hi << 32) | wd->ws.pc_lo) - (NUM_OPCODE_WORDS*4)/2;
+ printf("\n\nPGM_MEM:");
+ pgm_addr = (((uint64_t)wd->ws.pc_hi << 32) | wd->ws.pc_lo);
+ if (stream)
+ shader = umr_find_shader_in_stream(stream, wd->ws.hw_id.vm_id, pgm_addr);
+ if (shader) {
+ printf(" (found shader at: %s%u%s at 0x%s%llx%s of %s%u%s bytes)\n",
+ BLUE, shader->vmid, RST,
+ YELLOW, shader->addr, RST,
+ BLUE, shader->size, RST);
+
+ // start decoding a bit before PC if possible
+ if (shader->addr + ((NUM_OPCODE_WORDS*4)/2) < pgm_addr)
+ pgm_addr -= (NUM_OPCODE_WORDS*4)/2;
+ else
+ pgm_addr = shader->addr;
+
+ free(shader);
+ } else {
+ pgm_addr -= (NUM_OPCODE_WORDS*4)/2;
+ printf("\n");
+ }
+
umr_vm_disasm(asic, wd->ws.hw_id.vm_id, pgm_addr, (((uint64_t)wd->ws.pc_hi << 32) | wd->ws.pc_lo), NUM_OPCODE_WORDS*4, NULL);
Hv("LDS_ALLOC", wd->ws.lds_alloc.value);
@@ -253,6 +283,9 @@ void umr_print_waves(struct umr_asic *asic)
wd = owd;
}
+ if (stream)
+ umr_free_pm4_stream(stream);
+
if (asic->options.halt_waves)
umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_RESUME);
}
diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt
index 7a04540b58c8..3d4e376b1495 100644
--- a/src/lib/CMakeLists.txt
+++ b/src/lib/CMakeLists.txt
@@ -28,6 +28,7 @@ add_library(umrcore STATIC
umr_apply_bank_address.c
umr_llvm_disasm.c
umr_read_ring_data.c
+ umr_read_pm4_stream.c
update.c
version.c
$<TARGET_OBJECTS:asic> $<TARGET_OBJECTS:ip>
diff --git a/src/lib/umr_read_pm4_stream.c b/src/lib/umr_read_pm4_stream.c
new file mode 100644
index 000000000000..6c7645f4ebc6
--- /dev/null
+++ b/src/lib/umr_read_pm4_stream.c
@@ -0,0 +1,243 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Tom St Denis <tom.stdenis at amd.com>
+ *
+ */
+#include "umr.h"
+
+struct umr_pm4_stream *umr_pm4_decode_stream(struct umr_asic *asic, int vmid, uint32_t *stream, uint32_t nwords);
+
+// TODO: make this a library function
+static char *umr_reg_name(struct umr_asic *asic, uint64_t addr)
+{
+ struct umr_reg *reg;
+ struct umr_ip_block *ip;
+ static char name[512];
+
+ reg = umr_find_reg_by_addr(asic, addr, &ip);
+ if (ip && reg) {
+ sprintf(name, "%s%s.%s%s", RED, ip->ipname, reg->regname, RST);
+ return name;
+ } else {
+ return "<unknown>";
+ }
+}
+
+// process a packet for IB pointers or shader writes
+static void parse_pm4(struct umr_asic *asic, int vmid, struct umr_pm4_stream *ps)
+{
+ uint64_t addr;
+ uint32_t size, tvmid;
+ void *buf;
+
+ switch (ps->opcode) {
+ case 0x76: // SET_SH_REG (looking for writes to shader registers);
+ {
+ unsigned n, na;
+ uint32_t reg_addr = ps->words[0] + 0x2C00;
+ uint64_t shader_addr = 0;
+ char *tmp;
+
+ for (na = 0, n = 1; n < ps->n_words; n++) {
+ tmp = umr_reg_name(asic, reg_addr + n - 1);
+ if (strstr(tmp, "SPI_SHADER_PGM_LO_") || strstr(tmp, "COMPUTE_PGM_LO")) {
+ shader_addr = (shader_addr & ~0xFFFFFFFFFFULL) | ((uint64_t)ps->words[n] << 8);
+ na |= 1;
+ } else if (strstr(tmp, "SPI_SHADER_PGM_HI_") || strstr(tmp, "COMPUTE_PGM_HI")) {
+ shader_addr = (shader_addr & 0xFFFFFFFFFFULL) | ((uint64_t)ps->words[n] << 32);
+ na |= 2;
+ }
+ }
+
+ if (na == 3) {
+ // we have a shader address
+ ps->shader = calloc(1, sizeof(ps->shader[0]));
+ ps->shader->vmid = vmid;
+ ps->shader->addr = shader_addr;
+ ps->shader->size = umr_compute_shader_size(asic, ps->shader);
+ }
+ break;
+ }
+ case 0x3f: // INDIRECT_BUFFER_CIK
+ case 0x33: // INDIRECT_BUFFER_CONST
+ addr = (ps->words[0] & ~3ULL) | ((uint64_t)(ps->words[1] & 0xFFFF) << 32);
+ size = (ps->words[2] & ((1UL << 20) - 1)) * 4;
+ tvmid = ps->words[2] >> 24;
+ if (!tvmid)
+ tvmid = vmid;
+ buf = calloc(1, size);
+ umr_read_vram(asic, tvmid, addr, size, buf);
+ ps->ib = umr_pm4_decode_stream(asic, tvmid, buf, size / 4);
+ free(buf);
+ break;
+ }
+}
+
+// search for a shader in a ring
+struct umr_shaders_pgm *umr_find_shader_in_ring(struct umr_asic *asic, char *ringname, unsigned vmid, uint64_t addr, int no_halt)
+{
+ struct umr_pm4_stream *stream;
+ void *p;
+ int t;
+
+ // optionally mute halt_waves if we are calling this from
+ // a function that has already halted the waves
+ t = asic->options.halt_waves;
+
+ if (no_halt)
+ asic->options.halt_waves = 0;
+
+ stream = umr_pm4_decode_ring(asic, ringname);
+
+ asic->options.halt_waves = t;
+
+ p = umr_find_shader_in_stream(stream, vmid, addr);
+ umr_free_pm4_stream(stream);
+ return p;
+}
+
+
+// return a copy of a shader object found in a stream
+struct umr_shaders_pgm *umr_find_shader_in_stream(
+ struct umr_pm4_stream *stream, unsigned vmid, uint64_t addr)
+{
+ struct umr_shaders_pgm *p, *pp;
+
+ p = NULL;
+ while (stream) {
+ // compare shader if any
+ if (stream->shader)
+ if (stream->shader->vmid == vmid &&
+ (addr >= stream->shader->addr) &&
+ (addr < (stream->shader->addr + stream->shader->size))) {
+ p = stream->shader;
+ break;
+ }
+
+ // recurse into IBs if any
+ if (stream->ib) {
+ p = umr_find_shader_in_stream(stream->ib, vmid, addr);
+ if (p)
+ break;
+ }
+ stream = stream->next;
+ }
+
+ if (p) {
+ pp = calloc(1, sizeof(struct umr_shaders_pgm));
+ *pp = *p;
+ return pp;
+ }
+
+ return NULL;
+}
+
+
+void umr_free_pm4_stream(struct umr_pm4_stream *stream)
+{
+ while (stream) {
+ struct umr_pm4_stream *n;
+ n = stream->next;
+ if (stream->ib)
+ umr_free_pm4_stream(stream->ib);
+ free(stream->shader);
+ free(stream);
+ stream = n;
+ }
+}
+
+// decode a stream of packets into a linked list of packets
+struct umr_pm4_stream *umr_pm4_decode_stream(struct umr_asic *asic, int vmid, uint32_t *stream, uint32_t nwords)
+{
+ struct umr_pm4_stream *ops, *ps;
+
+ ps = ops = calloc(1, sizeof *ops);
+ if (!ps) {
+ fprintf(stderr, "[ERROR]: Out of memory\n");
+ return NULL;
+ }
+
+ while (nwords) {
+ // fetch basics out of header
+ ps->pkttype = *stream >> 30;
+ ps->n_words = ((*stream >> 16) + 1) & 0x3FFF;
+
+ // grab type specific header data
+ if (ps->pkttype == 0)
+ ps->pkt0off = *stream & 0xFFFF;
+ else
+ ps->opcode = (*stream >> 8) & 0xFF;
+
+ // grab rest of words
+ ps->words = calloc(ps->n_words, sizeof(ps->words[0]));
+ memcpy(ps->words, &stream[1], ps->n_words * sizeof(stream[0]));
+
+ // decode specific packets
+ if (ps->pkttype == 3)
+ parse_pm4(asic, vmid, ps);
+
+ // advance stream
+ nwords -= 1 + ps->n_words;
+ stream += 1 + ps->n_words;
+ ps->next = calloc(1, sizeof(*ps));
+ ps = ps->next;
+ }
+
+ return ops;
+}
+
+// decode a stream of PM4 packets starting with ring
+struct umr_pm4_stream *umr_pm4_decode_ring(struct umr_asic *asic, char *ringname)
+{
+ void *ps;
+ uint32_t *ringdata, ringsize;
+
+ if (asic->options.halt_waves)
+ umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_HALT);
+
+ ringdata = umr_read_ring_data(asic, ringname, &ringsize);
+ ringdata[0] %= ringsize;
+ ringdata[1] %= ringsize;
+ if (ringdata[0] != ringdata[1]) { // rptr != wptr
+ uint32_t *lineardata, linearsize;
+
+ // copy ring data into linear array
+ lineardata = calloc(ringsize, sizeof(*lineardata));
+ linearsize = 0;
+ while (ringdata[0] != ringdata[1]) {
+ lineardata[linearsize++] = ringdata[3 + ringdata[0]]; // first 3 words are rptr/wptr/dwptr
+ ringdata[0] = (ringdata[0] + 1) % ringsize;
+ }
+
+ ps = umr_pm4_decode_stream(asic, 0, lineardata, linearsize);
+ free(lineardata);
+ free(ringdata);
+ } else {
+ ps = NULL;
+ }
+
+ if (asic->options.halt_waves)
+ umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_RESUME);
+
+ return ps;
+}
+
diff --git a/src/umr.h b/src/umr.h
index 4c9f95e1b87b..67b8a2194492 100644
--- a/src/umr.h
+++ b/src/umr.h
@@ -621,7 +621,26 @@ int umr_grbm_select_index(struct umr_asic *asic, uint32_t se, uint32_t sh, uint3
int umr_sq_cmd_halt_waves(struct umr_asic *asic, enum umr_sq_cmd_halt_resume mode);
/* IB/ring decoding/dumping/etc */
+struct umr_pm4_stream {
+ uint32_t pkttype, // packet type (0==simple write, 3 == packet)
+ pkt0off, // base address for PKT0 writes
+ opcode,
+ n_words, // number of words ignoring header
+ *words; // words following header word
+
+ struct umr_pm4_stream *next, // adjacent PM4 packet if any
+ *ib; // IB this packet might point to
+
+ struct umr_shaders_pgm *shader; // shader program if any
+};
+
void *umr_read_ring_data(struct umr_asic *asic, char *ringname, uint32_t *ringsize);
+struct umr_pm4_stream *umr_pm4_decode_ring(struct umr_asic *asic, char *ringname);
+struct umr_pm4_stream *umr_pm4_decode_stream(struct umr_asic *asic, int vmid, uint32_t *stream, uint32_t nwords);
+void umr_free_pm4_stream(struct umr_pm4_stream *stream);
+struct umr_shaders_pgm *umr_find_shader_in_stream(struct umr_pm4_stream *stream, unsigned vmid, uint64_t addr);
+struct umr_shaders_pgm *umr_find_shader_in_ring(struct umr_asic *asic, char *ringname, unsigned vmid, uint64_t addr, int no_halt);
+
void umr_print_decode(struct umr_asic *asic, struct umr_ring_decoder *decoder, uint32_t ib);
void umr_dump_ib(struct umr_asic *asic, struct umr_ring_decoder *decoder);
void umr_dump_shaders(struct umr_asic *asic, struct umr_ring_decoder *decoder, struct umr_wave_data *wd);
--
2.14.3
More information about the amd-gfx
mailing list