[PATCH umr] PM4 packet streaming support (v2)

Tom St Denis tom.stdenis at amd.com
Sun May 20 14:27:28 UTC 2018


This adds the initial bits of PM4 stream decoding that can be
used to achieve other debugging purposes.  The first being
tying ring data loosely to shader data when printing waves.  Now
the --waves command can print out information about the shader
along with dumping the WAVE_STATUS registers.

(v2): Add ability to specify ring name and documentation

Signed-off-by: Tom St Denis <tom.stdenis at amd.com>
---
 doc/sphinx/source/libpm4_stream.rst | 102 +++++++++++++++
 doc/sphinx/source/libumr_api.rst    |   1 +
 doc/umr.1                           |   6 +-
 src/app/main.c                      |  12 +-
 src/app/print_waves.c               |  36 +++++-
 src/lib/CMakeLists.txt              |   1 +
 src/lib/umr_read_pm4_stream.c       | 244 ++++++++++++++++++++++++++++++++++++
 src/umr.h                           |  22 +++-
 8 files changed, 417 insertions(+), 7 deletions(-)
 create mode 100644 doc/sphinx/source/libpm4_stream.rst
 create mode 100644 src/lib/umr_read_pm4_stream.c

diff --git a/doc/sphinx/source/libpm4_stream.rst b/doc/sphinx/source/libpm4_stream.rst
new file mode 100644
index 000000000000..790cb9ca831f
--- /dev/null
+++ b/doc/sphinx/source/libpm4_stream.rst
@@ -0,0 +1,102 @@
+===================
+PM4 Stream Decoding
+===================
+
+The UMR library has the ability to read rings into a linked list
+of PM4 packets with pointers to indirect buffers (IBs) and shaders.
+
+-----------------
+PM4 Decode a Ring
+-----------------
+
+To decode a ring into a stream the following function can be used:
+
+::
+
+	struct umr_pm4_stream *umr_pm4_decode_ring(struct umr_asic *asic, char *ringname);
+
+Which will decode the ring named by ringname and return a pointer to
+the following structure if successful:
+
+::
+
+	/* IB/ring decoding/dumping/etc */
+	struct umr_pm4_stream {
+		uint32_t
+			pkttype,	// packet type (0==simple write, 3 == packet)
+			pkt0off,	// base address for PKT0 writes
+			opcode,
+			n_words,	// number of words ignoring header
+			*words;		// words following header word
+
+		struct umr_pm4_stream
+				*next,	// adjacent PM4 packet if any
+				*ib;	// IB this packet might point to
+
+		struct umr_shaders_pgm *shader; // shader program if any
+	};
+
+Adjacent PM4 packets are pointed to by 'next' (NULL terminated) and
+any IBs or shaders that are found are pointed to by 'ib' and 'shader'
+respectively.
+
+-------------------
+PM4 Decode a Buffer
+-------------------
+
+To decode a PM4 stream inside a user buffer the following function
+can be used:
+
+::
+
+	struct umr_pm4_stream *umr_pm4_decode_stream(struct umr_asic *asic, int vmid, uint32_t *stream, uint32_t nwords);
+
+This will return a structure pointer if successful.
+
+--------------------
+Freeing a PM4 Stream
+--------------------
+
+A PM4 stream can be freed with the following function:
+
+::
+
+	void umr_free_pm4_stream(struct umr_pm4_stream *stream);
+
+------------------------------
+Finding Shaders in PM4 Streams
+------------------------------
+
+The WAVE_STATUS registers can indicate active waves and where in
+shaders they are but not information about the shaders themselves.
+The following functions can find shaders in PM4 streams:
+
+::
+
+	struct umr_shaders_pgm *umr_find_shader_in_stream(struct umr_pm4_stream *stream, unsigned vmid, uint64_t addr);
+	struct umr_shaders_pgm *umr_find_shader_in_ring(struct umr_asic *asic, char *ringname, unsigned vmid, uint64_t addr, int no_halt);
+
+If found they return a pointer to a shader structure which then
+indicates the base address, VMID, and size of the shader.  This
+function returns a copy of the shader structure from the PM4 stream
+structure which must be freed independently.  Calling umr_free_pm4_stream()
+will not free these copies.
+
+::
+
+	struct umr_shaders_pgm {
+		// VMID and length in bytes
+		uint32_t
+			vmid,
+			size;
+
+		// address in VM space for this shader
+		uint64_t addr;
+
+		struct umr_shaders_pgm *next;
+
+		struct {
+			uint64_t ib_base, ib_offset;
+		} src;
+	};
+
diff --git a/doc/sphinx/source/libumr_api.rst b/doc/sphinx/source/libumr_api.rst
index 66877f87a3f9..f2a569f3042c 100644
--- a/doc/sphinx/source/libumr_api.rst
+++ b/doc/sphinx/source/libumr_api.rst
@@ -13,3 +13,4 @@ libumrcore.a: API Documentation
    libvm_access
    libhalt_waves
    libwave_status
+   libpm4_stream
diff --git a/doc/umr.1 b/doc/umr.1
index a777d9312054..c163a7bcb3f0 100644
--- a/doc/umr.1
+++ b/doc/umr.1
@@ -90,13 +90,15 @@ options that apply are:
 and
 .B use_pci
 .
-.IP "--waves, -wa"
+.IP "--waves, -wa <ring_name>"
 Print out information about any active CU waves.  Note that if GFX power gating
 is enabled this command may result in a GPU hang.  It's unlikely unless you're
 invoking it very rapidly.  Unlike the wave count reading in --top this command
 will operate regardless of whether GFX PG is enabled or not.  Can use
 .B bits
-to decode the wave bitfields.
+to decode the wave bitfields.  An optional ring name can be specified
+(default: gfx) to search for pointers to active shaders to find extra debugging
+information.
 
 .IP "--vm-decode, -vm vmid@<address> <num_of_pages>"
 Decode page mappings at a specified address (in hex) from the VMID specified.
diff --git a/src/app/main.c b/src/app/main.c
index d6571e77b74d..1710b2cdf4a9 100644
--- a/src/app/main.c
+++ b/src/app/main.c
@@ -274,6 +274,12 @@ int main(int argc, char **argv)
 		} else if (!strcmp(argv[i], "--waves") || !strcmp(argv[i], "-wa")) {
 			if (!asic)
 				asic = get_asic();
+			if (i + 1 < argc) {
+				if (argv[i+1][0] != '-') {
+					strcpy(asic->options.ring_name, argv[i+1]);
+					++i;
+				}
+			}
 			umr_print_waves(asic);
 		} else if (!strcmp(argv[i], "--scan") || !strcmp(argv[i], "-s")) {
 			if (i + 1 < argc) {
@@ -573,9 +579,11 @@ int main(int argc, char **argv)
 	"\n\t\t'-O bits,follow,empty_log' to continually dump the trace log.)\n"
 "\n\t--top, -t\n\t\tSummarize GPU utilization.  Can select a SE block with --bank.  Can use"
 	"\n\t\toptions 'use_colour' to colourize output and 'use_pci' to improve efficiency.\n"
-"\n\t--waves, -wa\n\t\tPrint out information about any active CU waves.  Can use '-O bits'"
+"\n\t--waves, -wa <ring_name>\n\t\tPrint out information about any active CU waves.  Can use '-O bits'"
 	"\n\t\tto see decoding of various wave fields.  Can use the '-O halt_waves' option"
-	"\n\t\tto halt the SQ while reading registers.\n"
+	"\n\t\tto halt the SQ while reading registers.  An optional ring name can be specified"
+	"\n\t\twhich will then search a given ring for pointers to active shaders.  It will"
+	"\n\t\tdefault to the 'gfx' ring if nothing is specified.\n"
 "\n\t--vm-decode, -vm vmid@<address> <num_of_pages>"
 	"\n\t\tDecode page mappings at a specified address (in hex) from the VMID specified."
 	"\n\t\tThe VMID can be specified in hexadecimal (with leading '0x') or in decimal."
diff --git a/src/app/print_waves.c b/src/app/print_waves.c
index 6965f7f31854..3847d69a3b8d 100644
--- a/src/app/print_waves.c
+++ b/src/app/print_waves.c
@@ -41,10 +41,19 @@ void umr_print_waves(struct umr_asic *asic)
 	uint64_t pgm_addr;
 	struct umr_wave_data *wd, *owd;
 	int first = 1, col = 0;
+	struct umr_shaders_pgm *shader = NULL;
+	struct umr_pm4_stream *stream;
 
 	if (asic->options.halt_waves)
 		umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_HALT);
 
+	// scan a ring but don't trigger the halt/resume
+	// since it would have already been done
+	x = asic->options.halt_waves;
+	asic->options.halt_waves = 0;
+	stream = umr_pm4_decode_ring(asic, asic->options.ring_name[0] ? asic->options.ring_name : "gfx");
+	asic->options.halt_waves = x;
+
 	if (asic->family <= FAMILY_CIK)
 		shift = 3;  // on SI..CIK allocations were done in 8-dword blocks
 	else
@@ -220,8 +229,28 @@ void umr_print_waves(struct umr_asic *asic)
 				}
 			}
 
-			printf("\n\nPGM_MEM:\n");
-			pgm_addr = (((uint64_t)wd->ws.pc_hi << 32) | wd->ws.pc_lo) - (NUM_OPCODE_WORDS*4)/2;
+			printf("\n\nPGM_MEM:");
+			pgm_addr = (((uint64_t)wd->ws.pc_hi << 32) | wd->ws.pc_lo);
+			if (stream)
+				shader = umr_find_shader_in_stream(stream, wd->ws.hw_id.vm_id, pgm_addr);
+			if (shader) {
+				printf(" (found shader at: %s%u%s at 0x%s%llx%s of %s%u%s bytes)\n",
+					BLUE, shader->vmid, RST,
+					YELLOW, (unsigned long long)shader->addr, RST,
+					BLUE, shader->size, RST);
+
+				// start decoding a bit before PC if possible
+				if (shader->addr + ((NUM_OPCODE_WORDS*4)/2) < pgm_addr)
+					pgm_addr -= (NUM_OPCODE_WORDS*4)/2;
+				else
+					pgm_addr = shader->addr;
+
+				free(shader);
+			} else {
+				pgm_addr -= (NUM_OPCODE_WORDS*4)/2;
+				printf("\n");
+			}
+
 			umr_vm_disasm(asic, wd->ws.hw_id.vm_id, pgm_addr, (((uint64_t)wd->ws.pc_hi << 32) | wd->ws.pc_lo), NUM_OPCODE_WORDS*4, NULL);
 
 			Hv("LDS_ALLOC", wd->ws.lds_alloc.value);
@@ -253,6 +282,9 @@ void umr_print_waves(struct umr_asic *asic)
 		wd = owd;
 	}
 
+	if (stream)
+		umr_free_pm4_stream(stream);
+
 	if (asic->options.halt_waves)
 		umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_RESUME);
 }
diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt
index 7a04540b58c8..3d4e376b1495 100644
--- a/src/lib/CMakeLists.txt
+++ b/src/lib/CMakeLists.txt
@@ -28,6 +28,7 @@ add_library(umrcore STATIC
   umr_apply_bank_address.c
   umr_llvm_disasm.c
   umr_read_ring_data.c
+  umr_read_pm4_stream.c
   update.c
   version.c
   $<TARGET_OBJECTS:asic> $<TARGET_OBJECTS:ip>
diff --git a/src/lib/umr_read_pm4_stream.c b/src/lib/umr_read_pm4_stream.c
new file mode 100644
index 000000000000..2dce03f5f01b
--- /dev/null
+++ b/src/lib/umr_read_pm4_stream.c
@@ -0,0 +1,244 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Tom St Denis <tom.stdenis at amd.com>
+ *
+ */
+#include "umr.h"
+
+struct umr_pm4_stream *umr_pm4_decode_stream(struct umr_asic *asic, int vmid, uint32_t *stream, uint32_t nwords);
+
+// TODO: make this a library function
+static char *umr_reg_name(struct umr_asic *asic, uint64_t addr)
+{
+	struct umr_reg *reg;
+	struct umr_ip_block *ip;
+	static char name[512];
+
+	reg = umr_find_reg_by_addr(asic, addr, &ip);
+	if (ip && reg) {
+		sprintf(name, "%s%s.%s%s", RED, ip->ipname, reg->regname, RST);
+		return name;
+	} else {
+		return "<unknown>";
+	}
+}
+
+// process a packet for IB pointers or shader writes
+static void parse_pm4(struct umr_asic *asic, int vmid, struct umr_pm4_stream *ps)
+{
+	uint64_t addr;
+	uint32_t size, tvmid;
+	void *buf;
+
+	switch (ps->opcode) {
+		case 0x76: // SET_SH_REG (looking for writes to shader registers);
+		{
+			unsigned n, na;
+			uint32_t reg_addr = ps->words[0] + 0x2C00;
+			uint64_t shader_addr = 0;
+			char *tmp;
+
+			for (na = 0, n = 1; n < ps->n_words; n++) {
+				tmp = umr_reg_name(asic, reg_addr + n - 1);
+				if (strstr(tmp, "SPI_SHADER_PGM_LO_") || strstr(tmp, "COMPUTE_PGM_LO")) {
+					shader_addr = (shader_addr & ~0xFFFFFFFFFFULL) | ((uint64_t)ps->words[n] << 8);
+					na |= 1;
+				} else if (strstr(tmp, "SPI_SHADER_PGM_HI_") || strstr(tmp, "COMPUTE_PGM_HI")) {
+					shader_addr = (shader_addr & 0xFFFFFFFFFFULL) | ((uint64_t)ps->words[n] << 32);
+					na |= 2;
+				}
+			}
+
+			if (na == 3) {
+				// we have a shader address
+				ps->shader = calloc(1, sizeof(ps->shader[0]));
+				ps->shader->vmid = vmid;
+				ps->shader->addr = shader_addr;
+				ps->shader->size = umr_compute_shader_size(asic, ps->shader);
+			}
+			break;
+		}
+		case 0x3f: // INDIRECT_BUFFER_CIK
+		case 0x33: // INDIRECT_BUFFER_CONST
+			addr = (ps->words[0] & ~3ULL) | ((uint64_t)(ps->words[1] & 0xFFFF) << 32);
+			size = (ps->words[2] & ((1UL << 20) - 1)) * 4;
+			tvmid = ps->words[2] >> 24;
+			if (!tvmid)
+				tvmid = vmid;
+			buf = calloc(1, size);
+			umr_read_vram(asic, tvmid, addr, size, buf);
+			ps->ib = umr_pm4_decode_stream(asic, tvmid, buf, size / 4);
+			free(buf);
+			break;
+	}
+}
+
+// search for a shader in a ring
+struct umr_shaders_pgm *umr_find_shader_in_ring(struct umr_asic *asic, char *ringname, unsigned vmid, uint64_t addr, int no_halt)
+{
+	struct umr_pm4_stream *stream;
+	void *p;
+	int t;
+
+	// optionally mute halt_waves if we are calling this from
+	// a function that has already halted the waves
+	t = asic->options.halt_waves;
+
+	if (no_halt)
+		asic->options.halt_waves = 0;
+
+	stream = umr_pm4_decode_ring(asic, ringname);
+
+	asic->options.halt_waves = t;
+
+	p = umr_find_shader_in_stream(stream, vmid, addr);
+	umr_free_pm4_stream(stream);
+	return p;
+}
+
+
+// return a copy of a shader object found in a stream
+struct umr_shaders_pgm *umr_find_shader_in_stream(
+	struct umr_pm4_stream *stream, unsigned vmid, uint64_t addr)
+{
+	struct umr_shaders_pgm *p, *pp;
+
+	p = NULL;
+	while (stream) {
+		// compare shader if any
+		if (stream->shader)
+			if (stream->shader->vmid == vmid &&
+				(addr >= stream->shader->addr) &&
+				(addr < (stream->shader->addr + stream->shader->size))) {
+					p = stream->shader;
+					break;
+				}
+
+		// recurse into IBs if any
+		if (stream->ib) {
+			p = umr_find_shader_in_stream(stream->ib, vmid, addr);
+			if (p)
+				break;
+		}
+		stream = stream->next;
+	}
+
+	if (p) {
+		pp = calloc(1, sizeof(struct umr_shaders_pgm));
+		*pp = *p;
+		return pp;
+	}
+
+	return NULL;
+}
+
+
+void umr_free_pm4_stream(struct umr_pm4_stream *stream)
+{
+	while (stream) {
+		struct umr_pm4_stream *n;
+		n = stream->next;
+		if (stream->ib)
+			umr_free_pm4_stream(stream->ib);
+		free(stream->shader);
+		free(stream->words);
+		free(stream);
+		stream = n;
+	}
+}
+
+// decode a stream of packets into a linked list of packets
+struct umr_pm4_stream *umr_pm4_decode_stream(struct umr_asic *asic, int vmid, uint32_t *stream, uint32_t nwords)
+{
+	struct umr_pm4_stream *ops, *ps;
+
+	ps = ops = calloc(1, sizeof *ops);
+	if (!ps) {
+		fprintf(stderr, "[ERROR]: Out of memory\n");
+		return NULL;
+	}
+
+	while (nwords) {
+		// fetch basics out of header
+		ps->pkttype = *stream >> 30;
+		ps->n_words = ((*stream >> 16) + 1) & 0x3FFF;
+
+		// grab type specific header data
+		if (ps->pkttype == 0)
+			ps->pkt0off = *stream & 0xFFFF;
+		else
+			ps->opcode = (*stream >> 8) & 0xFF;
+
+		// grab rest of words
+		ps->words = calloc(ps->n_words, sizeof(ps->words[0]));
+		memcpy(ps->words, &stream[1], ps->n_words * sizeof(stream[0]));
+
+		// decode specific packets
+		if (ps->pkttype == 3)
+			parse_pm4(asic, vmid, ps);
+
+		// advance stream
+		nwords -= 1 + ps->n_words;
+		stream += 1 + ps->n_words;
+		ps->next = calloc(1, sizeof(*ps));
+		ps = ps->next;
+	}
+
+	return ops;
+}
+
+// decode a stream of PM4 packets starting with ring
+struct umr_pm4_stream *umr_pm4_decode_ring(struct umr_asic *asic, char *ringname)
+{
+	void *ps;
+	uint32_t *ringdata, ringsize;
+
+	if (asic->options.halt_waves)
+		umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_HALT);
+
+	ringdata = umr_read_ring_data(asic, ringname, &ringsize);
+	ringdata[0] %= ringsize;
+	ringdata[1] %= ringsize;
+	if (ringdata[0] != ringdata[1]) { // rptr != wptr
+		uint32_t *lineardata, linearsize;
+
+		// copy ring data into linear array
+		lineardata = calloc(ringsize, sizeof(*lineardata));
+		linearsize = 0;
+		while (ringdata[0] != ringdata[1]) {
+			lineardata[linearsize++] = ringdata[3 + ringdata[0]];  // first 3 words are rptr/wptr/dwptr
+			ringdata[0] = (ringdata[0] + 1) % ringsize;
+		}
+
+		ps = umr_pm4_decode_stream(asic, 0, lineardata, linearsize);
+		free(lineardata);
+		free(ringdata);
+	} else {
+		ps = NULL;
+	}
+
+	if (asic->options.halt_waves)
+		umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_RESUME);
+
+	return ps;
+}
+
diff --git a/src/umr.h b/src/umr.h
index 4c9f95e1b87b..326c13af694f 100644
--- a/src/umr.h
+++ b/src/umr.h
@@ -213,7 +213,8 @@ struct umr_options {
 	char
 		*scanblock,
 		dev_name[32],
-		hub_name[32];
+		hub_name[32],
+		ring_name[32];
 	struct {
 		int domain,
 		    bus,
@@ -621,7 +622,26 @@ int umr_grbm_select_index(struct umr_asic *asic, uint32_t se, uint32_t sh, uint3
 int umr_sq_cmd_halt_waves(struct umr_asic *asic, enum umr_sq_cmd_halt_resume mode);
 
 /* IB/ring decoding/dumping/etc */
+struct umr_pm4_stream {
+	uint32_t pkttype,				// packet type (0==simple write, 3 == packet)
+			 pkt0off,				// base address for PKT0 writes
+			 opcode,
+			 n_words,				// number of words ignoring header
+			 *words;				// words following header word
+
+	struct umr_pm4_stream *next,	// adjacent PM4 packet if any
+						  *ib;		// IB this packet might point to
+
+	struct umr_shaders_pgm *shader; // shader program if any
+};
+
 void *umr_read_ring_data(struct umr_asic *asic, char *ringname, uint32_t *ringsize);
+struct umr_pm4_stream *umr_pm4_decode_ring(struct umr_asic *asic, char *ringname);
+struct umr_pm4_stream *umr_pm4_decode_stream(struct umr_asic *asic, int vmid, uint32_t *stream, uint32_t nwords);
+void umr_free_pm4_stream(struct umr_pm4_stream *stream);
+struct umr_shaders_pgm *umr_find_shader_in_stream(struct umr_pm4_stream *stream, unsigned vmid, uint64_t addr);
+struct umr_shaders_pgm *umr_find_shader_in_ring(struct umr_asic *asic, char *ringname, unsigned vmid, uint64_t addr, int no_halt);
+
 void umr_print_decode(struct umr_asic *asic, struct umr_ring_decoder *decoder, uint32_t ib);
 void umr_dump_ib(struct umr_asic *asic, struct umr_ring_decoder *decoder);
 void umr_dump_shaders(struct umr_asic *asic, struct umr_ring_decoder *decoder, struct umr_wave_data *wd);
-- 
2.14.3



More information about the amd-gfx mailing list