[PATCH umr 4/4] Add ability to halt waves and better VM decoding

Tom St Denis tom.stdenis at amd.com
Mon Jul 24 15:25:51 UTC 2017


This patch involves two things I was working on at once so the
patches are a bit intertwined.  It adds

1.  The ability to halt SQ waves when reading waves on CIK and later
ASICs.

2.  The ability to enable verbose decoding when reading vram

3.  The ability to decode virtual addresses in the mmhub (for say VCN)

Signed-off-by: Tom St Denis <tom.stdenis at amd.com>
---
 doc/umr.1                   |  6 +++
 src/app/main.c              | 15 +++++--
 src/app/print_waves.c       |  6 +++
 src/lib/CMakeLists.txt      |  1 +
 src/lib/dump_ib.c           |  6 ++-
 src/lib/find_reg.c          | 11 ++++-
 src/lib/mmio.c              | 36 +++++++++++++----
 src/lib/read_vram.c         | 98 ++++++++++++++++++++++++++++++++++++++-------
 src/lib/ring_decode.c       |  4 +-
 src/lib/sq_cmd_halt_waves.c | 57 ++++++++++++++++++++++++++
 src/umr.h                   | 26 ++++++++++++
 11 files changed, 233 insertions(+), 33 deletions(-)
 create mode 100644 src/lib/sq_cmd_halt_waves.c

diff --git a/doc/umr.1 b/doc/umr.1
index 4c720ba48840..da432a13b0d1 100644
--- a/doc/umr.1
+++ b/doc/umr.1
@@ -148,6 +148,12 @@ separated strings.  Options should be specified before --update or --force comma
      be used only if the KMD is hung or otherwise not working correctly.  Using it on live systems
      may result in race conditions.
 
+.B verbose
+     Enable verbose diagnostics (used in --vram).
+
+.B halt_waves
+     Halt/resume all waves while reading wave status.
+
 .SH "Notes"
 
 - The "Waves" field in the DRM section of --top only works if GFX PG has been disabled.  Otherwise,
diff --git a/src/app/main.c b/src/app/main.c
index 6e0bc57200b0..7e3914155a22 100644
--- a/src/app/main.c
+++ b/src/app/main.c
@@ -107,6 +107,10 @@ static void parse_options(char *str)
 			options.quiet = 1;
 		} else if (!strcmp(option, "follow_ib")) {
 			options.follow_ib = 1;
+		} else if (!strcmp(option, "verbose")) {
+			options.verbose = 1;
+		} else if (!strcmp(option, "halt_waves")) {
+			options.halt_waves = 1;
 		} else if (!strcmp(option, "no_kernel")) {
 			options.no_kernel = 1;
 			options.use_pci = 1;
@@ -422,12 +426,15 @@ int main(int argc, char **argv)
 "\n\t--top, -t\n\t\tSummarize GPU utilization.  Can select a SE block with --bank.  Can use"
 	"\n\t\toptions 'use_colour' to colourize output and 'use_pci' to improve efficiency.\n"
 "\n\t--waves, -wa\n\t\tPrint out information about any active CU waves.  Can use '-O bits'"
-	"\n\t\tto see decoding of various wave fields.\n"
+	"\n\t\tto see decoding of various wave fields.  Can use the '-O halt_waves' option"
+	"\n\t\tto halt the SQ while reading registers.\n"
 "\n\t--vram, -v [<vmid>@]<address> <size>"
 	"\n\t\tRead 'size' bytes (in hex) from a given address (in hex) to stdout. Optionally"
-	"\n\t\tspecify the VMID (in decimal) treating the address as a virtual address instead.\n"
-"\n\t--option -O <string>[,<string>,...]\n\t\tEnable various flags: risky, bits, bitsfull, empty_log, follow, named, many,"
-	"\n\t\tuse_pci, use_colour, read_smc, quiet, no_kernel.\n"
+	"\n\t\tspecify the VMID (in decimal or in hex with a '0x' prefix) treating the address"
+	"\n\t\tas a virtual address instead.  Can use 'verbose' option to print out PDE/PTE"
+	"\n\t\tdecodings.\n"
+"\n\t--option -O <string>[,<string>,...]\n\t\tEnable various flags: bits, bitsfull, empty_log, follow, named, many,"
+	"\n\t\tuse_pci, use_colour, read_smc, quiet, no_kernel, verbose, halt_waves.\n"
 "\n\n", UMR_BUILD_VER, UMR_BUILD_REV);
 			exit(EXIT_SUCCESS);
 		} else {
diff --git a/src/app/print_waves.c b/src/app/print_waves.c
index e157db9f9386..1efd8a13bd28 100644
--- a/src/app/print_waves.c
+++ b/src/app/print_waves.c
@@ -40,6 +40,9 @@ void umr_print_waves(struct umr_asic *asic)
 	struct umr_wave_status ws;
 	int first = 1, col = 0;
 
+	if (asic->options.halt_waves)
+		umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_HALT);
+
 	if (asic->family <= FAMILY_CIK)
 		shift = 3;  // on SI..CIK allocations were done in 8-dword blocks
 	else
@@ -206,4 +209,7 @@ void umr_print_waves(struct umr_asic *asic)
 	}
 	if (first)
 		printf("No active waves!\n");
+
+	if (asic->options.halt_waves)
+		umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_RESUME);
 }
diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt
index 217ae80cdfd7..fcb4f9c9dc80 100644
--- a/src/lib/CMakeLists.txt
+++ b/src/lib/CMakeLists.txt
@@ -21,6 +21,7 @@ add_library(umrcore STATIC
   read_vram.c
   ring_decode.c
   scan_config.c
+  sq_cmd_halt_waves.c
   transfer_soc15.c
   wave_status.c
   update.c
diff --git a/src/lib/dump_ib.c b/src/lib/dump_ib.c
index 4e81dbe3eb09..cba497373fe2 100644
--- a/src/lib/dump_ib.c
+++ b/src/lib/dump_ib.c
@@ -28,9 +28,11 @@
 void umr_dump_ib(struct umr_asic *asic, struct umr_ring_decoder *decoder)
 {
 	uint32_t *data = NULL, x;
+	static const char *hubs[] = { "gfxhub", "mmhub" };
 
-	printf("Dumping IB at VMID:%u 0x%llx of %u words\n",
-		(unsigned)decoder->next_ib_info.vmid,
+	printf("Dumping IB at (%s) VMID:%u 0x%llx of %u words\n",
+		hubs[decoder->next_ib_info.vmid >> 8],
+		(unsigned)decoder->next_ib_info.vmid & 0xFF,
 		(unsigned long long)decoder->next_ib_info.ib_addr,
 		(unsigned)decoder->next_ib_info.size/4);
 
diff --git a/src/lib/find_reg.c b/src/lib/find_reg.c
index d4647163ea63..ecd7f132c9c9 100644
--- a/src/lib/find_reg.c
+++ b/src/lib/find_reg.c
@@ -36,14 +36,21 @@ uint32_t umr_find_reg(struct umr_asic *asic, char *regname)
 	return 0xFFFFFFFF;
 }
 
-struct umr_reg *umr_find_reg_data(struct umr_asic *asic, char *regname)
+struct umr_reg *umr_find_reg_data_by_ip(struct umr_asic *asic, char *ip, char *regname)
 {
 	int i, j;
 
-	for (i = 0; i < asic->no_blocks; i++)
+	for (i = 0; i < asic->no_blocks; i++) {
+		if (ip && memcmp(asic->blocks[i]->ipname, ip, strlen(ip))) continue;
 		for (j = 0; j < asic->blocks[i]->no_regs; j++)
 			if (!strcmp(asic->blocks[i]->regs[j].regname, regname))
 				return &asic->blocks[i]->regs[j];
+	}
 	fprintf(stderr, "[BUG]: reg [%s] not found on asic [%s]\n", regname, asic->asicname);
 	return NULL;
 }
+
+struct umr_reg *umr_find_reg_data(struct umr_asic *asic, char *regname)
+{
+	return umr_find_reg_data_by_ip(asic, NULL, regname);
+}
diff --git a/src/lib/mmio.c b/src/lib/mmio.c
index 47e5150d3201..eb91e289404f 100644
--- a/src/lib/mmio.c
+++ b/src/lib/mmio.c
@@ -145,26 +145,36 @@ int umr_write_reg(struct umr_asic *asic, uint64_t addr, uint32_t value, enum reg
 	return 0;
 }
 
-uint32_t umr_read_reg_by_name(struct umr_asic *asic, char *name)
+uint32_t umr_read_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *name)
 {
 	struct umr_reg *reg;
-	reg = umr_find_reg_data(asic, name);
+	reg = umr_find_reg_data_by_ip(asic, ip, name);
 	if (reg)
 		return umr_read_reg(asic, reg->addr * (reg->type == REG_MMIO ? 4 : 1), reg->type);
 	else
 		return 0;
 }
 
-int umr_write_reg_by_name(struct umr_asic *asic, char *name, uint32_t value)
+uint32_t umr_read_reg_by_name(struct umr_asic *asic, char *name)
+{
+	return umr_read_reg_by_name_by_ip(asic, NULL, name);
+}
+
+int umr_write_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *name, uint32_t value)
 {
 	struct umr_reg *reg;
-	reg = umr_find_reg_data(asic, name);
+	reg = umr_find_reg_data_by_ip(asic, ip, name);
 	if (reg)
 		return umr_write_reg(asic, reg->addr * (reg->type == REG_MMIO ? 4 : 1), value, reg->type);
 	else
 		return -1;
 }
 
+int umr_write_reg_by_name(struct umr_asic *asic, char *name, uint32_t value)
+{
+	return umr_write_reg_by_name_by_ip(asic, NULL, name, value);
+}
+
 uint32_t umr_bitslice_reg(struct umr_asic *asic, struct umr_reg *reg, char *bitname, uint32_t regvalue)
 {
 	int i;
@@ -193,26 +203,36 @@ uint32_t umr_bitslice_compose_value(struct umr_asic *asic, struct umr_reg *reg,
 	return 0;
 }
 
-uint32_t umr_bitslice_reg_by_name(struct umr_asic *asic, char *regname, char *bitname, uint32_t regvalue)
+uint32_t umr_bitslice_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *regname, char *bitname, uint32_t regvalue)
 {
 	struct umr_reg *reg;
-	reg = umr_find_reg_data(asic, regname);
+	reg = umr_find_reg_data_by_ip(asic, ip, regname);
 	if (reg)
 		return umr_bitslice_reg(asic, reg, bitname, regvalue);
 	else
 		return 0;
 }
 
-uint32_t umr_bitslice_compose_value_by_name(struct umr_asic *asic, char *regname, char *bitname, uint32_t regvalue)
+uint32_t umr_bitslice_reg_by_name(struct umr_asic *asic, char *regname, char *bitname, uint32_t regvalue)
+{
+	return umr_bitslice_reg_by_name_by_ip(asic, NULL, regname, bitname, regvalue);
+}
+
+uint32_t umr_bitslice_compose_value_by_name_by_ip(struct umr_asic *asic, char *ip, char *regname, char *bitname, uint32_t regvalue)
 {
 	struct umr_reg *reg;
-	reg = umr_find_reg_data(asic, regname);
+	reg = umr_find_reg_data_by_ip(asic, ip, regname);
 	if (reg)
 		return umr_bitslice_compose_value(asic, reg, bitname, regvalue);
 	else
 		return 0;
 }
 
+uint32_t umr_bitslice_compose_value_by_name(struct umr_asic *asic, char *regname, char *bitname, uint32_t regvalue)
+{
+	return umr_bitslice_compose_value_by_name_by_ip(asic, NULL, regname, bitname, regvalue);
+}
+
 int umr_grbm_select_index(struct umr_asic *asic, uint32_t se, uint32_t sh, uint32_t instance)
 {
 	struct umr_reg *grbm_idx;
diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c
index 3d458db8fa11..b8034372b280 100644
--- a/src/lib/read_vram.c
+++ b/src/lib/read_vram.c
@@ -135,7 +135,7 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 	sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR", (int)vmid);
 		page_table_base_addr  = (uint64_t)umr_read_reg_by_name(asic, buf) << 12;
 
-	vm_fb_base  = ((uint64_t)umr_read_reg_by_name(asic, "mmMC_VM_FB_LOCATION") >> 16) << 24;
+	vm_fb_base  = ((uint64_t)umr_read_reg_by_name(asic, "mmMC_VM_FB_LOCATION") & 0xFFFF) << 24;
 
 	DEBUG("mmVM_CONTEXTx_PAGE_TABLE_START_ADDR = %08llx\n", (unsigned long long)page_table_start_addr);
 	DEBUG("mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR = 0x%08llx\n", (unsigned long long)page_table_base_addr);
@@ -143,6 +143,21 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 	DEBUG("mmVM_CONTEXTx_CNTL.PAGE_TABLE_DEPTH = %d\n", page_table_depth);
 	DEBUG("mmMC_VM_FB_LOCATION == %llx\n", (unsigned long long)vm_fb_base);
 
+	if (asic->options.verbose)
+		fprintf(stderr, "[VERBOSE]: Decoding %u at 0x%llx\n"
+				"[VERBOSE]: PAGE_TABLE_START_ADDR=0x%llx\n"
+				"[VERBOSE]: PAGE_TABLE_BASE_ADDR=0x%llx\n"
+				"[VERBOSE]: PAGE_TABLE_BLOCK_SIZE=%u\n"
+				"[VERBOSE]: PAGE_TABLE_DEPTH=%u\n"
+				"[VERBOSE]: MC_VM_FB_LOCATION=0x%llx\n",
+			(unsigned)vmid,
+			(unsigned long long)address,
+			(unsigned long long)page_table_start_addr,
+			(unsigned long long)page_table_base_addr,
+			(unsigned)page_table_size,
+			(unsigned)page_table_depth,
+			(unsigned long long)vm_fb_base);
+
 	address -= page_table_start_addr;
 
 	while (size) {
@@ -152,17 +167,21 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 			pte_idx = (address >> 12) & ((1ULL << (9 + page_table_size)) - 1);
 
 			// read PDE entry
-			umr_read_vram(asic, 0xFFFF, page_table_base_addr + pde_idx * 8, 8, &pde_entry);
+			umr_read_vram(asic, UMR_LINEAR_HUB, page_table_base_addr + pde_idx * 8 - vm_fb_base, 8, &pde_entry);
 
 			// decode PDE values
 			pde_fields.frag_size     = (pde_entry >> 59) & 0x1F;
 			pde_fields.pte_base_addr = pde_entry & 0xFFFFFFF000ULL;
 			pde_fields.valid         = pde_entry & 1;
+			if (asic->options.verbose)
+				fprintf(stderr, "[VERBOSE]: PDE.pte_base_addr==0x%llx, PDE.valid=%d\n",
+						(unsigned long long)pde_fields.pte_base_addr,
+						(int)pde_fields.valid);
 			DEBUG("PDE==%llx, pde_idx=%llx, frag_size=%u, pte_base_addr=0x%llx, valid=%d\n",
 				(unsigned long long)pde_entry, (unsigned long long)pde_idx, (unsigned)pde_fields.frag_size, (unsigned long long)pde_fields.pte_base_addr, (int)pde_fields.valid);
 
 			// now read PTE entry for this page
-			if (umr_read_vram(asic, 0xFFFF, pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry) < 0)
+			if (umr_read_vram(asic, UMR_LINEAR_HUB, pde_fields.pte_base_addr + pte_idx*8 - vm_fb_base, 8, &pte_entry) < 0)
 				return -1;
 
 			// decode PTE values
@@ -170,6 +189,11 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 			pte_fields.fragment       = (pte_entry >> 7)  & 0x1F;
 			pte_fields.system         = (pte_entry >> 1) & 1;
 			pte_fields.valid          = pte_entry & 1;
+			if (asic->options.verbose)
+				fprintf(stderr, "[VERBOSE]: PTE.page_base_addr==0x%08llx, PTE.system=%d, PTE.valid=%d\n",
+					(unsigned long long)pte_fields.page_base_addr,
+					(int)pte_fields.system,
+					(int)pte_fields.valid);
 			DEBUG("PTE=%llx, pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d, valid=%d\n",
 				(unsigned long long)pte_entry, (unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment, (int)pte_fields.system, (int)pte_fields.valid);
 
@@ -179,7 +203,7 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 			// depth == 0 == PTE only
 			pte_idx = (address >> 12);
 
-			if (umr_read_vram(asic, 0xFFFF, page_table_base_addr + pte_idx * 8, 8, &pte_entry) < 0)
+			if (umr_read_vram(asic, UMR_LINEAR_HUB, page_table_base_addr + pte_idx * 8 - vm_fb_base, 8, &pte_entry) < 0)
 				return -1;
 
 			// decode PTE values
@@ -187,6 +211,11 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 			pte_fields.fragment       = (pte_entry >> 7)  & 0x1F;
 			pte_fields.system         = (pte_entry >> 1) & 1;
 			pte_fields.valid          = pte_entry & 1;
+			if (asic->options.verbose)
+				fprintf(stderr, "[VERBOSE]: PTE.page_base_addr==0x%08llx, PTE.system=%d, PTE.valid=%d\n",
+					(unsigned long long)pte_fields.page_base_addr,
+					(int)pte_fields.system,
+					(int)pte_fields.valid);
 			DEBUG("pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment, (int)pte_fields.system, (int)pte_fields.valid);
 
 			// compute starting address
@@ -207,7 +236,7 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 				return -1;
 			}
 		} else {
-			if (umr_read_vram(asic, 0xFFFF, start_addr, chunk_size, pdst) < 0) {
+			if (umr_read_vram(asic, UMR_LINEAR_HUB, start_addr, chunk_size, pdst) < 0) {
 				fprintf(stderr, "[ERROR]: Cannot read from VRAM\n");
 				return -1;
 			}
@@ -244,6 +273,7 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 	} pte_fields;
 	char buf[64];
 	unsigned char *pdst = dst;
+	char *hub;
 
 	/*
 	 * PTE format on AI:
@@ -266,21 +296,28 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 	 * 0 valid
 	 */
 
+	if ((vmid & 0xFF00) == UMR_MM_HUB)
+		hub = "mmhub";
+	else
+		hub = "gfx";
+
+	vmid &= 0xFF;
+
 	// read vm registers
 	sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_LO32", (int)vmid);
-		page_table_start_addr = (uint64_t)umr_read_reg_by_name(asic, buf) << 12;
+		page_table_start_addr = (uint64_t)umr_read_reg_by_name_by_ip(asic, hub, buf) << 12;
 	sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_HI32", (int)vmid);
-		page_table_start_addr |= (uint64_t)umr_read_reg_by_name(asic, buf) << 44;
+		page_table_start_addr |= (uint64_t)umr_read_reg_by_name_by_ip(asic, hub, buf) << 44;
 
 	sprintf(buf, "mmVM_CONTEXT%d_CNTL", (int)vmid);
-		tmp = umr_read_reg_by_name(asic, buf);
+		tmp = umr_read_reg_by_name_by_ip(asic, hub, buf);
 		page_table_depth      = umr_bitslice_reg_by_name(asic, buf, "PAGE_TABLE_DEPTH", tmp);
 		page_table_size       = umr_bitslice_reg_by_name(asic, buf, "PAGE_TABLE_BLOCK_SIZE", tmp);
 
 	sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_LO32", (int)vmid);
-		page_table_base_addr  = (uint64_t)umr_read_reg_by_name(asic, buf) << 0;
+		page_table_base_addr  = (uint64_t)umr_read_reg_by_name_by_ip(asic, hub, buf) << 0;
 	sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_HI32", (int)vmid);
-		page_table_base_addr  |= (uint64_t)umr_read_reg_by_name(asic, buf) << 32;
+		page_table_base_addr  |= (uint64_t)umr_read_reg_by_name_by_ip(asic, hub, buf) << 32;
 
 	DEBUG("VIRT_ADDR = %08llx\n", (unsigned long long)address);
 	DEBUG("mmVM_CONTEXTx_PAGE_TABLE_START_ADDR = %08llx\n", (unsigned long long)page_table_start_addr);
@@ -288,6 +325,15 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 	DEBUG("mmVM_CONTEXTx_CNTL.PAGE_TABLE_BLOCK_SIZE = %lu\n", page_table_size);
 	DEBUG("mmVM_CONTEXTx_CNTL.PAGE_TABLE_DEPTH = %d\n", page_table_depth);
 
+	if (asic->options.verbose)
+		fprintf(stderr, "[VERBOSE]: Decoding %u at 0x%llx\nPAGE_TABLE_START_ADDR=0x%llx\nPAGE_TABLE_BASE_ADDR=0x%llx\nPAGE_TABLE_BLOCK_SIZE=%u\nPAGE_TABLE_DEPTH=%u\n",
+			(unsigned)vmid,
+			(unsigned long long)address,
+			(unsigned long long)page_table_start_addr,
+			(unsigned long long)page_table_base_addr,
+			(unsigned)page_table_size,
+			(unsigned)page_table_depth);
+
 	address -= page_table_start_addr;
 
 	// update addresses for APUs
@@ -336,7 +382,7 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 				DEBUG("selector mask == %llx\n", ((unsigned long long)511 << ((page_table_depth-1)*9 + (12 + 9 + page_table_size))));
 
 				// read PDE entry
-				if (umr_read_vram(asic, 0xFFFF, pde_address + pde_idx * 8, 8, &pde_entry) < 0)
+				if (umr_read_vram(asic, UMR_LINEAR_HUB, pde_address + pde_idx * 8, 8, &pde_entry) < 0)
 					return -1;
 
 				// decode PDE values
@@ -349,6 +395,13 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 				DEBUG("PDE==%llx, frag_size=%u, pte_base_addr=0x%llx, valid=%d, system=%d, cache=%d, pte=%d\n",
 					(unsigned long long)pde_entry, (unsigned)pde_fields.frag_size, (unsigned long long)pde_fields.pte_base_addr,
 					(int)pde_fields.valid, (int)pde_fields.system, (int)pde_fields.cache, (int)pde_fields.pte);
+				if (asic->options.verbose)
+					fprintf(stderr, "[VERBOSE]: PDE.pte_base_addr==0x%llx, PDE.valid=%d, PDE.system=%d, PDE.cache=%d, PDE.pte=%d\n",
+							(unsigned long long)pde_fields.pte_base_addr,
+							(int)pde_fields.valid,
+							(int)pde_fields.system,
+							(int)pde_fields.cache,
+							(int)pde_fields.pte);
 
 				if (!pde_fields.system)
 					pde_fields.pte_base_addr -= vm_fb_offset;
@@ -360,7 +413,7 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 			}
 
 			// now read PTE entry for this page
-			if (umr_read_vram(asic, 0xFFFF, pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry) < 0)
+			if (umr_read_vram(asic, UMR_LINEAR_HUB, pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry) < 0)
 				return -1;
 
 			// decode PTE values
@@ -371,6 +424,11 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 			DEBUG("PTE=%llx, pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d, valid=%d\n",
 				(unsigned long long)pte_entry, (unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment,
 				(int)pte_fields.system, (int)pte_fields.valid);
+			if (asic->options.verbose)
+				fprintf(stderr, "[VERBOSE]: PTE.page_base_addr==0x%08llx, PTE.system=%d, PTE.valid=%d\n",
+					(unsigned long long)pte_fields.page_base_addr,
+					(int)pte_fields.system,
+					(int)pte_fields.valid);
 
 			if (!pte_fields.system)
 				pte_fields.page_base_addr -= vm_fb_offset;
@@ -390,11 +448,16 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 			DEBUG("pde_idx=%llx, frag_size=%u, pte_base_addr=0x%llx, system=%d, valid=%d\n",
 				(unsigned long long)pde_idx, (unsigned)pde_fields.frag_size, (unsigned long long)pde_fields.pte_base_addr,
 				(int)pde_fields.system, (int)pde_fields.valid);
+				if (asic->options.verbose)
+					fprintf(stderr, "[VERBOSE]: PDE.pte_base_addr==0x%llx, PDE.valid=%d, PDE.system=%d\n",
+							(unsigned long long)pde_fields.pte_base_addr,
+							(int)pde_fields.valid,
+							(int)pde_fields.system);
 
 			// PTE addr = baseaddr[47:6] + (logical - start) >> fragsize)
 			pte_idx = (address >> (12 + pde_fields.frag_size));
 
-			if (umr_read_vram(asic, 0xFFFF, pde_fields.pte_base_addr + pte_idx * 8, 8, &pte_entry) < 0)
+			if (umr_read_vram(asic, UMR_LINEAR_HUB, pde_fields.pte_base_addr + pte_idx * 8, 8, &pte_entry) < 0)
 				return -1;
 
 			// decode PTE values
@@ -405,6 +468,11 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 			DEBUG("pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d, valid=%d\n",
 				(unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment,
 				(int)pte_fields.system, (int)pte_fields.valid);
+			if (asic->options.verbose)
+				fprintf(stderr, "[VERBOSE]: PTE.page_base_addr==0x%08llx, PTE.system=%d, PTE.valid=%d\n",
+					(unsigned long long)pte_fields.page_base_addr,
+					(int)pte_fields.system,
+					(int)pte_fields.valid);
 
 
 			// compute starting address
@@ -427,7 +495,7 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 				return -1;
 			}
 		} else {
-			if (umr_read_vram(asic, 0xFFFF, start_addr, chunk_size, pdst) < 0) {
+			if (umr_read_vram(asic, UMR_LINEAR_HUB, start_addr, chunk_size, pdst) < 0) {
 				fprintf(stderr, "[ERROR]: Cannot read from VRAM\n");
 				return -1;
 			}
@@ -451,7 +519,7 @@ int umr_read_vram(struct umr_asic *asic, uint32_t vmid, uint64_t address, uint32
 		return -1;
 	}
 
-	if (vmid == 0xFFFF) {
+	if ((vmid & 0xFF00) == UMR_LINEAR_HUB) {
 		DEBUG("Reading physical VRAM addr: 0x%llx\n", (unsigned long long)address);
 		// addressing is physical
 		if (asic->options.use_pci == 0) {
diff --git a/src/lib/ring_decode.c b/src/lib/ring_decode.c
index 35e72ed58e4f..772ea49dda6f 100644
--- a/src/lib/ring_decode.c
+++ b/src/lib/ring_decode.c
@@ -589,7 +589,7 @@ static void print_decode_pm4(struct umr_asic *asic, struct umr_ring_decoder *dec
 			// detect VCN/UVD IBs and chain them once all
 			// 4 pieces of information are found
 			if (!strcmp(name, "mmUVD_LMI_RBC_IB_VMID")) {
-				decoder->pm4.next_ib_state.ib_vmid = ib;
+				decoder->pm4.next_ib_state.ib_vmid = ib | ((asic->family <= FAMILY_VI) ? 0 : UMR_MM_HUB);
 				decoder->pm4.next_ib_state.tally |= 1;
 			} else if (!strcmp(name, "mmUVD_LMI_RBC_IB_64BIT_BAR_LOW")) {
 				decoder->pm4.next_ib_state.ib_addr_lo = ib;
@@ -598,7 +598,7 @@ static void print_decode_pm4(struct umr_asic *asic, struct umr_ring_decoder *dec
 				decoder->pm4.next_ib_state.ib_addr_hi = ib;
 				decoder->pm4.next_ib_state.tally |= 4;
 			} else if (!strcmp(name, "mmUVD_RBC_IB_SIZE")) {
-				decoder->pm4.next_ib_state.ib_size = ib;
+				decoder->pm4.next_ib_state.ib_size = ib * 4;
 				decoder->pm4.next_ib_state.tally |= 8;
 			}
 
diff --git a/src/lib/sq_cmd_halt_waves.c b/src/lib/sq_cmd_halt_waves.c
new file mode 100644
index 000000000000..83aa52d2cfb2
--- /dev/null
+++ b/src/lib/sq_cmd_halt_waves.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Tom St Denis <tom.stdenis at amd.com>
+ *
+ */
+#include "umr.h"
+
+int umr_sq_cmd_halt_waves(struct umr_asic *asic, enum umr_sq_cmd_halt_resume mode)
+{
+	struct umr_reg *reg;
+	uint32_t value;
+	uint64_t addr;
+
+	reg = umr_find_reg_data(asic, "SQ_CMD");
+	if (!reg) {
+		fprintf(stderr, "[BUG]: Cannot find SQ_CMD register in umr_sq_cmd_halt_waves()\n");
+		return -1;
+	}
+
+	// compose value
+	if (asic->family == FAMILY_CIK) {
+		value = umr_bitslice_compose_value(asic, reg, "CMD", mode == UMR_SQ_CMD_HALT ? 1 : 2); // SETHALT
+	} else {
+		value = umr_bitslice_compose_value(asic, reg, "CMD", 1); // SETHALT
+		value |= umr_bitslice_compose_value(asic, reg, "DATA", mode == UMR_SQ_CMD_HALT ? 1 : 0);
+	}
+	value |= umr_bitslice_compose_value(asic, reg, "MODE", 1); // BROADCAST
+
+	// compose address
+	addr = reg->addr * 4;
+	addr |= (1ULL << 62) |      // we need to take the lock so we can ensure a broadcast write
+			(0x3FFULL << 24) |
+			(0x3FFULL << 34) |
+			(0x3FFULL << 44);
+	umr_write_reg(asic, addr, value, reg->type);
+
+	return 0;
+}
diff --git a/src/umr.h b/src/umr.h
index a0e94a7e4db9..dd7f80c38f0c 100644
--- a/src/umr.h
+++ b/src/umr.h
@@ -33,6 +33,20 @@
 #include <pciaccess.h>
 #include <pthread.h>
 
+/* SQ_CMD halt/resume */
+enum umr_sq_cmd_halt_resume {
+	UMR_SQ_CMD_HALT=0,
+	UMR_SQ_CMD_RESUME,
+};
+
+/* memory space hubs */
+enum umr_hub_space {
+	UMR_GFX_HUB = 0 << 8,        // default on everything before AI
+	UMR_MM_HUB = 1 << 8,         // available on AI and later
+
+	UMR_LINEAR_HUB = 0xFF << 8,  // this is for linear access to vram
+};
+
 /* sourced from amd_powerplay.h from the kernel */
 enum amd_pp_sensors {
 	AMDGPU_PP_SENSOR_GFX_SCLK = 0,
@@ -174,6 +188,8 @@ struct umr_options {
 	    read_smc,
 	    quiet,
 	    follow_ib,
+	    verbose,
+	    halt_waves,
 	    no_kernel;
 	unsigned
 	    instance_bank,
@@ -477,6 +493,7 @@ int umr_create_mmio_accel(struct umr_asic *asic);
 uint32_t umr_find_reg(struct umr_asic *asic, char *regname);
 
 // find the register data for a register
+struct umr_reg *umr_find_reg_data_by_ip(struct umr_asic *asic, char *ip, char *regname);
 struct umr_reg *umr_find_reg_data(struct umr_asic *asic, char *regname);
 
 // read/write a 32-bit register given a BYTE address
@@ -487,17 +504,26 @@ int umr_write_reg(struct umr_asic *asic, uint64_t addr, uint32_t value, enum reg
 uint32_t umr_read_reg_by_name(struct umr_asic *asic, char *name);
 int umr_write_reg_by_name(struct umr_asic *asic, char *name, uint32_t value);
 
+// read/write a register by ip/name
+uint32_t umr_read_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *name);
+int umr_write_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *name, uint32_t value);
+
 // slice a full register into bits (shifted into LSB)
 uint32_t umr_bitslice_reg(struct umr_asic *asic, struct umr_reg *reg, char *bitname, uint32_t regvalue);
 uint32_t umr_bitslice_reg_by_name(struct umr_asic *asic, char *regname, char *bitname, uint32_t regvalue);
+uint32_t umr_bitslice_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *regname, char *bitname, uint32_t regvalue);
 
 // compose a 32-bit register with a value and a bitfield
 uint32_t umr_bitslice_compose_value(struct umr_asic *asic, struct umr_reg *reg, char *bitname, uint32_t regvalue);
 uint32_t umr_bitslice_compose_value_by_name(struct umr_asic *asic, char *reg, char *bitname, uint32_t regvalue);
+uint32_t umr_bitslice_compose_value_by_name_by_ip(struct umr_asic *asic, char *ip, char *regname, char *bitname, uint32_t regvalue);
 
 // select a GRBM_GFX_IDX
 int umr_grbm_select_index(struct umr_asic *asic, uint32_t se, uint32_t sh, uint32_t instance);
 
+// halt/resume SQ waves
+int umr_sq_cmd_halt_waves(struct umr_asic *asic, enum umr_sq_cmd_halt_resume mode);
+
 /* IB/ring decoding/dumping/etc */
 void umr_print_decode(struct umr_asic *asic, struct umr_ring_decoder *decoder, uint32_t ib);
 void umr_dump_ib(struct umr_asic *asic, struct umr_ring_decoder *decoder);
-- 
2.12.0



More information about the amd-gfx mailing list