[PATCH umr 4/4] Add ability to halt waves and better VM decoding (v3)

Tom St Denis tom.stdenis at amd.com
Tue Jul 25 17:23:24 UTC 2017


This patch involves two things I was working on at once so the
patches are a bit intertwined.  It adds

1.  The ability to halt SQ waves when reading waves on CIK and later
ASICs.

2.  The ability to enable verbose decoding when reading vram

3.  The ability to decode virtual addresses in the mmhub (for say VCN)

Signed-off-by: Tom St Denis <tom.stdenis at amd.com>
Acked-by: Alex Deucher <alexander.deucher at amd.com>

(v2): Tidy up AI verbose VM decoding output
(v3): More details in verbose output and removed some duplicate DEBUG output
---
 doc/umr.1                   |  13 +++
 src/app/main.c              |  56 ++++++++-
 src/app/print_waves.c       |   6 +
 src/lib/CMakeLists.txt      |   1 +
 src/lib/dump_ib.c           |   6 +-
 src/lib/find_reg.c          |  11 +-
 src/lib/mmio.c              |  36 ++++--
 src/lib/read_vram.c         | 270 +++++++++++++++++++++++++++++++++-----------
 src/lib/ring_decode.c       |   4 +-
 src/lib/sq_cmd_halt_waves.c |  57 ++++++++++
 src/umr.h                   |  26 +++++
 11 files changed, 403 insertions(+), 83 deletions(-)
 create mode 100644 src/lib/sq_cmd_halt_waves.c

diff --git a/doc/umr.1 b/doc/umr.1
index 0a991f9923a4..06950925b7b6 100644
--- a/doc/umr.1
+++ b/doc/umr.1
@@ -87,6 +87,13 @@ invoking it very rapidly.  Unlike the wave count reading in --top this command
 will operate regardless of whether GFX PG is enabled or not.  Can use
 .B bits
 to decode the wave bitfields.
+
+.IP "--vm-decode, -vm vmid@<address> <num_of_pages>"
+Decode page mappings at a specified address (in hex) from the VMID specified.
+The VMID can be specified in hexadecimal (with leading '0x') or in decimal.
+Implies '-O verbose' for the duration of the command so does not require it
+to be manually specified.
+
 .IP "--vram, -v [vmid@]<address> <size>"
 Read 'size' bytes (in hex) from the address specified (in hexadecimal) from VRAM
 to stdout.  Optionally specify the VMID (in decimal or in hex with a 0x prefix)
@@ -145,6 +152,12 @@ separated strings.  Options should be specified before --update or --force comma
      be used only if the KMD is hung or otherwise not working correctly.  Using it on live systems
      may result in race conditions.
 
+.B verbose
+     Enable verbose diagnostics (used in --vram).
+
+.B halt_waves
+     Halt/resume all waves while reading wave status.
+
 .SH "Notes"
 
 - The "Waves" field in the DRM section of --top only works if GFX PG has been disabled.  Otherwise,
diff --git a/src/app/main.c b/src/app/main.c
index 6e0bc57200b0..0b46643400ba 100644
--- a/src/app/main.c
+++ b/src/app/main.c
@@ -107,6 +107,10 @@ static void parse_options(char *str)
 			options.quiet = 1;
 		} else if (!strcmp(option, "follow_ib")) {
 			options.follow_ib = 1;
+		} else if (!strcmp(option, "verbose")) {
+			options.verbose = 1;
+		} else if (!strcmp(option, "halt_waves")) {
+			options.halt_waves = 1;
 		} else if (!strcmp(option, "no_kernel")) {
 			options.no_kernel = 1;
 			options.use_pci = 1;
@@ -337,6 +341,38 @@ int main(int argc, char **argv)
 		} else if (!strcmp(argv[i], "--enumerate") || !strcmp(argv[i], "-e")) {
 			umr_enumerate_devices();
 			return 0;
+		} else if (!strcmp(argv[i], "--vm-decode") || !strcmp(argv[i], "-vm")) {
+			if (i + 2 < argc) {
+				uint64_t address;
+				uint32_t size, n, vmid;
+				int overbose;
+
+				if (!asic)
+					asic = get_asic();
+
+				overbose = asic->options.verbose;
+				asic->options.verbose = 1;
+
+				// allow specifying the vmid in hex as well so
+				// people can add the HUB flags more easily
+				if ((n = sscanf(argv[i+1], "0x%"SCNx32"@%"SCNx64, &vmid, &address)) != 2)
+					if ((n = sscanf(argv[i+1], "%"SCNu32"@%"SCNx64, &vmid, &address)) != 2) {
+						fprintf(stderr, "[ERROR]: Must specify a VMID for the --vm-decode command\n");
+						exit(EXIT_FAILURE);
+					}
+				sscanf(argv[i+2], "%"SCNx32, &size);
+				while (size--) {
+					if (umr_read_vram(asic, vmid, address, 0, NULL))
+						break;
+					address += 0x1000;
+				}
+				i += 2;
+
+				asic->options.verbose = overbose;
+			} else {
+				printf("--vm-decode requires two parameters\n");
+				return EXIT_FAILURE;
+			}
 		} else if (!strcmp(argv[i], "--vram") || !strcmp(argv[i], "-v")) {
 			if (i + 2 < argc) {
 				unsigned char buf[256];
@@ -354,13 +390,13 @@ int main(int argc, char **argv)
 						vmid = UMR_LINEAR_HUB;
 					}
 				sscanf(argv[i+2], "%"SCNx32, &size);
-				while (size) {
+				do {
 					n = size > sizeof(buf) ? sizeof(buf) : size;
 					umr_read_vram(asic, vmid, address, n, buf);
 					fwrite(buf, 1, n, stdout);
 					size -= n;
 					address += n;
-				}
+				} while (size);
 				i += 2;
 			} else {
 				printf("--vram requires two parameters\n");
@@ -422,12 +458,20 @@ int main(int argc, char **argv)
 "\n\t--top, -t\n\t\tSummarize GPU utilization.  Can select a SE block with --bank.  Can use"
 	"\n\t\toptions 'use_colour' to colourize output and 'use_pci' to improve efficiency.\n"
 "\n\t--waves, -wa\n\t\tPrint out information about any active CU waves.  Can use '-O bits'"
-	"\n\t\tto see decoding of various wave fields.\n"
+	"\n\t\tto see decoding of various wave fields.  Can use the '-O halt_waves' option"
+	"\n\t\tto halt the SQ while reading registers.\n"
+"\n\t--vm-decode, -vm vmid@<address> <num_of_pages>"
+	"\n\t\tDecode page mappings at a specified address (in hex) from the VMID specified."
+	"\n\t\tThe VMID can be specified in hexadecimal (with leading '0x') or in decimal."
+	"\n\t\tImplies '-O verbose' for the duration of the command so does not require it"
+	"\n\t\tto be manually specified.\n"
 "\n\t--vram, -v [<vmid>@]<address> <size>"
 	"\n\t\tRead 'size' bytes (in hex) from a given address (in hex) to stdout. Optionally"
-	"\n\t\tspecify the VMID (in decimal) treating the address as a virtual address instead.\n"
-"\n\t--option -O <string>[,<string>,...]\n\t\tEnable various flags: risky, bits, bitsfull, empty_log, follow, named, many,"
-	"\n\t\tuse_pci, use_colour, read_smc, quiet, no_kernel.\n"
+	"\n\t\tspecify the VMID (in decimal or in hex with a '0x' prefix) treating the address"
+	"\n\t\tas a virtual address instead.  Can use 'verbose' option to print out PDE/PTE"
+	"\n\t\tdecodings.\n"
+"\n\t--option -O <string>[,<string>,...]\n\t\tEnable various flags: bits, bitsfull, empty_log, follow, named, many,"
+	"\n\t\tuse_pci, use_colour, read_smc, quiet, no_kernel, verbose, halt_waves.\n"
 "\n\n", UMR_BUILD_VER, UMR_BUILD_REV);
 			exit(EXIT_SUCCESS);
 		} else {
diff --git a/src/app/print_waves.c b/src/app/print_waves.c
index e157db9f9386..1efd8a13bd28 100644
--- a/src/app/print_waves.c
+++ b/src/app/print_waves.c
@@ -40,6 +40,9 @@ void umr_print_waves(struct umr_asic *asic)
 	struct umr_wave_status ws;
 	int first = 1, col = 0;
 
+	if (asic->options.halt_waves)
+		umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_HALT);
+
 	if (asic->family <= FAMILY_CIK)
 		shift = 3;  // on SI..CIK allocations were done in 8-dword blocks
 	else
@@ -206,4 +209,7 @@ void umr_print_waves(struct umr_asic *asic)
 	}
 	if (first)
 		printf("No active waves!\n");
+
+	if (asic->options.halt_waves)
+		umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_RESUME);
 }
diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt
index 217ae80cdfd7..fcb4f9c9dc80 100644
--- a/src/lib/CMakeLists.txt
+++ b/src/lib/CMakeLists.txt
@@ -21,6 +21,7 @@ add_library(umrcore STATIC
   read_vram.c
   ring_decode.c
   scan_config.c
+  sq_cmd_halt_waves.c
   transfer_soc15.c
   wave_status.c
   update.c
diff --git a/src/lib/dump_ib.c b/src/lib/dump_ib.c
index 4e81dbe3eb09..cba497373fe2 100644
--- a/src/lib/dump_ib.c
+++ b/src/lib/dump_ib.c
@@ -28,9 +28,11 @@
 void umr_dump_ib(struct umr_asic *asic, struct umr_ring_decoder *decoder)
 {
 	uint32_t *data = NULL, x;
+	static const char *hubs[] = { "gfxhub", "mmhub" };
 
-	printf("Dumping IB at VMID:%u 0x%llx of %u words\n",
-		(unsigned)decoder->next_ib_info.vmid,
+	printf("Dumping IB at (%s) VMID:%u 0x%llx of %u words\n",
+		hubs[decoder->next_ib_info.vmid >> 8],
+		(unsigned)decoder->next_ib_info.vmid & 0xFF,
 		(unsigned long long)decoder->next_ib_info.ib_addr,
 		(unsigned)decoder->next_ib_info.size/4);
 
diff --git a/src/lib/find_reg.c b/src/lib/find_reg.c
index d4647163ea63..ecd7f132c9c9 100644
--- a/src/lib/find_reg.c
+++ b/src/lib/find_reg.c
@@ -36,14 +36,21 @@ uint32_t umr_find_reg(struct umr_asic *asic, char *regname)
 	return 0xFFFFFFFF;
 }
 
-struct umr_reg *umr_find_reg_data(struct umr_asic *asic, char *regname)
+struct umr_reg *umr_find_reg_data_by_ip(struct umr_asic *asic, char *ip, char *regname)
 {
 	int i, j;
 
-	for (i = 0; i < asic->no_blocks; i++)
+	for (i = 0; i < asic->no_blocks; i++) {
+		if (ip && memcmp(asic->blocks[i]->ipname, ip, strlen(ip))) continue;
 		for (j = 0; j < asic->blocks[i]->no_regs; j++)
 			if (!strcmp(asic->blocks[i]->regs[j].regname, regname))
 				return &asic->blocks[i]->regs[j];
+	}
 	fprintf(stderr, "[BUG]: reg [%s] not found on asic [%s]\n", regname, asic->asicname);
 	return NULL;
 }
+
+struct umr_reg *umr_find_reg_data(struct umr_asic *asic, char *regname)
+{
+	return umr_find_reg_data_by_ip(asic, NULL, regname);
+}
diff --git a/src/lib/mmio.c b/src/lib/mmio.c
index 47e5150d3201..eb91e289404f 100644
--- a/src/lib/mmio.c
+++ b/src/lib/mmio.c
@@ -145,26 +145,36 @@ int umr_write_reg(struct umr_asic *asic, uint64_t addr, uint32_t value, enum reg
 	return 0;
 }
 
-uint32_t umr_read_reg_by_name(struct umr_asic *asic, char *name)
+uint32_t umr_read_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *name)
 {
 	struct umr_reg *reg;
-	reg = umr_find_reg_data(asic, name);
+	reg = umr_find_reg_data_by_ip(asic, ip, name);
 	if (reg)
 		return umr_read_reg(asic, reg->addr * (reg->type == REG_MMIO ? 4 : 1), reg->type);
 	else
 		return 0;
 }
 
-int umr_write_reg_by_name(struct umr_asic *asic, char *name, uint32_t value)
+uint32_t umr_read_reg_by_name(struct umr_asic *asic, char *name)
+{
+	return umr_read_reg_by_name_by_ip(asic, NULL, name);
+}
+
+int umr_write_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *name, uint32_t value)
 {
 	struct umr_reg *reg;
-	reg = umr_find_reg_data(asic, name);
+	reg = umr_find_reg_data_by_ip(asic, ip, name);
 	if (reg)
 		return umr_write_reg(asic, reg->addr * (reg->type == REG_MMIO ? 4 : 1), value, reg->type);
 	else
 		return -1;
 }
 
+int umr_write_reg_by_name(struct umr_asic *asic, char *name, uint32_t value)
+{
+	return umr_write_reg_by_name_by_ip(asic, NULL, name, value);
+}
+
 uint32_t umr_bitslice_reg(struct umr_asic *asic, struct umr_reg *reg, char *bitname, uint32_t regvalue)
 {
 	int i;
@@ -193,26 +203,36 @@ uint32_t umr_bitslice_compose_value(struct umr_asic *asic, struct umr_reg *reg,
 	return 0;
 }
 
-uint32_t umr_bitslice_reg_by_name(struct umr_asic *asic, char *regname, char *bitname, uint32_t regvalue)
+uint32_t umr_bitslice_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *regname, char *bitname, uint32_t regvalue)
 {
 	struct umr_reg *reg;
-	reg = umr_find_reg_data(asic, regname);
+	reg = umr_find_reg_data_by_ip(asic, ip, regname);
 	if (reg)
 		return umr_bitslice_reg(asic, reg, bitname, regvalue);
 	else
 		return 0;
 }
 
-uint32_t umr_bitslice_compose_value_by_name(struct umr_asic *asic, char *regname, char *bitname, uint32_t regvalue)
+uint32_t umr_bitslice_reg_by_name(struct umr_asic *asic, char *regname, char *bitname, uint32_t regvalue)
+{
+	return umr_bitslice_reg_by_name_by_ip(asic, NULL, regname, bitname, regvalue);
+}
+
+uint32_t umr_bitslice_compose_value_by_name_by_ip(struct umr_asic *asic, char *ip, char *regname, char *bitname, uint32_t regvalue)
 {
 	struct umr_reg *reg;
-	reg = umr_find_reg_data(asic, regname);
+	reg = umr_find_reg_data_by_ip(asic, ip, regname);
 	if (reg)
 		return umr_bitslice_compose_value(asic, reg, bitname, regvalue);
 	else
 		return 0;
 }
 
+uint32_t umr_bitslice_compose_value_by_name(struct umr_asic *asic, char *regname, char *bitname, uint32_t regvalue)
+{
+	return umr_bitslice_compose_value_by_name_by_ip(asic, NULL, regname, bitname, regvalue);
+}
+
 int umr_grbm_select_index(struct umr_asic *asic, uint32_t se, uint32_t sh, uint32_t instance)
 {
 	struct umr_reg *grbm_idx;
diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c
index 3d458db8fa11..b58400f1f4ca 100644
--- a/src/lib/read_vram.c
+++ b/src/lib/read_vram.c
@@ -83,7 +83,7 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 {
 	uint64_t start_addr, page_table_start_addr, page_table_base_addr,
 		 page_table_size, pte_idx, pde_idx, pte_entry, pde_entry,
-		 vm_fb_base;
+		 vm_fb_base, pde_mask, pte_mask;
 	uint32_t chunk_size, tmp;
 	int page_table_depth;
 	struct {
@@ -99,9 +99,18 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 			system,
 			valid;
 	} pte_fields;
+	struct {
+		uint32_t
+			mmVM_CONTEXTx_PAGE_TABLE_START_ADDR,
+			mmVM_CONTEXTx_CNTL,
+			mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR,
+			mmMC_VM_FB_LOCATION;
+	} registers;
 	char buf[64];
 	unsigned char *pdst = dst;
 
+	memset(&registers, 0, sizeof registers);
+
 	/*
 	 * PTE format on VI:
 	 * 63:40 reserved
@@ -125,44 +134,70 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 
 	// read vm registers
 	sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR", (int)vmid ? 1 : 0);
-		page_table_start_addr = (uint64_t)umr_read_reg_by_name(asic, buf) << 12;
+		registers.mmVM_CONTEXTx_PAGE_TABLE_START_ADDR = umr_read_reg_by_name(asic, buf);
+		page_table_start_addr = (uint64_t)registers.mmVM_CONTEXTx_PAGE_TABLE_START_ADDR << 12;
 
 	sprintf(buf, "mmVM_CONTEXT%d_CNTL", (int)vmid ? 1 : 0);
-		tmp = umr_read_reg_by_name(asic, buf);
+		tmp = registers.mmVM_CONTEXTx_CNTL = umr_read_reg_by_name(asic, buf);
 		page_table_depth      = umr_bitslice_reg_by_name(asic, buf, "PAGE_TABLE_DEPTH", tmp);
 		page_table_size       = umr_bitslice_reg_by_name(asic, buf, "PAGE_TABLE_BLOCK_SIZE", tmp);
 
 	sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR", (int)vmid);
-		page_table_base_addr  = (uint64_t)umr_read_reg_by_name(asic, buf) << 12;
-
-	vm_fb_base  = ((uint64_t)umr_read_reg_by_name(asic, "mmMC_VM_FB_LOCATION") >> 16) << 24;
-
-	DEBUG("mmVM_CONTEXTx_PAGE_TABLE_START_ADDR = %08llx\n", (unsigned long long)page_table_start_addr);
-	DEBUG("mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR = 0x%08llx\n", (unsigned long long)page_table_base_addr);
-	DEBUG("mmVM_CONTEXTx_CNTL.PAGE_TABLE_BLOCK_SIZE = %lu\n", page_table_size);
-	DEBUG("mmVM_CONTEXTx_CNTL.PAGE_TABLE_DEPTH = %d\n", page_table_depth);
-	DEBUG("mmMC_VM_FB_LOCATION == %llx\n", (unsigned long long)vm_fb_base);
+		registers.mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR = umr_read_reg_by_name(asic, buf);
+		page_table_base_addr  = (uint64_t)registers.mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR << 12;
+
+	registers.mmMC_VM_FB_LOCATION = umr_read_reg_by_name(asic, "mmMC_VM_FB_LOCATION");
+	vm_fb_base  = ((uint64_t)registers.mmMC_VM_FB_LOCATION & 0xFFFF) << 24;
+
+
+	if (asic->options.verbose)
+		fprintf(stderr,
+				"[VERBOSE]: mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR=0x%llx\n"
+				"[VERBOSE]: mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR=0x%llx\n"
+				"[VERBOSE]: mmVM_CONTEXT%d_CNTL=0x%llx\n"
+				"[VERBOSE]: mmMC_VM_FB_LOCATION=0x%llx\n",
+			(int)vmid ? 1 : 0,
+			(unsigned long long)registers.mmVM_CONTEXTx_PAGE_TABLE_START_ADDR,
+			(int)vmid ? 1 : 0,
+			(unsigned long long)registers.mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR,
+			(int)vmid ? 1 : 0,
+			(unsigned long long)registers.mmVM_CONTEXTx_CNTL,
+			(unsigned long long)registers.mmMC_VM_FB_LOCATION);
 
 	address -= page_table_start_addr;
 
-	while (size) {
+	do {
 		if (page_table_depth == 1) {
 			// decode addr into pte and pde selectors...
-			pde_idx = (address >> (12 + 9 + page_table_size)) & ((1ULL << (40 - 12 - 9 - page_table_size)) - 1);
-			pte_idx = (address >> 12) & ((1ULL << (9 + page_table_size)) - 1);
+			pde_mask = ((1ULL << (40 - 12 - 9 - page_table_size)) - 1);
+			pte_mask = ((1ULL << (9 + page_table_size)) - 1);
+
+			pde_idx = (address >> (12 + 9 + page_table_size)) & pde_mask;
+			pte_idx = (address >> 12) & pte_mask;
+
+			// shift masks so we can use them later
+			pte_mask <<= 12;
+			pde_mask <<= (12 + 9 + page_table_size);
 
 			// read PDE entry
-			umr_read_vram(asic, 0xFFFF, page_table_base_addr + pde_idx * 8, 8, &pde_entry);
+			umr_read_vram(asic, UMR_LINEAR_HUB, page_table_base_addr + pde_idx * 8 - vm_fb_base, 8, &pde_entry);
 
 			// decode PDE values
 			pde_fields.frag_size     = (pde_entry >> 59) & 0x1F;
 			pde_fields.pte_base_addr = pde_entry & 0xFFFFFFF000ULL;
 			pde_fields.valid         = pde_entry & 1;
-			DEBUG("PDE==%llx, pde_idx=%llx, frag_size=%u, pte_base_addr=0x%llx, valid=%d\n",
-				(unsigned long long)pde_entry, (unsigned long long)pde_idx, (unsigned)pde_fields.frag_size, (unsigned long long)pde_fields.pte_base_addr, (int)pde_fields.valid);
+			if (asic->options.verbose)
+				fprintf(stderr, "[VERBOSE]: PDE=0x%016llx, VA=0x%010llx, PBA==0x%010llx, V=%d\n",
+						(unsigned long long)pde_entry,
+						(unsigned long long)address & pde_mask,
+						(unsigned long long)pde_fields.pte_base_addr,
+						(int)pde_fields.valid);
+
+			if (!pde_fields.valid)
+				return -1;
 
 			// now read PTE entry for this page
-			if (umr_read_vram(asic, 0xFFFF, pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry) < 0)
+			if (umr_read_vram(asic, UMR_LINEAR_HUB, pde_fields.pte_base_addr + pte_idx*8 - vm_fb_base, 8, &pte_entry) < 0)
 				return -1;
 
 			// decode PTE values
@@ -170,8 +205,16 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 			pte_fields.fragment       = (pte_entry >> 7)  & 0x1F;
 			pte_fields.system         = (pte_entry >> 1) & 1;
 			pte_fields.valid          = pte_entry & 1;
-			DEBUG("PTE=%llx, pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d, valid=%d\n",
-				(unsigned long long)pte_entry, (unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment, (int)pte_fields.system, (int)pte_fields.valid);
+			if (asic->options.verbose)
+				fprintf(stderr, "[VERBOSE]: PTE=0x%016llx, VA=0x%010llx, PBA==0x%010llx, V=%d, S=%d\n",
+					(unsigned long long)pte_entry,
+					(unsigned long long)address & pte_mask,
+					(unsigned long long)pte_fields.page_base_addr,
+					(int)pte_fields.valid,
+					(int)pte_fields.system);
+
+			if (!pte_fields.valid)
+				return -1;
 
 			// compute starting address
 			start_addr = pte_fields.page_base_addr + (address & 0xFFF);
@@ -179,7 +222,7 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 			// depth == 0 == PTE only
 			pte_idx = (address >> 12);
 
-			if (umr_read_vram(asic, 0xFFFF, page_table_base_addr + pte_idx * 8, 8, &pte_entry) < 0)
+			if (umr_read_vram(asic, UMR_LINEAR_HUB, page_table_base_addr + pte_idx * 8 - vm_fb_base, 8, &pte_entry) < 0)
 				return -1;
 
 			// decode PTE values
@@ -187,7 +230,16 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 			pte_fields.fragment       = (pte_entry >> 7)  & 0x1F;
 			pte_fields.system         = (pte_entry >> 1) & 1;
 			pte_fields.valid          = pte_entry & 1;
-			DEBUG("pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment, (int)pte_fields.system, (int)pte_fields.valid);
+			if (asic->options.verbose)
+				fprintf(stderr, "[VERBOSE]: PTE=0x%016llx, VA=0x%010llx, PBA==0x%010llx, PTE.system=%d, PTE.valid=%d\n",
+					(unsigned long long)pte_entry,
+					(unsigned long long)address & ~0xFFFULL,
+					(unsigned long long)pte_fields.page_base_addr,
+					(int)pte_fields.system,
+					(int)pte_fields.valid);
+
+			if (!pte_fields.valid)
+				return -1;
 
 			// compute starting address
 			start_addr = pte_fields.page_base_addr + (address & 0xFFF);
@@ -207,7 +259,7 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 				return -1;
 			}
 		} else {
-			if (umr_read_vram(asic, 0xFFFF, start_addr, chunk_size, pdst) < 0) {
+			if (umr_read_vram(asic, UMR_LINEAR_HUB, start_addr, chunk_size, pdst) < 0) {
 				fprintf(stderr, "[ERROR]: Cannot read from VRAM\n");
 				return -1;
 			}
@@ -215,7 +267,7 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 		pdst += chunk_size;
 		size -= chunk_size;
 		address += chunk_size;
-	}
+	} while (size);
 	return 0;
 }
 
@@ -223,9 +275,21 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 {
 	uint64_t start_addr, page_table_start_addr, page_table_base_addr,
 		 page_table_size, pte_idx, pde_idx, pte_entry, pde_entry,
-		 pde_address, vga_base_address, vm_fb_offset, vm_fb_base;
+		 pde_address, vga_base_address, vm_fb_offset, vm_fb_base,
+		 va_mask;
 	uint32_t chunk_size, tmp;
-	int page_table_depth, first;
+	int page_table_depth, first, depth, indent;
+	struct {
+		uint32_t
+			mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_LO32,
+			mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_HI32,
+			mmVM_CONTEXTx_CNTL,
+			mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR_LO32,
+			mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR_HI32,
+			mmVGA_MEMORY_BASE_ADDRESS,
+			mmVGA_MEMORY_BASE_ADDRESS_HIGH,
+			mmMC_VM_FB_OFFSET;
+	} registers;
 	struct {
 		uint64_t
 			frag_size,
@@ -244,6 +308,9 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 	} pte_fields;
 	char buf[64];
 	unsigned char *pdst = dst;
+	char *hub;
+
+	memset(&registers, 0, sizeof registers);
 
 	/*
 	 * PTE format on AI:
@@ -266,61 +333,111 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 	 * 0 valid
 	 */
 
+	if ((vmid & 0xFF00) == UMR_MM_HUB)
+		hub = "mmhub";
+	else
+		hub = "gfx";
+
+	vmid &= 0xFF;
+
 	// read vm registers
 	sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_LO32", (int)vmid);
-		page_table_start_addr = (uint64_t)umr_read_reg_by_name(asic, buf) << 12;
+		registers.mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_LO32 = umr_read_reg_by_name_by_ip(asic, hub, buf);
+		page_table_start_addr = (uint64_t)registers.mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_LO32 << 12;
 	sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_HI32", (int)vmid);
-		page_table_start_addr |= (uint64_t)umr_read_reg_by_name(asic, buf) << 44;
+		registers.mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_HI32 = umr_read_reg_by_name_by_ip(asic, hub, buf);
+		page_table_start_addr |= (uint64_t)registers.mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_HI32 << 44;
 
 	sprintf(buf, "mmVM_CONTEXT%d_CNTL", (int)vmid);
-		tmp = umr_read_reg_by_name(asic, buf);
+		tmp = registers.mmVM_CONTEXTx_CNTL = umr_read_reg_by_name_by_ip(asic, hub, buf);
 		page_table_depth      = umr_bitslice_reg_by_name(asic, buf, "PAGE_TABLE_DEPTH", tmp);
 		page_table_size       = umr_bitslice_reg_by_name(asic, buf, "PAGE_TABLE_BLOCK_SIZE", tmp);
 
 	sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_LO32", (int)vmid);
-		page_table_base_addr  = (uint64_t)umr_read_reg_by_name(asic, buf) << 0;
+		registers.mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR_LO32 = umr_read_reg_by_name_by_ip(asic, hub, buf);
+		page_table_base_addr  = (uint64_t)registers.mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR_LO32 << 0;
 	sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_HI32", (int)vmid);
-		page_table_base_addr  |= (uint64_t)umr_read_reg_by_name(asic, buf) << 32;
-
-	DEBUG("VIRT_ADDR = %08llx\n", (unsigned long long)address);
-	DEBUG("mmVM_CONTEXTx_PAGE_TABLE_START_ADDR = %08llx\n", (unsigned long long)page_table_start_addr);
-	DEBUG("mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR = 0x%08llx\n", (unsigned long long)page_table_base_addr);
-	DEBUG("mmVM_CONTEXTx_CNTL.PAGE_TABLE_BLOCK_SIZE = %lu\n", page_table_size);
-	DEBUG("mmVM_CONTEXTx_CNTL.PAGE_TABLE_DEPTH = %d\n", page_table_depth);
-
-	address -= page_table_start_addr;
+		registers.mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR_HI32 = umr_read_reg_by_name_by_ip(asic, hub, buf);
+		page_table_base_addr  |= (uint64_t)registers.mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR_HI32 << 32;
 
 	// update addresses for APUs
 	if (asic->config.gfx.family == 142) {
 		DEBUG("Reading vram config...\n");
-		vga_base_address  = (uint64_t)umr_read_reg_by_name(asic, "mmVGA_MEMORY_BASE_ADDRESS") << 0;
-		vga_base_address |= (uint64_t)umr_read_reg_by_name(asic, "mmVGA_MEMORY_BASE_ADDRESS_HIGH") << 32;
-		vm_fb_offset      = (uint64_t)umr_read_reg_by_name(asic, "mmMC_VM_FB_OFFSET") << 24;
+		registers.mmVGA_MEMORY_BASE_ADDRESS = umr_read_reg_by_name(asic, "mmVGA_MEMORY_BASE_ADDRESS");
+		registers.mmVGA_MEMORY_BASE_ADDRESS_HIGH = umr_read_reg_by_name(asic, "mmVGA_MEMORY_BASE_ADDRESS_HIGH");
+		registers.mmMC_VM_FB_OFFSET = umr_read_reg_by_name(asic, "mmMC_VM_FB_OFFSET");
+		vga_base_address  = (uint64_t)registers.mmVGA_MEMORY_BASE_ADDRESS << 0;
+		vga_base_address |= (uint64_t)registers.mmVGA_MEMORY_BASE_ADDRESS_HIGH << 32;
+		vm_fb_offset      = (uint64_t)registers.mmMC_VM_FB_OFFSET << 24;
 	} else {
 		vga_base_address = 0;
 		vm_fb_offset = 0;
 	}
 	vm_fb_base = (uint64_t)umr_read_reg_by_name(asic, "mmMC_VM_FB_LOCATION_BASE") << 24;
 
+	if (asic->options.verbose)
+		fprintf(stderr,
+				"[VERBOSE]: mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_LO32=0x%llx\n"
+				"[VERBOSE]: mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_HI32=0x%llx\n"
+				"[VERBOSE]: mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_LO32=0x%llx\n"
+				"[VERBOSE]: mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_HI32=0x%llx\n"
+				"[VERBOSE]: mmVM_CONTEXT%d_CNTL=0x%llx\n"
+				"[VERBOSE]: mmVGA_MEMORY_BASE_ADDRESS=0x%llx\n"
+				"[VERBOSE]: mmVGA_MEMORY_BASE_ADDRESS_HIGH=0x%llx\n"
+				"[VERBOSE]: mmMC_VM_FB_OFFSET=0x%llx\n",
+			(int)vmid, (unsigned long long)registers.mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_LO32,
+			(int)vmid, (unsigned long long)registers.mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_HI32,
+			(int)vmid, (unsigned long long)registers.mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR_LO32,
+			(int)vmid, (unsigned long long)registers.mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR_HI32,
+			(int)vmid, (unsigned long long)registers.mmVM_CONTEXTx_CNTL,
+			(unsigned long long)registers.mmVGA_MEMORY_BASE_ADDRESS,
+			(unsigned long long)registers.mmVGA_MEMORY_BASE_ADDRESS_HIGH,
+			(unsigned long long)registers.mmMC_VM_FB_OFFSET);
+
+
 	DEBUG("mmMC_VM_FB_LOCATION_BASE == %llx\n", (unsigned long long)vm_fb_base);
 	DEBUG("mmMC_VM_FB_OFFSET = 0x%08llx\n", (unsigned long long)vm_fb_offset);
 	DEBUG("mmVGA_MEMORY_BASE_ADDRESS = 0x%08llx\n", (unsigned long long)vga_base_address);
 	DEBUG("\n");
 
 	// transform page_table_base
+	pde_entry = page_table_base_addr;
 	page_table_base_addr -= vm_fb_offset;
+	address -= page_table_start_addr;
 
-	while (size) {
+	do {
 		first = 1;
 		if (page_table_depth >= 1) {
+			// decode PDE values
+			pde_fields.frag_size     = (pde_entry >> 59) & 0x1F;
+			pde_fields.pte_base_addr = pde_entry & 0xFFFFFFFFFF000ULL;
+			pde_fields.valid         = pde_entry & 1;
+			pde_fields.system        = (pde_entry >> 1) & 1;
+			pde_fields.cache         = (pde_entry >> 2) & 1;
+			pde_fields.pte           = (pde_entry >> 54) & 1;
+
 			// mask off valid bit
 			page_table_base_addr &= ~1ULL;
 
+			// read PTE selector
 			pte_idx = (address >> 12) & ((1ULL << (9 + page_table_size)) - 1);
 
 			// AI+ supports more than 1 level of PDEs so we iterate for all of the depths
 			pde_address = page_table_base_addr;
 			pde_fields.system = 0;
+			depth = 0;
+			va_mask = ((unsigned long long)511 << ((page_table_depth)*9 + (12 + 9 + page_table_size)));
+			if (asic->options.verbose)
+				fprintf(stderr, "[VERBOSE]: PDE%d=0x%016llx, VA=0x%012llx, PBA==0x%012llx, V=%d, S=%d, C=%d, P=%d\n",
+						depth++,
+						(unsigned long long)pde_entry,
+						(unsigned long long)address & va_mask,
+						(unsigned long long)pde_fields.pte_base_addr,
+						(int)pde_fields.valid,
+						(int)pde_fields.system,
+						(int)pde_fields.cache,
+						(int)pde_fields.pte);
+			indent = page_table_depth * 3;
 			while (page_table_depth) {
 				DEBUG("Decoding depth %u...(0x%llx)\n", (unsigned)page_table_depth, (unsigned long long)address);
 				// decode addr into pte and pde selectors...
@@ -333,10 +450,11 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 				first = 0;
 
 				DEBUG("pde_idx == %llx\n", (unsigned long long)pde_idx);
-				DEBUG("selector mask == %llx\n", ((unsigned long long)511 << ((page_table_depth-1)*9 + (12 + 9 + page_table_size))));
+				va_mask = ((unsigned long long)511 << ((page_table_depth-2)*9 + (12 + 9 + page_table_size)));
+				DEBUG("selector mask == %llx\n", va_mask);
 
 				// read PDE entry
-				if (umr_read_vram(asic, 0xFFFF, pde_address + pde_idx * 8, 8, &pde_entry) < 0)
+				if (umr_read_vram(asic, UMR_LINEAR_HUB, pde_address + pde_idx * 8, 8, &pde_entry) < 0)
 					return -1;
 
 				// decode PDE values
@@ -346,21 +464,33 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 				pde_fields.system        = (pde_entry >> 1) & 1;
 				pde_fields.cache         = (pde_entry >> 2) & 1;
 				pde_fields.pte           = (pde_entry >> 54) & 1;
-				DEBUG("PDE==%llx, frag_size=%u, pte_base_addr=0x%llx, valid=%d, system=%d, cache=%d, pte=%d\n",
-					(unsigned long long)pde_entry, (unsigned)pde_fields.frag_size, (unsigned long long)pde_fields.pte_base_addr,
-					(int)pde_fields.valid, (int)pde_fields.system, (int)pde_fields.cache, (int)pde_fields.pte);
+				if (asic->options.verbose)
+					fprintf(stderr, "[VERBOSE]: PDE%d=0x%016llx, VA=0x%012llx, PBA==0x%012llx, V=%d, S=%d, C=%d, P=%d\n",
+							depth,
+							(unsigned long long)pde_entry,
+							(unsigned long long)address & va_mask,
+							(unsigned long long)pde_fields.pte_base_addr,
+							(int)pde_fields.valid,
+							(int)pde_fields.system,
+							(int)pde_fields.cache,
+							(int)pde_fields.pte);
 
 				if (!pde_fields.system)
 					pde_fields.pte_base_addr -= vm_fb_offset;
 
+				if (!pde_fields.valid)
+					return -1;
+
 				// for the next round the address we're decoding is the phys address in the currently decoded PDE
 				--page_table_depth;
+				++depth;
+				indent -= 3;
 				pde_address = pde_fields.pte_base_addr;
 				DEBUG("...done\n\n");
 			}
 
 			// now read PTE entry for this page
-			if (umr_read_vram(asic, 0xFFFF, pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry) < 0)
+			if (umr_read_vram(asic, UMR_LINEAR_HUB, pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry) < 0)
 				return -1;
 
 			// decode PTE values
@@ -368,13 +498,20 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 			pte_fields.fragment       = (pte_entry >> 7)  & 0x1F;
 			pte_fields.system         = (pte_entry >> 1) & 1;
 			pte_fields.valid          = pte_entry & 1;
-			DEBUG("PTE=%llx, pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d, valid=%d\n",
-				(unsigned long long)pte_entry, (unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment,
-				(int)pte_fields.system, (int)pte_fields.valid);
+			if (asic->options.verbose)
+				fprintf(stderr, "[VERBOSE]: PTE==0x%016llx, VA=0x%012llx, PBA==0x%012llx, V=%d, S=%d\n",
+					(unsigned long long)pte_entry,
+					(unsigned long long)address & ((1ULL << (9 + page_table_size)) - 1),
+					(unsigned long long)pte_fields.page_base_addr,
+					(int)pte_fields.valid,
+					(int)pte_fields.system);
 
 			if (!pte_fields.system)
 				pte_fields.page_base_addr -= vm_fb_offset;
 
+			if (!pte_fields.valid)
+				return -1;
+
 			// compute starting address
 			start_addr = pte_fields.page_base_addr + (address & 0xFFF);
 			DEBUG("phys address to read from: %llx\n\n\n", (unsigned long long)start_addr);
@@ -387,14 +524,17 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 			pde_fields.pte_base_addr = page_table_base_addr & 0xFFFFFFFFFF000ULL;
 			pde_fields.system        = (page_table_base_addr >> 1) & 1;
 			pde_fields.valid         = page_table_base_addr & 1;
-			DEBUG("pde_idx=%llx, frag_size=%u, pte_base_addr=0x%llx, system=%d, valid=%d\n",
-				(unsigned long long)pde_idx, (unsigned)pde_fields.frag_size, (unsigned long long)pde_fields.pte_base_addr,
-				(int)pde_fields.system, (int)pde_fields.valid);
+			if (asic->options.verbose)
+				fprintf(stderr, "[VERBOSE]: PDE=0x%016llx, PBA==0x%012llx, V=%d, S=%d\n",
+						(unsigned long long)page_table_base_addr,
+						(unsigned long long)pde_fields.pte_base_addr,
+						(int)pde_fields.valid,
+						(int)pde_fields.system);
 
 			// PTE addr = baseaddr[47:6] + (logical - start) >> fragsize)
 			pte_idx = (address >> (12 + pde_fields.frag_size));
 
-			if (umr_read_vram(asic, 0xFFFF, pde_fields.pte_base_addr + pte_idx * 8, 8, &pte_entry) < 0)
+			if (umr_read_vram(asic, UMR_LINEAR_HUB, pde_fields.pte_base_addr + pte_idx * 8, 8, &pte_entry) < 0)
 				return -1;
 
 			// decode PTE values
@@ -402,9 +542,13 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 			pte_fields.fragment       = (pte_entry >> 7)  & 0x1F;
 			pte_fields.system         = (pte_entry >> 1) & 1;
 			pte_fields.valid          = pte_entry & 1;
-			DEBUG("pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d, valid=%d\n",
-				(unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment,
-				(int)pte_fields.system, (int)pte_fields.valid);
+			if (asic->options.verbose)
+				fprintf(stderr, "[VERBOSE]: PTE=0x%016llx, PBA==0x%012llx, F=%u, V=%d, S=%d\n",
+					(unsigned long long)pte_entry,
+					(unsigned long long)pte_fields.page_base_addr,
+					(unsigned)pte_fields.fragment,
+					(int)pte_fields.valid,
+					(int)pte_fields.system);
 
 
 			// compute starting address
@@ -427,7 +571,7 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 				return -1;
 			}
 		} else {
-			if (umr_read_vram(asic, 0xFFFF, start_addr, chunk_size, pdst) < 0) {
+			if (umr_read_vram(asic, UMR_LINEAR_HUB, start_addr, chunk_size, pdst) < 0) {
 				fprintf(stderr, "[ERROR]: Cannot read from VRAM\n");
 				return -1;
 			}
@@ -435,7 +579,7 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
 		pdst += chunk_size;
 		size -= chunk_size;
 		address += chunk_size;
-	}
+	} while (size);
 	return 0;
 }
 
@@ -451,7 +595,7 @@ int umr_read_vram(struct umr_asic *asic, uint32_t vmid, uint64_t address, uint32
 		return -1;
 	}
 
-	if (vmid == 0xFFFF) {
+	if ((vmid & 0xFF00) == UMR_LINEAR_HUB) {
 		DEBUG("Reading physical VRAM addr: 0x%llx\n", (unsigned long long)address);
 		// addressing is physical
 		if (asic->options.use_pci == 0) {
diff --git a/src/lib/ring_decode.c b/src/lib/ring_decode.c
index 35e72ed58e4f..772ea49dda6f 100644
--- a/src/lib/ring_decode.c
+++ b/src/lib/ring_decode.c
@@ -589,7 +589,7 @@ static void print_decode_pm4(struct umr_asic *asic, struct umr_ring_decoder *dec
 			// detect VCN/UVD IBs and chain them once all
 			// 4 pieces of information are found
 			if (!strcmp(name, "mmUVD_LMI_RBC_IB_VMID")) {
-				decoder->pm4.next_ib_state.ib_vmid = ib;
+				decoder->pm4.next_ib_state.ib_vmid = ib | ((asic->family <= FAMILY_VI) ? 0 : UMR_MM_HUB);
 				decoder->pm4.next_ib_state.tally |= 1;
 			} else if (!strcmp(name, "mmUVD_LMI_RBC_IB_64BIT_BAR_LOW")) {
 				decoder->pm4.next_ib_state.ib_addr_lo = ib;
@@ -598,7 +598,7 @@ static void print_decode_pm4(struct umr_asic *asic, struct umr_ring_decoder *dec
 				decoder->pm4.next_ib_state.ib_addr_hi = ib;
 				decoder->pm4.next_ib_state.tally |= 4;
 			} else if (!strcmp(name, "mmUVD_RBC_IB_SIZE")) {
-				decoder->pm4.next_ib_state.ib_size = ib;
+				decoder->pm4.next_ib_state.ib_size = ib * 4;
 				decoder->pm4.next_ib_state.tally |= 8;
 			}
 
diff --git a/src/lib/sq_cmd_halt_waves.c b/src/lib/sq_cmd_halt_waves.c
new file mode 100644
index 000000000000..83aa52d2cfb2
--- /dev/null
+++ b/src/lib/sq_cmd_halt_waves.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Tom St Denis <tom.stdenis at amd.com>
+ *
+ */
+#include "umr.h"
+
+int umr_sq_cmd_halt_waves(struct umr_asic *asic, enum umr_sq_cmd_halt_resume mode)
+{
+	struct umr_reg *reg;
+	uint32_t value;
+	uint64_t addr;
+
+	reg = umr_find_reg_data(asic, "SQ_CMD");
+	if (!reg) {
+		fprintf(stderr, "[BUG]: Cannot find SQ_CMD register in umr_sq_cmd_halt_waves()\n");
+		return -1;
+	}
+
+	// compose value
+	if (asic->family == FAMILY_CIK) {
+		value = umr_bitslice_compose_value(asic, reg, "CMD", mode == UMR_SQ_CMD_HALT ? 1 : 2); // SETHALT
+	} else {
+		value = umr_bitslice_compose_value(asic, reg, "CMD", 1); // SETHALT
+		value |= umr_bitslice_compose_value(asic, reg, "DATA", mode == UMR_SQ_CMD_HALT ? 1 : 0);
+	}
+	value |= umr_bitslice_compose_value(asic, reg, "MODE", 1); // BROADCAST
+
+	// compose address
+	addr = reg->addr * 4;
+	addr |= (1ULL << 62) |      // we need to take the lock so we can ensure a broadcast write
+			(0x3FFULL << 24) |
+			(0x3FFULL << 34) |
+			(0x3FFULL << 44);
+	umr_write_reg(asic, addr, value, reg->type);
+
+	return 0;
+}
diff --git a/src/umr.h b/src/umr.h
index a0e94a7e4db9..dd7f80c38f0c 100644
--- a/src/umr.h
+++ b/src/umr.h
@@ -33,6 +33,20 @@
 #include <pciaccess.h>
 #include <pthread.h>
 
+/* SQ_CMD halt/resume */
+enum umr_sq_cmd_halt_resume {
+	UMR_SQ_CMD_HALT=0,
+	UMR_SQ_CMD_RESUME,
+};
+
+/* memory space hubs */
+enum umr_hub_space {
+	UMR_GFX_HUB = 0 << 8,        // default on everything before AI
+	UMR_MM_HUB = 1 << 8,         // available on AI and later
+
+	UMR_LINEAR_HUB = 0xFF << 8,  // this is for linear access to vram
+};
+
 /* sourced from amd_powerplay.h from the kernel */
 enum amd_pp_sensors {
 	AMDGPU_PP_SENSOR_GFX_SCLK = 0,
@@ -174,6 +188,8 @@ struct umr_options {
 	    read_smc,
 	    quiet,
 	    follow_ib,
+	    verbose,
+	    halt_waves,
 	    no_kernel;
 	unsigned
 	    instance_bank,
@@ -477,6 +493,7 @@ int umr_create_mmio_accel(struct umr_asic *asic);
 uint32_t umr_find_reg(struct umr_asic *asic, char *regname);
 
 // find the register data for a register
+struct umr_reg *umr_find_reg_data_by_ip(struct umr_asic *asic, char *ip, char *regname);
 struct umr_reg *umr_find_reg_data(struct umr_asic *asic, char *regname);
 
 // read/write a 32-bit register given a BYTE address
@@ -487,17 +504,26 @@ int umr_write_reg(struct umr_asic *asic, uint64_t addr, uint32_t value, enum reg
 uint32_t umr_read_reg_by_name(struct umr_asic *asic, char *name);
 int umr_write_reg_by_name(struct umr_asic *asic, char *name, uint32_t value);
 
+// read/write a register by ip/name
+uint32_t umr_read_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *name);
+int umr_write_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *name, uint32_t value);
+
 // slice a full register into bits (shifted into LSB)
 uint32_t umr_bitslice_reg(struct umr_asic *asic, struct umr_reg *reg, char *bitname, uint32_t regvalue);
 uint32_t umr_bitslice_reg_by_name(struct umr_asic *asic, char *regname, char *bitname, uint32_t regvalue);
+uint32_t umr_bitslice_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *regname, char *bitname, uint32_t regvalue);
 
 // compose a 32-bit register with a value and a bitfield
 uint32_t umr_bitslice_compose_value(struct umr_asic *asic, struct umr_reg *reg, char *bitname, uint32_t regvalue);
 uint32_t umr_bitslice_compose_value_by_name(struct umr_asic *asic, char *reg, char *bitname, uint32_t regvalue);
+uint32_t umr_bitslice_compose_value_by_name_by_ip(struct umr_asic *asic, char *ip, char *regname, char *bitname, uint32_t regvalue);
 
 // select a GRBM_GFX_IDX
 int umr_grbm_select_index(struct umr_asic *asic, uint32_t se, uint32_t sh, uint32_t instance);
 
+// halt/resume SQ waves
+int umr_sq_cmd_halt_waves(struct umr_asic *asic, enum umr_sq_cmd_halt_resume mode);
+
 /* IB/ring decoding/dumping/etc */
 void umr_print_decode(struct umr_asic *asic, struct umr_ring_decoder *decoder, uint32_t ib);
 void umr_dump_ib(struct umr_asic *asic, struct umr_ring_decoder *decoder);
-- 
2.12.0



More information about the amd-gfx mailing list