[PATCH umr 4/4] Add ability to halt waves and better VM decoding
Alex Deucher
alexdeucher at gmail.com
Mon Jul 24 15:49:41 UTC 2017
On Mon, Jul 24, 2017 at 11:25 AM, Tom St Denis <tom.stdenis at amd.com> wrote:
> This patch involves two things I was working on at once so the
> patches are a bit intertwined. It adds
>
> 1. The ability to halt SQ waves when reading waves on CIK and later
> ASICs.
>
> 2. The ability to enable verbose decoding when reading vram
>
> 3. The ability to decode virtual addresses in the mmhub (for say VCN)
>
> Signed-off-by: Tom St Denis <tom.stdenis at amd.com>
Series is:
Acked-by: Alex Deucher <alexander.deucher at amd.com>
> ---
> doc/umr.1 | 6 +++
> src/app/main.c | 15 +++++--
> src/app/print_waves.c | 6 +++
> src/lib/CMakeLists.txt | 1 +
> src/lib/dump_ib.c | 6 ++-
> src/lib/find_reg.c | 11 ++++-
> src/lib/mmio.c | 36 +++++++++++++----
> src/lib/read_vram.c | 98 ++++++++++++++++++++++++++++++++++++++-------
> src/lib/ring_decode.c | 4 +-
> src/lib/sq_cmd_halt_waves.c | 57 ++++++++++++++++++++++++++
> src/umr.h | 26 ++++++++++++
> 11 files changed, 233 insertions(+), 33 deletions(-)
> create mode 100644 src/lib/sq_cmd_halt_waves.c
>
> diff --git a/doc/umr.1 b/doc/umr.1
> index 4c720ba48840..da432a13b0d1 100644
> --- a/doc/umr.1
> +++ b/doc/umr.1
> @@ -148,6 +148,12 @@ separated strings. Options should be specified before --update or --force comma
> be used only if the KMD is hung or otherwise not working correctly. Using it on live systems
> may result in race conditions.
>
> +.B verbose
> + Enable verbose diagnostics (used in --vram).
> +
> +.B halt_waves
> + Halt/resume all waves while reading wave status.
> +
> .SH "Notes"
>
> - The "Waves" field in the DRM section of --top only works if GFX PG has been disabled. Otherwise,
> diff --git a/src/app/main.c b/src/app/main.c
> index 6e0bc57200b0..7e3914155a22 100644
> --- a/src/app/main.c
> +++ b/src/app/main.c
> @@ -107,6 +107,10 @@ static void parse_options(char *str)
> options.quiet = 1;
> } else if (!strcmp(option, "follow_ib")) {
> options.follow_ib = 1;
> + } else if (!strcmp(option, "verbose")) {
> + options.verbose = 1;
> + } else if (!strcmp(option, "halt_waves")) {
> + options.halt_waves = 1;
> } else if (!strcmp(option, "no_kernel")) {
> options.no_kernel = 1;
> options.use_pci = 1;
> @@ -422,12 +426,15 @@ int main(int argc, char **argv)
> "\n\t--top, -t\n\t\tSummarize GPU utilization. Can select a SE block with --bank. Can use"
> "\n\t\toptions 'use_colour' to colourize output and 'use_pci' to improve efficiency.\n"
> "\n\t--waves, -wa\n\t\tPrint out information about any active CU waves. Can use '-O bits'"
> - "\n\t\tto see decoding of various wave fields.\n"
> + "\n\t\tto see decoding of various wave fields. Can use the '-O halt_waves' option"
> + "\n\t\tto halt the SQ while reading registers.\n"
> "\n\t--vram, -v [<vmid>@]<address> <size>"
> "\n\t\tRead 'size' bytes (in hex) from a given address (in hex) to stdout. Optionally"
> - "\n\t\tspecify the VMID (in decimal) treating the address as a virtual address instead.\n"
> -"\n\t--option -O <string>[,<string>,...]\n\t\tEnable various flags: risky, bits, bitsfull, empty_log, follow, named, many,"
> - "\n\t\tuse_pci, use_colour, read_smc, quiet, no_kernel.\n"
> + "\n\t\tspecify the VMID (in decimal or in hex with a '0x' prefix) treating the address"
> + "\n\t\tas a virtual address instead. Can use 'verbose' option to print out PDE/PTE"
> + "\n\t\tdecodings.\n"
> +"\n\t--option -O <string>[,<string>,...]\n\t\tEnable various flags: bits, bitsfull, empty_log, follow, named, many,"
> + "\n\t\tuse_pci, use_colour, read_smc, quiet, no_kernel, verbose, halt_waves.\n"
> "\n\n", UMR_BUILD_VER, UMR_BUILD_REV);
> exit(EXIT_SUCCESS);
> } else {
> diff --git a/src/app/print_waves.c b/src/app/print_waves.c
> index e157db9f9386..1efd8a13bd28 100644
> --- a/src/app/print_waves.c
> +++ b/src/app/print_waves.c
> @@ -40,6 +40,9 @@ void umr_print_waves(struct umr_asic *asic)
> struct umr_wave_status ws;
> int first = 1, col = 0;
>
> + if (asic->options.halt_waves)
> + umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_HALT);
> +
> if (asic->family <= FAMILY_CIK)
> shift = 3; // on SI..CIK allocations were done in 8-dword blocks
> else
> @@ -206,4 +209,7 @@ void umr_print_waves(struct umr_asic *asic)
> }
> if (first)
> printf("No active waves!\n");
> +
> + if (asic->options.halt_waves)
> + umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_RESUME);
> }
> diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt
> index 217ae80cdfd7..fcb4f9c9dc80 100644
> --- a/src/lib/CMakeLists.txt
> +++ b/src/lib/CMakeLists.txt
> @@ -21,6 +21,7 @@ add_library(umrcore STATIC
> read_vram.c
> ring_decode.c
> scan_config.c
> + sq_cmd_halt_waves.c
> transfer_soc15.c
> wave_status.c
> update.c
> diff --git a/src/lib/dump_ib.c b/src/lib/dump_ib.c
> index 4e81dbe3eb09..cba497373fe2 100644
> --- a/src/lib/dump_ib.c
> +++ b/src/lib/dump_ib.c
> @@ -28,9 +28,11 @@
> void umr_dump_ib(struct umr_asic *asic, struct umr_ring_decoder *decoder)
> {
> uint32_t *data = NULL, x;
> + static const char *hubs[] = { "gfxhub", "mmhub" };
>
> - printf("Dumping IB at VMID:%u 0x%llx of %u words\n",
> - (unsigned)decoder->next_ib_info.vmid,
> + printf("Dumping IB at (%s) VMID:%u 0x%llx of %u words\n",
> + hubs[decoder->next_ib_info.vmid >> 8],
> + (unsigned)decoder->next_ib_info.vmid & 0xFF,
> (unsigned long long)decoder->next_ib_info.ib_addr,
> (unsigned)decoder->next_ib_info.size/4);
>
> diff --git a/src/lib/find_reg.c b/src/lib/find_reg.c
> index d4647163ea63..ecd7f132c9c9 100644
> --- a/src/lib/find_reg.c
> +++ b/src/lib/find_reg.c
> @@ -36,14 +36,21 @@ uint32_t umr_find_reg(struct umr_asic *asic, char *regname)
> return 0xFFFFFFFF;
> }
>
> -struct umr_reg *umr_find_reg_data(struct umr_asic *asic, char *regname)
> +struct umr_reg *umr_find_reg_data_by_ip(struct umr_asic *asic, char *ip, char *regname)
> {
> int i, j;
>
> - for (i = 0; i < asic->no_blocks; i++)
> + for (i = 0; i < asic->no_blocks; i++) {
> + if (ip && memcmp(asic->blocks[i]->ipname, ip, strlen(ip))) continue;
> for (j = 0; j < asic->blocks[i]->no_regs; j++)
> if (!strcmp(asic->blocks[i]->regs[j].regname, regname))
> return &asic->blocks[i]->regs[j];
> + }
> fprintf(stderr, "[BUG]: reg [%s] not found on asic [%s]\n", regname, asic->asicname);
> return NULL;
> }
> +
> +struct umr_reg *umr_find_reg_data(struct umr_asic *asic, char *regname)
> +{
> + return umr_find_reg_data_by_ip(asic, NULL, regname);
> +}
> diff --git a/src/lib/mmio.c b/src/lib/mmio.c
> index 47e5150d3201..eb91e289404f 100644
> --- a/src/lib/mmio.c
> +++ b/src/lib/mmio.c
> @@ -145,26 +145,36 @@ int umr_write_reg(struct umr_asic *asic, uint64_t addr, uint32_t value, enum reg
> return 0;
> }
>
> -uint32_t umr_read_reg_by_name(struct umr_asic *asic, char *name)
> +uint32_t umr_read_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *name)
> {
> struct umr_reg *reg;
> - reg = umr_find_reg_data(asic, name);
> + reg = umr_find_reg_data_by_ip(asic, ip, name);
> if (reg)
> return umr_read_reg(asic, reg->addr * (reg->type == REG_MMIO ? 4 : 1), reg->type);
> else
> return 0;
> }
>
> -int umr_write_reg_by_name(struct umr_asic *asic, char *name, uint32_t value)
> +uint32_t umr_read_reg_by_name(struct umr_asic *asic, char *name)
> +{
> + return umr_read_reg_by_name_by_ip(asic, NULL, name);
> +}
> +
> +int umr_write_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *name, uint32_t value)
> {
> struct umr_reg *reg;
> - reg = umr_find_reg_data(asic, name);
> + reg = umr_find_reg_data_by_ip(asic, ip, name);
> if (reg)
> return umr_write_reg(asic, reg->addr * (reg->type == REG_MMIO ? 4 : 1), value, reg->type);
> else
> return -1;
> }
>
> +int umr_write_reg_by_name(struct umr_asic *asic, char *name, uint32_t value)
> +{
> + return umr_write_reg_by_name_by_ip(asic, NULL, name, value);
> +}
> +
> uint32_t umr_bitslice_reg(struct umr_asic *asic, struct umr_reg *reg, char *bitname, uint32_t regvalue)
> {
> int i;
> @@ -193,26 +203,36 @@ uint32_t umr_bitslice_compose_value(struct umr_asic *asic, struct umr_reg *reg,
> return 0;
> }
>
> -uint32_t umr_bitslice_reg_by_name(struct umr_asic *asic, char *regname, char *bitname, uint32_t regvalue)
> +uint32_t umr_bitslice_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *regname, char *bitname, uint32_t regvalue)
> {
> struct umr_reg *reg;
> - reg = umr_find_reg_data(asic, regname);
> + reg = umr_find_reg_data_by_ip(asic, ip, regname);
> if (reg)
> return umr_bitslice_reg(asic, reg, bitname, regvalue);
> else
> return 0;
> }
>
> -uint32_t umr_bitslice_compose_value_by_name(struct umr_asic *asic, char *regname, char *bitname, uint32_t regvalue)
> +uint32_t umr_bitslice_reg_by_name(struct umr_asic *asic, char *regname, char *bitname, uint32_t regvalue)
> +{
> + return umr_bitslice_reg_by_name_by_ip(asic, NULL, regname, bitname, regvalue);
> +}
> +
> +uint32_t umr_bitslice_compose_value_by_name_by_ip(struct umr_asic *asic, char *ip, char *regname, char *bitname, uint32_t regvalue)
> {
> struct umr_reg *reg;
> - reg = umr_find_reg_data(asic, regname);
> + reg = umr_find_reg_data_by_ip(asic, ip, regname);
> if (reg)
> return umr_bitslice_compose_value(asic, reg, bitname, regvalue);
> else
> return 0;
> }
>
> +uint32_t umr_bitslice_compose_value_by_name(struct umr_asic *asic, char *regname, char *bitname, uint32_t regvalue)
> +{
> + return umr_bitslice_compose_value_by_name_by_ip(asic, NULL, regname, bitname, regvalue);
> +}
> +
> int umr_grbm_select_index(struct umr_asic *asic, uint32_t se, uint32_t sh, uint32_t instance)
> {
> struct umr_reg *grbm_idx;
> diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c
> index 3d458db8fa11..b8034372b280 100644
> --- a/src/lib/read_vram.c
> +++ b/src/lib/read_vram.c
> @@ -135,7 +135,7 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
> sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR", (int)vmid);
> page_table_base_addr = (uint64_t)umr_read_reg_by_name(asic, buf) << 12;
>
> - vm_fb_base = ((uint64_t)umr_read_reg_by_name(asic, "mmMC_VM_FB_LOCATION") >> 16) << 24;
> + vm_fb_base = ((uint64_t)umr_read_reg_by_name(asic, "mmMC_VM_FB_LOCATION") & 0xFFFF) << 24;
>
> DEBUG("mmVM_CONTEXTx_PAGE_TABLE_START_ADDR = %08llx\n", (unsigned long long)page_table_start_addr);
> DEBUG("mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR = 0x%08llx\n", (unsigned long long)page_table_base_addr);
> @@ -143,6 +143,21 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
> DEBUG("mmVM_CONTEXTx_CNTL.PAGE_TABLE_DEPTH = %d\n", page_table_depth);
> DEBUG("mmMC_VM_FB_LOCATION == %llx\n", (unsigned long long)vm_fb_base);
>
> + if (asic->options.verbose)
> + fprintf(stderr, "[VERBOSE]: Decoding %u at 0x%llx\n"
> + "[VERBOSE]: PAGE_TABLE_START_ADDR=0x%llx\n"
> + "[VERBOSE]: PAGE_TABLE_BASE_ADDR=0x%llx\n"
> + "[VERBOSE]: PAGE_TABLE_BLOCK_SIZE=%u\n"
> + "[VERBOSE]: PAGE_TABLE_DEPTH=%u\n"
> + "[VERBOSE]: MC_VM_FB_LOCATION=0x%llx\n",
> + (unsigned)vmid,
> + (unsigned long long)address,
> + (unsigned long long)page_table_start_addr,
> + (unsigned long long)page_table_base_addr,
> + (unsigned)page_table_size,
> + (unsigned)page_table_depth,
> + (unsigned long long)vm_fb_base);
> +
> address -= page_table_start_addr;
>
> while (size) {
> @@ -152,17 +167,21 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
> pte_idx = (address >> 12) & ((1ULL << (9 + page_table_size)) - 1);
>
> // read PDE entry
> - umr_read_vram(asic, 0xFFFF, page_table_base_addr + pde_idx * 8, 8, &pde_entry);
> + umr_read_vram(asic, UMR_LINEAR_HUB, page_table_base_addr + pde_idx * 8 - vm_fb_base, 8, &pde_entry);
>
> // decode PDE values
> pde_fields.frag_size = (pde_entry >> 59) & 0x1F;
> pde_fields.pte_base_addr = pde_entry & 0xFFFFFFF000ULL;
> pde_fields.valid = pde_entry & 1;
> + if (asic->options.verbose)
> + fprintf(stderr, "[VERBOSE]: PDE.pte_base_addr==0x%llx, PDE.valid=%d\n",
> + (unsigned long long)pde_fields.pte_base_addr,
> + (int)pde_fields.valid);
> DEBUG("PDE==%llx, pde_idx=%llx, frag_size=%u, pte_base_addr=0x%llx, valid=%d\n",
> (unsigned long long)pde_entry, (unsigned long long)pde_idx, (unsigned)pde_fields.frag_size, (unsigned long long)pde_fields.pte_base_addr, (int)pde_fields.valid);
>
> // now read PTE entry for this page
> - if (umr_read_vram(asic, 0xFFFF, pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry) < 0)
> + if (umr_read_vram(asic, UMR_LINEAR_HUB, pde_fields.pte_base_addr + pte_idx*8 - vm_fb_base, 8, &pte_entry) < 0)
> return -1;
>
> // decode PTE values
> @@ -170,6 +189,11 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
> pte_fields.fragment = (pte_entry >> 7) & 0x1F;
> pte_fields.system = (pte_entry >> 1) & 1;
> pte_fields.valid = pte_entry & 1;
> + if (asic->options.verbose)
> + fprintf(stderr, "[VERBOSE]: PTE.page_base_addr==0x%08llx, PTE.system=%d, PTE.valid=%d\n",
> + (unsigned long long)pte_fields.page_base_addr,
> + (int)pte_fields.system,
> + (int)pte_fields.valid);
> DEBUG("PTE=%llx, pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d, valid=%d\n",
> (unsigned long long)pte_entry, (unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment, (int)pte_fields.system, (int)pte_fields.valid);
>
> @@ -179,7 +203,7 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
> // depth == 0 == PTE only
> pte_idx = (address >> 12);
>
> - if (umr_read_vram(asic, 0xFFFF, page_table_base_addr + pte_idx * 8, 8, &pte_entry) < 0)
> + if (umr_read_vram(asic, UMR_LINEAR_HUB, page_table_base_addr + pte_idx * 8 - vm_fb_base, 8, &pte_entry) < 0)
> return -1;
>
> // decode PTE values
> @@ -187,6 +211,11 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
> pte_fields.fragment = (pte_entry >> 7) & 0x1F;
> pte_fields.system = (pte_entry >> 1) & 1;
> pte_fields.valid = pte_entry & 1;
> + if (asic->options.verbose)
> + fprintf(stderr, "[VERBOSE]: PTE.page_base_addr==0x%08llx, PTE.system=%d, PTE.valid=%d\n",
> + (unsigned long long)pte_fields.page_base_addr,
> + (int)pte_fields.system,
> + (int)pte_fields.valid);
> DEBUG("pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment, (int)pte_fields.system, (int)pte_fields.valid);
>
> // compute starting address
> @@ -207,7 +236,7 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
> return -1;
> }
> } else {
> - if (umr_read_vram(asic, 0xFFFF, start_addr, chunk_size, pdst) < 0) {
> + if (umr_read_vram(asic, UMR_LINEAR_HUB, start_addr, chunk_size, pdst) < 0) {
> fprintf(stderr, "[ERROR]: Cannot read from VRAM\n");
> return -1;
> }
> @@ -244,6 +273,7 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
> } pte_fields;
> char buf[64];
> unsigned char *pdst = dst;
> + char *hub;
>
> /*
> * PTE format on AI:
> @@ -266,21 +296,28 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
> * 0 valid
> */
>
> + if ((vmid & 0xFF00) == UMR_MM_HUB)
> + hub = "mmhub";
> + else
> + hub = "gfx";
> +
> + vmid &= 0xFF;
> +
> // read vm registers
> sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_LO32", (int)vmid);
> - page_table_start_addr = (uint64_t)umr_read_reg_by_name(asic, buf) << 12;
> + page_table_start_addr = (uint64_t)umr_read_reg_by_name_by_ip(asic, hub, buf) << 12;
> sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_HI32", (int)vmid);
> - page_table_start_addr |= (uint64_t)umr_read_reg_by_name(asic, buf) << 44;
> + page_table_start_addr |= (uint64_t)umr_read_reg_by_name_by_ip(asic, hub, buf) << 44;
>
> sprintf(buf, "mmVM_CONTEXT%d_CNTL", (int)vmid);
> - tmp = umr_read_reg_by_name(asic, buf);
> + tmp = umr_read_reg_by_name_by_ip(asic, hub, buf);
> page_table_depth = umr_bitslice_reg_by_name(asic, buf, "PAGE_TABLE_DEPTH", tmp);
> page_table_size = umr_bitslice_reg_by_name(asic, buf, "PAGE_TABLE_BLOCK_SIZE", tmp);
>
> sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_LO32", (int)vmid);
> - page_table_base_addr = (uint64_t)umr_read_reg_by_name(asic, buf) << 0;
> + page_table_base_addr = (uint64_t)umr_read_reg_by_name_by_ip(asic, hub, buf) << 0;
> sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_HI32", (int)vmid);
> - page_table_base_addr |= (uint64_t)umr_read_reg_by_name(asic, buf) << 32;
> + page_table_base_addr |= (uint64_t)umr_read_reg_by_name_by_ip(asic, hub, buf) << 32;
>
> DEBUG("VIRT_ADDR = %08llx\n", (unsigned long long)address);
> DEBUG("mmVM_CONTEXTx_PAGE_TABLE_START_ADDR = %08llx\n", (unsigned long long)page_table_start_addr);
> @@ -288,6 +325,15 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
> DEBUG("mmVM_CONTEXTx_CNTL.PAGE_TABLE_BLOCK_SIZE = %lu\n", page_table_size);
> DEBUG("mmVM_CONTEXTx_CNTL.PAGE_TABLE_DEPTH = %d\n", page_table_depth);
>
> + if (asic->options.verbose)
> + fprintf(stderr, "[VERBOSE]: Decoding %u at 0x%llx\nPAGE_TABLE_START_ADDR=0x%llx\nPAGE_TABLE_BASE_ADDR=0x%llx\nPAGE_TABLE_BLOCK_SIZE=%u\nPAGE_TABLE_DEPTH=%u\n",
> + (unsigned)vmid,
> + (unsigned long long)address,
> + (unsigned long long)page_table_start_addr,
> + (unsigned long long)page_table_base_addr,
> + (unsigned)page_table_size,
> + (unsigned)page_table_depth);
> +
> address -= page_table_start_addr;
>
> // update addresses for APUs
> @@ -336,7 +382,7 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
> DEBUG("selector mask == %llx\n", ((unsigned long long)511 << ((page_table_depth-1)*9 + (12 + 9 + page_table_size))));
>
> // read PDE entry
> - if (umr_read_vram(asic, 0xFFFF, pde_address + pde_idx * 8, 8, &pde_entry) < 0)
> + if (umr_read_vram(asic, UMR_LINEAR_HUB, pde_address + pde_idx * 8, 8, &pde_entry) < 0)
> return -1;
>
> // decode PDE values
> @@ -349,6 +395,13 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
> DEBUG("PDE==%llx, frag_size=%u, pte_base_addr=0x%llx, valid=%d, system=%d, cache=%d, pte=%d\n",
> (unsigned long long)pde_entry, (unsigned)pde_fields.frag_size, (unsigned long long)pde_fields.pte_base_addr,
> (int)pde_fields.valid, (int)pde_fields.system, (int)pde_fields.cache, (int)pde_fields.pte);
> + if (asic->options.verbose)
> + fprintf(stderr, "[VERBOSE]: PDE.pte_base_addr==0x%llx, PDE.valid=%d, PDE.system=%d, PDE.cache=%d, PDE.pte=%d\n",
> + (unsigned long long)pde_fields.pte_base_addr,
> + (int)pde_fields.valid,
> + (int)pde_fields.system,
> + (int)pde_fields.cache,
> + (int)pde_fields.pte);
>
> if (!pde_fields.system)
> pde_fields.pte_base_addr -= vm_fb_offset;
> @@ -360,7 +413,7 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
> }
>
> // now read PTE entry for this page
> - if (umr_read_vram(asic, 0xFFFF, pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry) < 0)
> + if (umr_read_vram(asic, UMR_LINEAR_HUB, pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry) < 0)
> return -1;
>
> // decode PTE values
> @@ -371,6 +424,11 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
> DEBUG("PTE=%llx, pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d, valid=%d\n",
> (unsigned long long)pte_entry, (unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment,
> (int)pte_fields.system, (int)pte_fields.valid);
> + if (asic->options.verbose)
> + fprintf(stderr, "[VERBOSE]: PTE.page_base_addr==0x%08llx, PTE.system=%d, PTE.valid=%d\n",
> + (unsigned long long)pte_fields.page_base_addr,
> + (int)pte_fields.system,
> + (int)pte_fields.valid);
>
> if (!pte_fields.system)
> pte_fields.page_base_addr -= vm_fb_offset;
> @@ -390,11 +448,16 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
> DEBUG("pde_idx=%llx, frag_size=%u, pte_base_addr=0x%llx, system=%d, valid=%d\n",
> (unsigned long long)pde_idx, (unsigned)pde_fields.frag_size, (unsigned long long)pde_fields.pte_base_addr,
> (int)pde_fields.system, (int)pde_fields.valid);
> + if (asic->options.verbose)
> + fprintf(stderr, "[VERBOSE]: PDE.pte_base_addr==0x%llx, PDE.valid=%d, PDE.system=%d\n",
> + (unsigned long long)pde_fields.pte_base_addr,
> + (int)pde_fields.valid,
> + (int)pde_fields.system);
>
> // PTE addr = baseaddr[47:6] + (logical - start) >> fragsize)
> pte_idx = (address >> (12 + pde_fields.frag_size));
>
> - if (umr_read_vram(asic, 0xFFFF, pde_fields.pte_base_addr + pte_idx * 8, 8, &pte_entry) < 0)
> + if (umr_read_vram(asic, UMR_LINEAR_HUB, pde_fields.pte_base_addr + pte_idx * 8, 8, &pte_entry) < 0)
> return -1;
>
> // decode PTE values
> @@ -405,6 +468,11 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
> DEBUG("pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d, valid=%d\n",
> (unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment,
> (int)pte_fields.system, (int)pte_fields.valid);
> + if (asic->options.verbose)
> + fprintf(stderr, "[VERBOSE]: PTE.page_base_addr==0x%08llx, PTE.system=%d, PTE.valid=%d\n",
> + (unsigned long long)pte_fields.page_base_addr,
> + (int)pte_fields.system,
> + (int)pte_fields.valid);
>
>
> // compute starting address
> @@ -427,7 +495,7 @@ static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t addre
> return -1;
> }
> } else {
> - if (umr_read_vram(asic, 0xFFFF, start_addr, chunk_size, pdst) < 0) {
> + if (umr_read_vram(asic, UMR_LINEAR_HUB, start_addr, chunk_size, pdst) < 0) {
> fprintf(stderr, "[ERROR]: Cannot read from VRAM\n");
> return -1;
> }
> @@ -451,7 +519,7 @@ int umr_read_vram(struct umr_asic *asic, uint32_t vmid, uint64_t address, uint32
> return -1;
> }
>
> - if (vmid == 0xFFFF) {
> + if ((vmid & 0xFF00) == UMR_LINEAR_HUB) {
> DEBUG("Reading physical VRAM addr: 0x%llx\n", (unsigned long long)address);
> // addressing is physical
> if (asic->options.use_pci == 0) {
> diff --git a/src/lib/ring_decode.c b/src/lib/ring_decode.c
> index 35e72ed58e4f..772ea49dda6f 100644
> --- a/src/lib/ring_decode.c
> +++ b/src/lib/ring_decode.c
> @@ -589,7 +589,7 @@ static void print_decode_pm4(struct umr_asic *asic, struct umr_ring_decoder *dec
> // detect VCN/UVD IBs and chain them once all
> // 4 pieces of information are found
> if (!strcmp(name, "mmUVD_LMI_RBC_IB_VMID")) {
> - decoder->pm4.next_ib_state.ib_vmid = ib;
> + decoder->pm4.next_ib_state.ib_vmid = ib | ((asic->family <= FAMILY_VI) ? 0 : UMR_MM_HUB);
> decoder->pm4.next_ib_state.tally |= 1;
> } else if (!strcmp(name, "mmUVD_LMI_RBC_IB_64BIT_BAR_LOW")) {
> decoder->pm4.next_ib_state.ib_addr_lo = ib;
> @@ -598,7 +598,7 @@ static void print_decode_pm4(struct umr_asic *asic, struct umr_ring_decoder *dec
> decoder->pm4.next_ib_state.ib_addr_hi = ib;
> decoder->pm4.next_ib_state.tally |= 4;
> } else if (!strcmp(name, "mmUVD_RBC_IB_SIZE")) {
> - decoder->pm4.next_ib_state.ib_size = ib;
> + decoder->pm4.next_ib_state.ib_size = ib * 4;
> decoder->pm4.next_ib_state.tally |= 8;
> }
>
> diff --git a/src/lib/sq_cmd_halt_waves.c b/src/lib/sq_cmd_halt_waves.c
> new file mode 100644
> index 000000000000..83aa52d2cfb2
> --- /dev/null
> +++ b/src/lib/sq_cmd_halt_waves.c
> @@ -0,0 +1,57 @@
> +/*
> + * Copyright 2017 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + * Authors: Tom St Denis <tom.stdenis at amd.com>
> + *
> + */
> +#include "umr.h"
> +
> +int umr_sq_cmd_halt_waves(struct umr_asic *asic, enum umr_sq_cmd_halt_resume mode)
> +{
> + struct umr_reg *reg;
> + uint32_t value;
> + uint64_t addr;
> +
> + reg = umr_find_reg_data(asic, "SQ_CMD");
> + if (!reg) {
> + fprintf(stderr, "[BUG]: Cannot find SQ_CMD register in umr_sq_cmd_halt_waves()\n");
> + return -1;
> + }
> +
> + // compose value
> + if (asic->family == FAMILY_CIK) {
> + value = umr_bitslice_compose_value(asic, reg, "CMD", mode == UMR_SQ_CMD_HALT ? 1 : 2); // SETHALT
> + } else {
> + value = umr_bitslice_compose_value(asic, reg, "CMD", 1); // SETHALT
> + value |= umr_bitslice_compose_value(asic, reg, "DATA", mode == UMR_SQ_CMD_HALT ? 1 : 0);
> + }
> + value |= umr_bitslice_compose_value(asic, reg, "MODE", 1); // BROADCAST
> +
> + // compose address
> + addr = reg->addr * 4;
> + addr |= (1ULL << 62) | // we need to take the lock so we can ensure a broadcast write
> + (0x3FFULL << 24) |
> + (0x3FFULL << 34) |
> + (0x3FFULL << 44);
> + umr_write_reg(asic, addr, value, reg->type);
> +
> + return 0;
> +}
> diff --git a/src/umr.h b/src/umr.h
> index a0e94a7e4db9..dd7f80c38f0c 100644
> --- a/src/umr.h
> +++ b/src/umr.h
> @@ -33,6 +33,20 @@
> #include <pciaccess.h>
> #include <pthread.h>
>
> +/* SQ_CMD halt/resume */
> +enum umr_sq_cmd_halt_resume {
> + UMR_SQ_CMD_HALT=0,
> + UMR_SQ_CMD_RESUME,
> +};
> +
> +/* memory space hubs */
> +enum umr_hub_space {
> + UMR_GFX_HUB = 0 << 8, // default on everything before AI
> + UMR_MM_HUB = 1 << 8, // available on AI and later
> +
> + UMR_LINEAR_HUB = 0xFF << 8, // this is for linear access to vram
> +};
> +
> /* sourced from amd_powerplay.h from the kernel */
> enum amd_pp_sensors {
> AMDGPU_PP_SENSOR_GFX_SCLK = 0,
> @@ -174,6 +188,8 @@ struct umr_options {
> read_smc,
> quiet,
> follow_ib,
> + verbose,
> + halt_waves,
> no_kernel;
> unsigned
> instance_bank,
> @@ -477,6 +493,7 @@ int umr_create_mmio_accel(struct umr_asic *asic);
> uint32_t umr_find_reg(struct umr_asic *asic, char *regname);
>
> // find the register data for a register
> +struct umr_reg *umr_find_reg_data_by_ip(struct umr_asic *asic, char *ip, char *regname);
> struct umr_reg *umr_find_reg_data(struct umr_asic *asic, char *regname);
>
> // read/write a 32-bit register given a BYTE address
> @@ -487,17 +504,26 @@ int umr_write_reg(struct umr_asic *asic, uint64_t addr, uint32_t value, enum reg
> uint32_t umr_read_reg_by_name(struct umr_asic *asic, char *name);
> int umr_write_reg_by_name(struct umr_asic *asic, char *name, uint32_t value);
>
> +// read/write a register by ip/name
> +uint32_t umr_read_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *name);
> +int umr_write_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *name, uint32_t value);
> +
> // slice a full register into bits (shifted into LSB)
> uint32_t umr_bitslice_reg(struct umr_asic *asic, struct umr_reg *reg, char *bitname, uint32_t regvalue);
> uint32_t umr_bitslice_reg_by_name(struct umr_asic *asic, char *regname, char *bitname, uint32_t regvalue);
> +uint32_t umr_bitslice_reg_by_name_by_ip(struct umr_asic *asic, char *ip, char *regname, char *bitname, uint32_t regvalue);
>
> // compose a 32-bit register with a value and a bitfield
> uint32_t umr_bitslice_compose_value(struct umr_asic *asic, struct umr_reg *reg, char *bitname, uint32_t regvalue);
> uint32_t umr_bitslice_compose_value_by_name(struct umr_asic *asic, char *reg, char *bitname, uint32_t regvalue);
> +uint32_t umr_bitslice_compose_value_by_name_by_ip(struct umr_asic *asic, char *ip, char *regname, char *bitname, uint32_t regvalue);
>
> // select a GRBM_GFX_IDX
> int umr_grbm_select_index(struct umr_asic *asic, uint32_t se, uint32_t sh, uint32_t instance);
>
> +// halt/resume SQ waves
> +int umr_sq_cmd_halt_waves(struct umr_asic *asic, enum umr_sq_cmd_halt_resume mode);
> +
> /* IB/ring decoding/dumping/etc */
> void umr_print_decode(struct umr_asic *asic, struct umr_ring_decoder *decoder, uint32_t ib);
> void umr_dump_ib(struct umr_asic *asic, struct umr_ring_decoder *decoder);
> --
> 2.12.0
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list