[Mesa-dev] [PATCH 10/10] radeonsi/gfx9: add VM fault dmesg parser support
Nicolai Hähnle
nhaehnle at gmail.com
Sun Jul 16 09:58:00 UTC 2017
Patches 2-10:
Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
On 10.07.2017 23:21, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> ---
> src/gallium/drivers/radeonsi/si_debug.c | 29 +++++++++++++++++++++++------
> 1 file changed, 23 insertions(+), 6 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
> index 0d26ce5..06dea61 100644
> --- a/src/gallium/drivers/radeonsi/si_debug.c
> +++ b/src/gallium/drivers/radeonsi/si_debug.c
> @@ -859,21 +859,21 @@ static void si_dump_dma(struct si_context *sctx,
> for (i = 0; i < saved->num_dw; ++i) {
> fprintf(f, " %08x\n", saved->ib[i]);
> }
>
> fprintf(f, "------------------- %s end -------------------\n", ib_name);
> fprintf(f, "\n");
>
> fprintf(f, "SDMA Dump Done.\n");
> }
>
> -static bool si_vm_fault_occured(struct si_context *sctx, uint32_t *out_addr)
> +static bool si_vm_fault_occured(struct si_context *sctx, uint64_t *out_addr)
> {
> char line[2000];
> unsigned sec, usec;
> int progress = 0;
> uint64_t timestamp = 0;
> bool fault = false;
>
> FILE *p = popen("dmesg", "r");
> if (!p)
> return false;
> @@ -914,32 +914,49 @@ static bool si_vm_fault_occured(struct si_context *sctx, uint32_t *out_addr)
> line[len-1] = 0;
>
> /* Get the message part. */
> msg = strchr(line, ']');
> if (!msg) {
> assert(0);
> continue;
> }
> msg++;
>
> + const char *header_line, *addr_line_prefix, *addr_line_format;
> +
> + if (sctx->b.chip_class >= GFX9) {
> + /* Match this:
> + * ..: [gfxhub] VMC page fault (src_id:0 ring:158 vm_id:2 pas_id:0)
> + * ..: at page 0x0000000219f8f000 from 27
> + * ..: VM_L2_PROTECTION_FAULT_STATUS:0x0020113C
> + */
> + header_line = "VMC page fault";
> + addr_line_prefix = " at page";
> + addr_line_format = "%"PRIx64;
> + } else {
> + header_line = "GPU fault detected:";
> + addr_line_prefix = "VM_CONTEXT1_PROTECTION_FAULT_ADDR";
> + addr_line_format = "%"PRIX64;
> + }
> +
> switch (progress) {
> case 0:
> - if (strstr(msg, "GPU fault detected:"))
> + if (strstr(msg, header_line))
> progress = 1;
> break;
> case 1:
> - msg = strstr(msg, "VM_CONTEXT1_PROTECTION_FAULT_ADDR");
> + msg = strstr(msg, addr_line_prefix);
> if (msg) {
> msg = strstr(msg, "0x");
> if (msg) {
> msg += 2;
> - if (sscanf(msg, "%X", out_addr) == 1)
> + if (sscanf(msg, addr_line_format, out_addr) == 1)
> fault = true;
> }
> }
> progress = 0;
> break;
> default:
> progress = 0;
> }
> }
> pclose(p);
> @@ -948,37 +965,37 @@ static bool si_vm_fault_occured(struct si_context *sctx, uint32_t *out_addr)
> sctx->dmesg_timestamp = timestamp;
> return fault;
> }
>
> void si_check_vm_faults(struct r600_common_context *ctx,
> struct radeon_saved_cs *saved, enum ring_type ring)
> {
> struct si_context *sctx = (struct si_context *)ctx;
> struct pipe_screen *screen = sctx->b.b.screen;
> FILE *f;
> - uint32_t addr;
> + uint64_t addr;
> char cmd_line[4096];
>
> if (!si_vm_fault_occured(sctx, &addr))
> return;
>
> f = dd_get_debug_file(false);
> if (!f)
> return;
>
> fprintf(f, "VM fault report.\n\n");
> if (os_get_command_line(cmd_line, sizeof(cmd_line)))
> fprintf(f, "Command: %s\n", cmd_line);
> fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen));
> fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen));
> fprintf(f, "Device name: %s\n\n", screen->get_name(screen));
> - fprintf(f, "Failing VM page: 0x%08x\n\n", addr);
> + fprintf(f, "Failing VM page: 0x%08"PRIx64"\n\n", addr);
>
> if (sctx->apitrace_call_number)
> fprintf(f, "Last apitrace call: %u\n\n",
> sctx->apitrace_call_number);
>
> switch (ring) {
> case RING_GFX:
> si_dump_debug_state(&sctx->b.b, f,
> PIPE_DUMP_CURRENT_STATES |
> PIPE_DUMP_CURRENT_SHADERS |
>
--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
More information about the mesa-dev
mailing list