[Mesa-dev] [PATCH 2/3] radeonsi: move si_vm_fault_occured() to AMD common code

Marek Olšák maraeo at gmail.com
Thu Aug 31 11:25:46 UTC 2017


Reviewed-by: Marek Olšák <marek.olsak at amd.com>

Marek

On Thu, Aug 31, 2017 at 11:43 AM, Samuel Pitoiset
<samuel.pitoiset at gmail.com> wrote:
> For radv, in order to report VM faults when detected.
>
> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
> ---
>  src/amd/common/ac_debug.c               | 109 ++++++++++++++++++++++++++++++++
>  src/amd/common/ac_debug.h               |   4 ++
>  src/gallium/drivers/radeonsi/si_debug.c | 106 ++-----------------------------
>  3 files changed, 117 insertions(+), 102 deletions(-)
>
> diff --git a/src/amd/common/ac_debug.c b/src/amd/common/ac_debug.c
> index d46fc27a9e..0de00e27e7 100644
> --- a/src/amd/common/ac_debug.c
> +++ b/src/amd/common/ac_debug.c
> @@ -34,6 +34,8 @@
>  #define VG(x)
>  #endif
>
> +#include <inttypes.h>
> +
>  #include "sid.h"
>  #include "gfx9d.h"
>  #include "sid_tables.h"
> @@ -597,3 +599,110 @@ void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids,
>
>         fprintf(f, "------------------- %s end -------------------\n\n", name);
>  }
> +
> +/**
> + * Parse dmesg and return TRUE if a VM fault has been detected.
> + *
> + * \param chip_class           chip class
> + * \param old_dmesg_timestamp  previous dmesg timestamp parsed at init time
> + * \param out_addr             detected VM fault addr
> + */
> +bool ac_vm_fault_occured(enum chip_class chip_class,
> +                        uint64_t *old_dmesg_timestamp, uint64_t *out_addr)
> +{
> +       char line[2000];
> +       unsigned sec, usec;
> +       int progress = 0;
> +       uint64_t dmesg_timestamp = 0;
> +       bool fault = false;
> +
> +       FILE *p = popen("dmesg", "r");
> +       if (!p)
> +               return false;
> +
> +       while (fgets(line, sizeof(line), p)) {
> +               char *msg, len;
> +
> +               if (!line[0] || line[0] == '\n')
> +                       continue;
> +
> +               /* Get the timestamp. */
> +               if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) {
> +                       static bool hit = false;
> +                       if (!hit) {
> +                               fprintf(stderr, "%s: failed to parse line '%s'\n",
> +                                       __func__, line);
> +                               hit = true;
> +                       }
> +                       continue;
> +               }
> +               dmesg_timestamp = sec * 1000000ull + usec;
> +
> +               /* If just updating the timestamp. */
> +               if (!out_addr)
> +                       continue;
> +
> +               /* Process messages only if the timestamp is newer. */
> +               if (dmesg_timestamp <= *old_dmesg_timestamp)
> +                       continue;
> +
> +               /* Only process the first VM fault. */
> +               if (fault)
> +                       continue;
> +
> +               /* Remove trailing \n */
> +               len = strlen(line);
> +               if (len && line[len-1] == '\n')
> +                       line[len-1] = 0;
> +
> +               /* Get the message part. */
> +               msg = strchr(line, ']');
> +               if (!msg)
> +                       continue;
> +               msg++;
> +
> +               const char *header_line, *addr_line_prefix, *addr_line_format;
> +
> +               if (chip_class >= GFX9) {
> +                       /* Match this:
> +                        * ..: [gfxhub] VMC page fault (src_id:0 ring:158 vm_id:2 pas_id:0)
> +                        * ..:   at page 0x0000000219f8f000 from 27
> +                        * ..: VM_L2_PROTECTION_FAULT_STATUS:0x0020113C
> +                        */
> +                       header_line = "VMC page fault";
> +                       addr_line_prefix = "   at page";
> +                       addr_line_format = "%"PRIx64;
> +               } else {
> +                       header_line = "GPU fault detected:";
> +                       addr_line_prefix = "VM_CONTEXT1_PROTECTION_FAULT_ADDR";
> +                       addr_line_format = "%"PRIX64;
> +               }
> +
> +               switch (progress) {
> +               case 0:
> +                       if (strstr(msg, header_line))
> +                               progress = 1;
> +                       break;
> +               case 1:
> +                       msg = strstr(msg, addr_line_prefix);
> +                       if (msg) {
> +                               msg = strstr(msg, "0x");
> +                               if (msg) {
> +                                       msg += 2;
> +                                       if (sscanf(msg, addr_line_format, out_addr) == 1)
> +                                               fault = true;
> +                               }
> +                       }
> +                       progress = 0;
> +                       break;
> +               default:
> +                       progress = 0;
> +               }
> +       }
> +       pclose(p);
> +
> +       if (dmesg_timestamp > *old_dmesg_timestamp)
> +               *old_dmesg_timestamp = dmesg_timestamp;
> +
> +       return fault;
> +}
> diff --git a/src/amd/common/ac_debug.h b/src/amd/common/ac_debug.h
> index a37acd2029..277025d8b6 100644
> --- a/src/amd/common/ac_debug.h
> +++ b/src/amd/common/ac_debug.h
> @@ -28,6 +28,7 @@
>
>  #include <stdint.h>
>  #include <stdio.h>
> +#include <stdbool.h>
>
>  #include "amd_family.h"
>
> @@ -46,4 +47,7 @@ void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids,
>                  unsigned trace_id_count, const char *name, enum chip_class chip_class,
>                  ac_debug_addr_callback addr_callback, void *addr_callback_data);
>
> +bool ac_vm_fault_occured(enum chip_class chip_class,
> +                        uint64_t *old_dmesg_timestamp, uint64_t *out_addr);
> +
>  #endif
> diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
> index 1b97ea4097..4e8d861019 100644
> --- a/src/gallium/drivers/radeonsi/si_debug.c
> +++ b/src/gallium/drivers/radeonsi/si_debug.c
> @@ -1092,106 +1092,6 @@ static void si_dump_dma(struct si_context *sctx,
>         fprintf(f, "SDMA Dump Done.\n");
>  }
>
> -static bool si_vm_fault_occured(struct si_context *sctx, uint64_t *out_addr)
> -{
> -       char line[2000];
> -       unsigned sec, usec;
> -       int progress = 0;
> -       uint64_t timestamp = 0;
> -       bool fault = false;
> -
> -       FILE *p = popen("dmesg", "r");
> -       if (!p)
> -               return false;
> -
> -       while (fgets(line, sizeof(line), p)) {
> -               char *msg, len;
> -
> -               if (!line[0] || line[0] == '\n')
> -                       continue;
> -
> -               /* Get the timestamp. */
> -               if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) {
> -                       static bool hit = false;
> -                       if (!hit) {
> -                               fprintf(stderr, "%s: failed to parse line '%s'\n",
> -                                       __func__, line);
> -                               hit = true;
> -                       }
> -                       continue;
> -               }
> -               timestamp = sec * 1000000ull + usec;
> -
> -               /* If just updating the timestamp. */
> -               if (!out_addr)
> -                       continue;
> -
> -               /* Process messages only if the timestamp is newer. */
> -               if (timestamp <= sctx->dmesg_timestamp)
> -                       continue;
> -
> -               /* Only process the first VM fault. */
> -               if (fault)
> -                       continue;
> -
> -               /* Remove trailing \n */
> -               len = strlen(line);
> -               if (len && line[len-1] == '\n')
> -                       line[len-1] = 0;
> -
> -               /* Get the message part. */
> -               msg = strchr(line, ']');
> -               if (!msg) {
> -                       assert(0);
> -                       continue;
> -               }
> -               msg++;
> -
> -               const char *header_line, *addr_line_prefix, *addr_line_format;
> -
> -               if (sctx->b.chip_class >= GFX9) {
> -                       /* Match this:
> -                        * ..: [gfxhub] VMC page fault (src_id:0 ring:158 vm_id:2 pas_id:0)
> -                        * ..:   at page 0x0000000219f8f000 from 27
> -                        * ..: VM_L2_PROTECTION_FAULT_STATUS:0x0020113C
> -                        */
> -                       header_line = "VMC page fault";
> -                       addr_line_prefix = "   at page";
> -                       addr_line_format = "%"PRIx64;
> -               } else {
> -                       header_line = "GPU fault detected:";
> -                       addr_line_prefix = "VM_CONTEXT1_PROTECTION_FAULT_ADDR";
> -                       addr_line_format = "%"PRIX64;
> -               }
> -
> -               switch (progress) {
> -               case 0:
> -                       if (strstr(msg, header_line))
> -                               progress = 1;
> -                       break;
> -               case 1:
> -                       msg = strstr(msg, addr_line_prefix);
> -                       if (msg) {
> -                               msg = strstr(msg, "0x");
> -                               if (msg) {
> -                                       msg += 2;
> -                                       if (sscanf(msg, addr_line_format, out_addr) == 1)
> -                                               fault = true;
> -                               }
> -                       }
> -                       progress = 0;
> -                       break;
> -               default:
> -                       progress = 0;
> -               }
> -       }
> -       pclose(p);
> -
> -       if (timestamp > sctx->dmesg_timestamp)
> -               sctx->dmesg_timestamp = timestamp;
> -       return fault;
> -}
> -
>  void si_check_vm_faults(struct r600_common_context *ctx,
>                         struct radeon_saved_cs *saved, enum ring_type ring)
>  {
> @@ -1201,7 +1101,8 @@ void si_check_vm_faults(struct r600_common_context *ctx,
>         uint64_t addr;
>         char cmd_line[4096];
>
> -       if (!si_vm_fault_occured(sctx, &addr))
> +       if (!ac_vm_fault_occured(sctx->b.chip_class,
> +                                &sctx->dmesg_timestamp, &addr))
>                 return;
>
>         f = dd_get_debug_file(false);
> @@ -1255,5 +1156,6 @@ void si_init_debug_functions(struct si_context *sctx)
>          * only new messages will be checked for VM faults.
>          */
>         if (sctx->screen->b.debug_flags & DBG_CHECK_VM)
> -               si_vm_fault_occured(sctx, NULL);
> +               ac_vm_fault_occured(sctx->b.chip_class,
> +                                   &sctx->dmesg_timestamp, NULL);
>  }
> --
> 2.14.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list