[Mesa-dev] [PATCH 2/3] radeonsi: move si_vm_fault_occured() to AMD common code

Samuel Pitoiset samuel.pitoiset at gmail.com
Thu Aug 31 09:43:59 UTC 2017


For radv, in order to report VM faults when detected.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
---
 src/amd/common/ac_debug.c               | 109 ++++++++++++++++++++++++++++++++
 src/amd/common/ac_debug.h               |   4 ++
 src/gallium/drivers/radeonsi/si_debug.c | 106 ++-----------------------------
 3 files changed, 117 insertions(+), 102 deletions(-)

diff --git a/src/amd/common/ac_debug.c b/src/amd/common/ac_debug.c
index d46fc27a9e..0de00e27e7 100644
--- a/src/amd/common/ac_debug.c
+++ b/src/amd/common/ac_debug.c
@@ -34,6 +34,8 @@
 #define VG(x)
 #endif
 
+#include <inttypes.h>
+
 #include "sid.h"
 #include "gfx9d.h"
 #include "sid_tables.h"
@@ -597,3 +599,110 @@ void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids,
 
 	fprintf(f, "------------------- %s end -------------------\n\n", name);
 }
+
+/**
+ * Parse dmesg and return TRUE if a VM fault has been detected.
+ *
+ * \param chip_class		chip class
+ * \param old_dmesg_timestamp	previous dmesg timestamp parsed at init time
+ * \param out_addr		detected VM fault addr
+ */
+bool ac_vm_fault_occured(enum chip_class chip_class,
+			 uint64_t *old_dmesg_timestamp, uint64_t *out_addr)
+{
+	char line[2000];
+	unsigned sec, usec;
+	int progress = 0;
+	uint64_t dmesg_timestamp = 0;
+	bool fault = false;
+
+	FILE *p = popen("dmesg", "r");
+	if (!p)
+		return false;
+
+	while (fgets(line, sizeof(line), p)) {
+		char *msg, len;
+
+		if (!line[0] || line[0] == '\n')
+			continue;
+
+		/* Get the timestamp. */
+		if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) {
+			static bool hit = false;
+			if (!hit) {
+				fprintf(stderr, "%s: failed to parse line '%s'\n",
+					__func__, line);
+				hit = true;
+			}
+			continue;
+		}
+		dmesg_timestamp = sec * 1000000ull + usec;
+
+		/* If just updating the timestamp. */
+		if (!out_addr)
+			continue;
+
+		/* Process messages only if the timestamp is newer. */
+		if (dmesg_timestamp <= *old_dmesg_timestamp)
+			continue;
+
+		/* Only process the first VM fault. */
+		if (fault)
+			continue;
+
+		/* Remove trailing \n */
+		len = strlen(line);
+		if (len && line[len-1] == '\n')
+			line[len-1] = 0;
+
+		/* Get the message part. */
+		msg = strchr(line, ']');
+		if (!msg)
+			continue;
+		msg++;
+
+		const char *header_line, *addr_line_prefix, *addr_line_format;
+
+		if (chip_class >= GFX9) {
+			/* Match this:
+			 * ..: [gfxhub] VMC page fault (src_id:0 ring:158 vm_id:2 pas_id:0)
+			 * ..:   at page 0x0000000219f8f000 from 27
+			 * ..: VM_L2_PROTECTION_FAULT_STATUS:0x0020113C
+			 */
+			header_line = "VMC page fault";
+			addr_line_prefix = "   at page";
+			addr_line_format = "%"PRIx64;
+		} else {
+			header_line = "GPU fault detected:";
+			addr_line_prefix = "VM_CONTEXT1_PROTECTION_FAULT_ADDR";
+			addr_line_format = "%"PRIX64;
+		}
+
+		switch (progress) {
+		case 0:
+			if (strstr(msg, header_line))
+				progress = 1;
+			break;
+		case 1:
+			msg = strstr(msg, addr_line_prefix);
+			if (msg) {
+				msg = strstr(msg, "0x");
+				if (msg) {
+					msg += 2;
+					if (sscanf(msg, addr_line_format, out_addr) == 1)
+						fault = true;
+				}
+			}
+			progress = 0;
+			break;
+		default:
+			progress = 0;
+		}
+	}
+	pclose(p);
+
+	if (dmesg_timestamp > *old_dmesg_timestamp)
+		*old_dmesg_timestamp = dmesg_timestamp;
+
+	return fault;
+}
diff --git a/src/amd/common/ac_debug.h b/src/amd/common/ac_debug.h
index a37acd2029..277025d8b6 100644
--- a/src/amd/common/ac_debug.h
+++ b/src/amd/common/ac_debug.h
@@ -28,6 +28,7 @@
 
 #include <stdint.h>
 #include <stdio.h>
+#include <stdbool.h>
 
 #include "amd_family.h"
 
@@ -46,4 +47,7 @@ void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids,
 		 unsigned trace_id_count, const char *name, enum chip_class chip_class,
 		 ac_debug_addr_callback addr_callback, void *addr_callback_data);
 
+bool ac_vm_fault_occured(enum chip_class chip_class,
+			 uint64_t *old_dmesg_timestamp, uint64_t *out_addr);
+
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
index 1b97ea4097..4e8d861019 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -1092,106 +1092,6 @@ static void si_dump_dma(struct si_context *sctx,
 	fprintf(f, "SDMA Dump Done.\n");
 }
 
-static bool si_vm_fault_occured(struct si_context *sctx, uint64_t *out_addr)
-{
-	char line[2000];
-	unsigned sec, usec;
-	int progress = 0;
-	uint64_t timestamp = 0;
-	bool fault = false;
-
-	FILE *p = popen("dmesg", "r");
-	if (!p)
-		return false;
-
-	while (fgets(line, sizeof(line), p)) {
-		char *msg, len;
-
-		if (!line[0] || line[0] == '\n')
-			continue;
-
-		/* Get the timestamp. */
-		if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) {
-			static bool hit = false;
-			if (!hit) {
-				fprintf(stderr, "%s: failed to parse line '%s'\n",
-					__func__, line);
-				hit = true;
-			}
-			continue;
-		}
-		timestamp = sec * 1000000ull + usec;
-
-		/* If just updating the timestamp. */
-		if (!out_addr)
-			continue;
-
-		/* Process messages only if the timestamp is newer. */
-		if (timestamp <= sctx->dmesg_timestamp)
-			continue;
-
-		/* Only process the first VM fault. */
-		if (fault)
-			continue;
-
-		/* Remove trailing \n */
-		len = strlen(line);
-		if (len && line[len-1] == '\n')
-			line[len-1] = 0;
-
-		/* Get the message part. */
-		msg = strchr(line, ']');
-		if (!msg) {
-			assert(0);
-			continue;
-		}
-		msg++;
-
-		const char *header_line, *addr_line_prefix, *addr_line_format;
-
-		if (sctx->b.chip_class >= GFX9) {
-			/* Match this:
-			 * ..: [gfxhub] VMC page fault (src_id:0 ring:158 vm_id:2 pas_id:0)
-			 * ..:   at page 0x0000000219f8f000 from 27
-			 * ..: VM_L2_PROTECTION_FAULT_STATUS:0x0020113C
-			 */
-			header_line = "VMC page fault";
-			addr_line_prefix = "   at page";
-			addr_line_format = "%"PRIx64;
-		} else {
-			header_line = "GPU fault detected:";
-			addr_line_prefix = "VM_CONTEXT1_PROTECTION_FAULT_ADDR";
-			addr_line_format = "%"PRIX64;
-		}
-
-		switch (progress) {
-		case 0:
-			if (strstr(msg, header_line))
-				progress = 1;
-			break;
-		case 1:
-			msg = strstr(msg, addr_line_prefix);
-			if (msg) {
-				msg = strstr(msg, "0x");
-				if (msg) {
-					msg += 2;
-					if (sscanf(msg, addr_line_format, out_addr) == 1)
-						fault = true;
-				}
-			}
-			progress = 0;
-			break;
-		default:
-			progress = 0;
-		}
-	}
-	pclose(p);
-
-	if (timestamp > sctx->dmesg_timestamp)
-		sctx->dmesg_timestamp = timestamp;
-	return fault;
-}
-
 void si_check_vm_faults(struct r600_common_context *ctx,
 			struct radeon_saved_cs *saved, enum ring_type ring)
 {
@@ -1201,7 +1101,8 @@ void si_check_vm_faults(struct r600_common_context *ctx,
 	uint64_t addr;
 	char cmd_line[4096];
 
-	if (!si_vm_fault_occured(sctx, &addr))
+	if (!ac_vm_fault_occured(sctx->b.chip_class,
+				 &sctx->dmesg_timestamp, &addr))
 		return;
 
 	f = dd_get_debug_file(false);
@@ -1255,5 +1156,6 @@ void si_init_debug_functions(struct si_context *sctx)
 	 * only new messages will be checked for VM faults.
 	 */
 	if (sctx->screen->b.debug_flags & DBG_CHECK_VM)
-		si_vm_fault_occured(sctx, NULL);
+		ac_vm_fault_occured(sctx->b.chip_class,
+				    &sctx->dmesg_timestamp, NULL);
 }
-- 
2.14.1



More information about the mesa-dev mailing list