[PATCH 3/3] lib/amdpgu: fix the hard code when shedule ring.
vitaly.prosyak at amd.com
vitaly.prosyak at amd.com
Sun Jan 5 07:15:51 UTC 2025
From: "Jesse.zhang at amd.com" <Jesse.zhang at amd.com>
Implementation of dynamically selected scheduling rings.
Cc: Vitaly Prosyak <vitaly.prosyak at amd.com>
Cc: Christian Koenig <christian.koenig at amd.com>
Cc: Alexander Deucher <alexander.deucher at amd.com>
v2: fix formatting(Vitaly)
Suggest-by: Vitaly Prosyak <vitaly.prosyak at amd.com>
Signed-off-by: Jesse Zhang <jesse.zhang at amd.com>
Reviewed-by: Vitaly Prosyak <vitaly.prosyak at amd.com>
---
lib/amdgpu/amd_deadlock_helpers.c | 43 ++++++++++++++++++-------------
lib/amdgpu/amd_deadlock_helpers.h | 8 +++---
tests/amdgpu/amd_deadlock.c | 28 +++++++++++---------
3 files changed, 46 insertions(+), 33 deletions(-)
diff --git a/lib/amdgpu/amd_deadlock_helpers.c b/lib/amdgpu/amd_deadlock_helpers.c
index dabd7ae76..8ac6abf8f 100644
--- a/lib/amdgpu/amd_deadlock_helpers.c
+++ b/lib/amdgpu/amd_deadlock_helpers.c
@@ -65,7 +65,7 @@ amdgpu_wait_memory(amdgpu_device_handle device_handle, unsigned int ip_type, uin
int job_count = 0;
struct amdgpu_cmd_base *base_cmd = get_cmd_base();
- if( priority == AMDGPU_CTX_PRIORITY_HIGH)
+ if (priority == AMDGPU_CTX_PRIORITY_HIGH)
r = amdgpu_cs_ctx_create2(device_handle, AMDGPU_CTX_PRIORITY_HIGH, &context_handle);
else
r = amdgpu_cs_ctx_create(device_handle, &context_handle);
@@ -173,7 +173,7 @@ amdgpu_wait_memory(amdgpu_device_handle device_handle, unsigned int ip_type, uin
free_cmd_base(base_cmd);
}
-void amdgpu_wait_memory_helper(amdgpu_device_handle device_handle, unsigned int ip_type)
+void amdgpu_wait_memory_helper(amdgpu_device_handle device_handle, unsigned int ip_type, struct pci_addr *pci)
{
int r;
FILE *fp;
@@ -190,18 +190,21 @@ void amdgpu_wait_memory_helper(amdgpu_device_handle device_handle, unsigned int
igt_info("SKIP ... as there's no ring for ip %d\n", ip_type);
if (ip_type == AMD_IP_GFX)
- snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_gfx_sched_mask");
+ snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_gfx_sched_mask",
+ pci->domain, pci->bus, pci->device, pci->function);
else if (ip_type == AMD_IP_COMPUTE)
- snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_compute_sched_mask");
+ snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_compute_sched_mask",
+ pci->domain, pci->bus, pci->device, pci->function);
else if (ip_type == AMD_IP_DMA)
- snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_sdma_sched_mask");
+ snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_sdma_sched_mask",
+ pci->domain, pci->bus, pci->device, pci->function);
snprintf(cmd, sizeof(cmd) - 1, "sudo cat %s", sysfs);
r = access(sysfs, R_OK);
if (!r) {
fp = popen(cmd, "r");
if (fp == NULL)
- igt_skip("read the sysfs failed: %s \n",sysfs);
+ igt_skip("read the sysfs failed: %s\n", sysfs);
if (fgets(buffer, 128, fp) != NULL)
sched_mask = strtol(buffer, NULL, 16);
@@ -247,7 +250,7 @@ void amdgpu_wait_memory_helper(amdgpu_device_handle device_handle, unsigned int
/* recover the sched mask */
if (sched_mask > 1) {
- snprintf(cmd, sizeof(cmd) - 1, "sudo echo 0x%lx > %s",sched_mask, sysfs);
+ snprintf(cmd, sizeof(cmd) - 1, "sudo echo 0x%lx > %s", sched_mask, sysfs);
r = system(cmd);
igt_assert_eq(r, 0);
}
@@ -269,7 +272,7 @@ bad_access_helper(amdgpu_device_handle device_handle, unsigned int cmd_error,
ring_context = calloc(1, sizeof(*ring_context));
igt_assert(ring_context);
- if( priority == AMDGPU_CTX_PRIORITY_HIGH)
+ if (priority == AMDGPU_CTX_PRIORITY_HIGH)
r = amdgpu_cs_ctx_create2(device_handle, AMDGPU_CTX_PRIORITY_HIGH, &ring_context->context_handle);
else
r = amdgpu_cs_ctx_create(device_handle, &ring_context->context_handle);
@@ -401,7 +404,7 @@ amdgpu_hang_sdma_helper(amdgpu_device_handle device_handle, uint8_t hang_type)
free_cmd_base(base_cmd);
}
-void bad_access_ring_helper(amdgpu_device_handle device_handle, unsigned int cmd_error, unsigned int ip_type)
+void bad_access_ring_helper(amdgpu_device_handle device_handle, unsigned int cmd_error, unsigned int ip_type, struct pci_addr *pci)
{
int r;
FILE *fp;
@@ -418,18 +421,21 @@ void bad_access_ring_helper(amdgpu_device_handle device_handle, unsigned int cmd
igt_info("SKIP ... as there's no ring for ip %d\n", ip_type);
if (ip_type == AMD_IP_GFX)
- snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_gfx_sched_mask");
+ snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_gfx_sched_mask",
+ pci->domain, pci->bus, pci->device, pci->function);
else if (ip_type == AMD_IP_COMPUTE)
- snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_compute_sched_mask");
+ snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_compute_sched_mask",
+ pci->domain, pci->bus, pci->device, pci->function);
else if (ip_type == AMD_IP_DMA)
- snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_sdma_sched_mask");
+ snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_sdma_sched_mask",
+ pci->domain, pci->bus, pci->device, pci->function);
snprintf(cmd, sizeof(cmd) - 1, "sudo cat %s", sysfs);
r = access(sysfs, R_OK);
if (!r) {
fp = popen(cmd, "r");
if (fp == NULL)
- igt_skip("read the sysfs failed: %s \n",sysfs);
+ igt_skip("read the sysfs failed: %s\n", sysfs);
if (fgets(buffer, 128, fp) != NULL)
sched_mask = strtol(buffer, NULL, 16);
@@ -475,14 +481,14 @@ void bad_access_ring_helper(amdgpu_device_handle device_handle, unsigned int cmd
/* recover the sched mask */
if (sched_mask > 1) {
- snprintf(cmd, sizeof(cmd) - 1, "sudo echo 0x%lx > %s",sched_mask, sysfs);
+ snprintf(cmd, sizeof(cmd) - 1, "sudo echo 0x%lx > %s", sched_mask, sysfs);
r = system(cmd);
igt_assert_eq(r, 0);
}
}
-void amdgpu_hang_sdma_ring_helper(amdgpu_device_handle device_handle, uint8_t hang_type)
+void amdgpu_hang_sdma_ring_helper(amdgpu_device_handle device_handle, uint8_t hang_type, struct pci_addr *pci)
{
int r;
FILE *fp;
@@ -498,13 +504,14 @@ void amdgpu_hang_sdma_ring_helper(amdgpu_device_handle device_handle, uint8_t ha
if (!info.available_rings)
igt_info("SKIP ... as there's no ring for the sdma\n");
- snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_sdma_sched_mask");
+ snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_sdma_sched_mask",
+ pci->domain, pci->bus, pci->device, pci->function);
snprintf(cmd, sizeof(cmd) - 1, "sudo cat %s", sysfs);
r = access(sysfs, R_OK);
if (!r) {
fp = popen(cmd, "r");
if (fp == NULL)
- igt_skip("read the sysfs failed: %s \n",sysfs);
+ igt_skip("read the sysfs failed: %s\n", sysfs);
if (fgets(buffer, 128, fp) != NULL)
sched_mask = strtol(buffer, NULL, 16);
@@ -530,7 +537,7 @@ void amdgpu_hang_sdma_ring_helper(amdgpu_device_handle device_handle, uint8_t ha
/* recover the sched mask */
if (sched_mask > 1) {
- snprintf(cmd, sizeof(cmd) - 1, "sudo echo 0x%lx > %s",sched_mask, sysfs);
+ snprintf(cmd, sizeof(cmd) - 1, "sudo echo 0x%lx > %s", sched_mask, sysfs);
r = system(cmd);
igt_assert_eq(r, 0);
}
diff --git a/lib/amdgpu/amd_deadlock_helpers.h b/lib/amdgpu/amd_deadlock_helpers.h
index 7f8419280..1d654c490 100644
--- a/lib/amdgpu/amd_deadlock_helpers.h
+++ b/lib/amdgpu/amd_deadlock_helpers.h
@@ -24,12 +24,14 @@
#ifndef __AMD_DEADLOCK_HELPERS_H__
#define __AMD_DEADLOCK_HELPERS_H__
+#include "amd_ip_blocks.h"
+
void
-amdgpu_wait_memory_helper(amdgpu_device_handle device_handle, unsigned int ip_type);
+amdgpu_wait_memory_helper(amdgpu_device_handle device_handle, unsigned int ip_type, struct pci_addr *pci);
void
-bad_access_ring_helper(amdgpu_device_handle device_handle, unsigned int cmd_error, unsigned int ip_type);
+bad_access_ring_helper(amdgpu_device_handle device_handle, unsigned int cmd_error, unsigned int ip_type, struct pci_addr *pci);
void
-amdgpu_hang_sdma_ring_helper(amdgpu_device_handle device_handle, uint8_t hang_type);
+amdgpu_hang_sdma_ring_helper(amdgpu_device_handle device_handle, uint8_t hang_type, struct pci_addr *pci);
#endif
diff --git a/tests/amdgpu/amd_deadlock.c b/tests/amdgpu/amd_deadlock.c
index b8bb053ca..29b7ae509 100644
--- a/tests/amdgpu/amd_deadlock.c
+++ b/tests/amdgpu/amd_deadlock.c
@@ -40,6 +40,7 @@ igt_main
int fd = -1;
int r;
bool arr_cap[AMD_IP_MAX] = {0};
+ struct pci_addr pci;
igt_fixture {
uint32_t major, minor;
@@ -60,12 +61,15 @@ igt_main
asic_rings_readness(device, 1, arr_cap);
igt_skip_on(!is_deadlock_tests_enable(&gpu_info));
+ igt_skip_on(get_pci_addr_from_fd(fd, &pci));
+ igt_info("PCI Address: domain %04x, bus %02x, device %02x, function %02x\n",
+ pci.domain, pci.bus, pci.device, pci.function);
}
igt_describe("Test-GPU-reset-by-flooding-sdma-ring-with-jobs");
igt_subtest_with_dynamic("amdgpu-deadlock-sdma") {
if (arr_cap[AMD_IP_DMA]) {
igt_dynamic_f("amdgpu-deadlock-sdma")
- amdgpu_wait_memory_helper(device, AMDGPU_HW_IP_DMA);
+ amdgpu_wait_memory_helper(device, AMDGPU_HW_IP_DMA, &pci);
}
}
@@ -75,7 +79,7 @@ igt_main
is_reset_enable(AMD_IP_GFX, AMDGPU_RESET_TYPE_PER_QUEUE)) {
igt_dynamic_f("amdgpu-illegal-reg-access")
bad_access_ring_helper(device, CMD_STREAM_TRANS_BAD_REG_ADDRESS,
- AMDGPU_HW_IP_GFX);
+ AMDGPU_HW_IP_GFX, &pci);
}
}
@@ -85,7 +89,7 @@ igt_main
is_reset_enable(AMD_IP_GFX, AMDGPU_RESET_TYPE_PER_QUEUE)) {
igt_dynamic_f("amdgpu-illegal-mem-access")
bad_access_ring_helper(device, CMD_STREAM_TRANS_BAD_MEM_ADDRESS,
- AMDGPU_HW_IP_GFX);
+ AMDGPU_HW_IP_GFX, &pci);
}
}
@@ -94,7 +98,7 @@ igt_main
igt_subtest_with_dynamic("amdgpu-deadlock-gfx") {
if (arr_cap[AMD_IP_GFX]) {
igt_dynamic_f("amdgpu-deadlock-gfx")
- amdgpu_wait_memory_helper(device, AMDGPU_HW_IP_GFX);
+ amdgpu_wait_memory_helper(device, AMDGPU_HW_IP_GFX, &pci);
}
}
@@ -103,7 +107,7 @@ igt_main
if (arr_cap[AMD_IP_COMPUTE] &&
is_reset_enable(AMD_IP_COMPUTE, AMDGPU_RESET_TYPE_PER_QUEUE)) {
bad_access_ring_helper(device, CMD_STREAM_TRANS_BAD_MEM_ADDRESS,
- AMDGPU_HW_IP_COMPUTE);
+ AMDGPU_HW_IP_COMPUTE, &pci);
}
}
@@ -111,7 +115,7 @@ igt_main
igt_subtest_with_dynamic("amdgpu-deadlock-compute") {
if (arr_cap[AMD_IP_COMPUTE]) {
igt_dynamic_f("amdgpu-deadlock-compute")
- amdgpu_wait_memory_helper(device, AMDGPU_HW_IP_COMPUTE);
+ amdgpu_wait_memory_helper(device, AMDGPU_HW_IP_COMPUTE, &pci);
}
}
@@ -120,7 +124,7 @@ igt_main
if (arr_cap[AMD_IP_DMA] &&
is_reset_enable(AMD_IP_DMA, AMDGPU_RESET_TYPE_PER_QUEUE)) {
igt_dynamic_f("amdgpu-deadlock-sdma-corrupted-header-test")
- amdgpu_hang_sdma_ring_helper(device, DMA_CORRUPTED_HEADER_HANG);
+ amdgpu_hang_sdma_ring_helper(device, DMA_CORRUPTED_HEADER_HANG, &pci);
}
}
@@ -129,7 +133,7 @@ igt_main
if (arr_cap[AMD_IP_DMA] &&
is_reset_enable(AMD_IP_DMA, AMDGPU_RESET_TYPE_PER_QUEUE)) {
igt_dynamic_f("amdgpu-deadlock-sdma-slow-linear-copy")
- amdgpu_hang_sdma_ring_helper(device, DMA_SLOW_LINEARCOPY_HANG);
+ amdgpu_hang_sdma_ring_helper(device, DMA_SLOW_LINEARCOPY_HANG, &pci);
}
}
@@ -139,7 +143,7 @@ igt_main
is_reset_enable(AMD_IP_DMA, AMDGPU_RESET_TYPE_PER_QUEUE)) {
igt_dynamic_f("amdgpu-deadlock-sdma-badop-test")
bad_access_ring_helper(device, CMD_STREAM_EXEC_INVALID_OPCODE,
- AMDGPU_HW_IP_DMA);
+ AMDGPU_HW_IP_DMA, &pci);
}
}
@@ -149,7 +153,7 @@ igt_main
is_reset_enable(AMD_IP_DMA, AMDGPU_RESET_TYPE_PER_QUEUE)) {
igt_dynamic_f("amdgpu-deadlock-sdma-bad-mem-test")
bad_access_ring_helper(device, CMD_STREAM_TRANS_BAD_MEM_ADDRESS,
- AMDGPU_HW_IP_DMA);
+ AMDGPU_HW_IP_DMA, &pci);
}
}
@@ -159,7 +163,7 @@ igt_main
is_reset_enable(AMD_IP_DMA, AMDGPU_RESET_TYPE_PER_QUEUE)) {
igt_dynamic_f("amdgpu-deadlock-sdma-bad-reg-test")
bad_access_ring_helper(device, CMD_STREAM_TRANS_BAD_REG_ADDRESS,
- AMDGPU_HW_IP_DMA);
+ AMDGPU_HW_IP_DMA, &pci);
}
}
@@ -169,7 +173,7 @@ igt_main
is_reset_enable(AMD_IP_DMA, AMDGPU_RESET_TYPE_PER_QUEUE)) {
igt_dynamic_f("amdgpu-deadlock-sdma-bad-length-test")
bad_access_ring_helper(device, CMD_STREAM_EXEC_INVALID_PACKET_LENGTH,
- AMDGPU_HW_IP_DMA);
+ AMDGPU_HW_IP_DMA, &pci);
}
}
--
2.34.1
More information about the igt-dev
mailing list