[PATCH 3/3] lib/amdpgu: fix the hard code when shedule ring.
vitaly prosyak
vprosyak at amd.com
Sun Jan 5 07:20:17 UTC 2025
I applied some code formatting, and the series of three changes now looks good to me.
Reviewed-by: Vitaly Prosyak <vitaly.prosyak at amd.com>
On 2025-01-05 02:15, vitaly.prosyak at amd.com wrote:
> From: "Jesse.zhang at amd.com" <Jesse.zhang at amd.com>
>
> Implementation of dynamically selected scheduling rings.
>
> Cc: Vitaly Prosyak <vitaly.prosyak at amd.com>
> Cc: Christian Koenig <christian.koenig at amd.com>
> Cc: Alexander Deucher <alexander.deucher at amd.com>
>
> v2: fix formatting(Vitaly)
>
> Suggest-by: Vitaly Prosyak <vitaly.prosyak at amd.com>
> Signed-off-by: Jesse Zhang <jesse.zhang at amd.com>
> Reviewed-by: Vitaly Prosyak <vitaly.prosyak at amd.com>
> ---
> lib/amdgpu/amd_deadlock_helpers.c | 43 ++++++++++++++++++-------------
> lib/amdgpu/amd_deadlock_helpers.h | 8 +++---
> tests/amdgpu/amd_deadlock.c | 28 +++++++++++---------
> 3 files changed, 46 insertions(+), 33 deletions(-)
>
> diff --git a/lib/amdgpu/amd_deadlock_helpers.c b/lib/amdgpu/amd_deadlock_helpers.c
> index dabd7ae76..8ac6abf8f 100644
> --- a/lib/amdgpu/amd_deadlock_helpers.c
> +++ b/lib/amdgpu/amd_deadlock_helpers.c
> @@ -65,7 +65,7 @@ amdgpu_wait_memory(amdgpu_device_handle device_handle, unsigned int ip_type, uin
> int job_count = 0;
> struct amdgpu_cmd_base *base_cmd = get_cmd_base();
>
> - if( priority == AMDGPU_CTX_PRIORITY_HIGH)
> + if (priority == AMDGPU_CTX_PRIORITY_HIGH)
> r = amdgpu_cs_ctx_create2(device_handle, AMDGPU_CTX_PRIORITY_HIGH, &context_handle);
> else
> r = amdgpu_cs_ctx_create(device_handle, &context_handle);
> @@ -173,7 +173,7 @@ amdgpu_wait_memory(amdgpu_device_handle device_handle, unsigned int ip_type, uin
> free_cmd_base(base_cmd);
> }
>
> -void amdgpu_wait_memory_helper(amdgpu_device_handle device_handle, unsigned int ip_type)
> +void amdgpu_wait_memory_helper(amdgpu_device_handle device_handle, unsigned int ip_type, struct pci_addr *pci)
> {
> int r;
> FILE *fp;
> @@ -190,18 +190,21 @@ void amdgpu_wait_memory_helper(amdgpu_device_handle device_handle, unsigned int
> igt_info("SKIP ... as there's no ring for ip %d\n", ip_type);
>
> if (ip_type == AMD_IP_GFX)
> - snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_gfx_sched_mask");
> + snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_gfx_sched_mask",
> + pci->domain, pci->bus, pci->device, pci->function);
> else if (ip_type == AMD_IP_COMPUTE)
> - snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_compute_sched_mask");
> + snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_compute_sched_mask",
> + pci->domain, pci->bus, pci->device, pci->function);
> else if (ip_type == AMD_IP_DMA)
> - snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_sdma_sched_mask");
> + snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_sdma_sched_mask",
> + pci->domain, pci->bus, pci->device, pci->function);
>
> snprintf(cmd, sizeof(cmd) - 1, "sudo cat %s", sysfs);
> r = access(sysfs, R_OK);
> if (!r) {
> fp = popen(cmd, "r");
> if (fp == NULL)
> - igt_skip("read the sysfs failed: %s \n",sysfs);
> + igt_skip("read the sysfs failed: %s\n", sysfs);
>
> if (fgets(buffer, 128, fp) != NULL)
> sched_mask = strtol(buffer, NULL, 16);
> @@ -247,7 +250,7 @@ void amdgpu_wait_memory_helper(amdgpu_device_handle device_handle, unsigned int
>
> /* recover the sched mask */
> if (sched_mask > 1) {
> - snprintf(cmd, sizeof(cmd) - 1, "sudo echo 0x%lx > %s",sched_mask, sysfs);
> + snprintf(cmd, sizeof(cmd) - 1, "sudo echo 0x%lx > %s", sched_mask, sysfs);
> r = system(cmd);
> igt_assert_eq(r, 0);
> }
> @@ -269,7 +272,7 @@ bad_access_helper(amdgpu_device_handle device_handle, unsigned int cmd_error,
> ring_context = calloc(1, sizeof(*ring_context));
> igt_assert(ring_context);
>
> - if( priority == AMDGPU_CTX_PRIORITY_HIGH)
> + if (priority == AMDGPU_CTX_PRIORITY_HIGH)
> r = amdgpu_cs_ctx_create2(device_handle, AMDGPU_CTX_PRIORITY_HIGH, &ring_context->context_handle);
> else
> r = amdgpu_cs_ctx_create(device_handle, &ring_context->context_handle);
> @@ -401,7 +404,7 @@ amdgpu_hang_sdma_helper(amdgpu_device_handle device_handle, uint8_t hang_type)
> free_cmd_base(base_cmd);
> }
>
> -void bad_access_ring_helper(amdgpu_device_handle device_handle, unsigned int cmd_error, unsigned int ip_type)
> +void bad_access_ring_helper(amdgpu_device_handle device_handle, unsigned int cmd_error, unsigned int ip_type, struct pci_addr *pci)
> {
> int r;
> FILE *fp;
> @@ -418,18 +421,21 @@ void bad_access_ring_helper(amdgpu_device_handle device_handle, unsigned int cmd
> igt_info("SKIP ... as there's no ring for ip %d\n", ip_type);
>
> if (ip_type == AMD_IP_GFX)
> - snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_gfx_sched_mask");
> + snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_gfx_sched_mask",
> + pci->domain, pci->bus, pci->device, pci->function);
> else if (ip_type == AMD_IP_COMPUTE)
> - snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_compute_sched_mask");
> + snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_compute_sched_mask",
> + pci->domain, pci->bus, pci->device, pci->function);
> else if (ip_type == AMD_IP_DMA)
> - snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_sdma_sched_mask");
> + snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_sdma_sched_mask",
> + pci->domain, pci->bus, pci->device, pci->function);
>
> snprintf(cmd, sizeof(cmd) - 1, "sudo cat %s", sysfs);
> r = access(sysfs, R_OK);
> if (!r) {
> fp = popen(cmd, "r");
> if (fp == NULL)
> - igt_skip("read the sysfs failed: %s \n",sysfs);
> + igt_skip("read the sysfs failed: %s\n", sysfs);
>
> if (fgets(buffer, 128, fp) != NULL)
> sched_mask = strtol(buffer, NULL, 16);
> @@ -475,14 +481,14 @@ void bad_access_ring_helper(amdgpu_device_handle device_handle, unsigned int cmd
>
> /* recover the sched mask */
> if (sched_mask > 1) {
> - snprintf(cmd, sizeof(cmd) - 1, "sudo echo 0x%lx > %s",sched_mask, sysfs);
> + snprintf(cmd, sizeof(cmd) - 1, "sudo echo 0x%lx > %s", sched_mask, sysfs);
> r = system(cmd);
> igt_assert_eq(r, 0);
> }
>
> }
>
> -void amdgpu_hang_sdma_ring_helper(amdgpu_device_handle device_handle, uint8_t hang_type)
> +void amdgpu_hang_sdma_ring_helper(amdgpu_device_handle device_handle, uint8_t hang_type, struct pci_addr *pci)
> {
> int r;
> FILE *fp;
> @@ -498,13 +504,14 @@ void amdgpu_hang_sdma_ring_helper(amdgpu_device_handle device_handle, uint8_t ha
> if (!info.available_rings)
> igt_info("SKIP ... as there's no ring for the sdma\n");
>
> - snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_sdma_sched_mask");
> + snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_sdma_sched_mask",
> + pci->domain, pci->bus, pci->device, pci->function);
> snprintf(cmd, sizeof(cmd) - 1, "sudo cat %s", sysfs);
> r = access(sysfs, R_OK);
> if (!r) {
> fp = popen(cmd, "r");
> if (fp == NULL)
> - igt_skip("read the sysfs failed: %s \n",sysfs);
> + igt_skip("read the sysfs failed: %s\n", sysfs);
>
> if (fgets(buffer, 128, fp) != NULL)
> sched_mask = strtol(buffer, NULL, 16);
> @@ -530,7 +537,7 @@ void amdgpu_hang_sdma_ring_helper(amdgpu_device_handle device_handle, uint8_t ha
>
> /* recover the sched mask */
> if (sched_mask > 1) {
> - snprintf(cmd, sizeof(cmd) - 1, "sudo echo 0x%lx > %s",sched_mask, sysfs);
> + snprintf(cmd, sizeof(cmd) - 1, "sudo echo 0x%lx > %s", sched_mask, sysfs);
> r = system(cmd);
> igt_assert_eq(r, 0);
> }
> diff --git a/lib/amdgpu/amd_deadlock_helpers.h b/lib/amdgpu/amd_deadlock_helpers.h
> index 7f8419280..1d654c490 100644
> --- a/lib/amdgpu/amd_deadlock_helpers.h
> +++ b/lib/amdgpu/amd_deadlock_helpers.h
> @@ -24,12 +24,14 @@
> #ifndef __AMD_DEADLOCK_HELPERS_H__
> #define __AMD_DEADLOCK_HELPERS_H__
>
> +#include "amd_ip_blocks.h"
> +
> void
> -amdgpu_wait_memory_helper(amdgpu_device_handle device_handle, unsigned int ip_type);
> +amdgpu_wait_memory_helper(amdgpu_device_handle device_handle, unsigned int ip_type, struct pci_addr *pci);
> void
> -bad_access_ring_helper(amdgpu_device_handle device_handle, unsigned int cmd_error, unsigned int ip_type);
> +bad_access_ring_helper(amdgpu_device_handle device_handle, unsigned int cmd_error, unsigned int ip_type, struct pci_addr *pci);
>
> void
> -amdgpu_hang_sdma_ring_helper(amdgpu_device_handle device_handle, uint8_t hang_type);
> +amdgpu_hang_sdma_ring_helper(amdgpu_device_handle device_handle, uint8_t hang_type, struct pci_addr *pci);
> #endif
>
> diff --git a/tests/amdgpu/amd_deadlock.c b/tests/amdgpu/amd_deadlock.c
> index b8bb053ca..29b7ae509 100644
> --- a/tests/amdgpu/amd_deadlock.c
> +++ b/tests/amdgpu/amd_deadlock.c
> @@ -40,6 +40,7 @@ igt_main
> int fd = -1;
> int r;
> bool arr_cap[AMD_IP_MAX] = {0};
> + struct pci_addr pci;
>
> igt_fixture {
> uint32_t major, minor;
> @@ -60,12 +61,15 @@ igt_main
> asic_rings_readness(device, 1, arr_cap);
> igt_skip_on(!is_deadlock_tests_enable(&gpu_info));
>
> + igt_skip_on(get_pci_addr_from_fd(fd, &pci));
> + igt_info("PCI Address: domain %04x, bus %02x, device %02x, function %02x\n",
> + pci.domain, pci.bus, pci.device, pci.function);
> }
> igt_describe("Test-GPU-reset-by-flooding-sdma-ring-with-jobs");
> igt_subtest_with_dynamic("amdgpu-deadlock-sdma") {
> if (arr_cap[AMD_IP_DMA]) {
> igt_dynamic_f("amdgpu-deadlock-sdma")
> - amdgpu_wait_memory_helper(device, AMDGPU_HW_IP_DMA);
> + amdgpu_wait_memory_helper(device, AMDGPU_HW_IP_DMA, &pci);
> }
> }
>
> @@ -75,7 +79,7 @@ igt_main
> is_reset_enable(AMD_IP_GFX, AMDGPU_RESET_TYPE_PER_QUEUE)) {
> igt_dynamic_f("amdgpu-illegal-reg-access")
> bad_access_ring_helper(device, CMD_STREAM_TRANS_BAD_REG_ADDRESS,
> - AMDGPU_HW_IP_GFX);
> + AMDGPU_HW_IP_GFX, &pci);
> }
> }
>
> @@ -85,7 +89,7 @@ igt_main
> is_reset_enable(AMD_IP_GFX, AMDGPU_RESET_TYPE_PER_QUEUE)) {
> igt_dynamic_f("amdgpu-illegal-mem-access")
> bad_access_ring_helper(device, CMD_STREAM_TRANS_BAD_MEM_ADDRESS,
> - AMDGPU_HW_IP_GFX);
> + AMDGPU_HW_IP_GFX, &pci);
> }
> }
>
> @@ -94,7 +98,7 @@ igt_main
> igt_subtest_with_dynamic("amdgpu-deadlock-gfx") {
> if (arr_cap[AMD_IP_GFX]) {
> igt_dynamic_f("amdgpu-deadlock-gfx")
> - amdgpu_wait_memory_helper(device, AMDGPU_HW_IP_GFX);
> + amdgpu_wait_memory_helper(device, AMDGPU_HW_IP_GFX, &pci);
> }
> }
>
> @@ -103,7 +107,7 @@ igt_main
> if (arr_cap[AMD_IP_COMPUTE] &&
> is_reset_enable(AMD_IP_COMPUTE, AMDGPU_RESET_TYPE_PER_QUEUE)) {
> bad_access_ring_helper(device, CMD_STREAM_TRANS_BAD_MEM_ADDRESS,
> - AMDGPU_HW_IP_COMPUTE);
> + AMDGPU_HW_IP_COMPUTE, &pci);
> }
> }
>
> @@ -111,7 +115,7 @@ igt_main
> igt_subtest_with_dynamic("amdgpu-deadlock-compute") {
> if (arr_cap[AMD_IP_COMPUTE]) {
> igt_dynamic_f("amdgpu-deadlock-compute")
> - amdgpu_wait_memory_helper(device, AMDGPU_HW_IP_COMPUTE);
> + amdgpu_wait_memory_helper(device, AMDGPU_HW_IP_COMPUTE, &pci);
> }
> }
>
> @@ -120,7 +124,7 @@ igt_main
> if (arr_cap[AMD_IP_DMA] &&
> is_reset_enable(AMD_IP_DMA, AMDGPU_RESET_TYPE_PER_QUEUE)) {
> igt_dynamic_f("amdgpu-deadlock-sdma-corrupted-header-test")
> - amdgpu_hang_sdma_ring_helper(device, DMA_CORRUPTED_HEADER_HANG);
> + amdgpu_hang_sdma_ring_helper(device, DMA_CORRUPTED_HEADER_HANG, &pci);
> }
> }
>
> @@ -129,7 +133,7 @@ igt_main
> if (arr_cap[AMD_IP_DMA] &&
> is_reset_enable(AMD_IP_DMA, AMDGPU_RESET_TYPE_PER_QUEUE)) {
> igt_dynamic_f("amdgpu-deadlock-sdma-slow-linear-copy")
> - amdgpu_hang_sdma_ring_helper(device, DMA_SLOW_LINEARCOPY_HANG);
> + amdgpu_hang_sdma_ring_helper(device, DMA_SLOW_LINEARCOPY_HANG, &pci);
> }
> }
>
> @@ -139,7 +143,7 @@ igt_main
> is_reset_enable(AMD_IP_DMA, AMDGPU_RESET_TYPE_PER_QUEUE)) {
> igt_dynamic_f("amdgpu-deadlock-sdma-badop-test")
> bad_access_ring_helper(device, CMD_STREAM_EXEC_INVALID_OPCODE,
> - AMDGPU_HW_IP_DMA);
> + AMDGPU_HW_IP_DMA, &pci);
> }
> }
>
> @@ -149,7 +153,7 @@ igt_main
> is_reset_enable(AMD_IP_DMA, AMDGPU_RESET_TYPE_PER_QUEUE)) {
> igt_dynamic_f("amdgpu-deadlock-sdma-bad-mem-test")
> bad_access_ring_helper(device, CMD_STREAM_TRANS_BAD_MEM_ADDRESS,
> - AMDGPU_HW_IP_DMA);
> + AMDGPU_HW_IP_DMA, &pci);
> }
> }
>
> @@ -159,7 +163,7 @@ igt_main
> is_reset_enable(AMD_IP_DMA, AMDGPU_RESET_TYPE_PER_QUEUE)) {
> igt_dynamic_f("amdgpu-deadlock-sdma-bad-reg-test")
> bad_access_ring_helper(device, CMD_STREAM_TRANS_BAD_REG_ADDRESS,
> - AMDGPU_HW_IP_DMA);
> + AMDGPU_HW_IP_DMA, &pci);
> }
> }
>
> @@ -169,7 +173,7 @@ igt_main
> is_reset_enable(AMD_IP_DMA, AMDGPU_RESET_TYPE_PER_QUEUE)) {
> igt_dynamic_f("amdgpu-deadlock-sdma-bad-length-test")
> bad_access_ring_helper(device, CMD_STREAM_EXEC_INVALID_PACKET_LENGTH,
> - AMDGPU_HW_IP_DMA);
> + AMDGPU_HW_IP_DMA, &pci);
> }
> }
>
More information about the igt-dev
mailing list