[PATCH 3/3] lib/amdpgu: fix the hard code when shedule ring.

vitaly prosyak vprosyak at amd.com
Sun Jan 5 07:20:17 UTC 2025


I applied some code formatting, and the series of three changes now looks good to me.

Reviewed-by: Vitaly Prosyak <vitaly.prosyak at amd.com>

On 2025-01-05 02:15, vitaly.prosyak at amd.com wrote:
> From: "Jesse.zhang at amd.com" <Jesse.zhang at amd.com>
>
> Implementation of dynamically selected scheduling rings.
>
> Cc: Vitaly Prosyak <vitaly.prosyak at amd.com>
> Cc: Christian Koenig <christian.koenig at amd.com>
> Cc: Alexander Deucher <alexander.deucher at amd.com>
>
> v2: fix formatting(Vitaly)
>
> Suggest-by: Vitaly Prosyak <vitaly.prosyak at amd.com>
> Signed-off-by: Jesse Zhang  <jesse.zhang at amd.com>
> Reviewed-by: Vitaly Prosyak <vitaly.prosyak at amd.com>
> ---
>  lib/amdgpu/amd_deadlock_helpers.c | 43 ++++++++++++++++++-------------
>  lib/amdgpu/amd_deadlock_helpers.h |  8 +++---
>  tests/amdgpu/amd_deadlock.c       | 28 +++++++++++---------
>  3 files changed, 46 insertions(+), 33 deletions(-)
>
> diff --git a/lib/amdgpu/amd_deadlock_helpers.c b/lib/amdgpu/amd_deadlock_helpers.c
> index dabd7ae76..8ac6abf8f 100644
> --- a/lib/amdgpu/amd_deadlock_helpers.c
> +++ b/lib/amdgpu/amd_deadlock_helpers.c
> @@ -65,7 +65,7 @@ amdgpu_wait_memory(amdgpu_device_handle device_handle, unsigned int ip_type, uin
>  	int job_count = 0;
>  	struct amdgpu_cmd_base *base_cmd = get_cmd_base();
>  
> -	if( priority == AMDGPU_CTX_PRIORITY_HIGH)
> +	if (priority == AMDGPU_CTX_PRIORITY_HIGH)
>  		r = amdgpu_cs_ctx_create2(device_handle, AMDGPU_CTX_PRIORITY_HIGH, &context_handle);
>  	else
>  		r = amdgpu_cs_ctx_create(device_handle, &context_handle);
> @@ -173,7 +173,7 @@ amdgpu_wait_memory(amdgpu_device_handle device_handle, unsigned int ip_type, uin
>  	free_cmd_base(base_cmd);
>  }
>  
> -void amdgpu_wait_memory_helper(amdgpu_device_handle device_handle, unsigned int ip_type)
> +void amdgpu_wait_memory_helper(amdgpu_device_handle device_handle, unsigned int ip_type, struct pci_addr *pci)
>  {
>  	int r;
>  	FILE *fp;
> @@ -190,18 +190,21 @@ void amdgpu_wait_memory_helper(amdgpu_device_handle device_handle, unsigned int
>  		igt_info("SKIP ... as there's no ring for ip %d\n", ip_type);
>  
>  	if (ip_type == AMD_IP_GFX)
> -		snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_gfx_sched_mask");
> +		snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_gfx_sched_mask",
> +			pci->domain, pci->bus, pci->device, pci->function);
>  	else if (ip_type == AMD_IP_COMPUTE)
> -		snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_compute_sched_mask");
> +		snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_compute_sched_mask",
> +			pci->domain, pci->bus, pci->device, pci->function);
>  	else if (ip_type == AMD_IP_DMA)
> -		snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_sdma_sched_mask");
> +		snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_sdma_sched_mask",
> +			pci->domain, pci->bus, pci->device, pci->function);
>  
>  	snprintf(cmd, sizeof(cmd) - 1, "sudo cat %s", sysfs);
>  	r = access(sysfs, R_OK);
>  	if (!r) {
>  		fp = popen(cmd, "r");
>  		if (fp == NULL)
> -			igt_skip("read the sysfs failed: %s \n",sysfs);
> +			igt_skip("read the sysfs failed: %s\n", sysfs);
>  
>  		if (fgets(buffer, 128, fp) != NULL)
>  			sched_mask = strtol(buffer, NULL, 16);
> @@ -247,7 +250,7 @@ void amdgpu_wait_memory_helper(amdgpu_device_handle device_handle, unsigned int
>  
>  	/* recover the sched mask */
>  	if (sched_mask > 1) {
> -		snprintf(cmd, sizeof(cmd) - 1, "sudo echo  0x%lx > %s",sched_mask, sysfs);
> +		snprintf(cmd, sizeof(cmd) - 1, "sudo echo  0x%lx > %s", sched_mask, sysfs);
>  		r = system(cmd);
>  		igt_assert_eq(r, 0);
>  	}
> @@ -269,7 +272,7 @@ bad_access_helper(amdgpu_device_handle device_handle, unsigned int cmd_error,
>  	ring_context = calloc(1, sizeof(*ring_context));
>  	igt_assert(ring_context);
>  
> -	if( priority == AMDGPU_CTX_PRIORITY_HIGH)
> +	if (priority == AMDGPU_CTX_PRIORITY_HIGH)
>  		r = amdgpu_cs_ctx_create2(device_handle, AMDGPU_CTX_PRIORITY_HIGH, &ring_context->context_handle);
>  	else
>  		r = amdgpu_cs_ctx_create(device_handle, &ring_context->context_handle);
> @@ -401,7 +404,7 @@ amdgpu_hang_sdma_helper(amdgpu_device_handle device_handle, uint8_t hang_type)
>  	free_cmd_base(base_cmd);
>  }
>  
> -void bad_access_ring_helper(amdgpu_device_handle device_handle, unsigned int cmd_error, unsigned int ip_type)
> +void bad_access_ring_helper(amdgpu_device_handle device_handle, unsigned int cmd_error, unsigned int ip_type, struct pci_addr *pci)
>  {
>  	int r;
>  	FILE *fp;
> @@ -418,18 +421,21 @@ void bad_access_ring_helper(amdgpu_device_handle device_handle, unsigned int cmd
>  		igt_info("SKIP ... as there's no ring for ip %d\n", ip_type);
>  
>  	if (ip_type == AMD_IP_GFX)
> -		snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_gfx_sched_mask");
> +		snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_gfx_sched_mask",
> +			pci->domain, pci->bus, pci->device, pci->function);
>  	else if (ip_type == AMD_IP_COMPUTE)
> -		snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_compute_sched_mask");
> +		snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_compute_sched_mask",
> +			pci->domain, pci->bus, pci->device, pci->function);
>  	else if (ip_type == AMD_IP_DMA)
> -		snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_sdma_sched_mask");
> +		snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_sdma_sched_mask",
> +			pci->domain, pci->bus, pci->device, pci->function);
>  
>  	snprintf(cmd, sizeof(cmd) - 1, "sudo cat %s", sysfs);
>  	r = access(sysfs, R_OK);
>  	if (!r) {
>  		fp = popen(cmd, "r");
>  		if (fp == NULL)
> -			igt_skip("read the sysfs failed: %s \n",sysfs);
> +			igt_skip("read the sysfs failed: %s\n", sysfs);
>  
>  		if (fgets(buffer, 128, fp) != NULL)
>  			sched_mask = strtol(buffer, NULL, 16);
> @@ -475,14 +481,14 @@ void bad_access_ring_helper(amdgpu_device_handle device_handle, unsigned int cmd
>  
>  	/* recover the sched mask */
>  	if (sched_mask > 1) {
> -		snprintf(cmd, sizeof(cmd) - 1, "sudo echo  0x%lx > %s",sched_mask, sysfs);
> +		snprintf(cmd, sizeof(cmd) - 1, "sudo echo  0x%lx > %s", sched_mask, sysfs);
>  		r = system(cmd);
>  		igt_assert_eq(r, 0);
>  	}
>  
>  }
>  
> -void amdgpu_hang_sdma_ring_helper(amdgpu_device_handle device_handle, uint8_t hang_type)
> +void amdgpu_hang_sdma_ring_helper(amdgpu_device_handle device_handle, uint8_t hang_type, struct pci_addr *pci)
>  {
>  	int r;
>  	FILE *fp;
> @@ -498,13 +504,14 @@ void amdgpu_hang_sdma_ring_helper(amdgpu_device_handle device_handle, uint8_t ha
>  	if (!info.available_rings)
>  		igt_info("SKIP ... as there's no ring for the sdma\n");
>  
> -	snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/0/amdgpu_sdma_sched_mask");
> +	snprintf(sysfs, sizeof(sysfs) - 1, "/sys/kernel/debug/dri/%04x:%02x:%02x.%01x/amdgpu_sdma_sched_mask",
> +			pci->domain, pci->bus, pci->device, pci->function);
>  	snprintf(cmd, sizeof(cmd) - 1, "sudo cat %s", sysfs);
>  	r = access(sysfs, R_OK);
>  	if (!r) {
>  		fp = popen(cmd, "r");
>  		if (fp == NULL)
> -			igt_skip("read the sysfs failed: %s \n",sysfs);
> +			igt_skip("read the sysfs failed: %s\n", sysfs);
>  
>  		if (fgets(buffer, 128, fp) != NULL)
>  			sched_mask = strtol(buffer, NULL, 16);
> @@ -530,7 +537,7 @@ void amdgpu_hang_sdma_ring_helper(amdgpu_device_handle device_handle, uint8_t ha
>  
>  	/* recover the sched mask */
>  	if (sched_mask > 1) {
> -		snprintf(cmd, sizeof(cmd) - 1, "sudo echo  0x%lx > %s",sched_mask, sysfs);
> +		snprintf(cmd, sizeof(cmd) - 1, "sudo echo  0x%lx > %s", sched_mask, sysfs);
>  		r = system(cmd);
>  		igt_assert_eq(r, 0);
>  	}
> diff --git a/lib/amdgpu/amd_deadlock_helpers.h b/lib/amdgpu/amd_deadlock_helpers.h
> index 7f8419280..1d654c490 100644
> --- a/lib/amdgpu/amd_deadlock_helpers.h
> +++ b/lib/amdgpu/amd_deadlock_helpers.h
> @@ -24,12 +24,14 @@
>  #ifndef __AMD_DEADLOCK_HELPERS_H__
>  #define __AMD_DEADLOCK_HELPERS_H__
>  
> +#include "amd_ip_blocks.h"
> +
>  void
> -amdgpu_wait_memory_helper(amdgpu_device_handle device_handle, unsigned int ip_type);
> +amdgpu_wait_memory_helper(amdgpu_device_handle device_handle, unsigned int ip_type, struct pci_addr *pci);
>  void
> -bad_access_ring_helper(amdgpu_device_handle device_handle, unsigned int cmd_error, unsigned int ip_type);
> +bad_access_ring_helper(amdgpu_device_handle device_handle, unsigned int cmd_error, unsigned int ip_type, struct pci_addr *pci);
>  
>  void
> -amdgpu_hang_sdma_ring_helper(amdgpu_device_handle device_handle, uint8_t hang_type);
> +amdgpu_hang_sdma_ring_helper(amdgpu_device_handle device_handle, uint8_t hang_type, struct pci_addr *pci);
>  #endif
>  
> diff --git a/tests/amdgpu/amd_deadlock.c b/tests/amdgpu/amd_deadlock.c
> index b8bb053ca..29b7ae509 100644
> --- a/tests/amdgpu/amd_deadlock.c
> +++ b/tests/amdgpu/amd_deadlock.c
> @@ -40,6 +40,7 @@ igt_main
>  	int fd = -1;
>  	int r;
>  	bool arr_cap[AMD_IP_MAX] = {0};
> +	struct pci_addr pci;
>  
>  	igt_fixture {
>  		uint32_t major, minor;
> @@ -60,12 +61,15 @@ igt_main
>  		asic_rings_readness(device, 1, arr_cap);
>  		igt_skip_on(!is_deadlock_tests_enable(&gpu_info));
>  
> +		igt_skip_on(get_pci_addr_from_fd(fd, &pci));
> +		igt_info("PCI Address: domain %04x, bus %02x, device %02x, function %02x\n",
> +				pci.domain, pci.bus, pci.device, pci.function);
>  	}
>  	igt_describe("Test-GPU-reset-by-flooding-sdma-ring-with-jobs");
>  	igt_subtest_with_dynamic("amdgpu-deadlock-sdma") {
>  		if (arr_cap[AMD_IP_DMA]) {
>  			igt_dynamic_f("amdgpu-deadlock-sdma")
> -			amdgpu_wait_memory_helper(device, AMDGPU_HW_IP_DMA);
> +			amdgpu_wait_memory_helper(device, AMDGPU_HW_IP_DMA, &pci);
>  		}
>  	}
>  
> @@ -75,7 +79,7 @@ igt_main
>  			is_reset_enable(AMD_IP_GFX, AMDGPU_RESET_TYPE_PER_QUEUE)) {
>  			igt_dynamic_f("amdgpu-illegal-reg-access")
>  			bad_access_ring_helper(device, CMD_STREAM_TRANS_BAD_REG_ADDRESS,
> -					AMDGPU_HW_IP_GFX);
> +					AMDGPU_HW_IP_GFX, &pci);
>  		}
>  	}
>  
> @@ -85,7 +89,7 @@ igt_main
>  			is_reset_enable(AMD_IP_GFX, AMDGPU_RESET_TYPE_PER_QUEUE)) {
>  			igt_dynamic_f("amdgpu-illegal-mem-access")
>  			bad_access_ring_helper(device, CMD_STREAM_TRANS_BAD_MEM_ADDRESS,
> -					AMDGPU_HW_IP_GFX);
> +					AMDGPU_HW_IP_GFX, &pci);
>  		}
>  	}
>  
> @@ -94,7 +98,7 @@ igt_main
>  	igt_subtest_with_dynamic("amdgpu-deadlock-gfx") {
>  		if (arr_cap[AMD_IP_GFX]) {
>  			igt_dynamic_f("amdgpu-deadlock-gfx")
> -			amdgpu_wait_memory_helper(device, AMDGPU_HW_IP_GFX);
> +			amdgpu_wait_memory_helper(device, AMDGPU_HW_IP_GFX, &pci);
>  		}
>  	}
>  
> @@ -103,7 +107,7 @@ igt_main
>  		if (arr_cap[AMD_IP_COMPUTE] &&
>  			 is_reset_enable(AMD_IP_COMPUTE, AMDGPU_RESET_TYPE_PER_QUEUE)) {
>  		bad_access_ring_helper(device, CMD_STREAM_TRANS_BAD_MEM_ADDRESS,
> -				AMDGPU_HW_IP_COMPUTE);
> +				AMDGPU_HW_IP_COMPUTE, &pci);
>  		}
>  	}
>  
> @@ -111,7 +115,7 @@ igt_main
>  	igt_subtest_with_dynamic("amdgpu-deadlock-compute") {
>  		if (arr_cap[AMD_IP_COMPUTE]) {
>  			igt_dynamic_f("amdgpu-deadlock-compute")
> -			amdgpu_wait_memory_helper(device, AMDGPU_HW_IP_COMPUTE);
> +			amdgpu_wait_memory_helper(device, AMDGPU_HW_IP_COMPUTE, &pci);
>  		}
>  	}
>  
> @@ -120,7 +124,7 @@ igt_main
>  		if (arr_cap[AMD_IP_DMA] &&
>  			is_reset_enable(AMD_IP_DMA, AMDGPU_RESET_TYPE_PER_QUEUE)) {
>  			igt_dynamic_f("amdgpu-deadlock-sdma-corrupted-header-test")
> -			amdgpu_hang_sdma_ring_helper(device, DMA_CORRUPTED_HEADER_HANG);
> +			amdgpu_hang_sdma_ring_helper(device, DMA_CORRUPTED_HEADER_HANG, &pci);
>  		}
>  	}
>  
> @@ -129,7 +133,7 @@ igt_main
>  		if (arr_cap[AMD_IP_DMA] &&
>  			is_reset_enable(AMD_IP_DMA, AMDGPU_RESET_TYPE_PER_QUEUE)) {
>  			igt_dynamic_f("amdgpu-deadlock-sdma-slow-linear-copy")
> -			amdgpu_hang_sdma_ring_helper(device, DMA_SLOW_LINEARCOPY_HANG);
> +			amdgpu_hang_sdma_ring_helper(device, DMA_SLOW_LINEARCOPY_HANG, &pci);
>  		}
>  	}
>  
> @@ -139,7 +143,7 @@ igt_main
>  			is_reset_enable(AMD_IP_DMA, AMDGPU_RESET_TYPE_PER_QUEUE)) {
>  			igt_dynamic_f("amdgpu-deadlock-sdma-badop-test")
>  			bad_access_ring_helper(device, CMD_STREAM_EXEC_INVALID_OPCODE,
> -					AMDGPU_HW_IP_DMA);
> +					AMDGPU_HW_IP_DMA, &pci);
>  		}
>  	}
>  
> @@ -149,7 +153,7 @@ igt_main
>  			is_reset_enable(AMD_IP_DMA, AMDGPU_RESET_TYPE_PER_QUEUE)) {
>  			igt_dynamic_f("amdgpu-deadlock-sdma-bad-mem-test")
>  			bad_access_ring_helper(device, CMD_STREAM_TRANS_BAD_MEM_ADDRESS,
> -					AMDGPU_HW_IP_DMA);
> +					AMDGPU_HW_IP_DMA, &pci);
>  		}
>  	}
>  
> @@ -159,7 +163,7 @@ igt_main
>  			is_reset_enable(AMD_IP_DMA, AMDGPU_RESET_TYPE_PER_QUEUE)) {
>  			igt_dynamic_f("amdgpu-deadlock-sdma-bad-reg-test")
>  			bad_access_ring_helper(device, CMD_STREAM_TRANS_BAD_REG_ADDRESS,
> -					AMDGPU_HW_IP_DMA);
> +					AMDGPU_HW_IP_DMA, &pci);
>  		}
>  	}
>  
> @@ -169,7 +173,7 @@ igt_main
>  			is_reset_enable(AMD_IP_DMA, AMDGPU_RESET_TYPE_PER_QUEUE)) {
>  			igt_dynamic_f("amdgpu-deadlock-sdma-bad-length-test")
>  			bad_access_ring_helper(device, CMD_STREAM_EXEC_INVALID_PACKET_LENGTH,
> -					AMDGPU_HW_IP_DMA);
> +					AMDGPU_HW_IP_DMA, &pci);
>  		}
>  	}
>  


More information about the igt-dev mailing list