[PATCH] drm/amdgpu: Increase IH soft ring size

Felix Kuehling felix.kuehling at amd.com
Fri Jul 7 15:06:36 UTC 2023


Am 2023-07-07 um 10:14 schrieb Philip Yang:
> Retry faults are delegated to IH soft ring and then processed by
> deferred worker. Current IH soft ring size PAGE_SIZE can store 128
> entries, which may overflow and drop retry faults, causes HW stucks
> because the retry fault is not recovered.
>
> Increase IH soft ring size to the same size as IH ring, define macro
> IH_RING_SIZE to remove duplicate constant.

As discussed offline, dropping retry fault interrupts is only a problem 
when the CAM is enabled. You only need as many entries in the soft IH 
ring as there are entries in the CAM.

Regards,
   Felix


>
> Show warning message if IH soft ring overflows because this should not
> happen any more.
>
> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c  | 8 ++++++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h  | 4 ++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 2 +-
>   drivers/gpu/drm/amd/amdgpu/ih_v6_0.c    | 5 +++--
>   drivers/gpu/drm/amd/amdgpu/navi10_ih.c  | 5 +++--
>   drivers/gpu/drm/amd/amdgpu/vega10_ih.c  | 5 +++--
>   drivers/gpu/drm/amd/amdgpu/vega20_ih.c  | 5 +++--
>   7 files changed, 21 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> index fceb3b384955..51a0dbd2358a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> @@ -138,6 +138,7 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
>   /**
>    * amdgpu_ih_ring_write - write IV to the ring buffer
>    *
> + * @adev: amdgpu_device pointer
>    * @ih: ih ring to write to
>    * @iv: the iv to write
>    * @num_dw: size of the iv in dw
> @@ -145,8 +146,8 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
>    * Writes an IV to the ring buffer using the CPU and increment the wptr.
>    * Used for testing and delegating IVs to a software ring.
>    */
> -void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
> -			  unsigned int num_dw)
> +void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
> +			  const uint32_t *iv, unsigned int num_dw)
>   {
>   	uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2;
>   	unsigned int i;
> @@ -161,6 +162,9 @@ void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
>   	if (wptr != READ_ONCE(ih->rptr)) {
>   		wmb();
>   		WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr));
> +	} else {
> +		dev_warn(adev->dev, "IH soft ring buffer overflow 0x%X, 0x%X\n",
> +			 wptr, ih->rptr);
>   	}
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> index dd1c2eded6b9..a8cf67f1f011 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> @@ -97,8 +97,8 @@ struct amdgpu_ih_funcs {
>   int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
>   			unsigned ring_size, bool use_bus_addr);
>   void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
> -void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
> -			  unsigned int num_dw);
> +void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
> +			  const uint32_t *iv, unsigned int num_dw);
>   int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
>   					    struct amdgpu_ih_ring *ih);
>   int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> index 5273decc5753..fa6d0adcec20 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> @@ -493,7 +493,7 @@ void amdgpu_irq_delegate(struct amdgpu_device *adev,
>   			 struct amdgpu_iv_entry *entry,
>   			 unsigned int num_dw)
>   {
> -	amdgpu_ih_ring_write(&adev->irq.ih_soft, entry->iv_entry, num_dw);
> +	amdgpu_ih_ring_write(adev, &adev->irq.ih_soft, entry->iv_entry, num_dw);
>   	schedule_work(&adev->irq.ih_soft_work);
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
> index b02e1cef78a7..21d2e57cffe7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
> @@ -32,6 +32,7 @@
>   #include "soc15_common.h"
>   #include "ih_v6_0.h"
>   
> +#define IH_RING_SIZE	(256 * 1024)
>   #define MAX_REARM_RETRY 10
>   
>   static void ih_v6_0_set_interrupt_funcs(struct amdgpu_device *adev);
> @@ -535,7 +536,7 @@ static int ih_v6_0_sw_init(void *handle)
>   	 * use bus address for ih ring by psp bl */
>   	use_bus_addr =
>   		(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true;
> -	r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr);
> +	r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, use_bus_addr);
>   	if (r)
>   		return r;
>   
> @@ -548,7 +549,7 @@ static int ih_v6_0_sw_init(void *handle)
>   	/* initialize ih control register offset */
>   	ih_v6_0_init_register_offset(adev);
>   
> -	r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
> +	r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_RING_SIZE, true);
>   	if (r)
>   		return r;
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
> index eec13cb5bf75..df33db6fd07b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
> @@ -32,6 +32,7 @@
>   #include "soc15_common.h"
>   #include "navi10_ih.h"
>   
> +#define IH_RING_SIZE	(256 * 1024)
>   #define MAX_REARM_RETRY 10
>   
>   #define mmIH_CHICKEN_Sienna_Cichlid                 0x018d
> @@ -565,7 +566,7 @@ static int navi10_ih_sw_init(void *handle)
>   		use_bus_addr = false;
>   	else
>   		use_bus_addr = true;
> -	r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr);
> +	r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, use_bus_addr);
>   	if (r)
>   		return r;
>   
> @@ -578,7 +579,7 @@ static int navi10_ih_sw_init(void *handle)
>   	/* initialize ih control registers offset */
>   	navi10_ih_init_register_offset(adev);
>   
> -	r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
> +	r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_RING_SIZE, true);
>   	if (r)
>   		return r;
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
> index 1e83db0c5438..c9b37983a18d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
> @@ -33,6 +33,7 @@
>   #include "soc15_common.h"
>   #include "vega10_ih.h"
>   
> +#define IH_RING_SIZE	(256 * 1024)
>   #define MAX_REARM_RETRY 10
>   
>   static void vega10_ih_set_interrupt_funcs(struct amdgpu_device *adev);
> @@ -485,7 +486,7 @@ static int vega10_ih_sw_init(void *handle)
>   	if (r)
>   		return r;
>   
> -	r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, true);
> +	r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, true);
>   	if (r)
>   		return r;
>   
> @@ -510,7 +511,7 @@ static int vega10_ih_sw_init(void *handle)
>   	/* initialize ih control registers offset */
>   	vega10_ih_init_register_offset(adev);
>   
> -	r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
> +	r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_RING_SIZE, true);
>   	if (r)
>   		return r;
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
> index 4d719df376a7..06d4176e4c68 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
> @@ -33,6 +33,7 @@
>   #include "soc15_common.h"
>   #include "vega20_ih.h"
>   
> +#define IH_RING_SIZE	(256 * 1024)
>   #define MAX_REARM_RETRY 10
>   
>   #define mmIH_CHICKEN_ALDEBARAN			0x18d
> @@ -539,7 +540,7 @@ static int vega20_ih_sw_init(void *handle)
>   	    (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 2)))
>   		use_bus_addr = false;
>   
> -	r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr);
> +	r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, use_bus_addr);
>   	if (r)
>   		return r;
>   
> @@ -565,7 +566,7 @@ static int vega20_ih_sw_init(void *handle)
>   	/* initialize ih control registers offset */
>   	vega20_ih_init_register_offset(adev);
>   
> -	r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, use_bus_addr);
> +	r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_RING_SIZE, use_bus_addr);
>   	if (r)
>   		return r;
>   


More information about the amd-gfx mailing list