[PATCH] drm/amdgpu: Increase IH soft ring size
Felix Kuehling
felix.kuehling at amd.com
Fri Jul 7 15:06:36 UTC 2023
Am 2023-07-07 um 10:14 schrieb Philip Yang:
> Retry faults are delegated to IH soft ring and then processed by
> deferred worker. Current IH soft ring size PAGE_SIZE can store 128
> entries, which may overflow and drop retry faults, causes HW stucks
> because the retry fault is not recovered.
>
> Increase IH soft ring size to the same size as IH ring, define macro
> IH_RING_SIZE to remove duplicate constant.
As discussed offline, dropping retry fault interrupts is only a problem
when the CAM is enabled. You only need as many entries in the soft IH
ring as there are entries in the CAM.
Regards,
Felix
>
> Show warning message if IH soft ring overflows because this should not
> happen any more.
>
> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 8 ++++++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h | 4 ++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 2 +-
> drivers/gpu/drm/amd/amdgpu/ih_v6_0.c | 5 +++--
> drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 5 +++--
> drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 5 +++--
> drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 5 +++--
> 7 files changed, 21 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> index fceb3b384955..51a0dbd2358a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> @@ -138,6 +138,7 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
> /**
> * amdgpu_ih_ring_write - write IV to the ring buffer
> *
> + * @adev: amdgpu_device pointer
> * @ih: ih ring to write to
> * @iv: the iv to write
> * @num_dw: size of the iv in dw
> @@ -145,8 +146,8 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
> * Writes an IV to the ring buffer using the CPU and increment the wptr.
> * Used for testing and delegating IVs to a software ring.
> */
> -void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
> - unsigned int num_dw)
> +void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
> + const uint32_t *iv, unsigned int num_dw)
> {
> uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2;
> unsigned int i;
> @@ -161,6 +162,9 @@ void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
> if (wptr != READ_ONCE(ih->rptr)) {
> wmb();
> WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr));
> + } else {
> + dev_warn(adev->dev, "IH soft ring buffer overflow 0x%X, 0x%X\n",
> + wptr, ih->rptr);
> }
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> index dd1c2eded6b9..a8cf67f1f011 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> @@ -97,8 +97,8 @@ struct amdgpu_ih_funcs {
> int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
> unsigned ring_size, bool use_bus_addr);
> void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
> -void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
> - unsigned int num_dw);
> +void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
> + const uint32_t *iv, unsigned int num_dw);
> int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
> struct amdgpu_ih_ring *ih);
> int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> index 5273decc5753..fa6d0adcec20 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> @@ -493,7 +493,7 @@ void amdgpu_irq_delegate(struct amdgpu_device *adev,
> struct amdgpu_iv_entry *entry,
> unsigned int num_dw)
> {
> - amdgpu_ih_ring_write(&adev->irq.ih_soft, entry->iv_entry, num_dw);
> + amdgpu_ih_ring_write(adev, &adev->irq.ih_soft, entry->iv_entry, num_dw);
> schedule_work(&adev->irq.ih_soft_work);
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
> index b02e1cef78a7..21d2e57cffe7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
> @@ -32,6 +32,7 @@
> #include "soc15_common.h"
> #include "ih_v6_0.h"
>
> +#define IH_RING_SIZE (256 * 1024)
> #define MAX_REARM_RETRY 10
>
> static void ih_v6_0_set_interrupt_funcs(struct amdgpu_device *adev);
> @@ -535,7 +536,7 @@ static int ih_v6_0_sw_init(void *handle)
> * use bus address for ih ring by psp bl */
> use_bus_addr =
> (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true;
> - r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr);
> + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, use_bus_addr);
> if (r)
> return r;
>
> @@ -548,7 +549,7 @@ static int ih_v6_0_sw_init(void *handle)
> /* initialize ih control register offset */
> ih_v6_0_init_register_offset(adev);
>
> - r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
> + r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_RING_SIZE, true);
> if (r)
> return r;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
> index eec13cb5bf75..df33db6fd07b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
> @@ -32,6 +32,7 @@
> #include "soc15_common.h"
> #include "navi10_ih.h"
>
> +#define IH_RING_SIZE (256 * 1024)
> #define MAX_REARM_RETRY 10
>
> #define mmIH_CHICKEN_Sienna_Cichlid 0x018d
> @@ -565,7 +566,7 @@ static int navi10_ih_sw_init(void *handle)
> use_bus_addr = false;
> else
> use_bus_addr = true;
> - r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr);
> + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, use_bus_addr);
> if (r)
> return r;
>
> @@ -578,7 +579,7 @@ static int navi10_ih_sw_init(void *handle)
> /* initialize ih control registers offset */
> navi10_ih_init_register_offset(adev);
>
> - r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
> + r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_RING_SIZE, true);
> if (r)
> return r;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
> index 1e83db0c5438..c9b37983a18d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
> @@ -33,6 +33,7 @@
> #include "soc15_common.h"
> #include "vega10_ih.h"
>
> +#define IH_RING_SIZE (256 * 1024)
> #define MAX_REARM_RETRY 10
>
> static void vega10_ih_set_interrupt_funcs(struct amdgpu_device *adev);
> @@ -485,7 +486,7 @@ static int vega10_ih_sw_init(void *handle)
> if (r)
> return r;
>
> - r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, true);
> + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, true);
> if (r)
> return r;
>
> @@ -510,7 +511,7 @@ static int vega10_ih_sw_init(void *handle)
> /* initialize ih control registers offset */
> vega10_ih_init_register_offset(adev);
>
> - r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
> + r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_RING_SIZE, true);
> if (r)
> return r;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
> index 4d719df376a7..06d4176e4c68 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
> @@ -33,6 +33,7 @@
> #include "soc15_common.h"
> #include "vega20_ih.h"
>
> +#define IH_RING_SIZE (256 * 1024)
> #define MAX_REARM_RETRY 10
>
> #define mmIH_CHICKEN_ALDEBARAN 0x18d
> @@ -539,7 +540,7 @@ static int vega20_ih_sw_init(void *handle)
> (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 2)))
> use_bus_addr = false;
>
> - r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr);
> + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, use_bus_addr);
> if (r)
> return r;
>
> @@ -565,7 +566,7 @@ static int vega20_ih_sw_init(void *handle)
> /* initialize ih control registers offset */
> vega20_ih_init_register_offset(adev);
>
> - r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, use_bus_addr);
> + r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_RING_SIZE, use_bus_addr);
> if (r)
> return r;
>
More information about the amd-gfx
mailing list