[PATCH] drm/amdgpu: Increase IH soft ring size

Joshi, Mukul Mukul.Joshi at amd.com
Fri Jul 7 14:49:13 UTC 2023


[AMD Official Use Only - General]

> -----Original Message-----
> From: Yang, Philip <Philip.Yang at amd.com>
> Sent: Friday, July 7, 2023 10:15 AM
> To: amd-gfx at lists.freedesktop.org
> Cc: Kuehling, Felix <Felix.Kuehling at amd.com>; Joshi, Mukul
> <Mukul.Joshi at amd.com>; Yang, Philip <Philip.Yang at amd.com>
> Subject: [PATCH] drm/amdgpu: Increase IH soft ring size
>
> Retry faults are delegated to IH soft ring and then processed by deferred
> worker. Current IH soft ring size PAGE_SIZE can store 128 entries, which may
> overflow and drop retry faults, causes HW stucks because the retry fault is not
> recovered.
>
> Increase IH soft ring size to the same size as IH ring, define macro
> IH_RING_SIZE to remove duplicate constant.
>
> Show warning message if IH soft ring overflows because this should not
> happen any more.
>
> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c  | 8 ++++++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h  | 4 ++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 2 +-
>  drivers/gpu/drm/amd/amdgpu/ih_v6_0.c    | 5 +++--
>  drivers/gpu/drm/amd/amdgpu/navi10_ih.c  | 5 +++--
> drivers/gpu/drm/amd/amdgpu/vega10_ih.c  | 5 +++--
> drivers/gpu/drm/amd/amdgpu/vega20_ih.c  | 5 +++--
>  7 files changed, 21 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> index fceb3b384955..51a0dbd2358a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> @@ -138,6 +138,7 @@ void amdgpu_ih_ring_fini(struct amdgpu_device
> *adev, struct amdgpu_ih_ring *ih)
>  /**
>   * amdgpu_ih_ring_write - write IV to the ring buffer
>   *
> + * @adev: amdgpu_device pointer
>   * @ih: ih ring to write to
>   * @iv: the iv to write
>   * @num_dw: size of the iv in dw
> @@ -145,8 +146,8 @@ void amdgpu_ih_ring_fini(struct amdgpu_device
> *adev, struct amdgpu_ih_ring *ih)
>   * Writes an IV to the ring buffer using the CPU and increment the wptr.
>   * Used for testing and delegating IVs to a software ring.
>   */
> -void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
> -                       unsigned int num_dw)
> +void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct
> amdgpu_ih_ring *ih,
> +                       const uint32_t *iv, unsigned int num_dw)
>  {
>       uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2;
>       unsigned int i;
> @@ -161,6 +162,9 @@ void amdgpu_ih_ring_write(struct amdgpu_ih_ring
> *ih, const uint32_t *iv,
>       if (wptr != READ_ONCE(ih->rptr)) {
>               wmb();
>               WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr));
> +     } else {
> +             dev_warn(adev->dev, "IH soft ring buffer overflow 0x%X,
> 0x%X\n",
> +                      wptr, ih->rptr);
>       }
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> index dd1c2eded6b9..a8cf67f1f011 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> @@ -97,8 +97,8 @@ struct amdgpu_ih_funcs {  int
> amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
>                       unsigned ring_size, bool use_bus_addr);  void
> amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring
> *ih); -void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t
> *iv,
> -                       unsigned int num_dw);
> +void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct
> amdgpu_ih_ring *ih,
> +                       const uint32_t *iv, unsigned int num_dw);
>  int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
>                                           struct amdgpu_ih_ring *ih);
>  int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring
> *ih); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> index 5273decc5753..fa6d0adcec20 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> @@ -493,7 +493,7 @@ void amdgpu_irq_delegate(struct amdgpu_device
> *adev,
>                        struct amdgpu_iv_entry *entry,
>                        unsigned int num_dw)
>  {
> -     amdgpu_ih_ring_write(&adev->irq.ih_soft, entry->iv_entry, num_dw);
> +     amdgpu_ih_ring_write(adev, &adev->irq.ih_soft, entry->iv_entry,
> +num_dw);
>       schedule_work(&adev->irq.ih_soft_work);
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
> b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
> index b02e1cef78a7..21d2e57cffe7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
> @@ -32,6 +32,7 @@
>  #include "soc15_common.h"
>  #include "ih_v6_0.h"
>
> +#define IH_RING_SIZE (256 * 1024)

I would recommend moving IH_RING_SIZE to amdgpu_ih.h instead of duplicating in the .c files.
The rest looks good to me.

Regards,
Mukul

>  #define MAX_REARM_RETRY 10
>
>  static void ih_v6_0_set_interrupt_funcs(struct amdgpu_device *adev); @@ -
> 535,7 +536,7 @@ static int ih_v6_0_sw_init(void *handle)
>        * use bus address for ih ring by psp bl */
>       use_bus_addr =
>               (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ?
> false : true;
> -     r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024,
> use_bus_addr);
> +     r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE,
> +use_bus_addr);
>       if (r)
>               return r;
>
> @@ -548,7 +549,7 @@ static int ih_v6_0_sw_init(void *handle)
>       /* initialize ih control register offset */
>       ih_v6_0_init_register_offset(adev);
>
> -     r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
> +     r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_RING_SIZE,
> true);
>       if (r)
>               return r;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
> b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
> index eec13cb5bf75..df33db6fd07b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
> @@ -32,6 +32,7 @@
>  #include "soc15_common.h"
>  #include "navi10_ih.h"
>
> +#define IH_RING_SIZE (256 * 1024)
>  #define MAX_REARM_RETRY 10
>
>  #define mmIH_CHICKEN_Sienna_Cichlid                 0x018d
> @@ -565,7 +566,7 @@ static int navi10_ih_sw_init(void *handle)
>               use_bus_addr = false;
>       else
>               use_bus_addr = true;
> -     r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024,
> use_bus_addr);
> +     r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE,
> +use_bus_addr);
>       if (r)
>               return r;
>
> @@ -578,7 +579,7 @@ static int navi10_ih_sw_init(void *handle)
>       /* initialize ih control registers offset */
>       navi10_ih_init_register_offset(adev);
>
> -     r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
> +     r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_RING_SIZE,
> true);
>       if (r)
>               return r;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
> b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
> index 1e83db0c5438..c9b37983a18d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
> @@ -33,6 +33,7 @@
>  #include "soc15_common.h"
>  #include "vega10_ih.h"
>
> +#define IH_RING_SIZE (256 * 1024)
>  #define MAX_REARM_RETRY 10
>
>  static void vega10_ih_set_interrupt_funcs(struct amdgpu_device *adev); @@
> -485,7 +486,7 @@ static int vega10_ih_sw_init(void *handle)
>       if (r)
>               return r;
>
> -     r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, true);
> +     r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, true);
>       if (r)
>               return r;
>
> @@ -510,7 +511,7 @@ static int vega10_ih_sw_init(void *handle)
>       /* initialize ih control registers offset */
>       vega10_ih_init_register_offset(adev);
>
> -     r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true);
> +     r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_RING_SIZE,
> true);
>       if (r)
>               return r;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
> b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
> index 4d719df376a7..06d4176e4c68 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
> @@ -33,6 +33,7 @@
>  #include "soc15_common.h"
>  #include "vega20_ih.h"
>
> +#define IH_RING_SIZE (256 * 1024)
>  #define MAX_REARM_RETRY 10
>
>  #define mmIH_CHICKEN_ALDEBARAN                       0x18d
> @@ -539,7 +540,7 @@ static int vega20_ih_sw_init(void *handle)
>           (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 2)))
>               use_bus_addr = false;
>
> -     r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024,
> use_bus_addr);
> +     r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE,
> +use_bus_addr);
>       if (r)
>               return r;
>
> @@ -565,7 +566,7 @@ static int vega20_ih_sw_init(void *handle)
>       /* initialize ih control registers offset */
>       vega20_ih_init_register_offset(adev);
>
> -     r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE,
> use_bus_addr);
> +     r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_RING_SIZE,
> +use_bus_addr);
>       if (r)
>               return r;
>
> --
> 2.35.1



More information about the amd-gfx mailing list