[PATCHv2] drm/amdgpu: Enable IH retry CAM on GFX9
Felix Kuehling
felix.kuehling at amd.com
Fri Jan 20 21:11:53 UTC 2023
Am 2023-01-19 um 16:59 schrieb Mukul Joshi:
> This patch enables the IH retry CAM on GFX9 series cards. This
> retry filter is used to prevent sending lots of retry interrupts
> in a short span of time and overflowing the IH ring buffer. This
> will also help reduce CPU interrupt workload.
>
> Signed-off-by: Mukul Joshi <mukul.joshi at amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
> v1:
> - Reviewed by Felix.
>
> v1->v2:
> - Update the CAM enable register offset for Aldebaran.
> - Add new register defines for Aldebaran in vega20_ih.c.
> - Drain IH0 ring also along with soft ring.
> - Setup CAM doorbell register before enabling CAM.
>
> drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h | 2 +
> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 51 +++++++++++------
> drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 2 +-
> drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 55 +++++++++----------
> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 10 +++-
> .../asic_reg/oss/osssys_4_2_0_offset.h | 6 ++
> .../asic_reg/oss/osssys_4_2_0_sh_mask.h | 11 ++++
> 7 files changed, 88 insertions(+), 49 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
> index e9f2c11ea416..be243adf3e65 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
> @@ -98,6 +98,8 @@ struct amdgpu_irq {
> struct irq_domain *domain; /* GPU irq controller domain */
> unsigned virq[AMDGPU_MAX_IRQ_SRC_ID];
> uint32_t srbm_soft_reset;
> + u32 retry_cam_doorbell_index;
> + bool retry_cam_enabled;
> };
>
> void amdgpu_irq_disable_all(struct amdgpu_device *adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index d65c6cea3445..4847117d67a1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -553,32 +553,49 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
> const char *mmhub_cid;
> const char *hub_name;
> u64 addr;
> + uint32_t cam_index = 0;
> + int ret;
>
> addr = (u64)entry->src_data[0] << 12;
> addr |= ((u64)entry->src_data[1] & 0xf) << 44;
>
> if (retry_fault) {
> - /* Returning 1 here also prevents sending the IV to the KFD */
> + if (adev->irq.retry_cam_enabled) {
> + /* Delegate it to a different ring if the hardware hasn't
> + * already done it.
> + */
> + if (entry->ih == &adev->irq.ih) {
> + amdgpu_irq_delegate(adev, entry, 8);
> + return 1;
> + }
> +
> + cam_index = entry->src_data[2] & 0x3ff;
>
> - /* Process it onyl if it's the first fault for this address */
> - if (entry->ih != &adev->irq.ih_soft &&
> - amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
> + ret = amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault);
> + WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index);
> + if (ret)
> + return 1;
> + } else {
> + /* Process it onyl if it's the first fault for this address */
> + if (entry->ih != &adev->irq.ih_soft &&
> + amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid,
> entry->timestamp))
> - return 1;
> + return 1;
>
> - /* Delegate it to a different ring if the hardware hasn't
> - * already done it.
> - */
> - if (entry->ih == &adev->irq.ih) {
> - amdgpu_irq_delegate(adev, entry, 8);
> - return 1;
> - }
> + /* Delegate it to a different ring if the hardware hasn't
> + * already done it.
> + */
> + if (entry->ih == &adev->irq.ih) {
> + amdgpu_irq_delegate(adev, entry, 8);
> + return 1;
> + }
>
> - /* Try to handle the recoverable page faults by filling page
> - * tables
> - */
> - if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault))
> - return 1;
> + /* Try to handle the recoverable page faults by filling page
> + * tables
> + */
> + if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault))
> + return 1;
> + }
> }
>
> if (!printk_ratelimit())
> diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
> index 19455a725939..685abf57ffdd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
> @@ -238,7 +238,7 @@ static void nbio_v7_4_ih_doorbell_range(struct amdgpu_device *adev,
>
> if (use_doorbell) {
> ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index);
> - ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 4);
> + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 8);
> } else
> ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 0);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
> index 1706081d054d..6a8fb1fb48a3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
> @@ -38,6 +38,11 @@
> #define mmIH_CHICKEN_ALDEBARAN 0x18d
> #define mmIH_CHICKEN_ALDEBARAN_BASE_IDX 0
>
> +#define mmIH_RETRY_INT_CAM_CNTL_ALDEBARAN 0x00ea
> +#define mmIH_RETRY_INT_CAM_CNTL_ALDEBARAN_BASE_IDX 0
> +#define IH_RETRY_INT_CAM_CNTL_ALDEBARAN__ENABLE__SHIFT 0x10
> +#define IH_RETRY_INT_CAM_CNTL_ALDEBARAN__ENABLE_MASK 0x00010000L
> +
> static void vega20_ih_set_interrupt_funcs(struct amdgpu_device *adev);
>
> /**
> @@ -251,36 +256,14 @@ static int vega20_ih_enable_ring(struct amdgpu_device *adev,
> return 0;
> }
>
> -/**
> - * vega20_ih_reroute_ih - reroute VMC/UTCL2 ih to an ih ring
> - *
> - * @adev: amdgpu_device pointer
> - *
> - * Reroute VMC and UMC interrupts on primary ih ring to
> - * ih ring 1 so they won't lose when bunches of page faults
> - * interrupts overwhelms the interrupt handler(VEGA20)
> - */
> -static void vega20_ih_reroute_ih(struct amdgpu_device *adev)
> +static uint32_t vega20_setup_retry_doorbell(u32 doorbell_index)
> {
> - uint32_t tmp;
> + u32 val = 0;
>
> - /* vega20 ih reroute will go through psp this
> - * function is used for newer asics starting arcturus
> - */
> - if (adev->ip_versions[OSSSYS_HWIP][0] >= IP_VERSION(4, 2, 1)) {
> - /* Reroute to IH ring 1 for VMC */
> - WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_INDEX, 0x12);
> - tmp = RREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA);
> - tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1);
> - tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1);
> - WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA, tmp);
> -
> - /* Reroute IH ring 1 for UTCL2 */
> - WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_INDEX, 0x1B);
> - tmp = RREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA);
> - tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1);
> - WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA, tmp);
> - }
> + val = REG_SET_FIELD(val, IH_DOORBELL_RPTR, OFFSET, doorbell_index);
> + val = REG_SET_FIELD(val, IH_DOORBELL_RPTR, ENABLE, 1);
> +
> + return val;
> }
>
> /**
> @@ -332,8 +315,6 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev)
>
> for (i = 0; i < ARRAY_SIZE(ih); i++) {
> if (ih[i]->ring_size) {
> - if (i == 1)
> - vega20_ih_reroute_ih(adev);
> ret = vega20_ih_enable_ring(adev, ih[i]);
> if (ret)
> return ret;
> @@ -346,6 +327,20 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev)
>
> pci_set_master(adev->pdev);
>
> + /* Allocate the doorbell for IH Retry CAM */
> + adev->irq.retry_cam_doorbell_index = (adev->doorbell_index.ih + 3) << 1;
> + WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RETRY_CAM,
> + vega20_setup_retry_doorbell(adev->irq.retry_cam_doorbell_index));
> +
> + /* Enable IH Retry CAM */
> + if (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 0))
> + WREG32_FIELD15(OSSSYS, 0, IH_RETRY_INT_CAM_CNTL_ALDEBARAN,
> + ENABLE, 1);
> + else
> + WREG32_FIELD15(OSSSYS, 0, IH_RETRY_INT_CAM_CNTL, ENABLE, 1);
> +
> + adev->irq.retry_cam_enabled = true;
> +
> /* enable interrupts */
> ret = vega20_ih_toggle_interrupts(adev, true);
> if (ret)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 859159093ffd..5c67f177dc6c 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -2171,7 +2171,15 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms)
> pr_debug("drain retry fault gpu %d svms %p\n", i, svms);
>
> amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev,
> - &pdd->dev->adev->irq.ih1);
> + pdd->dev->adev->irq.retry_cam_enabled ?
> + &pdd->dev->adev->irq.ih :
> + &pdd->dev->adev->irq.ih1);
> +
> + if (pdd->dev->adev->irq.retry_cam_enabled)
> + amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev,
> + &pdd->dev->adev->irq.ih_soft);
> +
> +
> pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
> }
> if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain)
> diff --git a/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_offset.h
> index bd129266ebfd..a84a7cfaf71e 100644
> --- a/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_offset.h
> +++ b/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_offset.h
> @@ -135,6 +135,8 @@
> #define mmIH_RB_WPTR_ADDR_LO_BASE_IDX 0
> #define mmIH_DOORBELL_RPTR 0x0087
> #define mmIH_DOORBELL_RPTR_BASE_IDX 0
> +#define mmIH_DOORBELL_RETRY_CAM 0x0088
> +#define mmIH_DOORBELL_RETRY_CAM_BASE_IDX 0
> #define mmIH_RB_CNTL_RING1 0x008c
> #define mmIH_RB_CNTL_RING1_BASE_IDX 0
> #define mmIH_RB_BASE_RING1 0x008d
> @@ -159,6 +161,8 @@
> #define mmIH_RB_WPTR_RING2_BASE_IDX 0
> #define mmIH_DOORBELL_RPTR_RING2 0x009f
> #define mmIH_DOORBELL_RPTR_RING2_BASE_IDX 0
> +#define mmIH_RETRY_CAM_ACK 0x00a4
> +#define mmIH_RETRY_CAM_ACK_BASE_IDX 0
> #define mmIH_VERSION 0x00a5
> #define mmIH_VERSION_BASE_IDX 0
> #define mmIH_CNTL 0x00c0
> @@ -235,6 +239,8 @@
> #define mmIH_MMHUB_ERROR_BASE_IDX 0
> #define mmIH_MEM_POWER_CTRL 0x00e8
> #define mmIH_MEM_POWER_CTRL_BASE_IDX 0
> +#define mmIH_RETRY_INT_CAM_CNTL 0x00e9
> +#define mmIH_RETRY_INT_CAM_CNTL_BASE_IDX 0
> #define mmIH_REGISTER_LAST_PART2 0x00ff
> #define mmIH_REGISTER_LAST_PART2_BASE_IDX 0
> #define mmSEM_CLK_CTRL 0x0100
> diff --git a/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_sh_mask.h
> index 3ea83ea9ce3a..75c04fc275a0 100644
> --- a/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_sh_mask.h
> +++ b/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_sh_mask.h
> @@ -349,6 +349,17 @@
> #define IH_DOORBELL_RPTR_RING2__ENABLE__SHIFT 0x1c
> #define IH_DOORBELL_RPTR_RING2__OFFSET_MASK 0x03FFFFFFL
> #define IH_DOORBELL_RPTR_RING2__ENABLE_MASK 0x10000000L
> +//IH_RETRY_INT_CAM_CNTL
> +#define IH_RETRY_INT_CAM_CNTL__CAM_SIZE__SHIFT 0x0
> +#define IH_RETRY_INT_CAM_CNTL__BACK_PRESSURE_SKID_VALUE__SHIFT 0x8
> +#define IH_RETRY_INT_CAM_CNTL__ENABLE__SHIFT 0x10
> +#define IH_RETRY_INT_CAM_CNTL__BACK_PRESSURE_ENABLE__SHIFT 0x11
> +#define IH_RETRY_INT_CAM_CNTL__PER_VF_ENTRY_SIZE__SHIFT 0x14
> +#define IH_RETRY_INT_CAM_CNTL__CAM_SIZE_MASK 0x0000001FL
> +#define IH_RETRY_INT_CAM_CNTL__BACK_PRESSURE_SKID_VALUE_MASK 0x00003F00L
> +#define IH_RETRY_INT_CAM_CNTL__ENABLE_MASK 0x00010000L
> +#define IH_RETRY_INT_CAM_CNTL__BACK_PRESSURE_ENABLE_MASK 0x00020000L
> +#define IH_RETRY_INT_CAM_CNTL__PER_VF_ENTRY_SIZE_MASK 0x00300000L
> //IH_VERSION
> #define IH_VERSION__MINVER__SHIFT 0x0
> #define IH_VERSION__MAJVER__SHIFT 0x8
More information about the amd-gfx
mailing list