[PATCH 06/14] accel/ivpu: Dump only first MMU fault from single context
Jacek Lawrynowicz
jacek.lawrynowicz at linux.intel.com
Thu Jan 9 08:26:57 UTC 2025
Reviewed-by: Jacek Lawrynowicz <jacek.lawrynowicz at linux.intel.com>
On 1/7/2025 6:32 PM, Maciej Falkowski wrote:
> From: Karol Wachowski <karol.wachowski at intel.com>
>
> Stop dumping consecutive faults from an already faulty context immediately,
> instead of waiting for the context abort thread handler (IRQ handler bottom
> half) to abort currently executing jobs.
>
> Remove 'R' (record events) bit from context descriptor of a faulty
> context to prevent future faults generation.
>
> This change speeds up the IRQ handler by eliminating the need to print the
> fault content repeatedly. Additionally, it prevents flooding dmesg with
> errors, which was occurring due to the delay in the bottom half of the
> handler stopping fault-generating jobs.
>
> Signed-off-by: Karol Wachowski <karol.wachowski at intel.com>
> Signed-off-by: Maciej Falkowski <maciej.falkowski at linux.intel.com>
> ---
> drivers/accel/ivpu/ivpu_mmu.c | 51 ++++++++++++++++++++++++---
> drivers/accel/ivpu/ivpu_mmu_context.c | 13 -------
> drivers/accel/ivpu/ivpu_mmu_context.h | 2 --
> 3 files changed, 46 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c
> index 21f820dd0c65..5ee4df892b3e 100644
> --- a/drivers/accel/ivpu/ivpu_mmu.c
> +++ b/drivers/accel/ivpu/ivpu_mmu.c
> @@ -870,23 +870,64 @@ static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev)
> return evt;
> }
>
> +static int ivpu_mmu_disable_events(struct ivpu_device *vdev, u32 ssid)
> +{
> + struct ivpu_mmu_info *mmu = vdev->mmu;
> + struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab;
> + u64 *entry;
> + u64 val;
> +
> + if (ssid > IVPU_MMU_CDTAB_ENT_COUNT)
> + return -EINVAL;
> +
> + entry = cdtab->base + (ssid * IVPU_MMU_CDTAB_ENT_SIZE);
> +
> + val = READ_ONCE(entry[0]);
> + val &= ~IVPU_MMU_CD_0_R;
> + WRITE_ONCE(entry[0], val);
> +
> + if (!ivpu_is_force_snoop_enabled(vdev))
> + clflush_cache_range(entry, IVPU_MMU_CDTAB_ENT_SIZE);
> +
> + ivpu_mmu_cmdq_write_cfgi_all(vdev);
> +
> + return 0;
> +}
> +
> void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev)
> {
> + struct ivpu_file_priv *file_priv;
> + u32 last_ssid = -1;
> u32 *event;
> u32 ssid;
>
> ivpu_dbg(vdev, IRQ, "MMU event queue\n");
>
> - while ((event = ivpu_mmu_get_event(vdev)) != NULL) {
> - ivpu_mmu_dump_event(vdev, event);
> -
> + while ((event = ivpu_mmu_get_event(vdev))) {
> ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, event[0]);
> +
> + if (ssid == last_ssid)
> + continue;
> +
> + xa_lock(&vdev->context_xa);
> + file_priv = xa_load(&vdev->context_xa, ssid);
> + if (file_priv) {
> + if (file_priv->has_mmu_faults) {
> + event = NULL;
> + } else {
> + ivpu_mmu_disable_events(vdev, ssid);
> + file_priv->has_mmu_faults = true;
> + }
> + }
> + xa_unlock(&vdev->context_xa);
> +
> + if (event)
> + ivpu_mmu_dump_event(vdev, event);
> +
> if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) {
> ivpu_pm_trigger_recovery(vdev, "MMU event");
> return;
> }
> -
> - ivpu_mmu_user_context_mark_invalid(vdev, ssid);
> REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, vdev->mmu->evtq.cons);
> }
>
> diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c
> index 891967a95bc3..d373443bbc83 100644
> --- a/drivers/accel/ivpu/ivpu_mmu_context.c
> +++ b/drivers/accel/ivpu/ivpu_mmu_context.c
> @@ -631,16 +631,3 @@ void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev)
> ivpu_mmu_cd_clear(vdev, vdev->rctx.id);
> ivpu_mmu_context_fini(vdev, &vdev->rctx);
> }
> -
> -void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid)
> -{
> - struct ivpu_file_priv *file_priv;
> -
> - xa_lock(&vdev->context_xa);
> -
> - file_priv = xa_load(&vdev->context_xa, ssid);
> - if (file_priv)
> - file_priv->has_mmu_faults = true;
> -
> - xa_unlock(&vdev->context_xa);
> -}
> diff --git a/drivers/accel/ivpu/ivpu_mmu_context.h b/drivers/accel/ivpu/ivpu_mmu_context.h
> index 8042fc067062..f255310968cf 100644
> --- a/drivers/accel/ivpu/ivpu_mmu_context.h
> +++ b/drivers/accel/ivpu/ivpu_mmu_context.h
> @@ -37,8 +37,6 @@ void ivpu_mmu_global_context_fini(struct ivpu_device *vdev);
> int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev);
> void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev);
>
> -void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid);
> -
> int ivpu_mmu_context_insert_node(struct ivpu_mmu_context *ctx, const struct ivpu_addr_range *range,
> u64 size, struct drm_mm_node *node);
> void ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *node);
More information about the dri-devel
mailing list