[PATCH 05/10] accel/ivpu: Fix potential infinite loops in IRQ handlers

Fri Jan 5 16:35:11 UTC 2024

On 1/5/2024 4:22 AM, Jacek Lawrynowicz wrote:
> Limit number of iterations in ivpu_mmu_irq_evtq_handler() and
> ivpu_ipc_irq_handler().

"potential infinite loops" sounds like something that has not been 
observed.  Has a problem actually occurred?

Are you concerned that the FW is broken and spamming Linux with events?

Why a limit of 100 events?  Seems arbitrary.

I suspect threaded irqs might be useful here, but it is hard to tell.

> 
> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz at linux.intel.com>
> ---
>   drivers/accel/ivpu/ivpu_ipc.c |  6 ++++++
>   drivers/accel/ivpu/ivpu_mmu.c | 21 +++++++++++++--------
>   2 files changed, 19 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
> index e86621f16f85..f69780248803 100644
> --- a/drivers/accel/ivpu/ivpu_ipc.c
> +++ b/drivers/accel/ivpu/ivpu_ipc.c
> @@ -389,12 +389,18 @@ void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool *wake_thread)
>   	unsigned long flags;
>   	bool dispatched;
>   	u32 vpu_addr;
> +	int msg_count = 0;
>   
>   	/*
>   	 * Driver needs to purge all messages from IPC FIFO to clear IPC interrupt.
>   	 * Without purge IPC FIFO to 0 next IPC interrupts won't be generated.
>   	 */
>   	while (ivpu_hw_reg_ipc_rx_count_get(vdev)) {
> +		if (++msg_count > IPC_MAX_RX_MSG) {
> +			ivpu_pm_schedule_recovery(vdev);
> +			return;
> +		}
> +
>   		vpu_addr = ivpu_hw_reg_ipc_rx_addr_get(vdev);
>   		if (vpu_addr == REG_IO_ERROR) {
>   			ivpu_err_ratelimited(vdev, "Failed to read IPC rx addr register\n");
> diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c
> index 1f813625aab3..c82929b0ae9d 100644
> --- a/drivers/accel/ivpu/ivpu_mmu.c
> +++ b/drivers/accel/ivpu/ivpu_mmu.c
> @@ -236,6 +236,8 @@
>   #define IVPU_MMU_CERROR_ABT          0x2
>   #define IVPU_MMU_CERROR_ATC_INV_SYNC 0x3
>   
> +#define IVPU_MMU_MAX_EVENT_COUNT     100
> +
>   static const char *ivpu_mmu_event_to_str(u32 cmd)
>   {
>   	switch (cmd) {
> @@ -887,7 +889,7 @@ static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev)
>   
>   void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev)
>   {
> -	bool schedule_recovery = false;
> +	int event_count = 0;
>   	u32 *event;
>   	u32 ssid;
>   
> @@ -895,16 +897,19 @@ void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev)
>   
>   	while ((event = ivpu_mmu_get_event(vdev)) != NULL) {
>   		ivpu_mmu_dump_event(vdev, event);
> +		if (++event_count > IVPU_MMU_MAX_EVENT_COUNT) {
> +			ivpu_pm_schedule_recovery(vdev);
> +			return;
> +		}
>   
>   		ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, event[0]);
> -		if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID)
> -			schedule_recovery = true;
> -		else
> -			ivpu_mmu_user_context_mark_invalid(vdev, ssid);
> -	}
> +		if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) {
> +			ivpu_pm_schedule_recovery(vdev);
> +			return;
> +		}
>   
> -	if (schedule_recovery)
> -		ivpu_pm_schedule_recovery(vdev);
> +		ivpu_mmu_user_context_mark_invalid(vdev, ssid);
> +	}
>   }
>   
>   void ivpu_mmu_evtq_dump(struct ivpu_device *vdev)