[PATCH 2/2] drm/amdgpu:cleanup deprecated gpu reset logic

Christian König ckoenig.leichtzumerken at gmail.com
Tue Oct 31 15:01:10 UTC 2017


Am 31.10.2017 um 12:55 schrieb Monk Liu:
> trigger gpu reset/recovery from illegle instruction IRQ
> is deprecated long time ago, we already switch to recover
> gpu by TDR triggering.
>
> now please set lockup_timeout to non-zero value
> in driver loading to enable TDR.
The patch is ok, but NAK to the general approach. We should have illegal 
instruction/access alongside the timeout.

But instead of trying to trigger the reset directly inform the scheduler 
that the we detected a problem on the engine. The scheduler can then 
cancel the timeout and do the appropriate things.

This patch would be ok if you install this new functionality directly 
after removing the old (and broken) one.

Regards,
Christian.

>
> Change-Id: I45a576a97fd9859e1098e785ce857c2cf5adfba5
> Signed-off-by: Monk Liu <Monk.Liu at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 -
>   drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 21 ---------------------
>   drivers/gpu/drm/amd/amdgpu/cik_sdma.c   |  1 -
>   drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c   |  2 --
>   drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c   |  3 ---
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   |  2 --
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   |  2 --
>   drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c  |  1 -
>   drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c  |  1 -
>   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c  |  1 -
>   drivers/gpu/drm/amd/amdgpu/si_dma.c     |  1 -
>   11 files changed, 36 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 6e89be5..b3453e3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1459,7 +1459,6 @@ struct amdgpu_device {
>   	bool				shutdown;
>   	bool				need_dma32;
>   	bool				accel_working;
> -	struct work_struct		reset_work;
>   	struct notifier_block		acpi_nb;
>   	struct amdgpu_i2c_chan		*i2c_bus[AMDGPU_MAX_I2C_BUS];
>   	struct amdgpu_debugfs		debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS];
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> index c340774..989b530 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
> @@ -73,23 +73,6 @@ static void amdgpu_hotplug_work_func(struct work_struct *work)
>   	drm_helper_hpd_irq_event(dev);
>   }
>   
> -/**
> - * amdgpu_irq_reset_work_func - execute gpu reset
> - *
> - * @work: work struct
> - *
> - * Execute scheduled gpu reset (cayman+).
> - * This function is called when the irq handler
> - * thinks we need a gpu reset.
> - */
> -static void amdgpu_irq_reset_work_func(struct work_struct *work)
> -{
> -	struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
> -						  reset_work);
> -
> -	if (!amdgpu_sriov_vf(adev))
> -		amdgpu_gpu_recover(adev, NULL);
> -}
>   
>   /* Disable *all* interrupts */
>   static void amdgpu_irq_disable_all(struct amdgpu_device *adev)
> @@ -251,14 +234,11 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
>   				amdgpu_hotplug_work_func);
>   	}
>   
> -	INIT_WORK(&adev->reset_work, amdgpu_irq_reset_work_func);
> -
>   	adev->irq.installed = true;
>   	r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq);
>   	if (r) {
>   		adev->irq.installed = false;
>   		flush_work(&adev->hotplug_work);
> -		cancel_work_sync(&adev->reset_work);
>   		return r;
>   	}
>   
> @@ -283,7 +263,6 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
>   		if (adev->irq.msi_enabled)
>   			pci_disable_msi(adev->pdev);
>   		flush_work(&adev->hotplug_work);
> -		cancel_work_sync(&adev->reset_work);
>   	}
>   
>   	for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> index ed26dcb..c8d9bc1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> @@ -1227,7 +1227,6 @@ static int cik_sdma_process_illegal_inst_irq(struct amdgpu_device *adev,
>   					     struct amdgpu_iv_entry *entry)
>   {
>   	DRM_ERROR("Illegal instruction in SDMA command stream\n");
> -	schedule_work(&adev->reset_work);
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
> index 9430d48..25b32b7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
> @@ -3443,7 +3443,6 @@ static int gfx_v6_0_priv_reg_irq(struct amdgpu_device *adev,
>   				 struct amdgpu_iv_entry *entry)
>   {
>   	DRM_ERROR("Illegal register access in command stream\n");
> -	schedule_work(&adev->reset_work);
>   	return 0;
>   }
>   
> @@ -3452,7 +3451,6 @@ static int gfx_v6_0_priv_inst_irq(struct amdgpu_device *adev,
>   				  struct amdgpu_iv_entry *entry)
>   {
>   	DRM_ERROR("Illegal instruction in command stream\n");
> -	schedule_work(&adev->reset_work);
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> index 3c2b15a..b0e127d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> @@ -5020,7 +5020,6 @@ static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev,
>   				 struct amdgpu_iv_entry *entry)
>   {
>   	DRM_ERROR("Illegal register access in command stream\n");
> -	schedule_work(&adev->reset_work);
>   	return 0;
>   }
>   
> @@ -5029,8 +5028,6 @@ static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev,
>   				  struct amdgpu_iv_entry *entry)
>   {
>   	DRM_ERROR("Illegal instruction in command stream\n");
> -	// XXX soft reset the gfx block only
> -	schedule_work(&adev->reset_work);
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index a74515a..5fd4996 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -6795,7 +6795,6 @@ static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
>   				 struct amdgpu_iv_entry *entry)
>   {
>   	DRM_ERROR("Illegal register access in command stream\n");
> -	schedule_work(&adev->reset_work);
>   	return 0;
>   }
>   
> @@ -6804,7 +6803,6 @@ static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
>   				  struct amdgpu_iv_entry *entry)
>   {
>   	DRM_ERROR("Illegal instruction in command stream\n");
> -	schedule_work(&adev->reset_work);
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 9855dc0..dce1960 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -4136,7 +4136,6 @@ static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
>   				 struct amdgpu_iv_entry *entry)
>   {
>   	DRM_ERROR("Illegal register access in command stream\n");
> -	schedule_work(&adev->reset_work);
>   	return 0;
>   }
>   
> @@ -4145,7 +4144,6 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
>   				  struct amdgpu_iv_entry *entry)
>   {
>   	DRM_ERROR("Illegal instruction in command stream\n");
> -	schedule_work(&adev->reset_work);
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> index 92f8c44..7e2580c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> @@ -1161,7 +1161,6 @@ static int sdma_v2_4_process_illegal_inst_irq(struct amdgpu_device *adev,
>   					      struct amdgpu_iv_entry *entry)
>   {
>   	DRM_ERROR("Illegal instruction in SDMA command stream\n");
> -	schedule_work(&adev->reset_work);
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> index 52e6bf2..c12f994 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> @@ -1480,7 +1480,6 @@ static int sdma_v3_0_process_illegal_inst_irq(struct amdgpu_device *adev,
>   					      struct amdgpu_iv_entry *entry)
>   {
>   	DRM_ERROR("Illegal instruction in SDMA command stream\n");
> -	schedule_work(&adev->reset_work);
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index fe78c00..09b7586 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -1416,7 +1416,6 @@ static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev,
>   					      struct amdgpu_iv_entry *entry)
>   {
>   	DRM_ERROR("Illegal instruction in SDMA command stream\n");
> -	schedule_work(&adev->reset_work);
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
> index ee469a9..408e145 100644
> --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
> @@ -683,7 +683,6 @@ static int si_dma_process_illegal_inst_irq(struct amdgpu_device *adev,
>   					      struct amdgpu_iv_entry *entry)
>   {
>   	DRM_ERROR("Illegal instruction in SDMA command stream\n");
> -	schedule_work(&adev->reset_work);
>   	return 0;
>   }
>   




More information about the amd-gfx mailing list