[PATCH] drm/amdgpu: defer test IBs on the rings at boot

Christian König christian.koenig at amd.com
Fri Apr 13 06:23:24 UTC 2018


Am 13.04.2018 um 06:07 schrieb Shirish S:
> amdgpu_ib_ring_tests() runs test IB's on rings at boot
> contributes to ~500 ms of amdgpu driver's boot time.
>
> This patch defers it and adds a check to report
> in amdgpu_info_ioctl() if it was scheduled or not.

That is rather suboptimal, but see below.

>
> Signed-off-by: Shirish S <shirish.s at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  2 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 20 +++++++++++++++++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c    |  3 +++
>   3 files changed, 22 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 5734871..ae8f722 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1611,6 +1611,8 @@ struct amdgpu_device {
>   
>   	/* delayed work_func for deferring clockgating during resume */
>   	struct delayed_work     late_init_work;
> +	/* delayed work_func to defer testing IB's on rings during boot */
> +	struct delayed_work     late_init_test_ib_work;

You must put the IB test into the late_init_work as well, otherwise the 
two delayed workers can race with each other.

>   
>   	struct amdgpu_virt	virt;
>   	/* firmware VRAM reservation */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 1762eb4..e65a5e6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -63,6 +63,7 @@ MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>   MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
>   
>   #define AMDGPU_RESUME_MS		2000
> +#define AMDGPU_IB_TEST_SCHED_MS		2000
>   
>   static const char *amdgpu_asic_name[] = {
>   	"TAHITI",
> @@ -2105,6 +2106,16 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
>   	}
>   }
>   
> +static void amdgpu_device_late_init_test_ib_func_handler(struct work_struct *work)
> +{
> +	struct amdgpu_device *adev =
> +		container_of(work, struct amdgpu_device, late_init_test_ib_work.work);
> +	int r = amdgpu_ib_ring_tests(adev);
> +
> +	if (r)
> +		DRM_ERROR("ib ring test failed (%d).\n", r);
> +}
> +
>   /**
>    * amdgpu_device_has_dc_support - check if dc is supported
>    *
> @@ -2212,6 +2223,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   	INIT_LIST_HEAD(&adev->ring_lru_list);
>   	spin_lock_init(&adev->ring_lru_list_lock);
>   
> +	INIT_DELAYED_WORK(&adev->late_init_test_ib_work,
> +			  amdgpu_device_late_init_test_ib_func_handler);
>   	INIT_DELAYED_WORK(&adev->late_init_work,
>   			  amdgpu_device_ip_late_init_func_handler);
>   
> @@ -2374,9 +2387,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   		goto failed;
>   	}
>   
> -	r = amdgpu_ib_ring_tests(adev);
> -	if (r)
> -		DRM_ERROR("ib ring test failed (%d).\n", r);
> +	/* Schedule amdgpu_ib_ring_tests() */
> +	mod_delayed_work(system_wq, &adev->late_init_test_ib_work,
> +			msecs_to_jiffies(AMDGPU_IB_TEST_SCHED_MS));

That doesn't work like you intended. mod_delayed_work() overrides the 
existing handler.

What you wanted to use is queue_delayed_work(), but as I said we should 
only have one delayed worker.

>   
>   	if (amdgpu_sriov_vf(adev))
>   		amdgpu_virt_init_data_exchange(adev);
> @@ -2469,6 +2482,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
>   	}
>   	adev->accel_working = false;
>   	cancel_delayed_work_sync(&adev->late_init_work);
> +	cancel_delayed_work_sync(&adev->late_init_test_ib_work);
>   	/* free i2c buses */
>   	if (!amdgpu_device_has_dc_support(adev))
>   		amdgpu_i2c_fini(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 487d39e..057bd9a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -279,6 +279,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
>   	if (!info->return_size || !info->return_pointer)
>   		return -EINVAL;
>   
> +	if (delayed_work_pending(&adev->late_init_test_ib_work))
> +		DRM_ERROR("IB test on ring not executed\n");
> +

Please use flush_delayed_work() instead of issuing and error here.

Regards,
Christian.

>   	switch (info->query) {
>   	case AMDGPU_INFO_ACCEL_WORKING:
>   		ui32 = adev->accel_working;



More information about the amd-gfx mailing list