[PATCH] drm/amdgpu: Add early fini callback
Andrey Grodzovsky
andrey.grodzovsky at amd.com
Thu May 20 03:58:17 UTC 2021
On 2021-05-19 11:29 p.m., Felix Kuehling wrote:
> Am 2021-05-19 um 11:20 p.m. schrieb Andrey Grodzovsky:
>> Use it to call disply code dependent on device->drv_data
>> before it's set to NULL on device unplug
>>
>> v5:
>> Move HW finilization into this callback to prevent MMIO accesses
>> post cpi remove.
>>
>> v7:
>> Split kfd suspend from device exit to expdite HW related
>> stuff to amdgpu_pci_remove
>>
>> v8:
>> Squash previous KFD commit into this commit to avoid compile break.
>>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
>> Acked-by: Christian König <christian.koenig at amd.com>
> See one cosmetic comment inline. With that fixed the patch is
>
> Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
Thanks for quick response, updated.
Since this was last commit to review I also pushed the series to
drm-misc-next.
Andrey
>
>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 59 +++++++++++++------
>> drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 +-
>> .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 +++-
>> drivers/gpu/drm/amd/include/amd_shared.h | 2 +
>> 6 files changed, 56 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> index 5f6696a3c778..2b06dee9a0ce 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> @@ -170,7 +170,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
>> }
>> }
>>
>> -void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
>> +void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev)
>> {
>> if (adev->kfd.dev) {
>> kgd2kfd_device_exit(adev->kfd.dev);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> index 5ffb07b02810..d8a537e8aea5 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> @@ -127,7 +127,7 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
>> const void *ih_ring_entry);
>> void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
>> void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
>> -void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
>> +void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev);
>> int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
>> uint32_t vmid, uint64_t gpu_addr,
>> uint32_t *ib_cmd, uint32_t ib_len);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 8bee95ad32d9..bc75e35dd8d8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -2558,34 +2558,26 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
>> return 0;
>> }
>>
>> -/**
>> - * amdgpu_device_ip_fini - run fini for hardware IPs
>> - *
>> - * @adev: amdgpu_device pointer
>> - *
>> - * Main teardown pass for hardware IPs. The list of all the hardware
>> - * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
>> - * are run. hw_fini tears down the hardware associated with each IP
>> - * and sw_fini tears down any software state associated with each IP.
>> - * Returns 0 on success, negative error code on failure.
>> - */
>> -static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
>> +static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
>> {
>> int i, r;
>>
>> - if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
>> - amdgpu_virt_release_ras_err_handler_data(adev);
>> + for (i = 0; i < adev->num_ip_blocks; i++) {
>> + if (!adev->ip_blocks[i].version->funcs->early_fini)
>> + continue;
>>
>> - amdgpu_ras_pre_fini(adev);
>> + r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
>> + if (r) {
>> + DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
>> + adev->ip_blocks[i].version->funcs->name, r);
>> + }
>> + }
>>
>> - if (adev->gmc.xgmi.num_physical_nodes > 1)
>> - amdgpu_xgmi_remove_device(adev);
>> + amdgpu_amdkfd_suspend(adev, false);
>>
>> amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
>> amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
>>
>> - amdgpu_amdkfd_device_fini(adev);
>> -
>> /* need to disable SMC first */
>> for (i = 0; i < adev->num_ip_blocks; i++) {
>> if (!adev->ip_blocks[i].status.hw)
>> @@ -2616,6 +2608,33 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
>> adev->ip_blocks[i].status.hw = false;
>> }
>>
>> + return 0;
>> +}
>> +
>> +/**
>> + * amdgpu_device_ip_fini - run fini for hardware IPs
>> + *
>> + * @adev: amdgpu_device pointer
>> + *
>> + * Main teardown pass for hardware IPs. The list of all the hardware
>> + * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
>> + * are run. hw_fini tears down the hardware associated with each IP
>> + * and sw_fini tears down any software state associated with each IP.
>> + * Returns 0 on success, negative error code on failure.
>> + */
>> +static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
>> +{
>> + int i, r;
>> +
>> + if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
>> + amdgpu_virt_release_ras_err_handler_data(adev);
>> +
>> + amdgpu_ras_pre_fini(adev);
>> +
>> + if (adev->gmc.xgmi.num_physical_nodes > 1)
>> + amdgpu_xgmi_remove_device(adev);
>> +
>> + amdgpu_amdkfd_device_fini_sw(adev);
>>
>> for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
>> if (!adev->ip_blocks[i].status.sw)
>> @@ -3681,6 +3700,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
>> amdgpu_fbdev_fini(adev);
>>
>> amdgpu_irq_fini_hw(adev);
>> +
>> + amdgpu_device_ip_fini_early(adev);
>> }
>>
>> void amdgpu_device_fini_sw(struct amdgpu_device *adev)
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> index 357b9bf62a1c..ab6d2a43c9a3 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> @@ -858,10 +858,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
>> return kfd->init_complete;
>> }
>>
>> +
>> +
>> void kgd2kfd_device_exit(struct kfd_dev *kfd)
> Unnecessary whitespace change.
>
> Regards,
> Felix
>
>
>> {
>> if (kfd->init_complete) {
>> - kgd2kfd_suspend(kfd, false);
>> device_queue_manager_uninit(kfd->dqm);
>> kfd_interrupt_exit(kfd);
>> kfd_topology_remove_device(kfd);
>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> index 9ca517b65854..f7112865269a 100644
>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> @@ -1251,6 +1251,15 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
>> return -EINVAL;
>> }
>>
>> +static int amdgpu_dm_early_fini(void *handle)
>> +{
>> + struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>> +
>> + amdgpu_dm_audio_fini(adev);
>> +
>> + return 0;
>> +}
>> +
>> static void amdgpu_dm_fini(struct amdgpu_device *adev)
>> {
>> int i;
>> @@ -1259,8 +1268,6 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
>> drm_encoder_cleanup(&adev->dm.mst_encoders[i].base);
>> }
>>
>> - amdgpu_dm_audio_fini(adev);
>> -
>> amdgpu_dm_destroy_drm_device(&adev->dm);
>>
>> #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
>> @@ -2298,6 +2305,7 @@ static const struct amd_ip_funcs amdgpu_dm_funcs = {
>> .late_init = dm_late_init,
>> .sw_init = dm_sw_init,
>> .sw_fini = dm_sw_fini,
>> + .early_fini = amdgpu_dm_early_fini,
>> .hw_init = dm_hw_init,
>> .hw_fini = dm_hw_fini,
>> .suspend = dm_suspend,
>> diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
>> index 43ed6291b2b8..1ad56da486e4 100644
>> --- a/drivers/gpu/drm/amd/include/amd_shared.h
>> +++ b/drivers/gpu/drm/amd/include/amd_shared.h
>> @@ -240,6 +240,7 @@ enum amd_dpm_forced_level;
>> * @late_init: sets up late driver/hw state (post hw_init) - Optional
>> * @sw_init: sets up driver state, does not configure hw
>> * @sw_fini: tears down driver state, does not configure hw
>> + * @early_fini: tears down stuff before dev detached from driver
>> * @hw_init: sets up the hw state
>> * @hw_fini: tears down the hw state
>> * @late_fini: final cleanup
>> @@ -268,6 +269,7 @@ struct amd_ip_funcs {
>> int (*late_init)(void *handle);
>> int (*sw_init)(void *handle);
>> int (*sw_fini)(void *handle);
>> + int (*early_fini)(void *handle);
>> int (*hw_init)(void *handle);
>> int (*hw_fini)(void *handle);
>> void (*late_fini)(void *handle);
More information about the amd-gfx
mailing list