[PATCH 3/3] drm/amdgpu: Avoid HW reset if guilty job already signaled.
Christian König
ckoenig.leichtzumerken at gmail.com
Wed Apr 10 14:06:04 UTC 2019
Am 09.04.19 um 18:42 schrieb Grodzovsky, Andrey:
> On 4/9/19 10:50 AM, Christian König wrote:
>> Am 08.04.19 um 18:08 schrieb Andrey Grodzovsky:
>>> Also reject TDRs if another one already running.
>>>
>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 94
>>> +++++++++++++++++++++---------
>>> 1 file changed, 67 insertions(+), 27 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index aabd043..4446892 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -3327,10 +3327,12 @@ bool amdgpu_device_should_recover_gpu(struct
>>> amdgpu_device *adev)
>>> static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
>>> struct amdgpu_job *job,
>>> - bool *need_full_reset_arg)
>>> + bool *need_full_reset_arg,
>>> + bool *job_signaled)
>>> {
>>> int i, r = 0;
>>> bool need_full_reset = *need_full_reset_arg;
>>> + struct amdgpu_ring *job_ring = job ?
>>> to_amdgpu_ring(job->base.sched) : NULL;
>>> /* block all schedulers and reset given job's ring */
>>> for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
>>> @@ -3341,6 +3343,17 @@ static int amdgpu_device_pre_asic_reset(struct
>>> amdgpu_device *adev,
>>> drm_sched_stop(&ring->sched, &job->base);
>>> + /*
>>> + * Must check guilty signal here since after this point all old
>>> + * HW fences are force signaled.
>>> + *
>>> + * job->base holds a reference to parent fence
>>> + */
>>> + if (job_signaled && job && ring == job_ring &&
>>> + job->base.s_fence->parent &&
>>> + dma_fence_is_signaled(job->base.s_fence->parent))
>>> + *job_signaled = true;
>>> +
>> That won't work correctly. See when the guilty job is not on the first
>> scheduler, you would already have force completed some before getting
>> here.
>>
>> Better to stop all schedulers first and then do the check.
>>
>> Christian.
>
> What do you mean by first scheduler ? There is one scheduler object per
> ring so I am not clear what 'first' means here.
Well for example if the guilty job is from a compute ring the we have
already force signaled the gfx ring here.
Same is true for other devices in the same hive, so it would probably be
a good idea to move the force signaling and the IP reset somewhere else
and this check up a layer.
Christian.
>
> Andrey
>
>
>>> /* after all hw jobs are reset, hw fence is meaningless, so
>>> force_completion */
>>> amdgpu_fence_driver_force_completion(ring);
>>> }
>>> @@ -3358,7 +3371,8 @@ static int amdgpu_device_pre_asic_reset(struct
>>> amdgpu_device *adev,
>>> - if (!amdgpu_sriov_vf(adev)) {
>>> + /* Don't suspend on bare metal if we are not going to HW reset
>>> the ASIC */
>>> + if (!amdgpu_sriov_vf(adev) && !(*job_signaled)) {
>>> if (!need_full_reset)
>>> need_full_reset = amdgpu_device_ip_need_full_reset(adev);
>>> @@ -3495,7 +3509,7 @@ static int amdgpu_do_asic_reset(struct
>>> amdgpu_hive_info *hive,
>>> return r;
>>> }
>>> -static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
>>> +static void amdgpu_device_post_asic_reset(struct amdgpu_device
>>> *adev, bool job_signaled)
>>> {
>>> int i;
>>> @@ -3505,7 +3519,8 @@ static void
>>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
>>> if (!ring || !ring->sched.thread)
>>> continue;
>>> - if (!adev->asic_reset_res)
>>> + /* No point to resubmit jobs if we didn't HW reset*/
>>> + if (!adev->asic_reset_res && !job_signaled)
>>> drm_sched_resubmit_jobs(&ring->sched);
>>> drm_sched_start(&ring->sched, !adev->asic_reset_res);
>>> @@ -3518,14 +3533,21 @@ static void
>>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
>>> adev->asic_reset_res = 0;
>>> }
>>> -static void amdgpu_device_lock_adev(struct amdgpu_device *adev)
>>> +static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool
>>> trylock)
>>> {
>>> - mutex_lock(&adev->lock_reset);
>>> + if (trylock) {
>>> + if (!mutex_trylock(&adev->lock_reset))
>>> + return false;
>>> + } else
>>> + mutex_lock(&adev->lock_reset);
>>> +
>>> atomic_inc(&adev->gpu_reset_counter);
>>> adev->in_gpu_reset = 1;
>>> /* Block kfd: SRIOV would do it separately */
>>> if (!amdgpu_sriov_vf(adev))
>>> amdgpu_amdkfd_pre_reset(adev);
>>> +
>>> + return true;
>>> }
>>> static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
>>> @@ -3555,29 +3577,44 @@ int amdgpu_device_gpu_recover(struct
>>> amdgpu_device *adev,
>>> {
>>> int r;
>>> struct amdgpu_hive_info *hive = NULL;
>>> - bool need_full_reset = false;
>>> struct amdgpu_device *tmp_adev = NULL;
>>> struct list_head device_list, *device_list_handle = NULL;
>>> + bool xgmi_topology_present, need_full_reset, job_signaled;
>>> + need_full_reset = job_signaled = false;
>>> INIT_LIST_HEAD(&device_list);
>>> dev_info(adev->dev, "GPU reset begin!\n");
>>> + hive = amdgpu_get_xgmi_hive(adev, 0);
>>> + xgmi_topology_present = hive &&
>>> adev->gmc.xgmi.num_physical_nodes > 1;
>>> +
>>> /*
>>> - * In case of XGMI hive disallow concurrent resets to be triggered
>>> - * by different nodes. No point also since the one node already
>>> executing
>>> - * reset will also reset all the other nodes in the hive.
>>> + * Here we trylock to avoid chain of resets executing from
>>> + * either trigger by jobs on different adevs in XGMI hive or
>>> jobs on
>>> + * different schedulers for same device while this tdr is running.
>>> + * We always reset all schedulers for device and all devices for
>>> XGMI
>>> + * hive so that should take care of them too.
>>> */
>>> - hive = amdgpu_get_xgmi_hive(adev, 0);
>>> - if (hive && adev->gmc.xgmi.num_physical_nodes > 1 &&
>>> - !mutex_trylock(&hive->reset_lock))
>>> +
>>> + if (xgmi_topology_present && !mutex_trylock(&hive->reset_lock)) {
>>> + DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as
>>> another already in progress",
>>> + job->base.id, hive->hive_id);
>>> return 0;
>>> + }
>>> /* Start with adev pre asic reset first for soft reset check.*/
>>> - amdgpu_device_lock_adev(adev);
>>> + if (!amdgpu_device_lock_adev(adev, !xgmi_topology_present)) {
>>> + DRM_INFO("Bailing on TDR for s_job:%llx, as another already
>>> in progress",
>>> + job->base.id);
>>> + return 0;
>>> + }
>>> +
>>> + /* Guilty job will be freed after this*/
>>> r = amdgpu_device_pre_asic_reset(adev,
>>> job,
>>> - &need_full_reset);
>>> + &need_full_reset,
>>> + &job_signaled);
>>> if (r) {
>>> /*TODO Should we stop ?*/
>>> DRM_ERROR("GPU pre asic reset failed with err, %d for drm
>>> dev, %s ",
>>> @@ -3609,10 +3646,11 @@ int amdgpu_device_gpu_recover(struct
>>> amdgpu_device *adev,
>>> if (tmp_adev == adev)
>>> continue;
>>> - amdgpu_device_lock_adev(tmp_adev);
>>> + amdgpu_device_lock_adev(tmp_adev, false);
>>> r = amdgpu_device_pre_asic_reset(tmp_adev,
>>> NULL,
>>> - &need_full_reset);
>>> + &need_full_reset,
>>> + &job_signaled);
>>> /*TODO Should we stop ?*/
>>> if (r) {
>>> DRM_ERROR("GPU pre asic reset failed with err, %d for
>>> drm dev, %s ",
>>> @@ -3623,19 +3661,21 @@ int amdgpu_device_gpu_recover(struct
>>> amdgpu_device *adev,
>>> /* Actual ASIC resets if needed.*/
>>> /* TODO Implement XGMI hive reset logic for SRIOV */
>>> - if (amdgpu_sriov_vf(adev)) {
>>> - r = amdgpu_device_reset_sriov(adev, job ? false : true);
>>> - if (r)
>>> - adev->asic_reset_res = r;
>>> - } else {
>>> - r = amdgpu_do_asic_reset(hive, device_list_handle,
>>> &need_full_reset);
>>> - if (r && r == -EAGAIN)
>>> - goto retry;
>>> + if (!job || !job_signaled) {
>>> + if (amdgpu_sriov_vf(adev)) {
>>> + r = amdgpu_device_reset_sriov(adev, job ? false : true);
>>> + if (r)
>>> + adev->asic_reset_res = r;
>>> + } else {
>>> + r = amdgpu_do_asic_reset(hive, device_list_handle,
>>> &need_full_reset);
>>> + if (r && r == -EAGAIN)
>>> + goto retry;
>>> + }
>>> }
>>> /* Post ASIC reset for all devs .*/
>>> list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
>>> - amdgpu_device_post_asic_reset(tmp_adev);
>>> + amdgpu_device_post_asic_reset(tmp_adev, job_signaled);
>>> if (r) {
>>> /* bad news, how to tell it to userspace ? */
>>> @@ -3648,7 +3688,7 @@ int amdgpu_device_gpu_recover(struct
>>> amdgpu_device *adev,
>>> amdgpu_device_unlock_adev(tmp_adev);
>>> }
>>> - if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
>>> + if (xgmi_topology_present)
>>> mutex_unlock(&hive->reset_lock);
>>> if (r)
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the dri-devel
mailing list