[PATCH] drm/amdgpu:fix world switch hang
zhoucm1
david1.zhou at amd.com
Wed Jun 7 01:52:25 UTC 2017
On 2017年06月06日 20:01, Christian König wrote:
> Am 06.06.2017 um 11:27 schrieb Monk Liu:
>> for SR-IOV, we must keep the pipeline-sync in the protection
>> of COND_EXEC, otherwise the command consumed by CPG is not
>> consistent when world switch triggerd, e.g.:
>>
>> world switch hit and the IB frame is skipped so the fence
>> won't signal, thus CP will jump to the next DMAframe's pipeline-sync
>> command, and it will make CP hang foever.
>>
>> after pipelin-sync moved into COND_EXEC the consistency can be
>> guaranteed
>>
>> Change-Id: I3059535745e2e1215343299b17f27fe081fc01bc
>> Signed-off-by: Monk Liu <Monk.Liu at amd.com>
>
> Reviewed-by: Christian König <christian.koenig at amd.com>
>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 5 +++--
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 +++++--
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +-
>> 3 files changed, 9 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> index 025e03b..947bd34 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> @@ -130,6 +130,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring,
>> unsigned num_ibs,
>> unsigned i;
>> int r = 0;
>> + bool need_pipe_sync = false;
>> if (num_ibs == 0)
>> return -EINVAL;
>> @@ -165,7 +166,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring,
>> unsigned num_ibs,
>> if (ring->funcs->emit_pipeline_sync && job &&
>> ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) ||
>> amdgpu_vm_need_pipeline_sync(ring, job))) {
>> - amdgpu_ring_emit_pipeline_sync(ring);
>> + need_pipe_sync = true;
>> fence_put(tmp);
>> }
>> @@ -173,7 +174,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring
>> *ring, unsigned num_ibs,
>> ring->funcs->insert_start(ring);
>> if (vm) {
>> - r = amdgpu_vm_flush(ring, job);
>> + r = amdgpu_vm_flush(ring, job, need_pipe_sync);
There is a potential case lost, if no-vm job needs pipeline_sync, then
which will be lost.
Regards,
David Zhou
>> if (r) {
>> amdgpu_ring_undo(ring);
>> return r;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index b065f8b..6d62e34 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -747,7 +747,7 @@ static bool amdgpu_vm_is_large_bar(struct
>> amdgpu_device *adev)
>> *
>> * Emit a VM flush when it is necessary.
>> */
>> -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
>> +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job
>> *job, bool need_pipe_sync)
>> {
>> struct amdgpu_device *adev = ring->adev;
>> unsigned vmhub = ring->funcs->vmhub;
>> @@ -770,12 +770,15 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
>> struct amdgpu_job *job)
>> vm_flush_needed = true;
>> }
>> - if (!vm_flush_needed && !gds_switch_needed)
>> + if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
>> return 0;
>> if (ring->funcs->init_cond_exec)
>> patch_offset = amdgpu_ring_init_cond_exec(ring);
>> + if (need_pipe_sync)
>> + amdgpu_ring_emit_pipeline_sync(ring);
>> +
>> if (ring->funcs->emit_vm_flush && vm_flush_needed) {
>> struct fence *fence;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 9aa00d9..cc0009f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -222,7 +222,7 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>> int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
>> struct amdgpu_sync *sync, struct fence *fence,
>> struct amdgpu_job *job);
>> -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
>> +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job
>> *job, bool need_pipe_sync);
>> void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
>> unsigned vmid);
>> void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev);
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list