[PATCH 7/9] drm/amdkfd: Fix and simplify sync object handling for KFD
Kuehling, Felix
Felix.Kuehling at amd.com
Fri Nov 16 20:19:55 UTC 2018
Hi Christian,
Would you review this patch? Just looking at the code, calling
amdgpu_sync_fence with adev=NULL should be OK for us. It's just a bit
unusual compared to amdgpu's usage of this function. We've had this
patch in kfd-staging for a while without problems. If you're OK with
this I'll go ahead and push this upstream as well.
Thanks,
Felix
On 2018-11-05 8:40 p.m., Kuehling, Felix wrote:
> The adev parameter in amdgpu_sync_fence and amdgpu_sync_resv is only
> needed for updating sync->last_vm_update. This breaks if different
> adevs are passed to calls for the same sync object.
>
> Always pass NULL for calls from KFD because sync objects used for
> KFD don't belong to any particular device, and KFD doesn't need the
> sync->last_vm_update fence.
>
> This fixes kernel log warnings on multi-GPU systems after recent
> changes in amdgpu_amdkfd_gpuvm_restore_process_bos.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 28 +++++-------------------
> 1 file changed, 5 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index d005371..572ac5f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -395,23 +395,6 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
> return 0;
> }
>
> -static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
> - struct dma_fence *f)
> -{
> - int ret = amdgpu_sync_fence(adev, sync, f, false);
> -
> - /* Sync objects can't handle multiple GPUs (contexts) updating
> - * sync->last_vm_update. Fortunately we don't need it for
> - * KFD's purposes, so we can just drop that fence.
> - */
> - if (sync->last_vm_update) {
> - dma_fence_put(sync->last_vm_update);
> - sync->last_vm_update = NULL;
> - }
> -
> - return ret;
> -}
> -
> static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
> {
> struct amdgpu_bo *pd = vm->root.base.bo;
> @@ -422,7 +405,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
> if (ret)
> return ret;
>
> - return sync_vm_fence(adev, sync, vm->last_update);
> + return amdgpu_sync_fence(NULL, sync, vm->last_update, false);
> }
>
> /* add_bo_to_vm - Add a BO to a VM
> @@ -826,7 +809,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
> /* Add the eviction fence back */
> amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
>
> - sync_vm_fence(adev, sync, bo_va->last_pt_update);
> + amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);
>
> return 0;
> }
> @@ -851,7 +834,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
> return ret;
> }
>
> - return sync_vm_fence(adev, sync, bo_va->last_pt_update);
> + return amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);
> }
>
> static int map_bo_to_gpuvm(struct amdgpu_device *adev,
> @@ -911,7 +894,7 @@ static int process_sync_pds_resv(struct amdkfd_process_info *process_info,
> vm_list_node) {
> struct amdgpu_bo *pd = peer_vm->root.base.bo;
>
> - ret = amdgpu_sync_resv(amdgpu_ttm_adev(pd->tbo.bdev),
> + ret = amdgpu_sync_resv(NULL,
> sync, pd->tbo.resv,
> AMDGPU_FENCE_OWNER_UNDEFINED, false);
> if (ret)
> @@ -2084,8 +2067,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
> pr_debug("Memory eviction: Validate BOs failed. Try again\n");
> goto validate_map_fail;
> }
> - ret = amdgpu_sync_fence(amdgpu_ttm_adev(bo->tbo.bdev),
> - &sync_obj, bo->tbo.moving, false);
> + ret = amdgpu_sync_fence(NULL, &sync_obj, bo->tbo.moving, false);
> if (ret) {
> pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
> goto validate_map_fail;
More information about the amd-gfx
mailing list