[PATCH 7/9] drm/amdkfd: Fix and simplify sync object handling for KFD

Koenig, Christian Christian.Koenig at amd.com
Sat Nov 17 16:01:38 UTC 2018


Hi Felix,

looks perfectly ok to me, patch is Reviewed-by: Christian König 
<christian.koenig at amd.com>.

But IIRC we once had a patch to remove the adev parameter from 
amdgpu_sync_fence(). I wonder why we have dropped that one.

Regards,
Christian.

Am 16.11.18 um 21:19 schrieb Kuehling, Felix:
> Hi Christian,
>
> Would you review this patch? Just looking at the code, calling
> amdgpu_sync_fence with adev=NULL should be OK for us. It's just a bit
> unusual compared to amdgpu's usage of this function. We've had this
> patch in kfd-staging for a while without problems. If you're OK with
> this I'll go ahead and push this upstream as well.
>
> Thanks,
>    Felix
>
> On 2018-11-05 8:40 p.m., Kuehling, Felix wrote:
>> The adev parameter in amdgpu_sync_fence and amdgpu_sync_resv is only
>> needed for updating sync->last_vm_update. This breaks if different
>> adevs are passed to calls for the same sync object.
>>
>> Always pass NULL for calls from KFD because sync objects used for
>> KFD don't belong to any particular device, and KFD doesn't need the
>> sync->last_vm_update fence.
>>
>> This fixes kernel log warnings on multi-GPU systems after recent
>> changes in amdgpu_amdkfd_gpuvm_restore_process_bos.
>>
>> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 28 +++++-------------------
>>   1 file changed, 5 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index d005371..572ac5f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -395,23 +395,6 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
>>   	return 0;
>>   }
>>   
>> -static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
>> -			 struct dma_fence *f)
>> -{
>> -	int ret = amdgpu_sync_fence(adev, sync, f, false);
>> -
>> -	/* Sync objects can't handle multiple GPUs (contexts) updating
>> -	 * sync->last_vm_update. Fortunately we don't need it for
>> -	 * KFD's purposes, so we can just drop that fence.
>> -	 */
>> -	if (sync->last_vm_update) {
>> -		dma_fence_put(sync->last_vm_update);
>> -		sync->last_vm_update = NULL;
>> -	}
>> -
>> -	return ret;
>> -}
>> -
>>   static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
>>   {
>>   	struct amdgpu_bo *pd = vm->root.base.bo;
>> @@ -422,7 +405,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
>>   	if (ret)
>>   		return ret;
>>   
>> -	return sync_vm_fence(adev, sync, vm->last_update);
>> +	return amdgpu_sync_fence(NULL, sync, vm->last_update, false);
>>   }
>>   
>>   /* add_bo_to_vm - Add a BO to a VM
>> @@ -826,7 +809,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
>>   	/* Add the eviction fence back */
>>   	amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
>>   
>> -	sync_vm_fence(adev, sync, bo_va->last_pt_update);
>> +	amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);
>>   
>>   	return 0;
>>   }
>> @@ -851,7 +834,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
>>   		return ret;
>>   	}
>>   
>> -	return sync_vm_fence(adev, sync, bo_va->last_pt_update);
>> +	return amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);
>>   }
>>   
>>   static int map_bo_to_gpuvm(struct amdgpu_device *adev,
>> @@ -911,7 +894,7 @@ static int process_sync_pds_resv(struct amdkfd_process_info *process_info,
>>   			    vm_list_node) {
>>   		struct amdgpu_bo *pd = peer_vm->root.base.bo;
>>   
>> -		ret = amdgpu_sync_resv(amdgpu_ttm_adev(pd->tbo.bdev),
>> +		ret = amdgpu_sync_resv(NULL,
>>   					sync, pd->tbo.resv,
>>   					AMDGPU_FENCE_OWNER_UNDEFINED, false);
>>   		if (ret)
>> @@ -2084,8 +2067,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
>>   			pr_debug("Memory eviction: Validate BOs failed. Try again\n");
>>   			goto validate_map_fail;
>>   		}
>> -		ret = amdgpu_sync_fence(amdgpu_ttm_adev(bo->tbo.bdev),
>> -					&sync_obj, bo->tbo.moving, false);
>> +		ret = amdgpu_sync_fence(NULL, &sync_obj, bo->tbo.moving, false);
>>   		if (ret) {
>>   			pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
>>   			goto validate_map_fail;



More information about the amd-gfx mailing list