[PATCH] drm/amdkfd: Add memory sync before TLB flush on unmap

Felix Kuehling felix.kuehling at amd.com
Thu Jun 10 19:54:52 UTC 2021


On 2021-06-10 3:00 p.m., Eric Huang wrote:
> It is to fix a failure for SDMA updating PTEs.
>
> Signed-off-by: Eric Huang <jinhuieric.huang at amd.com>

Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>


> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 26 +++++++++++++++++++++---
>   1 file changed, 23 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 98f1d2b586c5..17d0a318b708 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1766,14 +1766,33 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
>   			amdgpu_read_unlock(peer->ddev);
>   			goto unmap_memory_from_gpu_failed;
>   		}
> -		kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
>   		amdgpu_read_unlock(peer->ddev);
>   		args->n_success = i+1;
>   	}
> -	kfree(devices_arr);
> -
>   	mutex_unlock(&p->mutex);
>   
> +	err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
> +	if (err) {
> +		pr_debug("Sync memory failed, wait interrupted by user signal\n");
> +		goto sync_memory_failed;
> +	}
> +
> +	/* Flush TLBs after waiting for the page table updates to complete */
> +	for (i = 0; i < args->n_devices; i++) {
> +		peer = kfd_device_by_id(devices_arr[i]);
> +		if (WARN_ON_ONCE(!peer))
> +			continue;
> +		peer_pdd = kfd_get_process_device_data(peer, p);
> +		if (WARN_ON_ONCE(!peer_pdd))
> +			continue;
> +		if (!amdgpu_read_lock(peer->ddev, true)) {
> +			kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
> +			amdgpu_read_unlock(peer->ddev);
> +		}
> +	}
> +
> +	kfree(devices_arr);
> +
>   	return 0;
>   
>   bind_process_to_device_failed:
> @@ -1781,6 +1800,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
>   unmap_memory_from_gpu_failed:
>   	mutex_unlock(&p->mutex);
>   copy_from_user_failed:
> +sync_memory_failed:
>   	kfree(devices_arr);
>   	return err;
>   }


More information about the amd-gfx mailing list