[PATCH] drm/amdkfd: Fix GTT BO CPU mapping

Felix Kuehling felix.kuehling at amd.com
Tue May 31 15:24:50 UTC 2022


Please ignore this patch. Lang's patch already handles this and also 
covers a corner case I missed (not adding eviction fences to pinned BOs 
after restore). I will update my patch later to only remove the unused 
parameter and add kernel-doc comments.

Regards,
   Felix


Am 2022-05-31 um 11:14 schrieb Felix Kuehling:
> Leave CPU-mapped BOs on the validate list to allow restoring their GPU
> mappings after page tables were evicted.
>
> Also removed an unused parameter and added kernel-doc comments.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
> CC: Christian Koenig <Christian.Koenig at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  7 ++---
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 29 +++++++++++++++----
>   drivers/gpu/drm/amd/amdkfd/kfd_events.c       |  5 ++--
>   drivers/gpu/drm/amd/amdkfd/kfd_process.c      |  6 ++--
>   4 files changed, 32 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index f8b9f27adcf5..38c1a685cb24 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -279,10 +279,9 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
>   		struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv);
>   int amdgpu_amdkfd_gpuvm_sync_memory(
>   		struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
> -int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev,
> -		struct kgd_mem *mem, void **kptr, uint64_t *size);
> -void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev,
> -		struct kgd_mem *mem);
> +int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
> +					     void **kptr, uint64_t *size);
> +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem);
>   
>   int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
>   					    struct dma_fence **ef);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 34ba9e776521..839321839ee9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -1942,8 +1942,21 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
>   	return ret;
>   }
>   
> -int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev,
> -		struct kgd_mem *mem, void **kptr, uint64_t *size)
> +/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Map a GTT BO for kernel CPU access
> + *
> + * @mem: Buffer object to be mapped for CPU access
> + * @kptr[out]: pointer in kernel CPU address space
> + * @size[out]: size of the buffer
> + *
> + * Pins the BO and maps it for kernel CPU access. The eviction fence is removed
> + * from the BO, since pinned BOs cannot be evicted. The bo must remain on the
> + * validate_list, so the GPU mapping can be restored after a page table was
> + * evicted.
> + *
> + * Return: 0 on success, error code on failure
> + */
> +int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
> +					     void **kptr, uint64_t *size)
>   {
>   	int ret;
>   	struct amdgpu_bo *bo = mem->bo;
> @@ -1978,7 +1991,6 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev,
>   
>   	amdgpu_amdkfd_remove_eviction_fence(
>   		bo, mem->process_info->eviction_fence);
> -	list_del_init(&mem->validate_list.head);
>   
>   	if (size)
>   		*size = amdgpu_bo_size(bo);
> @@ -1998,8 +2010,15 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev,
>   	return ret;
>   }
>   
> -void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev,
> -						  struct kgd_mem *mem)
> +/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Unmap a GTT BO for kernel CPU access
> + *
> + * @mem: Buffer object to be unmapped for CPU access
> + *
> + * Removes the kernel CPU mapping and unpins the BO. It does not restore the
> + * eviction fence, so this function should only be used for cleanup before the
> + * BO is destroyed.
> + */
> +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem)
>   {
>   	struct amdgpu_bo *bo = mem->bo;
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> index 4df9c36146ba..3942a56c28bb 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> @@ -377,8 +377,7 @@ int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset)
>   		return -EINVAL;
>   	}
>   
> -	err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->adev,
> -					mem, &kern_addr, &size);
> +	err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(mem, &kern_addr, &size);
>   	if (err) {
>   		pr_err("Failed to map event page to kernel\n");
>   		return err;
> @@ -387,7 +386,7 @@ int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset)
>   	err = kfd_event_page_set(p, kern_addr, size, event_page_offset);
>   	if (err) {
>   		pr_err("Failed to set event page\n");
> -		amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->adev, mem);
> +		amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
>   		return err;
>   	}
>   	return err;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index e3d64ec8c353..a13e60d48b73 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -693,7 +693,7 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem,
>   	struct kfd_dev *dev = pdd->dev;
>   
>   	if (kptr) {
> -		amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(dev->adev, mem);
> +		amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
>   		kptr = NULL;
>   	}
>   
> @@ -733,7 +733,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
>   	}
>   
>   	if (kptr) {
> -		err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->adev,
> +		err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(
>   				(struct kgd_mem *)*mem, kptr, NULL);
>   		if (err) {
>   			pr_debug("Map GTT BO to kernel failed\n");
> @@ -999,7 +999,7 @@ static void kfd_process_kunmap_signal_bo(struct kfd_process *p)
>   	if (!mem)
>   		goto out;
>   
> -	amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kdev->adev, mem);
> +	amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
>   
>   out:
>   	mutex_unlock(&p->mutex);


More information about the amd-gfx mailing list