[PATCH 4/6] drm/amdgpu: Attach eviction fence on alloc

Felix Kuehling felix.kuehling at amd.com
Mon Jan 16 22:51:17 UTC 2023


On 2023-01-16 17:11, Errabolu, Ramesh wrote:
> [AMD Official Use Only - General]
>
> Comment inline.
>
> Regards,
> Ramesh
>
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Felix Kuehling
> Sent: Thursday, January 12, 2023 7:02 AM
> To: amd-gfx at lists.freedesktop.org; dri-devel at lists.freedesktop.org
> Cc: Chen, Xiaogang <Xiaogang.Chen at amd.com>; Koenig, Christian <Christian.Koenig at amd.com>
> Subject: [PATCH 4/6] drm/amdgpu: Attach eviction fence on alloc
>
> Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding.
>
>
> Instead of attaching the eviction fence when a KFD BO is first mapped, attach it when it is allocated or imported. This in preparation to allow KFD BOs to be mapped using the render node API.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 63 ++++++++++---------
>   1 file changed, 32 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 5645103beed0..79213f476493 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -360,6 +360,24 @@ static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
>          return ret;
>   }
>
> +static int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
> +                                              uint32_t domain,
> +                                              struct dma_fence *fence)
> +{
> +       int ret = amdgpu_bo_reserve(bo, false);
> +
> +       if (ret)
> +               return ret;
> +
> +       ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
> +       if (!ret)
> Ramesh: Should space for fences be reserved before adding one.

Yes, I probably should. I think I fixed this in the later patch because 
I didn't see the problem until I fixed those other issues.


>
> +               dma_resv_add_fence(bo->tbo.base.resv, fence,
> +                                  DMA_RESV_USAGE_BOOKKEEP);
> +       amdgpu_bo_unreserve(bo);
> +
> +       return ret;
> +}
> +
>   static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo)  {
>          return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false);
> @@ -1720,6 +1738,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>                  }
>                  bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
>                  bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
> +       } else {
> +               ret = amdgpu_amdkfd_bo_validate_and_fence(bo, domain,
> +                               &avm->process_info->eviction_fence->base);
> +               if (ret)
> +                       goto err_validate_bo;
>          }
>
>          if (offset)
> @@ -1729,6 +1752,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>
>   allocate_init_user_pages_failed:
>   err_pin_bo:
> +err_validate_bo:
>          remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
>          drm_vma_node_revoke(&gobj->vma_node, drm_priv);
>   err_node_allow:
> @@ -1804,10 +1828,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>          if (unlikely(ret))
>                  return ret;
>
> -       /* The eviction fence should be removed by the last unmap.
> -        * TODO: Log an error condition if the bo still has the eviction fence
> -        * attached
> -        */
>          amdgpu_amdkfd_remove_eviction_fence(mem->bo,
>                                          process_info->eviction_fence);
> Ramesh: Is it correct to call remove_eviction() unconditionally? This procedure applies to GTT and VRAM BO's only. Furthermore, the fence on these BO's has already been removed in the unmap_memory() call.

This patch removes the amdgpu_amdkfd_remove_eviction_fence from 
amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu. So we definitely need to do 
this here.

amdgpu_amdkfd_remove_eviction_fence uses dma_resv_replace_fences. If the 
specified fence is not found in the BO's reservation object, it is a 
no-op. So calling this unconditionally is safe.

Regards,
   Felix


>
>          pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
> @@ -1931,19 +1951,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
>          if (unlikely(ret))
>                  goto out_unreserve;
>
> -       if (mem->mapped_to_gpu_memory == 0 &&
> -           !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
> -               /* Validate BO only once. The eviction fence gets added to BO
> -                * the first time it is mapped. Validate will wait for all
> -                * background evictions to complete.
> -                */
> -               ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
> -               if (ret) {
> -                       pr_debug("Validate failed\n");
> -                       goto out_unreserve;
> -               }
> -       }
> -
>          list_for_each_entry(entry, &mem->attachments, list) {
>                  if (entry->bo_va->base.vm != avm || entry->is_mapped)
>                          continue;
> @@ -1970,10 +1977,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
>                           mem->mapped_to_gpu_memory);
>          }
>
> -       if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count)
> -               dma_resv_add_fence(bo->tbo.base.resv,
> -                                  &avm->process_info->eviction_fence->base,
> -                                  DMA_RESV_USAGE_BOOKKEEP);
>          ret = unreserve_bo_and_vms(&ctx, false, false);
>
>          goto out;
> @@ -1990,7 +1993,6 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
>                  struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv)  {
>          struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
> -       struct amdkfd_process_info *process_info = avm->process_info;
>          unsigned long bo_size = mem->bo->tbo.base.size;
>          struct kfd_mem_attachment *entry;
>          struct bo_vm_reservation_context ctx; @@ -2031,15 +2033,6 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
>                           mem->mapped_to_gpu_memory);
>          }
>
> -       /* If BO is unmapped from all VMs, unfence it. It can be evicted if
> -        * required.
> -        */
> -       if (mem->mapped_to_gpu_memory == 0 &&
> -           !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) &&
> -           !mem->bo->tbo.pin_count)
> -               amdgpu_amdkfd_remove_eviction_fence(mem->bo,
> -                                               process_info->eviction_fence);
> -
>   unreserve_out:
>          unreserve_bo_and_vms(&ctx, false, false);
>   out:
> @@ -2266,8 +2259,16 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
>          amdgpu_sync_create(&(*mem)->sync);
>          (*mem)->is_imported = true;
>
> +       ret = amdgpu_amdkfd_bo_validate_and_fence(bo, (*mem)->domain,
> +                               &avm->process_info->eviction_fence->base);
> +       if (ret)
> +               goto err_remove_mem;
> +
>          return 0;
>
> +err_remove_mem:
> +       remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
> +       drm_vma_node_revoke(&obj->vma_node, drm_priv);
>   err_free_mem:
>          kfree(*mem);
>   err_put_obj:
> --
> 2.34.1


More information about the dri-devel mailing list