[PATCH 1/7] drm/radeon: switch over to drm_exec v2

Alex Deucher alexdeucher at gmail.com
Thu Nov 14 20:50:18 UTC 2024


On Thu, Nov 14, 2024 at 10:30 AM Christian König
<ckoenig.leichtzumerken at gmail.com> wrote:
>
> Just a straightforward conversion without any optimization.
>
> Smoke tested on actual hardware.
>
> v2: rebase
>
> Signed-off-by: Christian König <christian.koenig at amd.com>

Acked-by: Alex Deucher <alexander.deucher at amd.com>

> ---
>  drivers/gpu/drm/radeon/Kconfig         |  1 +
>  drivers/gpu/drm/radeon/radeon.h        |  7 ++--
>  drivers/gpu/drm/radeon/radeon_cs.c     | 45 +++++++++++++-------------
>  drivers/gpu/drm/radeon/radeon_gem.c    | 39 ++++++++++++----------
>  drivers/gpu/drm/radeon/radeon_object.c | 25 +++++++-------
>  drivers/gpu/drm/radeon/radeon_object.h |  2 +-
>  drivers/gpu/drm/radeon/radeon_vm.c     | 10 +++---
>  7 files changed, 66 insertions(+), 63 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/Kconfig b/drivers/gpu/drm/radeon/Kconfig
> index 9c6c74a75778..f51bace9555d 100644
> --- a/drivers/gpu/drm/radeon/Kconfig
> +++ b/drivers/gpu/drm/radeon/Kconfig
> @@ -13,6 +13,7 @@ config DRM_RADEON
>          select DRM_TTM
>         select DRM_TTM_HELPER
>         select FB_IOMEM_HELPERS if DRM_FBDEV_EMULATION
> +       select DRM_EXEC
>         select SND_HDA_COMPONENT if SND_HDA_CORE
>         select POWER_SUPPLY
>         select HWMON
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index fd8a4513025f..8605c074d9f7 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -75,8 +75,8 @@
>
>  #include <drm/ttm/ttm_bo.h>
>  #include <drm/ttm/ttm_placement.h>
> -#include <drm/ttm/ttm_execbuf_util.h>
>
> +#include <drm/drm_exec.h>
>  #include <drm/drm_gem.h>
>  #include <drm/drm_audio_component.h>
>  #include <drm/drm_suballoc.h>
> @@ -457,7 +457,8 @@ struct radeon_mman {
>
>  struct radeon_bo_list {
>         struct radeon_bo                *robj;
> -       struct ttm_validate_buffer      tv;
> +       struct list_head                list;
> +       bool                            shared;
>         uint64_t                        gpu_offset;
>         unsigned                        preferred_domains;
>         unsigned                        allowed_domains;
> @@ -1030,6 +1031,7 @@ struct radeon_cs_parser {
>         struct radeon_bo_list   *vm_bos;
>         struct list_head        validated;
>         unsigned                dma_reloc_idx;
> +       struct drm_exec         exec;
>         /* indices of various chunks */
>         struct radeon_cs_chunk  *chunk_ib;
>         struct radeon_cs_chunk  *chunk_relocs;
> @@ -1043,7 +1045,6 @@ struct radeon_cs_parser {
>         u32                     cs_flags;
>         u32                     ring;
>         s32                     priority;
> -       struct ww_acquire_ctx   ticket;
>  };
>
>  static inline u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
> diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
> index a6700d7278bf..64b26bfeafc9 100644
> --- a/drivers/gpu/drm/radeon/radeon_cs.c
> +++ b/drivers/gpu/drm/radeon/radeon_cs.c
> @@ -182,11 +182,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
>                         }
>                 }
>
> -               p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
> -               p->relocs[i].tv.num_shared = !r->write_domain;
> -
> -               radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
> -                                     priority);
> +               p->relocs[i].shared = !r->write_domain;
> +               radeon_cs_buckets_add(&buckets, &p->relocs[i].list, priority);
>         }
>
>         radeon_cs_buckets_get_list(&buckets, &p->validated);
> @@ -197,7 +194,7 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
>         if (need_mmap_lock)
>                 mmap_read_lock(current->mm);
>
> -       r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
> +       r = radeon_bo_list_validate(p->rdev, &p->exec, &p->validated, p->ring);
>
>         if (need_mmap_lock)
>                 mmap_read_unlock(current->mm);
> @@ -253,12 +250,11 @@ static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
>         struct radeon_bo_list *reloc;
>         int r;
>
> -       list_for_each_entry(reloc, &p->validated, tv.head) {
> +       list_for_each_entry(reloc, &p->validated, list) {
>                 struct dma_resv *resv;
>
>                 resv = reloc->robj->tbo.base.resv;
> -               r = radeon_sync_resv(p->rdev, &p->ib.sync, resv,
> -                                    reloc->tv.num_shared);
> +               r = radeon_sync_resv(p->rdev, &p->ib.sync, resv, reloc->shared);
>                 if (r)
>                         return r;
>         }
> @@ -276,6 +272,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
>         s32 priority = 0;
>
>         INIT_LIST_HEAD(&p->validated);
> +       drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
>
>         if (!cs->num_chunks) {
>                 return 0;
> @@ -397,8 +394,8 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
>  static int cmp_size_smaller_first(void *priv, const struct list_head *a,
>                                   const struct list_head *b)
>  {
> -       struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head);
> -       struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head);
> +       struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, list);
> +       struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, list);
>
>         /* Sort A before B if A is smaller. */
>         if (la->robj->tbo.base.size > lb->robj->tbo.base.size)
> @@ -417,11 +414,13 @@ static int cmp_size_smaller_first(void *priv, const struct list_head *a,
>   * If error is set than unvalidate buffer, otherwise just free memory
>   * used by parsing context.
>   **/
> -static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff)
> +static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error)
>  {
>         unsigned i;
>
>         if (!error) {
> +               struct radeon_bo_list *reloc;
> +
>                 /* Sort the buffer list from the smallest to largest buffer,
>                  * which affects the order of buffers in the LRU list.
>                  * This assures that the smallest buffers are added first
> @@ -433,15 +432,17 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo
>                  * per frame under memory pressure.
>                  */
>                 list_sort(NULL, &parser->validated, cmp_size_smaller_first);
> -
> -               ttm_eu_fence_buffer_objects(&parser->ticket,
> -                                           &parser->validated,
> -                                           &parser->ib.fence->base);
> -       } else if (backoff) {
> -               ttm_eu_backoff_reservation(&parser->ticket,
> -                                          &parser->validated);
> +               list_for_each_entry(reloc, &parser->validated, list) {
> +                       dma_resv_add_fence(reloc->robj->tbo.base.resv,
> +                                          &parser->ib.fence->base,
> +                                          reloc->shared ?
> +                                          DMA_RESV_USAGE_READ :
> +                                          DMA_RESV_USAGE_WRITE);
> +               }
>         }
>
> +       drm_exec_fini(&parser->exec);
> +
>         if (parser->relocs != NULL) {
>                 for (i = 0; i < parser->nrelocs; i++) {
>                         struct radeon_bo *bo = parser->relocs[i].robj;
> @@ -693,7 +694,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>         r = radeon_cs_parser_init(&parser, data);
>         if (r) {
>                 DRM_ERROR("Failed to initialize parser !\n");
> -               radeon_cs_parser_fini(&parser, r, false);
> +               radeon_cs_parser_fini(&parser, r);
>                 up_read(&rdev->exclusive_lock);
>                 r = radeon_cs_handle_lockup(rdev, r);
>                 return r;
> @@ -707,7 +708,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>         }
>
>         if (r) {
> -               radeon_cs_parser_fini(&parser, r, false);
> +               radeon_cs_parser_fini(&parser, r);
>                 up_read(&rdev->exclusive_lock);
>                 r = radeon_cs_handle_lockup(rdev, r);
>                 return r;
> @@ -724,7 +725,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>                 goto out;
>         }
>  out:
> -       radeon_cs_parser_fini(&parser, r, true);
> +       radeon_cs_parser_fini(&parser, r);
>         up_read(&rdev->exclusive_lock);
>         r = radeon_cs_handle_lockup(rdev, r);
>         return r;
> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
> index bf2d4b16dc2a..f86773f3db20 100644
> --- a/drivers/gpu/drm/radeon/radeon_gem.c
> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
> @@ -605,33 +605,40 @@ int radeon_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
>  static void radeon_gem_va_update_vm(struct radeon_device *rdev,
>                                     struct radeon_bo_va *bo_va)
>  {
> -       struct ttm_validate_buffer tv, *entry;
> -       struct radeon_bo_list *vm_bos;
> -       struct ww_acquire_ctx ticket;
> +       struct radeon_bo_list *vm_bos, *entry;
>         struct list_head list;
> +       struct drm_exec exec;
>         unsigned domain;
>         int r;
>
>         INIT_LIST_HEAD(&list);
>
> -       tv.bo = &bo_va->bo->tbo;
> -       tv.num_shared = 1;
> -       list_add(&tv.head, &list);
> -
>         vm_bos = radeon_vm_get_bos(rdev, bo_va->vm, &list);
>         if (!vm_bos)
>                 return;
>
> -       r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
> -       if (r)
> -               goto error_free;
> +       drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
> +       drm_exec_until_all_locked(&exec) {
> +               list_for_each_entry(entry, &list, list) {
> +                       r = drm_exec_prepare_obj(&exec, &entry->robj->tbo.base,
> +                                                1);
> +                       drm_exec_retry_on_contention(&exec);
> +                       if (unlikely(r))
> +                               goto error_cleanup;
> +               }
>
> -       list_for_each_entry(entry, &list, head) {
> -               domain = radeon_mem_type_to_domain(entry->bo->resource->mem_type);
> +               r = drm_exec_prepare_obj(&exec, &bo_va->bo->tbo.base, 1);
> +               drm_exec_retry_on_contention(&exec);
> +               if (unlikely(r))
> +                       goto error_cleanup;
> +       }
> +
> +       list_for_each_entry(entry, &list, list) {
> +               domain = radeon_mem_type_to_domain(entry->robj->tbo.resource->mem_type);
>                 /* if anything is swapped out don't swap it in here,
>                    just abort and wait for the next CS */
>                 if (domain == RADEON_GEM_DOMAIN_CPU)
> -                       goto error_unreserve;
> +                       goto error_cleanup;
>         }
>
>         mutex_lock(&bo_va->vm->mutex);
> @@ -645,10 +652,8 @@ static void radeon_gem_va_update_vm(struct radeon_device *rdev,
>  error_unlock:
>         mutex_unlock(&bo_va->vm->mutex);
>
> -error_unreserve:
> -       ttm_eu_backoff_reservation(&ticket, &list);
> -
> -error_free:
> +error_cleanup:
> +       drm_exec_fini(&exec);
>         kvfree(vm_bos);
>
>         if (r && r != -ERESTARTSYS)
> diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
> index 7672404fdb29..a0fc0801abb0 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.c
> +++ b/drivers/gpu/drm/radeon/radeon_object.c
> @@ -464,23 +464,26 @@ static u64 radeon_bo_get_threshold_for_moves(struct radeon_device *rdev)
>  }
>
>  int radeon_bo_list_validate(struct radeon_device *rdev,
> -                           struct ww_acquire_ctx *ticket,
> +                           struct drm_exec *exec,
>                             struct list_head *head, int ring)
>  {
>         struct ttm_operation_ctx ctx = { true, false };
>         struct radeon_bo_list *lobj;
> -       struct list_head duplicates;
> -       int r;
>         u64 bytes_moved = 0, initial_bytes_moved;
>         u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev);
> +       int r;
>
> -       INIT_LIST_HEAD(&duplicates);
> -       r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates);
> -       if (unlikely(r != 0)) {
> -               return r;
> +       drm_exec_until_all_locked(exec) {
> +               list_for_each_entry(lobj, head, list) {
> +                       r = drm_exec_prepare_obj(exec, &lobj->robj->tbo.base,
> +                                                1);
> +                       drm_exec_retry_on_contention(exec);
> +                       if (unlikely(r && r != -EALREADY))
> +                               return r;
> +               }
>         }
>
> -       list_for_each_entry(lobj, head, tv.head) {
> +       list_for_each_entry(lobj, head, list) {
>                 struct radeon_bo *bo = lobj->robj;
>                 if (!bo->tbo.pin_count) {
>                         u32 domain = lobj->preferred_domains;
> @@ -519,7 +522,6 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
>                                         domain = lobj->allowed_domains;
>                                         goto retry;
>                                 }
> -                               ttm_eu_backoff_reservation(ticket, head);
>                                 return r;
>                         }
>                 }
> @@ -527,11 +529,6 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
>                 lobj->tiling_flags = bo->tiling_flags;
>         }
>
> -       list_for_each_entry(lobj, &duplicates, tv.head) {
> -               lobj->gpu_offset = radeon_bo_gpu_offset(lobj->robj);
> -               lobj->tiling_flags = lobj->robj->tiling_flags;
> -       }
> -
>         return 0;
>  }
>
> diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
> index 39cc87a59a9a..d7bbb52db546 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.h
> +++ b/drivers/gpu/drm/radeon/radeon_object.h
> @@ -152,7 +152,7 @@ extern void radeon_bo_force_delete(struct radeon_device *rdev);
>  extern int radeon_bo_init(struct radeon_device *rdev);
>  extern void radeon_bo_fini(struct radeon_device *rdev);
>  extern int radeon_bo_list_validate(struct radeon_device *rdev,
> -                                  struct ww_acquire_ctx *ticket,
> +                                  struct drm_exec *exec,
>                                    struct list_head *head, int ring);
>  extern int radeon_bo_set_tiling_flags(struct radeon_bo *bo,
>                                 u32 tiling_flags, u32 pitch);
> diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
> index c38b4d5d6a14..21a5340aefdf 100644
> --- a/drivers/gpu/drm/radeon/radeon_vm.c
> +++ b/drivers/gpu/drm/radeon/radeon_vm.c
> @@ -142,10 +142,9 @@ struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev,
>         list[0].robj = vm->page_directory;
>         list[0].preferred_domains = RADEON_GEM_DOMAIN_VRAM;
>         list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
> -       list[0].tv.bo = &vm->page_directory->tbo;
> -       list[0].tv.num_shared = 1;
> +       list[0].shared = true;
>         list[0].tiling_flags = 0;
> -       list_add(&list[0].tv.head, head);
> +       list_add(&list[0].list, head);
>
>         for (i = 0, idx = 1; i <= vm->max_pde_used; i++) {
>                 if (!vm->page_tables[i].bo)
> @@ -154,10 +153,9 @@ struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev,
>                 list[idx].robj = vm->page_tables[i].bo;
>                 list[idx].preferred_domains = RADEON_GEM_DOMAIN_VRAM;
>                 list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
> -               list[idx].tv.bo = &list[idx].robj->tbo;
> -               list[idx].tv.num_shared = 1;
> +               list[idx].shared = true;
>                 list[idx].tiling_flags = 0;
> -               list_add(&list[idx++].tv.head, head);
> +               list_add(&list[idx++].list, head);
>         }
>
>         return list;
> --
> 2.34.1
>


More information about the amd-gfx mailing list