[Mesa-dev] [PATCH 2/3] winsys/amdgpu: implement sync_file import/export

Wed Sep 13 15:40:22 UTC 2017

This will need a bump of the libdrm version requirements, right?

Apart from that and a minor comment on patch 3, the series is

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>

On 12.09.2017 22:50, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
> 
> syncobj is used internally for interactions with command submission.
> ---
>   src/gallium/drivers/radeon/radeon_winsys.h |  12 +++
>   src/gallium/winsys/amdgpu/drm/amdgpu_cs.c  | 115 +++++++++++++++++++++++++++--
>   src/gallium/winsys/amdgpu/drm/amdgpu_cs.h  |  18 ++++-
>   3 files changed, 138 insertions(+), 7 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
> index 99e22e0..2438ec2 100644
> --- a/src/gallium/drivers/radeon/radeon_winsys.h
> +++ b/src/gallium/drivers/radeon/radeon_winsys.h
> @@ -590,20 +590,32 @@ struct radeon_winsys {
>                          struct pipe_fence_handle *fence,
>                          uint64_t timeout);
>   
>       /**
>        * Reference counting for fences.
>        */
>       void (*fence_reference)(struct pipe_fence_handle **dst,
>                               struct pipe_fence_handle *src);
>   
>       /**
> +     * Create a new fence object corresponding to the given sync_file.
> +     */
> +    struct pipe_fence_handle *(*fence_import_sync_file)(struct radeon_winsys *ws,
> +							int fd);
> +
> +    /**
> +     * Return a sync_file FD corresponding to the given fence object.
> +     */
> +    int (*fence_export_sync_file)(struct radeon_winsys *ws,
> +				  struct pipe_fence_handle *fence);
> +
> +    /**
>        * Initialize surface
>        *
>        * \param ws        The winsys this function is called from.
>        * \param tex       Input texture description
>        * \param flags     Bitmask of RADEON_SURF_* flags
>        * \param bpe       Bytes per pixel, it can be different for Z buffers.
>        * \param mode      Preferred tile mode. (linear, 1D, or 2D)
>        * \param surf      Output structure
>        */
>       int (*surface_init)(struct radeon_winsys *ws,
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> index 768a164..d9d2a8b 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> @@ -40,30 +40,86 @@ DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", false)
>   
>   /* FENCES */
>   
>   static struct pipe_fence_handle *
>   amdgpu_fence_create(struct amdgpu_ctx *ctx, unsigned ip_type,
>                       unsigned ip_instance, unsigned ring)
>   {
>      struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence);
>   
>      fence->reference.count = 1;
> +   fence->ws = ctx->ws;
>      fence->ctx = ctx;
>      fence->fence.context = ctx->ctx;
>      fence->fence.ip_type = ip_type;
>      fence->fence.ip_instance = ip_instance;
>      fence->fence.ring = ring;
>      fence->submission_in_progress = true;
>      p_atomic_inc(&ctx->refcount);
>      return (struct pipe_fence_handle *)fence;
>   }
>   
> +static struct pipe_fence_handle *
> +amdgpu_fence_import_sync_file(struct radeon_winsys *rws, int fd)
> +{
> +   struct amdgpu_winsys *ws = amdgpu_winsys(rws);
> +   struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence);
> +
> +   if (!fence)
> +      return NULL;
> +
> +   pipe_reference_init(&fence->reference, 1);
> +   fence->ws = ws;
> +   /* fence->ctx == NULL means that the fence is syncobj-based. */
> +
> +   /* Convert sync_file into syncobj. */
> +   int r = amdgpu_cs_create_syncobj(ws->dev, &fence->syncobj);
> +   if (r) {
> +      FREE(fence);
> +      return NULL;
> +   }
> +
> +   r = amdgpu_cs_syncobj_import_sync_file(ws->dev, fence->syncobj, fd);
> +   if (r) {
> +      amdgpu_cs_destroy_syncobj(ws->dev, fence->syncobj);
> +      FREE(fence);
> +      return NULL;
> +   }
> +   return (struct pipe_fence_handle*)fence;
> +}
> +
> +static int amdgpu_fence_export_sync_file(struct radeon_winsys *rws,
> +					 struct pipe_fence_handle *pfence)
> +{
> +   struct amdgpu_winsys *ws = amdgpu_winsys(rws);
> +   struct amdgpu_fence *fence = (struct amdgpu_fence*)pfence;
> +
> +   if (amdgpu_fence_is_syncobj(fence)) {
> +      int fd, r;
> +
> +      /* Convert syncobj into sync_file. */
> +      r = amdgpu_cs_syncobj_export_sync_file(ws->dev, fence->syncobj, &fd);
> +      return r ? -1 : fd;
> +   }
> +
> +   os_wait_until_zero(&fence->submission_in_progress, PIPE_TIMEOUT_INFINITE);
> +
> +   /* Convert the amdgpu fence into a fence FD. */
> +   int fd;
> +   if (amdgpu_cs_fence_to_handle(ws->dev, &fence->fence,
> +                                 AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD,
> +                                 (uint32_t*)&fd))
> +      return -1;
> +
> +   return fd;
> +}
> +
>   static void amdgpu_fence_submitted(struct pipe_fence_handle *fence,
>                                      uint64_t seq_no,
>                                      uint64_t *user_fence_cpu_address)
>   {
>      struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence;
>   
>      rfence->fence.fence = seq_no;
>      rfence->user_fence_cpu_address = user_fence_cpu_address;
>      rfence->submission_in_progress = false;
>   }
> @@ -81,20 +137,35 @@ bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout,
>   {
>      struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence;
>      uint32_t expired;
>      int64_t abs_timeout;
>      uint64_t *user_fence_cpu;
>      int r;
>   
>      if (rfence->signalled)
>         return true;
>   
> +   /* Handle syncobjs. */
> +   if (amdgpu_fence_is_syncobj(rfence)) {
> +      /* Absolute timeouts are only be used by BO fences, which aren't
> +       * backed by syncobjs.
> +       */
> +      assert(!absolute);
> +
> +      if (amdgpu_cs_syncobj_wait(rfence->ws->dev, &rfence->syncobj, 1,
> +                                 timeout, 0, NULL))
> +         return false;
> +
> +      rfence->signalled = true;
> +      return true;
> +   }
> +
>      if (absolute)
>         abs_timeout = timeout;
>      else
>         abs_timeout = os_time_get_absolute_timeout(timeout);
>   
>      /* The fence might not have a number assigned if its IB is being
>       * submitted in the other thread right now. Wait until the submission
>       * is done. */
>      if (!os_wait_until_zero_abs_timeout(&rfence->submission_in_progress,
>                                          abs_timeout))
> @@ -921,21 +992,22 @@ static unsigned add_fence_dependency_entry(struct amdgpu_cs_context *cs)
>                increment * sizeof(cs->fence_dependencies[0]));
>      }
>      return idx;
>   }
>   
>   static bool is_noop_fence_dependency(struct amdgpu_cs *acs,
>                                        struct amdgpu_fence *fence)
>   {
>      struct amdgpu_cs_context *cs = acs->csc;
>   
> -   if (fence->ctx == acs->ctx &&
> +   if (!amdgpu_fence_is_syncobj(fence) &&
> +       fence->ctx == acs->ctx &&
>          fence->fence.ip_type == cs->ib[IB_MAIN].ip_type &&
>          fence->fence.ip_instance == cs->ib[IB_MAIN].ip_instance &&
>          fence->fence.ring == cs->ib[IB_MAIN].ring)
>         return true;
>   
>      return amdgpu_fence_wait((void *)fence, 0, false);
>   }
>   
>   static void amdgpu_cs_add_fence_dependency(struct radeon_winsys_cs *rws,
>                                              struct pipe_fence_handle *pfence)
> @@ -1174,21 +1246,21 @@ bo_list_error:
>      if (r) {
>         fprintf(stderr, "amdgpu: buffer list creation failed (%d)\n", r);
>         amdgpu_fence_signalled(cs->fence);
>         cs->error_code = r;
>         goto cleanup;
>      }
>   
>      if (acs->ctx->num_rejected_cs) {
>         r = -ECANCELED;
>      } else {
> -      struct drm_amdgpu_cs_chunk chunks[3];
> +      struct drm_amdgpu_cs_chunk chunks[4];
>         unsigned num_chunks = 0;
>   
>         /* Convert from dwords to bytes. */
>         cs->ib[IB_MAIN].ib_bytes *= 4;
>   
>         /* IB */
>         chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_IB;
>         chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
>         chunks[num_chunks].chunk_data = (uintptr_t)&cs->ib[IB_MAIN];
>         num_chunks++;
> @@ -1196,38 +1268,69 @@ bo_list_error:
>         /* Fence */
>         if (has_user_fence) {
>            chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_FENCE;
>            chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4;
>            chunks[num_chunks].chunk_data = (uintptr_t)&acs->fence_chunk;
>            num_chunks++;
>         }
>   
>         /* Dependencies */
>         unsigned num_dependencies = cs->num_fence_dependencies;
> +      unsigned num_syncobj_dependencies = 0;
> +
>         if (num_dependencies) {
>            struct drm_amdgpu_cs_chunk_dep *dep_chunk =
>               alloca(num_dependencies * sizeof(*dep_chunk));
> +         unsigned num = 0;
>   
>            for (unsigned i = 0; i < num_dependencies; i++) {
>               struct amdgpu_fence *fence =
>                  (struct amdgpu_fence*)cs->fence_dependencies[i];
>   
> +            if (amdgpu_fence_is_syncobj(fence)) {
> +               num_syncobj_dependencies++;
> +               continue;
> +            }
> +
>               assert(!fence->submission_in_progress);
> -            amdgpu_cs_chunk_fence_to_dep(&fence->fence, &dep_chunk[i]);
> +            amdgpu_cs_chunk_fence_to_dep(&fence->fence, &dep_chunk[num++]);
>            }
>   
>            chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
> -         chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4 *
> -                                        num_dependencies;
> +         chunks[num_chunks].length_dw = sizeof(dep_chunk[0]) / 4 * num;
>            chunks[num_chunks].chunk_data = (uintptr_t)dep_chunk;
>            num_chunks++;
>         }
> +
> +      /* Syncobj dependencies. */
> +      if (num_syncobj_dependencies) {
> +         struct drm_amdgpu_cs_chunk_sem *sem_chunk =
> +            alloca(num_syncobj_dependencies * sizeof(sem_chunk[0]));
> +         unsigned num = 0;
> +
> +         for (unsigned i = 0; i < num_dependencies; i++) {
> +            struct amdgpu_fence *fence =
> +               (struct amdgpu_fence*)cs->fence_dependencies[i];
> +
> +            if (!amdgpu_fence_is_syncobj(fence))
> +               continue;
> +
> +            assert(!fence->submission_in_progress);
> +            sem_chunk[num++].handle = fence->syncobj;
> +         }
> +
> +         chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_IN;
> +         chunks[num_chunks].length_dw = sizeof(sem_chunk[0]) / 4 * num;
> +         chunks[num_chunks].chunk_data = (uintptr_t)sem_chunk;
> +         num_chunks++;
> +      }
> +
>         assert(num_chunks <= ARRAY_SIZE(chunks));
>   
>         r = amdgpu_cs_submit_raw(ws->dev, acs->ctx->ctx, bo_list,
>                                  num_chunks, chunks, &seq_no);
>      }
>   
>      cs->error_code = r;
>      if (r) {
>         if (r == -ENOMEM)
>            fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
> @@ -1423,11 +1526,13 @@ void amdgpu_cs_init_functions(struct amdgpu_winsys *ws)
>      ws->base.cs_validate = amdgpu_cs_validate;
>      ws->base.cs_check_space = amdgpu_cs_check_space;
>      ws->base.cs_get_buffer_list = amdgpu_cs_get_buffer_list;
>      ws->base.cs_flush = amdgpu_cs_flush;
>      ws->base.cs_get_next_fence = amdgpu_cs_get_next_fence;
>      ws->base.cs_is_buffer_referenced = amdgpu_bo_is_referenced;
>      ws->base.cs_sync_flush = amdgpu_cs_sync_flush;
>      ws->base.cs_add_fence_dependency = amdgpu_cs_add_fence_dependency;
>      ws->base.fence_wait = amdgpu_fence_wait_rel_timeout;
>      ws->base.fence_reference = amdgpu_fence_reference;
> +   ws->base.fence_import_sync_file = amdgpu_fence_import_sync_file;
> +   ws->base.fence_export_sync_file = amdgpu_fence_export_sync_file;
>   }
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
> index de00912..21e1354 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
> @@ -132,49 +132,63 @@ struct amdgpu_cs {
>      /* Flush CS. */
>      void (*flush_cs)(void *ctx, unsigned flags, struct pipe_fence_handle **fence);
>      void *flush_data;
>   
>      struct util_queue_fence flush_completed;
>      struct pipe_fence_handle *next_fence;
>   };
>   
>   struct amdgpu_fence {
>      struct pipe_reference reference;
> +   /* If ctx == NULL, this fence is syncobj-based. */
> +   uint32_t syncobj;
>   
> +   struct amdgpu_winsys *ws;
>      struct amdgpu_ctx *ctx;  /* submission context */
>      struct amdgpu_cs_fence fence;
>      uint64_t *user_fence_cpu_address;
>   
>      /* If the fence is unknown due to an IB still being submitted
>       * in the other thread. */
>      volatile int submission_in_progress; /* bool (int for atomicity) */
>      volatile int signalled;              /* bool (int for atomicity) */
>   };
>   
> +static inline bool amdgpu_fence_is_syncobj(struct amdgpu_fence *fence)
> +{
> +   return fence->ctx == NULL;
> +}
> +
>   static inline void amdgpu_ctx_unref(struct amdgpu_ctx *ctx)
>   {
>      if (p_atomic_dec_zero(&ctx->refcount)) {
>         amdgpu_cs_ctx_free(ctx->ctx);
>         amdgpu_bo_free(ctx->user_fence_bo);
>         FREE(ctx);
>      }
>   }
>   
>   static inline void amdgpu_fence_reference(struct pipe_fence_handle **dst,
>                                             struct pipe_fence_handle *src)
>   {
>      struct amdgpu_fence **rdst = (struct amdgpu_fence **)dst;
>      struct amdgpu_fence *rsrc = (struct amdgpu_fence *)src;
>   
>      if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) {
> -      amdgpu_ctx_unref((*rdst)->ctx);
> -      FREE(*rdst);
> +      struct amdgpu_fence *fence = *rdst;
> +
> +      if (amdgpu_fence_is_syncobj(fence))
> +         amdgpu_cs_destroy_syncobj(fence->ws->dev, fence->syncobj);
> +      else
> +         amdgpu_ctx_unref(fence->ctx);
> +
> +      FREE(fence);
>      }
>      *rdst = rsrc;
>   }
>   
>   int amdgpu_lookup_buffer(struct amdgpu_cs_context *cs, struct amdgpu_winsys_bo *bo);
>   
>   static inline struct amdgpu_ib *
>   amdgpu_ib(struct radeon_winsys_cs *base)
>   {
>      return (struct amdgpu_ib *)base;
> 

-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.