[PATCH 5/6] drm/amdgpu: add timeline support in amdgpu CS
Christian König
ckoenig.leichtzumerken at gmail.com
Mon Sep 24 10:25:02 UTC 2018
Am 20.09.2018 um 13:03 schrieb Chunming Zhou:
> syncobj wait/signal operation is appending in command submission.
>
> Signed-off-by: Chunming Zhou <david1.zhou at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 114 +++++++++++++++++++------
> include/uapi/drm/amdgpu_drm.h | 10 +++
> 3 files changed, 104 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 447c4c7a36d6..6e4a3db56833 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -975,6 +975,11 @@ struct amdgpu_cs_chunk {
> void *kdata;
> };
>
> +struct amdgpu_cs_syncobj_post_dep {
> + struct drm_syncobj *post_dep_syncobj;
> + u64 point;
> +};
> +
> struct amdgpu_cs_parser {
> struct amdgpu_device *adev;
> struct drm_file *filp;
> @@ -1003,9 +1008,8 @@ struct amdgpu_cs_parser {
>
> /* user fence */
> struct amdgpu_bo_list_entry uf_entry;
> -
> + struct amdgpu_cs_syncobj_post_dep *post_dep_syncobjs;
> unsigned num_post_dep_syncobjs;
> - struct drm_syncobj **post_dep_syncobjs;
> };
>
> static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 412fac238575..0efe75bf2f03 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -204,6 +204,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
> case AMDGPU_CHUNK_ID_DEPENDENCIES:
> case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
> case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
> + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
> + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
> break;
>
> default:
> @@ -783,7 +785,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
> &parser->validated);
>
> for (i = 0; i < parser->num_post_dep_syncobjs; i++)
> - drm_syncobj_put(parser->post_dep_syncobjs[i]);
> + drm_syncobj_put(parser->post_dep_syncobjs[i].post_dep_syncobj);
> kfree(parser->post_dep_syncobjs);
>
> dma_fence_put(parser->fence);
> @@ -1098,11 +1100,13 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
> }
>
> static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
> - uint32_t handle)
> + uint32_t handle, u64 point,
> + u64 flags)
> {
> int r;
> struct dma_fence *fence;
> - r = drm_syncobj_find_fence(p->filp, handle, 0, 0, &fence);
> +
> + r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
> if (r)
> return r;
>
> @@ -1113,48 +1117,91 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
> }
>
> static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
> - struct amdgpu_cs_chunk *chunk)
> + struct amdgpu_cs_chunk *chunk,
> + bool timeline)
You should really just duplicate the function. The only think you have
in common here is allocating the post_dep_sync objects.
Would also prevent that we need to make the two chunk types mutual
exclusive.
Christian.
> {
> unsigned num_deps;
> int i, r;
> - struct drm_amdgpu_cs_chunk_sem *deps;
>
> - deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
> - num_deps = chunk->length_dw * 4 /
> - sizeof(struct drm_amdgpu_cs_chunk_sem);
> + if (!timeline) {
> + struct drm_amdgpu_cs_chunk_sem *deps;
>
> - for (i = 0; i < num_deps; ++i) {
> - r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle);
> + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
> + num_deps = chunk->length_dw * 4 /
> + sizeof(struct drm_amdgpu_cs_chunk_sem);
> + for (i = 0; i < num_deps; ++i) {
> + r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
> + 0, 0);
> if (r)
> return r;
> + }
> + } else {
> + struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
> +
> + syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
> + num_deps = chunk->length_dw * 4 /
> + sizeof(struct drm_amdgpu_cs_chunk_syncobj);
> + for (i = 0; i < num_deps; ++i) {
> + r = amdgpu_syncobj_lookup_and_add_to_sync(p, syncobj_deps[i].handle,
> + syncobj_deps[i].point,
> + syncobj_deps[i].flags);
> + if (r)
> + return r;
> + }
> }
> +
> return 0;
> }
>
> static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
> - struct amdgpu_cs_chunk *chunk)
> + struct amdgpu_cs_chunk *chunk,
> + bool timeline)
> {
> unsigned num_deps;
> int i;
> struct drm_amdgpu_cs_chunk_sem *deps;
> - deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
> - num_deps = chunk->length_dw * 4 /
> - sizeof(struct drm_amdgpu_cs_chunk_sem);
> + struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
> +
> + if (!timeline) {
> + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
> + num_deps = chunk->length_dw * 4 /
> + sizeof(struct drm_amdgpu_cs_chunk_sem);
> + } else {
> + syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
> + num_deps = chunk->length_dw * 4 /
> + sizeof(struct drm_amdgpu_cs_chunk_syncobj);
> + }
> +
>
> p->post_dep_syncobjs = kmalloc_array(num_deps,
> - sizeof(struct drm_syncobj *),
> + sizeof(struct amdgpu_cs_syncobj_post_dep),
> GFP_KERNEL);
> p->num_post_dep_syncobjs = 0;
>
> if (!p->post_dep_syncobjs)
> return -ENOMEM;
>
> - for (i = 0; i < num_deps; ++i) {
> - p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle);
> - if (!p->post_dep_syncobjs[i])
> - return -EINVAL;
> - p->num_post_dep_syncobjs++;
> +
> + if (!timeline) {
> + for (i = 0; i < num_deps; ++i) {
> + p->post_dep_syncobjs[i].post_dep_syncobj =
> + drm_syncobj_find(p->filp, deps[i].handle);
> + if (!p->post_dep_syncobjs[i].post_dep_syncobj)
> + return -EINVAL;
> + p->post_dep_syncobjs[i].point = 0;
> + p->num_post_dep_syncobjs++;
> + }
> + } else {
> + for (i = 0; i < num_deps; ++i) {
> + p->post_dep_syncobjs[i].post_dep_syncobj =
> + drm_syncobj_find(p->filp, syncobj_deps[i].handle);
> + if (!p->post_dep_syncobjs[i].post_dep_syncobj)
> + return -EINVAL;
> + p->post_dep_syncobjs[i].point = syncobj_deps[i].point;
> + p->num_post_dep_syncobjs++;
> + }
> }
> +
> return 0;
> }
>
> @@ -1168,18 +1215,32 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
>
> chunk = &p->chunks[i];
>
> - if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) {
> + switch (chunk->chunk_id) {
> + case AMDGPU_CHUNK_ID_DEPENDENCIES:
> r = amdgpu_cs_process_fence_dep(p, chunk);
> if (r)
> return r;
> - } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) {
> - r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
> + break;
> + case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
> + r = amdgpu_cs_process_syncobj_in_dep(p, chunk, false);
> + if (r)
> + return r;
> + break;
> + case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
> + r = amdgpu_cs_process_syncobj_out_dep(p, chunk, false);
> if (r)
> return r;
> - } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) {
> - r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
> + break;
> + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
> + r = amdgpu_cs_process_syncobj_in_dep(p, chunk, true);
> if (r)
> return r;
> + break;
> + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
> + r = amdgpu_cs_process_syncobj_out_dep(p, chunk, true);
> + if (r)
> + return r;
> + break;
> }
> }
>
> @@ -1191,7 +1252,8 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
> int i;
>
> for (i = 0; i < p->num_post_dep_syncobjs; ++i)
> - drm_syncobj_replace_fence(p->post_dep_syncobjs[i], 0, p->fence);
> + drm_syncobj_replace_fence(p->post_dep_syncobjs[i].post_dep_syncobj,
> + p->post_dep_syncobjs[i].point, p->fence);
> }
>
> static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 1ceec56de015..412359b446f1 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -517,6 +517,8 @@ struct drm_amdgpu_gem_va {
> #define AMDGPU_CHUNK_ID_SYNCOBJ_IN 0x04
> #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05
> #define AMDGPU_CHUNK_ID_BO_HANDLES 0x06
> +#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT 0x07
> +#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL 0x08
>
> struct drm_amdgpu_cs_chunk {
> __u32 chunk_id;
> @@ -592,6 +594,14 @@ struct drm_amdgpu_cs_chunk_sem {
> __u32 handle;
> };
>
> +struct drm_amdgpu_cs_chunk_syncobj {
> + __u32 handle;
> + __u32 pad;
> + __u64 point;
> + __u64 flags;
> +};
> +
> +
> #define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ 0
> #define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD 1
> #define AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD 2
More information about the amd-gfx
mailing list