[PATCH 1/5] drm/amdgpu:keep ctx alive till all job finished
Liu, Monk
Monk.Liu at amd.com
Wed May 3 03:30:33 UTC 2017
1, This is necessary otherwise how can I access entity pointer after a job timedout , and why it is dangerous ?
2, what's the status field in the fences you were referring to ? I need to judge if it could satisfy my requirement
-----Original Message-----
From: Christian König [mailto:deathsimple at vodafone.de]
Sent: Monday, May 01, 2017 10:48 PM
To: Liu, Monk <Monk.Liu at amd.com>; amd-gfx at lists.freedesktop.org
Subject: Re: [PATCH 1/5] drm/amdgpu:keep ctx alive till all job finished
Am 01.05.2017 um 09:22 schrieb Monk Liu:
> for TDR guilty context feature, we need access ctx/s_entity field
> member through sched job pointer,so ctx must keep alive till all job
> from it signaled.
NAK, that is unnecessary and quite dangerous.
Instead we have the designed status field in the fences which should be checked for that.
Regards,
Christian.
>
> Change-Id: Ib87e9502f7a5c8c054c7e56956d7f7ad75998e43
> Signed-off-by: Monk Liu <Monk.Liu at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 +++++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 9 +++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 9 +++++++--
> drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 6 ------
> drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 1 +
> 6 files changed, 23 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index e330009..8e031d6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -760,10 +760,12 @@ struct amdgpu_ib {
> uint32_t flags;
> };
>
> +struct amdgpu_ctx;
> +
> extern const struct amd_sched_backend_ops amdgpu_sched_ops;
>
> int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
> - struct amdgpu_job **job, struct amdgpu_vm *vm);
> + struct amdgpu_job **job, struct amdgpu_vm *vm, struct
> +amdgpu_ctx *ctx);
> int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
> struct amdgpu_job **job);
>
> @@ -802,6 +804,7 @@ struct amdgpu_ctx_mgr {
>
> struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
> int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
> +struct amdgpu_ctx *amdgpu_ctx_kref_get(struct amdgpu_ctx *ctx);
>
> uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
> struct fence *fence);
> @@ -1129,6 +1132,7 @@ struct amdgpu_job {
> struct amdgpu_sync sync;
> struct amdgpu_ib *ibs;
> struct fence *fence; /* the hw fence */
> + struct amdgpu_ctx *ctx;
> uint32_t preamble_status;
> uint32_t num_ibs;
> void *owner;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 699f5fe..267fb65 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -234,7 +234,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
> }
> }
>
> - ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm);
> + ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm, p->ctx);
> if (ret)
> goto free_all_kdata;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index b4bbbb3..81438af 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -25,6 +25,13 @@
> #include <drm/drmP.h>
> #include "amdgpu.h"
>
> +struct amdgpu_ctx *amdgpu_ctx_kref_get(struct amdgpu_ctx *ctx) {
> + if (ctx)
> + kref_get(&ctx->refcount);
> + return ctx;
> +}
> +
> static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
> {
> unsigned i, j;
> @@ -56,6 +63,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
> rq, amdgpu_sched_jobs);
> if (r)
> goto failed;
> +
> + ctx->rings[i].entity.ptr_guilty = &ctx->guilty; /* kernel entity
> +doesn't have ptr_guilty */
> }
>
> return 0;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 690ef3d..208da11 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -40,7 +40,7 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job)
> }
>
> int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
> - struct amdgpu_job **job, struct amdgpu_vm *vm)
> + struct amdgpu_job **job, struct amdgpu_vm *vm, struct
> +amdgpu_ctx *ctx)
> {
> size_t size = sizeof(struct amdgpu_job);
>
> @@ -57,6 +57,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
> (*job)->vm = vm;
> (*job)->ibs = (void *)&(*job)[1];
> (*job)->num_ibs = num_ibs;
> + (*job)->ctx = amdgpu_ctx_kref_get(ctx);
>
> amdgpu_sync_create(&(*job)->sync);
>
> @@ -68,7 +69,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
> {
> int r;
>
> - r = amdgpu_job_alloc(adev, 1, job, NULL);
> + r = amdgpu_job_alloc(adev, 1, job, NULL, NULL);
> if (r)
> return r;
>
> @@ -94,6 +95,10 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
> static void amdgpu_job_free_cb(struct amd_sched_job *s_job)
> {
> struct amdgpu_job *job = container_of(s_job, struct amdgpu_job,
> base);
> + struct amdgpu_ctx *ctx = job->ctx;
> +
> + if (ctx)
> + amdgpu_ctx_put(ctx);
>
> fence_put(job->fence);
> amdgpu_sync_free(&job->sync);
> diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> index 6f4e31f..9100ca8 100644
> --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> @@ -208,12 +208,6 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
> if (!amd_sched_entity_is_initialized(sched, entity))
> return;
>
> - /**
> - * The client will not queue more IBs during this fini, consume existing
> - * queued IBs
> - */
> - wait_event(sched->job_scheduled, amd_sched_entity_is_idle(entity));
> -
> amd_sched_rq_remove_entity(rq, entity);
> kfifo_free(&entity->job_queue);
> }
> diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
> b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
> index 8cb41d3..ccbbcb0 100644
> --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
> +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
> @@ -49,6 +49,7 @@ struct amd_sched_entity {
>
> struct fence *dependency;
> struct fence_cb cb;
> + bool *ptr_guilty;
> };
>
> /**
More information about the amd-gfx
mailing list