[Intel-gfx] [PATCH 2/2] drm/i915: Immediately execute the fenced work
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Mar 25 12:58:10 UTC 2020
On 25/03/2020 12:02, Chris Wilson wrote:
> If the caller allows and we do not have to wait for any signals,
> immediately execute the work within the caller's process. By doing so we
> avoid the overhead of scheduling a new task, and the latency in
> executing it, at the cost of pulling that work back into the immediate
> context. (Sometimes we still prefer to offload the task to another cpu,
> especially if we plan on executing many such tasks in parallel for this
> client.)
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +-
> drivers/gpu/drm/i915/i915_sw_fence_work.c | 5 +++-
> drivers/gpu/drm/i915/i915_sw_fence_work.h | 23 +++++++++++++++++++
> drivers/gpu/drm/i915/i915_vma.c | 2 +-
> 4 files changed, 29 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 6b3013d20851..c643eec4dca0 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -1822,7 +1822,7 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
> dma_resv_add_excl_fence(shadow->resv, &pw->base.dma);
> dma_resv_unlock(shadow->resv);
>
> - dma_fence_work_commit(&pw->base);
> + dma_fence_work_commit_imm(&pw->base);
> return 0;
>
> err_batch_unlock:
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence_work.c b/drivers/gpu/drm/i915/i915_sw_fence_work.c
> index 997b2998f1f2..a3a81bb8f2c3 100644
> --- a/drivers/gpu/drm/i915/i915_sw_fence_work.c
> +++ b/drivers/gpu/drm/i915/i915_sw_fence_work.c
> @@ -38,7 +38,10 @@ fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
>
> if (!f->dma.error) {
> dma_fence_get(&f->dma);
> - queue_work(system_unbound_wq, &f->work);
> + if (test_bit(DMA_FENCE_WORK_IMM, &f->dma.flags))
> + fence_work(&f->work);
> + else
> + queue_work(system_unbound_wq, &f->work);
> } else {
> fence_complete(f);
> }
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence_work.h b/drivers/gpu/drm/i915/i915_sw_fence_work.h
> index 3a22b287e201..2c409f11c5c5 100644
> --- a/drivers/gpu/drm/i915/i915_sw_fence_work.h
> +++ b/drivers/gpu/drm/i915/i915_sw_fence_work.h
> @@ -32,6 +32,10 @@ struct dma_fence_work {
> const struct dma_fence_work_ops *ops;
> };
>
> +enum {
> + DMA_FENCE_WORK_IMM = DMA_FENCE_FLAG_USER_BITS,
> +};
> +
> void dma_fence_work_init(struct dma_fence_work *f,
> const struct dma_fence_work_ops *ops);
> int dma_fence_work_chain(struct dma_fence_work *f, struct dma_fence *signal);
> @@ -41,4 +45,23 @@ static inline void dma_fence_work_commit(struct dma_fence_work *f)
> i915_sw_fence_commit(&f->chain);
> }
>
> +/**
> + * dma_fence_work_commit_imm: Commit the fence, and if possible execute locally.
> + * @f: the fenced worker
> + *
> + * Instead of always scheduling a worker to execute the callback (see
> + * dma_fence_work_commit()), we try to execute the callback immediately in
> + * the local context. It is required that the fence be committed before it
> + * is published, and that no other threads try to tamper with the number
> + * of asynchronous waits on the fence (or else the callback will be
> + * executed in the wrong context, i.e. not the callers).
> + */
> +static inline void dma_fence_work_commit_imm(struct dma_fence_work *f)
> +{
> + if (atomic_read(&f->chain.pending) <= 1)
> + __set_bit(DMA_FENCE_WORK_IMM, &f->dma.flags);
> +
> + dma_fence_work_commit(f);
> +}
> +
> #endif /* I915_SW_FENCE_WORK_H */
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 08699fa069aa..191577a98390 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -980,7 +980,7 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> mutex_unlock(&vma->vm->mutex);
> err_fence:
> if (work)
> - dma_fence_work_commit(&work->base);
> + dma_fence_work_commit_imm(&work->base);
> if (wakeref)
> intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
> err_pages:
>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list