[Intel-gfx] [PATCH 05/12] drm/i915/scheduler: Record all dependencies upon request construction
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Thu Nov 3 11:03:47 UTC 2016
On 02/11/2016 17:50, Chris Wilson wrote:
> The scheduler needs to know the dependencies of each request for the
> lifetime of the request, as it may choose to reschedule the requests at
> any time and must ensure the dependency tree is not broken. This is in
> additional to using the fence to only allow execution after all
> dependencies have been completed.
>
> One option was to extend the fence to support the bidirectional
> dependency tracking required by the scheduler. However the mismatch in
> lifetimes between the submit fence and the request essentially meant
> that we had to build a completely separate struct (and we could not
> simply reuse the existing waitqueue in the fence for one half of the
> dependency tracking). The extra dependency tracking simply did not mesh
> well with the fence, and keeping it separate both keeps the fence
> implementation simpler and allows us to extend the dependency tracking
> into a priority tree (whilst maintaining support for reordering the
> tree).
>
> To avoid the additional allocations and list manipulations, the use of
> the priotree is disabled when there are no schedulers to use it.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_gem_request.c | 72 ++++++++++++++++++++++++++++++++-
> drivers/gpu/drm/i915/i915_gem_request.h | 23 +++++++++++
> 2 files changed, 94 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 9c8605c834f9..13090f226203 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -113,6 +113,59 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
> spin_unlock(&file_priv->mm.lock);
> }
>
> +static int
> +i915_priotree_add_dependency(struct i915_priotree *pt,
> + struct i915_priotree *signal,
> + struct i915_dependency *dep)
> +{
> + unsigned long flags = 0;
> +
> + if (!dep) {
> + dep = kmalloc(sizeof(*dep), GFP_KERNEL);
I will mention a dedicated cache again since this could possibly be our
hottest allocation path. With a dedicated slab I've seen it grow to 5-7k
objects in some benchmarks, with the request slab around 1k at the same
time.
> + if (!dep)
> + return -ENOMEM;
> +
> + flags |= I915_DEPENDENCY_ALLOC;
> + }
Not sure if it would be any nicer to just set the flags after allocating
to I915_DEPENDENCY_ALLOC and add an else path to set it to zero here.
> +
> + dep->signal = signal;
> + dep->flags = flags;
> +
> + list_add(&dep->post_link, &signal->post_list);
> + list_add(&dep->pre_link, &pt->pre_list);
> + return 0;
> +}
> +
> +static void
> +i915_priotree_fini(struct i915_priotree *pt)
> +{
> + struct i915_dependency *dep, *next;
> +
> + /* Everyone we depended upon should retire before us and remove
> + * themselves from our list. However, retirement is run independently
> + * on each timeline and so we may be called out-of-order.
> + */
> + list_for_each_entry_safe(dep, next, &pt->pre_list, pre_link) {
> + list_del(&dep->post_link);
> + if (dep->flags & I915_DEPENDENCY_ALLOC)
> + kfree(dep);
> + }
> +
> + /* Remove ourselves from everyone who depends upon us */
> + list_for_each_entry_safe(dep, next, &pt->post_list, post_link) {
> + list_del(&dep->pre_link);
> + if (dep->flags & I915_DEPENDENCY_ALLOC)
> + kfree(dep);
> + }
> +}
> +
> +static void
> +i915_priotree_init(struct i915_priotree *pt)
> +{
> + INIT_LIST_HEAD(&pt->pre_list);
> + INIT_LIST_HEAD(&pt->post_list);
> +}
> +
> void i915_gem_retire_noop(struct i915_gem_active *active,
> struct drm_i915_gem_request *request)
> {
> @@ -182,6 +235,8 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
> i915_gem_context_put(request->ctx);
>
> dma_fence_signal(&request->fence);
> +
> + i915_priotree_fini(&request->priotree);
> i915_gem_request_put(request);
> }
>
> @@ -464,6 +519,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
> */
> i915_sw_fence_await_sw_fence(&req->execute, &req->submit, &req->execq);
>
> + i915_priotree_init(&req->priotree);
> +
> INIT_LIST_HEAD(&req->active_list);
> req->i915 = dev_priv;
> req->engine = engine;
> @@ -517,6 +574,14 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
>
> GEM_BUG_ON(to == from);
>
> + if (to->engine->schedule) {
> + ret = i915_priotree_add_dependency(&to->priotree,
> + &from->priotree,
> + NULL);
> + if (ret < 0)
> + return ret;
> + }
> +
> if (to->timeline == from->timeline)
> return 0;
>
> @@ -740,9 +805,14 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
>
> prev = i915_gem_active_raw(&timeline->last_request,
> &request->i915->drm.struct_mutex);
> - if (prev)
> + if (prev) {
> i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
> &request->submitq);
> + if (engine->schedule)
> + i915_priotree_add_dependency(&request->priotree,
> + &prev->priotree,
> + &request->depq);
> + }
>
> spin_lock_irq(&timeline->lock);
> list_add_tail(&request->link, &timeline->requests);
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 725d04128eaf..0cb2ba546062 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -41,6 +41,18 @@ struct intel_signal_node {
> struct intel_wait wait;
> };
>
> +struct i915_dependency {
> + struct i915_priotree *signal;
> + struct list_head pre_link, post_link;
> + unsigned long flags;
> +#define I915_DEPENDENCY_ALLOC BIT(0)
> +};
> +
> +struct i915_priotree {
> + struct list_head pre_list; /* who is before us, we depend upon */
> + struct list_head post_list; /* who is after us, they depend upon us */
> +};
I need a picture to imagine this data structure. :(
> +
> /**
> * Request queue structure.
> *
> @@ -89,6 +101,17 @@ struct drm_i915_gem_request {
> wait_queue_t submitq;
> wait_queue_t execq;
>
> + /* A list of everyone we wait upon, and everyone who waits upon us.
> + * Even though we will not be submitted to the hardware before the
> + * submit fence is signaled (it waits for all external events as well
> + * as our own requests), the scheduler still needs to know the
> + * dependency tree for the lifetime of the request (from execbuf
> + * to retirement), i.e. bidirectional dependency information for the
> + * request not tied to individual fences.
> + */
> + struct i915_priotree priotree;
> + struct i915_dependency depq;
Why depq and not dep_node or something? Makes me wonder what am I
missing about the "q". It is not a queue?!?
> +
> u32 global_seqno;
>
> /** GEM sequence number associated with the previous request,
>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list