[Intel-gfx] [PATCH 14/20] drm/i915/gt: Track timeline activeness in enter/exit
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Mon Jul 22 16:14:23 UTC 2019
On 18/07/2019 08:00, Chris Wilson wrote:
> Lift moving the timeline to/from the active_list on enter/exit in order
> to shorten the active tracking span in comparison to the existing
> pin/unpin.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/gem/i915_gem_pm.c | 1 -
> drivers/gpu/drm/i915/gt/intel_context.c | 2 +
> drivers/gpu/drm/i915/gt/intel_engine_pm.c | 1 +
> drivers/gpu/drm/i915/gt/intel_lrc.c | 4 +
> drivers/gpu/drm/i915/gt/intel_timeline.c | 98 +++++++------------
> drivers/gpu/drm/i915/gt/intel_timeline.h | 3 +-
> .../gpu/drm/i915/gt/intel_timeline_types.h | 1 +
> drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 -
> 8 files changed, 46 insertions(+), 66 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> index 8faf262278ae..195ee6eedac0 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> @@ -39,7 +39,6 @@ static void i915_gem_park(struct drm_i915_private *i915)
> i915_gem_batch_pool_fini(&engine->batch_pool);
> }
>
> - intel_timelines_park(i915);
> i915_vma_parked(i915);
>
> i915_globals_park();
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index 9830edda1ade..87c84cc0f658 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -242,10 +242,12 @@ int __init i915_global_context_init(void)
> void intel_context_enter_engine(struct intel_context *ce)
> {
> intel_engine_pm_get(ce->engine);
> + intel_timeline_enter(ce->ring->timeline);
> }
>
> void intel_context_exit_engine(struct intel_context *ce)
> {
> + intel_timeline_exit(ce->ring->timeline);
> intel_engine_pm_put(ce->engine);
> }
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> index e74fbf04a68d..072f65e6a09e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -89,6 +89,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
>
> /* Check again on the next retirement. */
> engine->wakeref_serial = engine->serial + 1;
> + intel_timeline_enter(rq->timeline);
>
> i915_request_add_barriers(rq);
> __i915_request_commit(rq);
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 884dfc1cb033..aceb990ae3b9 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -3253,6 +3253,8 @@ static void virtual_context_enter(struct intel_context *ce)
>
> for (n = 0; n < ve->num_siblings; n++)
> intel_engine_pm_get(ve->siblings[n]);
> +
> + intel_timeline_enter(ce->ring->timeline);
Here we couldn't enter all sibling contexts instead? Would be a bit
wasteful I guess. And there must be a place where it is already done.
But can't be on picking the engine, where is it?
> }
>
> static void virtual_context_exit(struct intel_context *ce)
> @@ -3260,6 +3262,8 @@ static void virtual_context_exit(struct intel_context *ce)
> struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
> unsigned int n;
>
> + intel_timeline_exit(ce->ring->timeline);
> +
> for (n = 0; n < ve->num_siblings; n++)
> intel_engine_pm_put(ve->siblings[n]);
> }
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
> index 6daa9eb59e19..4af0b9801d91 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
> @@ -278,64 +278,11 @@ void intel_timelines_init(struct drm_i915_private *i915)
> timelines_init(&i915->gt);
> }
>
> -static void timeline_add_to_active(struct intel_timeline *tl)
> -{
> - struct intel_gt_timelines *gt = &tl->gt->timelines;
> -
> - mutex_lock(>->mutex);
> - list_add(&tl->link, >->active_list);
> - mutex_unlock(>->mutex);
> -}
> -
> -static void timeline_remove_from_active(struct intel_timeline *tl)
> -{
> - struct intel_gt_timelines *gt = &tl->gt->timelines;
> -
> - mutex_lock(>->mutex);
> - list_del(&tl->link);
> - mutex_unlock(>->mutex);
> -}
> -
> -static void timelines_park(struct intel_gt *gt)
> -{
> - struct intel_gt_timelines *timelines = >->timelines;
> - struct intel_timeline *timeline;
> -
> - mutex_lock(&timelines->mutex);
> - list_for_each_entry(timeline, &timelines->active_list, link) {
> - /*
> - * All known fences are completed so we can scrap
> - * the current sync point tracking and start afresh,
> - * any attempt to wait upon a previous sync point
> - * will be skipped as the fence was signaled.
> - */
> - i915_syncmap_free(&timeline->sync);
> - }
> - mutex_unlock(&timelines->mutex);
> -}
> -
> -/**
> - * intel_timelines_park - called when the driver idles
> - * @i915: the drm_i915_private device
> - *
> - * When the driver is completely idle, we know that all of our sync points
> - * have been signaled and our tracking is then entirely redundant. Any request
> - * to wait upon an older sync point will be completed instantly as we know
> - * the fence is signaled and therefore we will not even look them up in the
> - * sync point map.
> - */
> -void intel_timelines_park(struct drm_i915_private *i915)
> -{
> - timelines_park(&i915->gt);
> -}
> -
> void intel_timeline_fini(struct intel_timeline *timeline)
> {
> GEM_BUG_ON(timeline->pin_count);
> GEM_BUG_ON(!list_empty(&timeline->requests));
>
> - i915_syncmap_free(&timeline->sync);
> -
> if (timeline->hwsp_cacheline)
> cacheline_free(timeline->hwsp_cacheline);
> else
> @@ -370,6 +317,7 @@ int intel_timeline_pin(struct intel_timeline *tl)
> if (tl->pin_count++)
> return 0;
> GEM_BUG_ON(!tl->pin_count);
> + GEM_BUG_ON(tl->active_count);
>
> err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
> if (err)
> @@ -380,7 +328,6 @@ int intel_timeline_pin(struct intel_timeline *tl)
> offset_in_page(tl->hwsp_offset);
>
> cacheline_acquire(tl->hwsp_cacheline);
> - timeline_add_to_active(tl);
>
> return 0;
>
> @@ -389,6 +336,40 @@ int intel_timeline_pin(struct intel_timeline *tl)
> return err;
> }
>
> +void intel_timeline_enter(struct intel_timeline *tl)
> +{
> + struct intel_gt_timelines *timelines = &tl->gt->timelines;
> +
> + GEM_BUG_ON(!tl->pin_count);
> + if (tl->active_count++)
> + return;
> + GEM_BUG_ON(!tl->active_count); /* overflow? */
> +
> + mutex_lock(&timelines->mutex);
> + list_add(&tl->link, &timelines->active_list);
> + mutex_unlock(&timelines->mutex);
> +}
> +
> +void intel_timeline_exit(struct intel_timeline *tl)
> +{
> + struct intel_gt_timelines *timelines = &tl->gt->timelines;
> +
> + GEM_BUG_ON(!tl->active_count);
> + if (--tl->active_count)
> + return;
> +
> + mutex_lock(&timelines->mutex);
> + list_del(&tl->link);
> + mutex_unlock(&timelines->mutex);
So we end up with one lock protecting tl->active_count and another for
the list of active timelines?
> +
> + /*
> + * Since this timeline is idle, all bariers upon which we were waiting
> + * must also be complete and so we can discard the last used barriers
> + * without loss of information.
> + */
> + i915_syncmap_free(&tl->sync);
> +}
> +
> static u32 timeline_advance(struct intel_timeline *tl)
> {
> GEM_BUG_ON(!tl->pin_count);
> @@ -546,16 +527,9 @@ void intel_timeline_unpin(struct intel_timeline *tl)
> if (--tl->pin_count)
> return;
>
> - timeline_remove_from_active(tl);
> + GEM_BUG_ON(tl->active_count);
> cacheline_release(tl->hwsp_cacheline);
>
> - /*
> - * Since this timeline is idle, all bariers upon which we were waiting
> - * must also be complete and so we can discard the last used barriers
> - * without loss of information.
> - */
> - i915_syncmap_free(&tl->sync);
> -
> __i915_vma_unpin(tl->hwsp_ggtt);
> }
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h
> index e08cebf64833..f583af1ba18d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline.h
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h
> @@ -77,9 +77,11 @@ static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl,
> }
>
> int intel_timeline_pin(struct intel_timeline *tl);
> +void intel_timeline_enter(struct intel_timeline *tl);
> int intel_timeline_get_seqno(struct intel_timeline *tl,
> struct i915_request *rq,
> u32 *seqno);
> +void intel_timeline_exit(struct intel_timeline *tl);
> void intel_timeline_unpin(struct intel_timeline *tl);
>
> int intel_timeline_read_hwsp(struct i915_request *from,
> @@ -87,7 +89,6 @@ int intel_timeline_read_hwsp(struct i915_request *from,
> u32 *hwsp_offset);
>
> void intel_timelines_init(struct drm_i915_private *i915);
> -void intel_timelines_park(struct drm_i915_private *i915);
> void intel_timelines_fini(struct drm_i915_private *i915);
>
> #endif
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> index 9a71aea7a338..b820ee76b7f5 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> @@ -58,6 +58,7 @@ struct intel_timeline {
> */
> struct i915_syncmap *sync;
>
> + unsigned int active_count;
Is it becoming non-obvious what is pin_count and what is active_count? I
suggest some comments dropped along the members here.
> struct list_head link;
> struct intel_gt *gt;
>
> diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
> index f0a840030382..d54113697745 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
> @@ -816,8 +816,6 @@ static int live_hwsp_recycle(void *arg)
>
> if (err)
> goto out;
> -
> - intel_timelines_park(i915); /* Encourage recycling! */
> } while (!__igt_timeout(end_time, NULL));
> }
>
>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list