[Intel-gfx] [PATCH 4/6] drm/i915: Move timeline from GTT to ring
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Mon Apr 23 10:44:19 UTC 2018
On 23/04/2018 11:13, Chris Wilson wrote:
> In the future, we want to move a request between engines. To achieve
> this, we first realise that we have two timelines in effect here. The
> first runs through the GTT is required for ordering vma access, which is
> tracked currently by engine. The second is implied by sequential
> execution of commands inside the ringbuffer. This timeline is one that
> maps to userspace's expectations when submitting requests (i.e. given the
> same context, batch A is executed before batch B). As the rings's
> timelines map to userspace and the GTT timeline an implementation
> detail, move the timeline from the GTT into the ring itself (per-context
> in logical-ring-contexts/execlists, or a global per-engine timeline for
> the shared ringbuffers in legacy submission.
>
> The two timelines are still assumed to be equivalent at the moment (no
> migrating requests between engines yet) and so we can simply move from
> one to the other without adding extra ordering.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 13 +----
> drivers/gpu/drm/i915/i915_gem.c | 9 ++--
> drivers/gpu/drm/i915/i915_gem_context.c | 15 +++++-
> drivers/gpu/drm/i915/i915_gem_context.h | 2 +
> drivers/gpu/drm/i915/i915_gem_gtt.c | 3 --
> drivers/gpu/drm/i915/i915_gem_gtt.h | 1 -
> drivers/gpu/drm/i915/i915_gem_timeline.c | 54 +++++++++++++++++--
> drivers/gpu/drm/i915/i915_gem_timeline.h | 4 ++
> drivers/gpu/drm/i915/i915_request.c | 15 +++---
> drivers/gpu/drm/i915/intel_engine_cs.c | 3 +-
> drivers/gpu/drm/i915/intel_lrc.c | 2 +-
> drivers/gpu/drm/i915/intel_ringbuffer.c | 9 +++-
> drivers/gpu/drm/i915/intel_ringbuffer.h | 5 +-
> drivers/gpu/drm/i915/selftests/mock_engine.c | 3 +-
> .../gpu/drm/i915/selftests/mock_gem_device.c | 4 +-
> drivers/gpu/drm/i915/selftests/mock_gtt.c | 1 -
> 16 files changed, 101 insertions(+), 42 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a7787c2cb53c..66123cf0eda3 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2058,7 +2058,8 @@ struct drm_i915_private {
> void (*resume)(struct drm_i915_private *);
> void (*cleanup_engine)(struct intel_engine_cs *engine);
>
> - struct i915_gem_timeline global_timeline;
> + struct i915_gem_timeline execution_timeline;
> + struct i915_gem_timeline legacy_timeline;
> struct list_head timelines;
> struct list_head live_rings;
> u32 active_requests;
> @@ -3232,16 +3233,6 @@ i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id)
> return ctx;
> }
>
> -static inline struct intel_timeline *
> -i915_gem_context_lookup_timeline(struct i915_gem_context *ctx,
> - struct intel_engine_cs *engine)
> -{
> - struct i915_address_space *vm;
> -
> - vm = ctx->ppgtt ? &ctx->ppgtt->base : &ctx->i915->ggtt.base;
> - return &vm->timeline.engine[engine->id];
> -}
> -
> int i915_perf_open_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file);
> int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 0097a77fae8d..1635975dbc16 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3110,10 +3110,10 @@ static void engine_skip_context(struct i915_request *request)
> {
> struct intel_engine_cs *engine = request->engine;
> struct i915_gem_context *hung_ctx = request->ctx;
> - struct intel_timeline *timeline;
> + struct intel_timeline *timeline = request->timeline;
> unsigned long flags;
>
> - timeline = i915_gem_context_lookup_timeline(hung_ctx, engine);
> + GEM_BUG_ON(timeline == engine->timeline);
Isn't this the guilty request, so would be on the engine timeline?
>
> spin_lock_irqsave(&engine->timeline->lock, flags);
> spin_lock(&timeline->lock);
> @@ -3782,7 +3782,7 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
>
> ret = wait_for_engines(i915);
> } else {
> - ret = wait_for_timeline(&i915->gt.global_timeline, flags);
> + ret = wait_for_timeline(&i915->gt.execution_timeline, flags);
> }
>
> return ret;
> @@ -5651,7 +5651,8 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
> WARN_ON(dev_priv->mm.object_count);
>
> mutex_lock(&dev_priv->drm.struct_mutex);
> - i915_gem_timeline_fini(&dev_priv->gt.global_timeline);
> + i915_gem_timeline_fini(&dev_priv->gt.legacy_timeline);
> + i915_gem_timeline_fini(&dev_priv->gt.execution_timeline);
> WARN_ON(!list_empty(&dev_priv->gt.timelines));
> mutex_unlock(&dev_priv->drm.struct_mutex);
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 74435affe23f..58b185abe652 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -122,6 +122,7 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
> lockdep_assert_held(&ctx->i915->drm.struct_mutex);
> GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
>
> + i915_gem_timeline_free(ctx->timeline);
> i915_ppgtt_put(ctx->ppgtt);
>
> for (i = 0; i < I915_NUM_ENGINES; i++) {
> @@ -376,6 +377,18 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
> ctx->desc_template = default_desc_template(dev_priv, ppgtt);
> }
>
> + if (HAS_EXECLISTS(dev_priv)) {
> + struct i915_gem_timeline *timeline;
> +
> + timeline = i915_gem_timeline_create(dev_priv, ctx->name);
> + if (IS_ERR(timeline)) {
> + __destroy_hw_context(ctx, file_priv);
> + return ERR_CAST(timeline);
Leaks ppgtt by the look of it.
> + }
> +
> + ctx->timeline = timeline;
> + }
> +
> trace_i915_context_create(ctx);
>
> return ctx;
> @@ -584,7 +597,7 @@ static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine)
> list_for_each_entry(timeline, &engine->i915->gt.timelines, link) {
> struct intel_timeline *tl;
>
> - if (timeline == &engine->i915->gt.global_timeline)
> + if (timeline == &engine->i915->gt.execution_timeline)
> continue;
>
> tl = &timeline->engine[engine->id];
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index b12a8a8c5af9..140edcb424df 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -58,6 +58,8 @@ struct i915_gem_context {
> /** file_priv: owning file descriptor */
> struct drm_i915_file_private *file_priv;
>
> + struct i915_gem_timeline *timeline;
> +
> /**
> * @ppgtt: unique address space (GTT)
> *
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 21d72f695adb..e9d828324f67 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2111,8 +2111,6 @@ static void i915_address_space_init(struct i915_address_space *vm,
> struct drm_i915_private *dev_priv,
> const char *name)
> {
> - i915_gem_timeline_init(dev_priv, &vm->timeline, name);
> -
> drm_mm_init(&vm->mm, 0, vm->total);
> vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
>
> @@ -2129,7 +2127,6 @@ static void i915_address_space_fini(struct i915_address_space *vm)
> if (pagevec_count(&vm->free_pages))
> vm_free_pages_release(vm, true);
>
> - i915_gem_timeline_fini(&vm->timeline);
> drm_mm_takedown(&vm->mm);
> list_del(&vm->global_link);
> }
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 6efc017e8bb3..98107925de48 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -257,7 +257,6 @@ struct i915_pml4 {
>
> struct i915_address_space {
> struct drm_mm mm;
> - struct i915_gem_timeline timeline;
> struct drm_i915_private *i915;
> struct device *dma;
> /* Every address space belongs to a struct file - except for the global
> diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c
> index e9fd87604067..24f4068cc137 100644
> --- a/drivers/gpu/drm/i915/i915_gem_timeline.c
> +++ b/drivers/gpu/drm/i915/i915_gem_timeline.c
> @@ -95,12 +95,28 @@ int i915_gem_timeline_init(struct drm_i915_private *i915,
>
> int i915_gem_timeline_init__global(struct drm_i915_private *i915)
> {
> - static struct lock_class_key class;
> + static struct lock_class_key class1, class2;
> + int err;
> +
> + err = __i915_gem_timeline_init(i915,
> + &i915->gt.execution_timeline,
> + "[execution]", &class1,
> + "i915_execution_timeline");
> + if (err)
> + return err;
> +
> + err = __i915_gem_timeline_init(i915,
> + &i915->gt.legacy_timeline,
> + "[global]", &class2,
> + "i915_global_timeline");
> + if (err)
> + goto err_exec_timeline;
> +
> + return 0;
>
> - return __i915_gem_timeline_init(i915,
> - &i915->gt.global_timeline,
> - "[execution]",
> - &class, "&global_timeline->lock");
> +err_exec_timeline:
> + i915_gem_timeline_fini(&i915->gt.execution_timeline);
> + return err;
> }
>
> /**
> @@ -148,6 +164,34 @@ void i915_gem_timeline_fini(struct i915_gem_timeline *timeline)
> kfree(timeline->name);
> }
>
> +struct i915_gem_timeline *
> +i915_gem_timeline_create(struct drm_i915_private *i915, const char *name)
> +{
> + struct i915_gem_timeline *timeline;
> + int err;
> +
> + timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
> + if (!timeline)
> + return ERR_PTR(-ENOMEM);
> +
> + err = i915_gem_timeline_init(i915, timeline, name);
> + if (err) {
> + kfree(timeline);
> + return ERR_PTR(err);
> + }
> +
> + return timeline;
> +}
> +
> +void i915_gem_timeline_free(struct i915_gem_timeline *timeline)
> +{
> + if (!timeline)
> + return;
> +
> + i915_gem_timeline_fini(timeline);
> + kfree(timeline);
> +}
> +
> #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> #include "selftests/mock_timeline.c"
> #include "selftests/i915_gem_timeline.c"
> diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h
> index 6e82119e2cd8..780ed465c4fc 100644
> --- a/drivers/gpu/drm/i915/i915_gem_timeline.h
> +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h
> @@ -90,6 +90,10 @@ int i915_gem_timeline_init__global(struct drm_i915_private *i915);
> void i915_gem_timelines_park(struct drm_i915_private *i915);
> void i915_gem_timeline_fini(struct i915_gem_timeline *tl);
>
> +struct i915_gem_timeline *
> +i915_gem_timeline_create(struct drm_i915_private *i915, const char *name);
> +void i915_gem_timeline_free(struct i915_gem_timeline *timeline);
> +
> static inline int __intel_timeline_sync_set(struct intel_timeline *tl,
> u64 context, u32 seqno)
> {
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 534b8d684cef..35869afdb199 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -639,6 +639,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
> if (IS_ERR(ring))
> return ERR_CAST(ring);
> GEM_BUG_ON(!ring);
> + GEM_BUG_ON(ring->timeline == engine->timeline);
It's debugging only but feels out of place. Put it in
intel_engine_create_ring?
>
> ret = reserve_engine(engine);
> if (ret)
> @@ -711,8 +712,12 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
> }
> }
>
> - rq->timeline = i915_gem_context_lookup_timeline(ctx, engine);
> - GEM_BUG_ON(rq->timeline == engine->timeline);
> + INIT_LIST_HEAD(&rq->active_list);
> + rq->i915 = i915;
> + rq->engine = engine;
> + rq->ctx = ctx;
> + rq->ring = ring;
> + rq->timeline = ring->timeline;
>
> spin_lock_init(&rq->lock);
> dma_fence_init(&rq->fence,
> @@ -727,12 +732,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>
> i915_sched_node_init(&rq->sched);
>
> - INIT_LIST_HEAD(&rq->active_list);
> - rq->i915 = i915;
> - rq->engine = engine;
> - rq->ctx = ctx;
> - rq->ring = ring;
> -
> /* No zalloc, must clear what we need by hand */
> rq->global_seqno = 0;
> rq->signaling.wait.seqno = 0;
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index a55a849b81b6..d44a8eb83379 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -453,7 +453,8 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
>
> static void intel_engine_init_timeline(struct intel_engine_cs *engine)
> {
> - engine->timeline = &engine->i915->gt.global_timeline.engine[engine->id];
> + engine->timeline =
> + &engine->i915->gt.execution_timeline.engine[engine->id];
> }
>
> static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 029901a8fa38..fd3539034665 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2584,7 +2584,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
> goto error_deref_obj;
> }
>
> - ring = intel_engine_create_ring(engine, ctx->ring_size);
> + ring = intel_engine_create_ring(engine, ctx->timeline, ctx->ring_size);
> if (IS_ERR(ring)) {
> ret = PTR_ERR(ring);
> goto error_deref_obj;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 3453e7426f6b..4559fe1c574e 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1117,7 +1117,9 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size)
> }
>
> struct intel_ring *
> -intel_engine_create_ring(struct intel_engine_cs *engine, int size)
> +intel_engine_create_ring(struct intel_engine_cs *engine,
> + struct i915_gem_timeline *timeline,
> + int size)
> {
> struct intel_ring *ring;
> struct i915_vma *vma;
> @@ -1131,6 +1133,7 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
> return ERR_PTR(-ENOMEM);
>
> INIT_LIST_HEAD(&ring->request_list);
> + ring->timeline = &timeline->engine[engine->id];
>
> ring->size = size;
> /* Workaround an erratum on the i830 which causes a hang if
> @@ -1327,7 +1330,9 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
> if (err)
> goto err;
>
> - ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE);
> + ring = intel_engine_create_ring(engine,
> + &engine->i915->gt.legacy_timeline,
> + 32 * PAGE_SIZE);
> if (IS_ERR(ring)) {
> err = PTR_ERR(ring);
> goto err;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index fd5a6363ab1d..3f63499734f7 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -128,6 +128,7 @@ struct intel_ring {
> struct i915_vma *vma;
> void *vaddr;
>
> + struct intel_timeline *timeline;
> struct list_head request_list;
> struct list_head live;
>
> @@ -767,7 +768,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
> #define CNL_HWS_CSB_WRITE_INDEX 0x2f
>
> struct intel_ring *
> -intel_engine_create_ring(struct intel_engine_cs *engine, int size);
> +intel_engine_create_ring(struct intel_engine_cs *engine,
> + struct i915_gem_timeline *timeline,
> + int size);
> int intel_ring_pin(struct intel_ring *ring,
> struct drm_i915_private *i915,
> unsigned int offset_bias);
> diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
> index 74a88913623f..6a10cb734c35 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_engine.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
> @@ -173,8 +173,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
> engine->base.emit_breadcrumb = mock_emit_breadcrumb;
> engine->base.submit_request = mock_submit_request;
>
> - engine->base.timeline =
> - &i915->gt.global_timeline.engine[engine->base.id];
> + intel_engine_init_timeline(&engine->base);
>
> intel_engine_init_breadcrumbs(&engine->base);
> engine->base.breadcrumbs.mock = true; /* prevent touching HW for irqs */
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> index 9335b09d8b04..ed1bf3b2e47f 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> @@ -73,7 +73,9 @@ static void mock_device_release(struct drm_device *dev)
>
> mutex_lock(&i915->drm.struct_mutex);
> mock_fini_ggtt(i915);
> - i915_gem_timeline_fini(&i915->gt.global_timeline);
> + i915_gem_timeline_fini(&i915->gt.legacy_timeline);
> + i915_gem_timeline_fini(&i915->gt.execution_timeline);
> + WARN_ON(!list_empty(&i915->gt.timelines));
> mutex_unlock(&i915->drm.struct_mutex);
>
> destroy_workqueue(i915->wq);
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c
> index e96873f96116..36c112088940 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
> @@ -76,7 +76,6 @@ mock_ppgtt(struct drm_i915_private *i915,
>
> INIT_LIST_HEAD(&ppgtt->base.global_link);
> drm_mm_init(&ppgtt->base.mm, 0, ppgtt->base.total);
> - i915_gem_timeline_init(i915, &ppgtt->base.timeline, name);
>
> ppgtt->base.clear_range = nop_clear_range;
> ppgtt->base.insert_page = mock_insert_page;
>
Looks good in principle, only some details to talk about.
Regards,
Tvrtko
More information about the Intel-gfx
mailing list