[Intel-gfx] [PATCH 4/6] drm/i915: Move timeline from GTT to ring

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Mon Apr 23 10:44:19 UTC 2018


On 23/04/2018 11:13, Chris Wilson wrote:
> In the future, we want to move a request between engines. To achieve
> this, we first realise that we have two timelines in effect here. The
> first runs through the GTT is required for ordering vma access, which is
> tracked currently by engine. The second is implied by sequential
> execution of commands inside the ringbuffer. This timeline is one that
> maps to userspace's expectations when submitting requests (i.e. given the
> same context, batch A is executed before batch B). As the rings's
> timelines map to userspace and the GTT timeline an implementation
> detail, move the timeline from the GTT into the ring itself (per-context
> in logical-ring-contexts/execlists, or a global per-engine timeline for
> the shared ringbuffers in legacy submission.
> 
> The two timelines are still assumed to be equivalent at the moment (no
> migrating requests between engines yet) and so we can simply move from
> one to the other without adding extra ordering.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h               | 13 +----
>   drivers/gpu/drm/i915/i915_gem.c               |  9 ++--
>   drivers/gpu/drm/i915/i915_gem_context.c       | 15 +++++-
>   drivers/gpu/drm/i915/i915_gem_context.h       |  2 +
>   drivers/gpu/drm/i915/i915_gem_gtt.c           |  3 --
>   drivers/gpu/drm/i915/i915_gem_gtt.h           |  1 -
>   drivers/gpu/drm/i915/i915_gem_timeline.c      | 54 +++++++++++++++++--
>   drivers/gpu/drm/i915/i915_gem_timeline.h      |  4 ++
>   drivers/gpu/drm/i915/i915_request.c           | 15 +++---
>   drivers/gpu/drm/i915/intel_engine_cs.c        |  3 +-
>   drivers/gpu/drm/i915/intel_lrc.c              |  2 +-
>   drivers/gpu/drm/i915/intel_ringbuffer.c       |  9 +++-
>   drivers/gpu/drm/i915/intel_ringbuffer.h       |  5 +-
>   drivers/gpu/drm/i915/selftests/mock_engine.c  |  3 +-
>   .../gpu/drm/i915/selftests/mock_gem_device.c  |  4 +-
>   drivers/gpu/drm/i915/selftests/mock_gtt.c     |  1 -
>   16 files changed, 101 insertions(+), 42 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a7787c2cb53c..66123cf0eda3 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2058,7 +2058,8 @@ struct drm_i915_private {
>   		void (*resume)(struct drm_i915_private *);
>   		void (*cleanup_engine)(struct intel_engine_cs *engine);
>   
> -		struct i915_gem_timeline global_timeline;
> +		struct i915_gem_timeline execution_timeline;
> +		struct i915_gem_timeline legacy_timeline;
>   		struct list_head timelines;
>   		struct list_head live_rings;
>   		u32 active_requests;
> @@ -3232,16 +3233,6 @@ i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id)
>   	return ctx;
>   }
>   
> -static inline struct intel_timeline *
> -i915_gem_context_lookup_timeline(struct i915_gem_context *ctx,
> -				 struct intel_engine_cs *engine)
> -{
> -	struct i915_address_space *vm;
> -
> -	vm = ctx->ppgtt ? &ctx->ppgtt->base : &ctx->i915->ggtt.base;
> -	return &vm->timeline.engine[engine->id];
> -}
> -
>   int i915_perf_open_ioctl(struct drm_device *dev, void *data,
>   			 struct drm_file *file);
>   int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 0097a77fae8d..1635975dbc16 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3110,10 +3110,10 @@ static void engine_skip_context(struct i915_request *request)
>   {
>   	struct intel_engine_cs *engine = request->engine;
>   	struct i915_gem_context *hung_ctx = request->ctx;
> -	struct intel_timeline *timeline;
> +	struct intel_timeline *timeline = request->timeline;
>   	unsigned long flags;
>   
> -	timeline = i915_gem_context_lookup_timeline(hung_ctx, engine);
> +	GEM_BUG_ON(timeline == engine->timeline);

Isn't this the guilty request, so would be on the engine timeline?

>   
>   	spin_lock_irqsave(&engine->timeline->lock, flags);
>   	spin_lock(&timeline->lock);
> @@ -3782,7 +3782,7 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
>   
>   		ret = wait_for_engines(i915);
>   	} else {
> -		ret = wait_for_timeline(&i915->gt.global_timeline, flags);
> +		ret = wait_for_timeline(&i915->gt.execution_timeline, flags);
>   	}
>   
>   	return ret;
> @@ -5651,7 +5651,8 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
>   	WARN_ON(dev_priv->mm.object_count);
>   
>   	mutex_lock(&dev_priv->drm.struct_mutex);
> -	i915_gem_timeline_fini(&dev_priv->gt.global_timeline);
> +	i915_gem_timeline_fini(&dev_priv->gt.legacy_timeline);
> +	i915_gem_timeline_fini(&dev_priv->gt.execution_timeline);
>   	WARN_ON(!list_empty(&dev_priv->gt.timelines));
>   	mutex_unlock(&dev_priv->drm.struct_mutex);
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 74435affe23f..58b185abe652 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -122,6 +122,7 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
>   	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
>   	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
>   
> +	i915_gem_timeline_free(ctx->timeline);
>   	i915_ppgtt_put(ctx->ppgtt);
>   
>   	for (i = 0; i < I915_NUM_ENGINES; i++) {
> @@ -376,6 +377,18 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
>   		ctx->desc_template = default_desc_template(dev_priv, ppgtt);
>   	}
>   
> +	if (HAS_EXECLISTS(dev_priv)) {
> +		struct i915_gem_timeline *timeline;
> +
> +		timeline = i915_gem_timeline_create(dev_priv, ctx->name);
> +		if (IS_ERR(timeline)) {
> +			__destroy_hw_context(ctx, file_priv);
> +			return ERR_CAST(timeline);

Leaks ppgtt by the look of it.

> +		}
> +
> +		ctx->timeline = timeline;
> +	}
> +
>   	trace_i915_context_create(ctx);
>   
>   	return ctx;
> @@ -584,7 +597,7 @@ static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine)
>   	list_for_each_entry(timeline, &engine->i915->gt.timelines, link) {
>   		struct intel_timeline *tl;
>   
> -		if (timeline == &engine->i915->gt.global_timeline)
> +		if (timeline == &engine->i915->gt.execution_timeline)
>   			continue;
>   
>   		tl = &timeline->engine[engine->id];
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index b12a8a8c5af9..140edcb424df 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -58,6 +58,8 @@ struct i915_gem_context {
>   	/** file_priv: owning file descriptor */
>   	struct drm_i915_file_private *file_priv;
>   
> +	struct i915_gem_timeline *timeline;
> +
>   	/**
>   	 * @ppgtt: unique address space (GTT)
>   	 *
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 21d72f695adb..e9d828324f67 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2111,8 +2111,6 @@ static void i915_address_space_init(struct i915_address_space *vm,
>   				    struct drm_i915_private *dev_priv,
>   				    const char *name)
>   {
> -	i915_gem_timeline_init(dev_priv, &vm->timeline, name);
> -
>   	drm_mm_init(&vm->mm, 0, vm->total);
>   	vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
>   
> @@ -2129,7 +2127,6 @@ static void i915_address_space_fini(struct i915_address_space *vm)
>   	if (pagevec_count(&vm->free_pages))
>   		vm_free_pages_release(vm, true);
>   
> -	i915_gem_timeline_fini(&vm->timeline);
>   	drm_mm_takedown(&vm->mm);
>   	list_del(&vm->global_link);
>   }
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 6efc017e8bb3..98107925de48 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -257,7 +257,6 @@ struct i915_pml4 {
>   
>   struct i915_address_space {
>   	struct drm_mm mm;
> -	struct i915_gem_timeline timeline;
>   	struct drm_i915_private *i915;
>   	struct device *dma;
>   	/* Every address space belongs to a struct file - except for the global
> diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c
> index e9fd87604067..24f4068cc137 100644
> --- a/drivers/gpu/drm/i915/i915_gem_timeline.c
> +++ b/drivers/gpu/drm/i915/i915_gem_timeline.c
> @@ -95,12 +95,28 @@ int i915_gem_timeline_init(struct drm_i915_private *i915,
>   
>   int i915_gem_timeline_init__global(struct drm_i915_private *i915)
>   {
> -	static struct lock_class_key class;
> +	static struct lock_class_key class1, class2;
> +	int err;
> +
> +	err = __i915_gem_timeline_init(i915,
> +				       &i915->gt.execution_timeline,
> +				       "[execution]", &class1,
> +				       "i915_execution_timeline");
> +	if (err)
> +		return err;
> +
> +	err = __i915_gem_timeline_init(i915,
> +				       &i915->gt.legacy_timeline,
> +				       "[global]", &class2,
> +				       "i915_global_timeline");
> +	if (err)
> +		goto err_exec_timeline;
> +
> +	return 0;
>   
> -	return __i915_gem_timeline_init(i915,
> -					&i915->gt.global_timeline,
> -					"[execution]",
> -					&class, "&global_timeline->lock");
> +err_exec_timeline:
> +	i915_gem_timeline_fini(&i915->gt.execution_timeline);
> +	return err;
>   }
>   
>   /**
> @@ -148,6 +164,34 @@ void i915_gem_timeline_fini(struct i915_gem_timeline *timeline)
>   	kfree(timeline->name);
>   }
>   
> +struct i915_gem_timeline *
> +i915_gem_timeline_create(struct drm_i915_private *i915, const char *name)
> +{
> +	struct i915_gem_timeline *timeline;
> +	int err;
> +
> +	timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
> +	if (!timeline)
> +		return ERR_PTR(-ENOMEM);
> +
> +	err = i915_gem_timeline_init(i915, timeline, name);
> +	if (err) {
> +		kfree(timeline);
> +		return ERR_PTR(err);
> +	}
> +
> +	return timeline;
> +}
> +
> +void i915_gem_timeline_free(struct i915_gem_timeline *timeline)
> +{
> +	if (!timeline)
> +		return;
> +
> +	i915_gem_timeline_fini(timeline);
> +	kfree(timeline);
> +}
> +
>   #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
>   #include "selftests/mock_timeline.c"
>   #include "selftests/i915_gem_timeline.c"
> diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h
> index 6e82119e2cd8..780ed465c4fc 100644
> --- a/drivers/gpu/drm/i915/i915_gem_timeline.h
> +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h
> @@ -90,6 +90,10 @@ int i915_gem_timeline_init__global(struct drm_i915_private *i915);
>   void i915_gem_timelines_park(struct drm_i915_private *i915);
>   void i915_gem_timeline_fini(struct i915_gem_timeline *tl);
>   
> +struct i915_gem_timeline *
> +i915_gem_timeline_create(struct drm_i915_private *i915, const char *name);
> +void i915_gem_timeline_free(struct i915_gem_timeline *timeline);
> +
>   static inline int __intel_timeline_sync_set(struct intel_timeline *tl,
>   					    u64 context, u32 seqno)
>   {
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 534b8d684cef..35869afdb199 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -639,6 +639,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>   	if (IS_ERR(ring))
>   		return ERR_CAST(ring);
>   	GEM_BUG_ON(!ring);
> +	GEM_BUG_ON(ring->timeline == engine->timeline);

It's debugging only but feels out of place. Put it in 
intel_engine_create_ring?

>   
>   	ret = reserve_engine(engine);
>   	if (ret)
> @@ -711,8 +712,12 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>   		}
>   	}
>   
> -	rq->timeline = i915_gem_context_lookup_timeline(ctx, engine);
> -	GEM_BUG_ON(rq->timeline == engine->timeline);
> +	INIT_LIST_HEAD(&rq->active_list);
> +	rq->i915 = i915;
> +	rq->engine = engine;
> +	rq->ctx = ctx;
> +	rq->ring = ring;
> +	rq->timeline = ring->timeline;
>   
>   	spin_lock_init(&rq->lock);
>   	dma_fence_init(&rq->fence,
> @@ -727,12 +732,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>   
>   	i915_sched_node_init(&rq->sched);
>   
> -	INIT_LIST_HEAD(&rq->active_list);
> -	rq->i915 = i915;
> -	rq->engine = engine;
> -	rq->ctx = ctx;
> -	rq->ring = ring;
> -
>   	/* No zalloc, must clear what we need by hand */
>   	rq->global_seqno = 0;
>   	rq->signaling.wait.seqno = 0;
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index a55a849b81b6..d44a8eb83379 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -453,7 +453,8 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
>   
>   static void intel_engine_init_timeline(struct intel_engine_cs *engine)
>   {
> -	engine->timeline = &engine->i915->gt.global_timeline.engine[engine->id];
> +	engine->timeline =
> +		&engine->i915->gt.execution_timeline.engine[engine->id];
>   }
>   
>   static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 029901a8fa38..fd3539034665 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2584,7 +2584,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
>   		goto error_deref_obj;
>   	}
>   
> -	ring = intel_engine_create_ring(engine, ctx->ring_size);
> +	ring = intel_engine_create_ring(engine, ctx->timeline, ctx->ring_size);
>   	if (IS_ERR(ring)) {
>   		ret = PTR_ERR(ring);
>   		goto error_deref_obj;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 3453e7426f6b..4559fe1c574e 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1117,7 +1117,9 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size)
>   }
>   
>   struct intel_ring *
> -intel_engine_create_ring(struct intel_engine_cs *engine, int size)
> +intel_engine_create_ring(struct intel_engine_cs *engine,
> +			 struct i915_gem_timeline *timeline,
> +			 int size)
>   {
>   	struct intel_ring *ring;
>   	struct i915_vma *vma;
> @@ -1131,6 +1133,7 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
>   		return ERR_PTR(-ENOMEM);
>   
>   	INIT_LIST_HEAD(&ring->request_list);
> +	ring->timeline = &timeline->engine[engine->id];
>   
>   	ring->size = size;
>   	/* Workaround an erratum on the i830 which causes a hang if
> @@ -1327,7 +1330,9 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
>   	if (err)
>   		goto err;
>   
> -	ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE);
> +	ring = intel_engine_create_ring(engine,
> +					&engine->i915->gt.legacy_timeline,
> +					32 * PAGE_SIZE);
>   	if (IS_ERR(ring)) {
>   		err = PTR_ERR(ring);
>   		goto err;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index fd5a6363ab1d..3f63499734f7 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -128,6 +128,7 @@ struct intel_ring {
>   	struct i915_vma *vma;
>   	void *vaddr;
>   
> +	struct intel_timeline *timeline;
>   	struct list_head request_list;
>   	struct list_head live;
>   
> @@ -767,7 +768,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
>   #define CNL_HWS_CSB_WRITE_INDEX		0x2f
>   
>   struct intel_ring *
> -intel_engine_create_ring(struct intel_engine_cs *engine, int size);
> +intel_engine_create_ring(struct intel_engine_cs *engine,
> +			 struct i915_gem_timeline *timeline,
> +			 int size);
>   int intel_ring_pin(struct intel_ring *ring,
>   		   struct drm_i915_private *i915,
>   		   unsigned int offset_bias);
> diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
> index 74a88913623f..6a10cb734c35 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_engine.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
> @@ -173,8 +173,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
>   	engine->base.emit_breadcrumb = mock_emit_breadcrumb;
>   	engine->base.submit_request = mock_submit_request;
>   
> -	engine->base.timeline =
> -		&i915->gt.global_timeline.engine[engine->base.id];
> +	intel_engine_init_timeline(&engine->base);
>   
>   	intel_engine_init_breadcrumbs(&engine->base);
>   	engine->base.breadcrumbs.mock = true; /* prevent touching HW for irqs */
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> index 9335b09d8b04..ed1bf3b2e47f 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> @@ -73,7 +73,9 @@ static void mock_device_release(struct drm_device *dev)
>   
>   	mutex_lock(&i915->drm.struct_mutex);
>   	mock_fini_ggtt(i915);
> -	i915_gem_timeline_fini(&i915->gt.global_timeline);
> +	i915_gem_timeline_fini(&i915->gt.legacy_timeline);
> +	i915_gem_timeline_fini(&i915->gt.execution_timeline);
> +	WARN_ON(!list_empty(&i915->gt.timelines));
>   	mutex_unlock(&i915->drm.struct_mutex);
>   
>   	destroy_workqueue(i915->wq);
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c
> index e96873f96116..36c112088940 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
> @@ -76,7 +76,6 @@ mock_ppgtt(struct drm_i915_private *i915,
>   
>   	INIT_LIST_HEAD(&ppgtt->base.global_link);
>   	drm_mm_init(&ppgtt->base.mm, 0, ppgtt->base.total);
> -	i915_gem_timeline_init(i915, &ppgtt->base.timeline, name);
>   
>   	ppgtt->base.clear_range = nop_clear_range;
>   	ppgtt->base.insert_page = mock_insert_page;
> 

Looks good in principle, only some details to talk about.

Regards,

Tvrtko


More information about the Intel-gfx mailing list