[Intel-gfx] [PATCH 13/23] drm/i915: Move list of timelines under its own lock

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Thu Jan 17 17:54:38 UTC 2019


On 17/01/2019 14:34, Chris Wilson wrote:
> Currently, the list of timelines is serialised by the struct_mutex, but
> to alleviate difficulties with using that mutex in future, move the
> list management under its own dedicated mutex.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.h               |  5 +-
>   drivers/gpu/drm/i915/i915_gem.c               | 89 +++++++++----------
>   drivers/gpu/drm/i915/i915_reset.c             |  8 +-
>   drivers/gpu/drm/i915/i915_timeline.c          | 38 ++++++--
>   drivers/gpu/drm/i915/i915_timeline.h          |  3 +
>   drivers/gpu/drm/i915/i915_vma.c               |  6 ++
>   .../gpu/drm/i915/selftests/mock_gem_device.c  |  7 +-
>   .../gpu/drm/i915/selftests/mock_timeline.c    |  3 +-
>   8 files changed, 101 insertions(+), 58 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 94680b15bed0..3913900600b7 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1975,7 +1975,10 @@ struct drm_i915_private {
>   		void (*resume)(struct drm_i915_private *);
>   		void (*cleanup_engine)(struct intel_engine_cs *engine);
>   
> -		struct list_head timelines;
> +		struct i915_gt_timelines {
> +			struct mutex mutex; /* protects list, tainted by GPU */

What does it mean "tainted by GPU"?

> +			struct list_head list;
> +		} timelines;
>   
>   		struct list_head active_rings;
>   		struct list_head closed_vma;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 15acd052da46..3c6091021290 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3222,33 +3222,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>   	return ret;
>   }
>   
> -static long wait_for_timeline(struct i915_timeline *tl,
> -			      unsigned int flags, long timeout)
> -{
> -	struct i915_request *rq;
> -
> -	rq = i915_gem_active_get_unlocked(&tl->last_request);
> -	if (!rq)
> -		return timeout;
> -
> -	/*
> -	 * "Race-to-idle".
> -	 *
> -	 * Switching to the kernel context is often used a synchronous
> -	 * step prior to idling, e.g. in suspend for flushing all
> -	 * current operations to memory before sleeping. These we
> -	 * want to complete as quickly as possible to avoid prolonged
> -	 * stalls, so allow the gpu to boost to maximum clocks.
> -	 */
> -	if (flags & I915_WAIT_FOR_IDLE_BOOST)
> -		gen6_rps_boost(rq, NULL);
> -
> -	timeout = i915_request_wait(rq, flags, timeout);
> -	i915_request_put(rq);
> -
> -	return timeout;
> -}
> -
>   static int wait_for_engines(struct drm_i915_private *i915)
>   {
>   	if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
> @@ -3265,6 +3238,8 @@ static int wait_for_engines(struct drm_i915_private *i915)
>   int i915_gem_wait_for_idle(struct drm_i915_private *i915,
>   			   unsigned int flags, long timeout)
>   {
> +	struct i915_timeline *tl;
> +
>   	GEM_TRACE("flags=%x (%s), timeout=%ld%s\n",
>   		  flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked",
>   		  timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "");
> @@ -3273,17 +3248,45 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
>   	if (!READ_ONCE(i915->gt.awake))
>   		return 0;
>   
> +	mutex_lock(&i915->gt.timelines.mutex);
> +	list_for_each_entry(tl, &i915->gt.timelines.list, link) {
> +		struct i915_request *rq;
> +
> +		rq = i915_gem_active_get_unlocked(&tl->last_request);
> +		if (!rq)
> +			continue;
> +
> +		mutex_unlock(&i915->gt.timelines.mutex);
> +
> +		/*
> +		 * "Race-to-idle".
> +		 *
> +		 * Switching to the kernel context is often used a synchronous
> +		 * step prior to idling, e.g. in suspend for flushing all
> +		 * current operations to memory before sleeping. These we
> +		 * want to complete as quickly as possible to avoid prolonged
> +		 * stalls, so allow the gpu to boost to maximum clocks.
> +		 */
> +		if (flags & I915_WAIT_FOR_IDLE_BOOST)
> +			gen6_rps_boost(rq, NULL);
> +
> +		timeout = i915_request_wait(rq, flags, timeout);
> +		i915_request_put(rq);
> +		if (timeout < 0)
> +			return timeout;
> +
> +		mutex_lock(&i915->gt.timelines.mutex);
> +
> +		/* restart after dropping the lock */
> +		tl = list_entry(&i915->gt.timelines.list, typeof(*tl), link);
> +	}
> +	mutex_unlock(&i915->gt.timelines.mutex);

Hm, since the loop above bothers restarting after dropping the lock, 
that implies when we drop the lock here we may not be idle any longer. 
Or we actually still depend on struct_mutex and this is another small 
charade? I guess so, since without this patch we also have two path with 
different levels of idleness guarantee.

> +
>   	if (flags & I915_WAIT_LOCKED) {
> -		struct i915_timeline *tl;
>   		int err;
>   
>   		lockdep_assert_held(&i915->drm.struct_mutex);
>   
> -		list_for_each_entry(tl, &i915->gt.timelines, link) {
> -			timeout = wait_for_timeline(tl, flags, timeout);
> -			if (timeout < 0)
> -				return timeout;
> -		}
>   		if (GEM_SHOW_DEBUG() && !timeout) {
>   			/* Presume that timeout was non-zero to begin with! */
>   			dev_warn(&i915->drm.pdev->dev,
> @@ -3297,17 +3300,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
>   
>   		i915_retire_requests(i915);
>   		GEM_BUG_ON(i915->gt.active_requests);
> -	} else {
> -		struct intel_engine_cs *engine;
> -		enum intel_engine_id id;
> -
> -		for_each_engine(engine, i915, id) {
> -			struct i915_timeline *tl = &engine->timeline;
> -
> -			timeout = wait_for_timeline(tl, flags, timeout);
> -			if (timeout < 0)
> -				return timeout;
> -		}
>   	}
>   
>   	return 0;
> @@ -5008,6 +5000,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>   		dev_priv->gt.cleanup_engine = intel_engine_cleanup;
>   	}
>   
> +	i915_timelines_init(dev_priv);
> +
>   	ret = i915_gem_init_userptr(dev_priv);
>   	if (ret)
>   		return ret;
> @@ -5130,8 +5124,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>   err_uc_misc:
>   	intel_uc_fini_misc(dev_priv);
>   
> -	if (ret != -EIO)
> +	if (ret != -EIO) {
>   		i915_gem_cleanup_userptr(dev_priv);
> +		i915_timelines_fini(dev_priv);
> +	}
>   
>   	if (ret == -EIO) {
>   		mutex_lock(&dev_priv->drm.struct_mutex);
> @@ -5182,6 +5178,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)
>   
>   	intel_uc_fini_misc(dev_priv);
>   	i915_gem_cleanup_userptr(dev_priv);
> +	i915_timelines_fini(dev_priv);
>   
>   	i915_gem_drain_freed_objects(dev_priv);
>   
> @@ -5284,7 +5281,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
>   	if (!dev_priv->priorities)
>   		goto err_dependencies;
>   
> -	INIT_LIST_HEAD(&dev_priv->gt.timelines);
>   	INIT_LIST_HEAD(&dev_priv->gt.active_rings);
>   	INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
>   
> @@ -5328,7 +5324,6 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
>   	GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
>   	GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
>   	WARN_ON(dev_priv->mm.object_count);
> -	WARN_ON(!list_empty(&dev_priv->gt.timelines));
>   
>   	kmem_cache_destroy(dev_priv->priorities);
>   	kmem_cache_destroy(dev_priv->dependencies);
> diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
> index d44b095e2860..12e5a2bc825c 100644
> --- a/drivers/gpu/drm/i915/i915_reset.c
> +++ b/drivers/gpu/drm/i915/i915_reset.c
> @@ -850,7 +850,8 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
>   	 *
>   	 * No more can be submitted until we reset the wedged bit.
>   	 */
> -	list_for_each_entry(tl, &i915->gt.timelines, link) {
> +	mutex_lock(&i915->gt.timelines.mutex);
> +	list_for_each_entry(tl, &i915->gt.timelines.list, link) {
>   		struct i915_request *rq;
>   		long timeout;
>   
> @@ -872,9 +873,12 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
>   		timeout = dma_fence_default_wait(&rq->fence, true,
>   						 MAX_SCHEDULE_TIMEOUT);
>   		i915_request_put(rq);
> -		if (timeout < 0)
> +		if (timeout < 0) {
> +			mutex_unlock(&i915->gt.timelines.mutex);
>   			goto unlock;
> +		}
>   	}
> +	mutex_unlock(&i915->gt.timelines.mutex);
>   
>   	intel_engines_sanitize(i915, false);
>   
> diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
> index 4667cc08c416..84550f17d3df 100644
> --- a/drivers/gpu/drm/i915/i915_timeline.c
> +++ b/drivers/gpu/drm/i915/i915_timeline.c
> @@ -13,7 +13,7 @@ void i915_timeline_init(struct drm_i915_private *i915,
>   			struct i915_timeline *timeline,
>   			const char *name)
>   {
> -	lockdep_assert_held(&i915->drm.struct_mutex);
> +	struct i915_gt_timelines *gt = &i915->gt.timelines;
>   
>   	/*
>   	 * Ideally we want a set of engines on a single leaf as we expect
> @@ -23,9 +23,12 @@ void i915_timeline_init(struct drm_i915_private *i915,
>   	 */
>   	BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES);
>   
> +	timeline->i915 = i915;
>   	timeline->name = name;
>   
> -	list_add(&timeline->link, &i915->gt.timelines);
> +	mutex_lock(&gt->mutex);
> +	list_add(&timeline->link, &gt->list);
> +	mutex_unlock(&gt->mutex);
>   
>   	/* Called during early_init before we know how many engines there are */
>   
> @@ -39,6 +42,17 @@ void i915_timeline_init(struct drm_i915_private *i915,
>   	i915_syncmap_init(&timeline->sync);
>   }
>   
> +void i915_timelines_init(struct drm_i915_private *i915)
> +{
> +	struct i915_gt_timelines *gt = &i915->gt.timelines;
> +
> +	mutex_init(&gt->mutex);
> +	INIT_LIST_HEAD(&gt->list);
> +
> +	/* via i915_gem_wait_for_idle() */
> +	i915_gem_shrinker_taints_mutex(i915, &gt->mutex);
> +}
> +
>   /**
>    * i915_timelines_park - called when the driver idles
>    * @i915: the drm_i915_private device
> @@ -51,11 +65,11 @@ void i915_timeline_init(struct drm_i915_private *i915,
>    */
>   void i915_timelines_park(struct drm_i915_private *i915)
>   {
> +	struct i915_gt_timelines *gt = &i915->gt.timelines;
>   	struct i915_timeline *timeline;
>   
> -	lockdep_assert_held(&i915->drm.struct_mutex);
> -
> -	list_for_each_entry(timeline, &i915->gt.timelines, link) {
> +	mutex_lock(&gt->mutex);
> +	list_for_each_entry(timeline, &gt->list, link) {
>   		/*
>   		 * All known fences are completed so we can scrap
>   		 * the current sync point tracking and start afresh,
> @@ -64,15 +78,20 @@ void i915_timelines_park(struct drm_i915_private *i915)
>   		 */
>   		i915_syncmap_free(&timeline->sync);
>   	}
> +	mutex_unlock(&gt->mutex);
>   }
>   
>   void i915_timeline_fini(struct i915_timeline *timeline)
>   {
> +	struct i915_gt_timelines *gt = &timeline->i915->gt.timelines;
> +
>   	GEM_BUG_ON(!list_empty(&timeline->requests));
>   
>   	i915_syncmap_free(&timeline->sync);
>   
> +	mutex_lock(&gt->mutex);
>   	list_del(&timeline->link);
> +	mutex_unlock(&gt->mutex);
>   }
>   
>   struct i915_timeline *
> @@ -99,6 +118,15 @@ void __i915_timeline_free(struct kref *kref)
>   	kfree(timeline);
>   }
>   
> +void i915_timelines_fini(struct drm_i915_private *i915)
> +{
> +	struct i915_gt_timelines *gt = &i915->gt.timelines;
> +
> +	GEM_BUG_ON(!list_empty(&gt->list));
> +
> +	mutex_destroy(&gt->mutex);
> +}
> +
>   #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
>   #include "selftests/mock_timeline.c"
>   #include "selftests/i915_timeline.c"
> diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
> index 38c1e15e927a..87ad2dd31c20 100644
> --- a/drivers/gpu/drm/i915/i915_timeline.h
> +++ b/drivers/gpu/drm/i915/i915_timeline.h
> @@ -66,6 +66,7 @@ struct i915_timeline {
>   
>   	struct list_head link;
>   	const char *name;
> +	struct drm_i915_private *i915;
>   
>   	struct kref kref;
>   };
> @@ -134,6 +135,8 @@ static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
>   	return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno);
>   }
>   
> +void i915_timelines_init(struct drm_i915_private *i915);
>   void i915_timelines_park(struct drm_i915_private *i915);
> +void i915_timelines_fini(struct drm_i915_private *i915);
>   
>   #endif
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 3a680fe2bb8b..d83b8ad5f859 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -201,6 +201,11 @@ vma_create(struct drm_i915_gem_object *obj,
>   		rb = *p;
>   		pos = rb_entry(rb, struct i915_vma, obj_node);
>   
> +		/*
> +		 * If the view already exists in the tree, another thread
> +		 * already created a matching vma, so return the older instance
> +		 * and dispose of ours.
> +		 */

Oh here is the lost comment, all alone in the strange world. :)

>   		cmp = i915_vma_compare(pos, vm, view);
>   		if (cmp == 0) {
>   			spin_unlock(&obj->vma.lock);
> @@ -294,6 +299,7 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
>   	vma = vma_lookup(obj, vm, view);
>   	spin_unlock(&obj->vma.lock);
>   
> +	/* vma_create() will resolve the race if another creates the vma */

I assume you'll move both comments to their right place.

>   	if (unlikely(!vma))
>   		vma = vma_create(obj, vm, view);
>   
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> index 888c6978bc54..41ae502361d7 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> @@ -68,13 +68,14 @@ static void mock_device_release(struct drm_device *dev)
>   	i915_gem_contexts_fini(i915);
>   	mutex_unlock(&i915->drm.struct_mutex);
>   
> +	i915_timelines_fini(i915);
> +
>   	drain_workqueue(i915->wq);
>   	i915_gem_drain_freed_objects(i915);
>   
>   	mutex_lock(&i915->drm.struct_mutex);
>   	mock_fini_ggtt(i915);
>   	mutex_unlock(&i915->drm.struct_mutex);
> -	WARN_ON(!list_empty(&i915->gt.timelines));
>   
>   	destroy_workqueue(i915->wq);
>   
> @@ -226,7 +227,8 @@ struct drm_i915_private *mock_gem_device(void)
>   	if (!i915->priorities)
>   		goto err_dependencies;
>   
> -	INIT_LIST_HEAD(&i915->gt.timelines);
> +	i915_timelines_init(i915);
> +
>   	INIT_LIST_HEAD(&i915->gt.active_rings);
>   	INIT_LIST_HEAD(&i915->gt.closed_vma);
>   
> @@ -253,6 +255,7 @@ struct drm_i915_private *mock_gem_device(void)
>   	i915_gem_contexts_fini(i915);
>   err_unlock:
>   	mutex_unlock(&i915->drm.struct_mutex);
> +	i915_timelines_fini(i915);
>   	kmem_cache_destroy(i915->priorities);
>   err_dependencies:
>   	kmem_cache_destroy(i915->dependencies);
> diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
> index dcf3b16f5a07..cf39ccd9fc05 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
> @@ -10,6 +10,7 @@
>   
>   void mock_timeline_init(struct i915_timeline *timeline, u64 context)
>   {
> +	timeline->i915 = NULL;
>   	timeline->fence_context = context;
>   
>   	spin_lock_init(&timeline->lock);
> @@ -24,5 +25,5 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
>   
>   void mock_timeline_fini(struct i915_timeline *timeline)
>   {
> -	i915_timeline_fini(timeline);
> +	i915_syncmap_free(&timeline->sync);
>   }
> 

Looks okay.

Apart that I am 9/10 worried of how the long game of fine grained 
locking will untangle, or in other words, how much you managed to nail 
all the new locks and how much you'll have to re-fiddle with them. :I 
But maybe you see the end game so I won't project my inability to do so.

Regards,

Tvrtko


More information about the Intel-gfx mailing list