[Intel-gfx] [PATCH 9/9] drm/i915: Move releasing of the GEM request from free to retire/cancel

Tue Apr 19 12:16:44 UTC 2016

On 19/04/16 07:49, Chris Wilson wrote:
> If we move the release of the GEM request (i.e. decoupling it from the
> various lists used for client and context tracking) after it is complete
> (either by the GPU retiring the request, or by the caller cancelling the
> request), we can remove the requirement that the final unreference of
> the GEM request need to be under the struct_mutex.
> 
> v2,v3: Rebalance execlists by moving the context unpinning.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem.c          |  4 ++--
>   drivers/gpu/drm/i915/i915_gem_request.c  | 25 ++++++++++---------------
>   drivers/gpu/drm/i915/i915_gem_request.h  | 14 --------------
>   drivers/gpu/drm/i915/intel_breadcrumbs.c |  2 +-
>   drivers/gpu/drm/i915/intel_display.c     |  2 +-
>   drivers/gpu/drm/i915/intel_lrc.c         |  4 ----
>   drivers/gpu/drm/i915/intel_pm.c          |  2 +-
>   7 files changed, 15 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 4057c0febccd..1f5434f7a294 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2542,7 +2542,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>   			ret = __i915_wait_request(req[i], true,
>   						  args->timeout_ns > 0 ? &args->timeout_ns : NULL,
>   						  to_rps_client(file));
> -		i915_gem_request_unreference__unlocked(req[i]);
> +		i915_gem_request_unreference(req[i]);
>   	}
>   	return ret;
>   
> @@ -3548,7 +3548,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
>   		return 0;
>   
>   	ret = __i915_wait_request(target, true, NULL, NULL);
> -	i915_gem_request_unreference__unlocked(target);
> +	i915_gem_request_unreference(target);
>   
>   	return ret;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 8d7c415f1896..c14dca3d8b87 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -250,6 +250,7 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
>   static void i915_gem_request_retire(struct drm_i915_gem_request *request)
>   {
>   	trace_i915_gem_request_retire(request);
> +	list_del_init(&request->list);
>   
>   	/* We know the GPU must have read the request to have
>   	 * sent us the seqno + interrupt, so use the position
> @@ -261,9 +262,15 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
>   	 */
>   	request->ringbuf->last_retired_head = request->postfix;
>   
> -	list_del_init(&request->list);
>   	i915_gem_request_remove_from_client(request);
>   
> +	if (request->pinned_context) {
> +		if (i915.enable_execlists)
> +			intel_lr_context_unpin(request->pinned_context,
> +					       request->engine);
> +	}
> +
> +	i915_gem_context_unreference(request->ctx);
>   	i915_gem_request_unreference(request);
>   }
>   
> @@ -636,19 +643,7 @@ int i915_wait_request(struct drm_i915_gem_request *req)
>   
>   void i915_gem_request_free(struct kref *req_ref)
>   {
> -	struct drm_i915_gem_request *req = container_of(req_ref,
> -						 typeof(*req), ref);
> -	struct intel_context *ctx = req->ctx;
> -
> -	if (req->file_priv)
> -		i915_gem_request_remove_from_client(req);
> -
> -	if (req->pinned_context) {
> -		if (i915.enable_execlists)
> -			intel_lr_context_unpin(req->pinned_context,
> -					       req->engine);
> -	}
> -
> -	i915_gem_context_unreference(ctx);
> +	struct drm_i915_gem_request *req =
> +		container_of(req_ref, typeof(*req), ref);
>   	kmem_cache_free(to_i915(req)->requests, req);
>   }
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 389813cbc19a..48bff7dc4f90 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -170,23 +170,9 @@ i915_gem_request_reference(struct drm_i915_gem_request *req)
>   static inline void
>   i915_gem_request_unreference(struct drm_i915_gem_request *req)
>   {
> -	WARN_ON(!mutex_is_locked(&req->engine->dev->struct_mutex));
>   	kref_put(&req->ref, i915_gem_request_free);
>   }
>   
> -static inline void
> -i915_gem_request_unreference__unlocked(struct drm_i915_gem_request *req)
> -{
> -	struct drm_device *dev;
> -
> -	if (!req)
> -		return;
> -
> -	dev = req->engine->dev;
> -	if (kref_put_mutex(&req->ref, i915_gem_request_free, &dev->struct_mutex))
> -		mutex_unlock(&dev->struct_mutex);
> -}
> -
>   static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
>   					   struct drm_i915_gem_request *src)
>   {
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 79f175853548..d7d19e17318e 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c

! :)

> @@ -379,7 +379,7 @@ static int intel_breadcrumbs_signaler(void *arg)
>   			 */
>   			intel_engine_remove_wait(engine, &signal->wait);
>   
> -			i915_gem_request_unreference__unlocked(signal->request);
> +			i915_gem_request_unreference(signal->request);
>   
>   			/* Find the next oldest signal. Note that as we have
>   			 * not been holding the lock, another client may
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index caff656417c2..07dfd55fff91 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -11370,7 +11370,7 @@ static void intel_mmio_flip_work_func(struct work_struct *work)
>   		WARN_ON(__i915_wait_request(mmio_flip->req,
>   					    false, NULL,
>   					    &mmio_flip->i915->rps.mmioflips));
> -		i915_gem_request_unreference__unlocked(mmio_flip->req);
> +		i915_gem_request_unreference(mmio_flip->req);
>   	}
>   
>   	/* For framebuffer backed by dmabuf, wait for fence */
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 0e55f206e592..0eaa74fab87b 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -587,7 +587,6 @@ static void execlists_context_queue(struct drm_i915_gem_request *request)
>   	struct drm_i915_gem_request *cursor;
>   	int num_elements = 0;
>   
> -	intel_lr_context_pin(request->ctx, request->engine);

I would prefer this step to be a separate patch from the
retire/free reorg.

Also, in my testing, another patch was required before this
step to make it work. Basically:

>From 848df37ef90dfb7c7adbf59d155c39d8e4521b8e Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Date: Fri, 15 Apr 2016 16:13:26 +0100
Subject: [PATCH 6/7] drm/i915: Store LRC hardware id in the context

This way in the following patch we can disconnect requests
from contexts.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  | 3 +++
 drivers/gpu/drm/i915/intel_lrc.c | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 79c23108cc35..d85c03425c54 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2334,6 +2334,9 @@ struct drm_i915_gem_request {
 
        /** Execlists no. of times this request has been sent to the ELSP */
        int elsp_submitted;
+
+       /** Execlists context hardware id. */
+       unsigned ctx_hw_id;
 };
 
 struct drm_i915_gem_request * __must_check
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8f8da1b01458..871f399f84f6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -475,7 +475,7 @@ execlists_check_remove_request(struct intel_engine_cs *engine, u32 request_id)
        if (!head_req)
                return 0;
 
-       if (unlikely(head_req->ctx->hw_id != request_id))
+       if (unlikely(head_req->ctx_hw_id != request_id))
                return 0;
 
        WARN(head_req->elsp_submitted == 0, "Never submitted head request\n");
@@ -614,6 +614,7 @@ static void execlists_context_queue(struct drm_i915_gem_request *request)
        }
 
        list_add_tail(&request->execlist_link, &engine->execlist_queue);
+       request->ctx_hw_id = request->ctx->hw_id;
        if (num_elements == 0)
                execlists_context_unqueue(engine);
 
-- 
1.9.1

Otherwise I was hitting requests with unpinned LRCs on the
execlists queue so check_remove was failing to spot progress.

I had a theory on why that was happening but I am not sure
about it any longer. Will try to stress it a bit with your
series and see if I can hit the problem.

But either way I think removing the extra execlist pin/unpin
removal from this patch would be better.

>   	i915_gem_request_reference(request);
>   
>   	spin_lock_bh(&engine->execlist_lock);
> @@ -1006,9 +1005,6 @@ void intel_execlists_retire_requests(struct intel_engine_cs *engine)
>   	spin_unlock_bh(&engine->execlist_lock);
>   
>   	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
> -		if (req->pinned_context)
> -			intel_lr_context_unpin(req->pinned_context, engine);
> -
>   		list_del(&req->execlist_link);
>   		i915_gem_request_unreference(req);
>   	}
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index b964c448bc03..565b725cd817 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -7364,7 +7364,7 @@ static void __intel_rps_boost_work(struct work_struct *work)
>   	if (!i915_gem_request_completed(req))
>   		gen6_rps_boost(to_i915(req), NULL, req->emitted_jiffies);
>   
> -	i915_gem_request_unreference__unlocked(req);
> +	i915_gem_request_unreference(req);
>   	kfree(boost);
>   }
>   
> 

Regards,

Tvrtko