[Intel-gfx] [PATCH 25/55] drm/i915: Update i915_gem_object_sync() to take a request structure

Tue Jun 2 11:26:50 PDT 2015

On 29/05/2015 17:43, John.C.Harrison at Intel.com wrote:
> From: John Harrison <John.C.Harrison at Intel.com>
>
> The plan is to pass requests around as the basic submission tracking structure
> rather than rings and contexts. This patch updates the i915_gem_object_sync()
> code path.
>
> v2: Much more complex patch to share a single request between the sync and the
> page flip. The _sync() function now supports lazy allocation of the request
> structure. That is, if one is passed in then that will be used. If one is not,
> then a request will be allocated and passed back out. Note that the _sync() code
> does not necessarily require a request. Thus one will only be created until
> certain situations. The reason the lazy allocation must be done within the
> _sync() code itself is because the decision to need one or not is not really
> something that code above can second guess (except in the case where one is
> definitely not required because no ring is passed in).
>
> The call chains above _sync() now support passing a request through which most
> callers passing in NULL and assuming that no request will be required (because
> they also pass in NULL for the ring and therefore can't be generating any ring
> code).
>
> The exeception is intel_crtc_page_flip() which now supports having a request

1. "The exeception" -> "The exception"

> returned from _sync(). If one is, then that request is shared by the page flip
> (if the page flip is of a type to need a request). If _sync() does not generate
> a request but the page flip does need one, then the page flip path will create
> its own request.
>
> v3: Updated comment description to be clearer about 'to_req' parameter (Tomas
> Elf review request). Rebased onto newer tree that significantly changed the
> synchronisation code.
>
> For: VIZ-5115
> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h            |    4 ++-
>   drivers/gpu/drm/i915/i915_gem.c            |   48 +++++++++++++++++++++-------
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c |    2 +-
>   drivers/gpu/drm/i915/intel_display.c       |   17 +++++++---
>   drivers/gpu/drm/i915/intel_drv.h           |    3 +-
>   drivers/gpu/drm/i915/intel_fbdev.c         |    2 +-
>   drivers/gpu/drm/i915/intel_lrc.c           |    2 +-
>   drivers/gpu/drm/i915/intel_overlay.c       |    2 +-
>   8 files changed, 58 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 64a10fa..f69e9cb 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2778,7 +2778,8 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>
>   int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
>   int i915_gem_object_sync(struct drm_i915_gem_object *obj,
> -			 struct intel_engine_cs *to);
> +			 struct intel_engine_cs *to,
> +			 struct drm_i915_gem_request **to_req);
>   void i915_vma_move_to_active(struct i915_vma *vma,
>   			     struct intel_engine_cs *ring);
>   int i915_gem_dumb_create(struct drm_file *file_priv,
> @@ -2889,6 +2890,7 @@ int __must_check
>   i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>   				     u32 alignment,
>   				     struct intel_engine_cs *pipelined,
> +				     struct drm_i915_gem_request **pipelined_request,
>   				     const struct i915_ggtt_view *view);
>   void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
>   					      const struct i915_ggtt_view *view);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index b7d66aa..db90043 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3098,25 +3098,26 @@ out:
>   static int
>   __i915_gem_object_sync(struct drm_i915_gem_object *obj,
>   		       struct intel_engine_cs *to,
> -		       struct drm_i915_gem_request *req)
> +		       struct drm_i915_gem_request *from_req,
> +		       struct drm_i915_gem_request **to_req)
>   {
>   	struct intel_engine_cs *from;
>   	int ret;
>
> -	from = i915_gem_request_get_ring(req);
> +	from = i915_gem_request_get_ring(from_req);
>   	if (to == from)
>   		return 0;
>
> -	if (i915_gem_request_completed(req, true))
> +	if (i915_gem_request_completed(from_req, true))
>   		return 0;
>
> -	ret = i915_gem_check_olr(req);
> +	ret = i915_gem_check_olr(from_req);
>   	if (ret)
>   		return ret;
>
>   	if (!i915_semaphore_is_enabled(obj->base.dev)) {
>   		struct drm_i915_private *i915 = to_i915(obj->base.dev);
> -		ret = __i915_wait_request(req,
> +		ret = __i915_wait_request(from_req,
>   					  atomic_read(&i915->gpu_error.reset_counter),
>   					  i915->mm.interruptible,
>   					  NULL,
> @@ -3124,15 +3125,25 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
>   		if (ret)
>   			return ret;
>
> -		i915_gem_object_retire_request(obj, req);
> +		i915_gem_object_retire_request(obj, from_req);
>   	} else {
>   		int idx = intel_ring_sync_index(from, to);
> -		u32 seqno = i915_gem_request_get_seqno(req);
> +		u32 seqno = i915_gem_request_get_seqno(from_req);
>
> +		WARN_ON(!to_req);
> +
> +		/* Optimization: Avoid semaphore sync when we are sure we already
> +		 * waited for an object with higher seqno */

2. How about using the standard multi-line comment format?

/* (empty line)
  * (first line)
  * (second line)
  */

>   		if (seqno <= from->semaphore.sync_seqno[idx])
>   			return 0;
>
> -		trace_i915_gem_ring_sync_to(from, to, req);
> +		if (*to_req == NULL) {
> +			ret = i915_gem_request_alloc(to, to->default_context, to_req);
> +			if (ret)
> +				return ret;
> +		}
> +
> +		trace_i915_gem_ring_sync_to(from, to, from_req);
>   		ret = to->semaphore.sync_to(to, from, seqno);
>   		if (ret)
>   			return ret;
> @@ -3153,6 +3164,9 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
>    *
>    * @obj: object which may be in use on another ring.
>    * @to: ring we wish to use the object on. May be NULL.
> + * @to_req: request we wish to use the object for. See below.
> + *          This will be allocated and returned if a request is
> + *          required but not passed in.
>    *
>    * This code is meant to abstract object synchronization with the GPU.
>    * Calling with NULL implies synchronizing the object with the CPU
> @@ -3168,11 +3182,22 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
>    * - If we are a write request (pending_write_domain is set), the new
>    *   request must wait for outstanding read requests to complete.
>    *
> + * For CPU synchronisation (NULL to) no request is required. For syncing with
> + * rings to_req must be non-NULL. However, a request does not have to be
> + * pre-allocated. If *to_req is null and sync commands will be emitted then a
> + * request will be allocated automatically and returned through *to_req. Note
> + * that it is not guaranteed that commands will be emitted (because the
> + * might already be idle). Hence there is no need to create a request that
> + * might never have any work submitted. Note further that if a request is
> + * returned in *to_req, it is the responsibility of the caller to submit
> + * that request (after potentially adding more work to it).
> + *

3. "(because the might already be idle)" : The what? The engine?
4. "NULL" and "null" mixed. Please be consistent.

Overall, the explanation is better than in the last patch version

With those minor changes:

Reviewed-by: Tomas Elf <tomas.elf at intel.com>

Thanks,
Tomas

>    * Returns 0 if successful, else propagates up the lower layer error.
>    */
>   int
>   i915_gem_object_sync(struct drm_i915_gem_object *obj,
> -		     struct intel_engine_cs *to)
> +		     struct intel_engine_cs *to,
> +		     struct drm_i915_gem_request **to_req)
>   {
>   	const bool readonly = obj->base.pending_write_domain == 0;
>   	struct drm_i915_gem_request *req[I915_NUM_RINGS];
> @@ -3194,7 +3219,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
>   				req[n++] = obj->last_read_req[i];
>   	}
>   	for (i = 0; i < n; i++) {
> -		ret = __i915_gem_object_sync(obj, to, req[i]);
> +		ret = __i915_gem_object_sync(obj, to, req[i], to_req);
>   		if (ret)
>   			return ret;
>   	}
> @@ -4144,12 +4169,13 @@ int
>   i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>   				     u32 alignment,
>   				     struct intel_engine_cs *pipelined,
> +				     struct drm_i915_gem_request **pipelined_request,
>   				     const struct i915_ggtt_view *view)
>   {
>   	u32 old_read_domains, old_write_domain;
>   	int ret;
>
> -	ret = i915_gem_object_sync(obj, pipelined);
> +	ret = i915_gem_object_sync(obj, pipelined, pipelined_request);
>   	if (ret)
>   		return ret;
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 50b1ced..bea92ad 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -899,7 +899,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
>   		struct drm_i915_gem_object *obj = vma->obj;
>
>   		if (obj->active & other_rings) {
> -			ret = i915_gem_object_sync(obj, req->ring);
> +			ret = i915_gem_object_sync(obj, req->ring, &req);
>   			if (ret)
>   				return ret;
>   		}
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 657a333..6528ada 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -2338,7 +2338,8 @@ int
>   intel_pin_and_fence_fb_obj(struct drm_plane *plane,
>   			   struct drm_framebuffer *fb,
>   			   const struct drm_plane_state *plane_state,
> -			   struct intel_engine_cs *pipelined)
> +			   struct intel_engine_cs *pipelined,
> +			   struct drm_i915_gem_request **pipelined_request)
>   {
>   	struct drm_device *dev = fb->dev;
>   	struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -2403,7 +2404,7 @@ intel_pin_and_fence_fb_obj(struct drm_plane *plane,
>
>   	dev_priv->mm.interruptible = false;
>   	ret = i915_gem_object_pin_to_display_plane(obj, alignment, pipelined,
> -						   &view);
> +						   pipelined_request, &view);
>   	if (ret)
>   		goto err_interruptible;
>
> @@ -11119,6 +11120,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>   	struct intel_unpin_work *work;
>   	struct intel_engine_cs *ring;
>   	bool mmio_flip;
> +	struct drm_i915_gem_request *request = NULL;
>   	int ret;
>
>   	/*
> @@ -11225,7 +11227,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>   	 */
>   	ret = intel_pin_and_fence_fb_obj(crtc->primary, fb,
>   					 crtc->primary->state,
> -					 mmio_flip ? i915_gem_request_get_ring(obj->last_write_req) : ring);
> +					 mmio_flip ? i915_gem_request_get_ring(obj->last_write_req) : ring, &request);
>   	if (ret)
>   		goto cleanup_pending;
>
> @@ -11256,6 +11258,9 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>   					intel_ring_get_request(ring));
>   	}
>
> +	if (request)
> +		i915_add_request_no_flush(request->ring);
> +
>   	work->flip_queued_vblank = drm_crtc_vblank_count(crtc);
>   	work->enable_stall_check = true;
>
> @@ -11273,6 +11278,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>   cleanup_unpin:
>   	intel_unpin_fb_obj(fb, crtc->primary->state);
>   cleanup_pending:
> +	if (request)
> +		i915_gem_request_cancel(request);
>   	atomic_dec(&intel_crtc->unpin_work_count);
>   	mutex_unlock(&dev->struct_mutex);
>   cleanup:
> @@ -13171,7 +13178,7 @@ intel_prepare_plane_fb(struct drm_plane *plane,
>   		if (ret)
>   			DRM_DEBUG_KMS("failed to attach phys object\n");
>   	} else {
> -		ret = intel_pin_and_fence_fb_obj(plane, fb, new_state, NULL);
> +		ret = intel_pin_and_fence_fb_obj(plane, fb, new_state, NULL, NULL);
>   	}
>
>   	if (ret == 0)
> @@ -15218,7 +15225,7 @@ void intel_modeset_gem_init(struct drm_device *dev)
>   		ret = intel_pin_and_fence_fb_obj(c->primary,
>   						 c->primary->fb,
>   						 c->primary->state,
> -						 NULL);
> +						 NULL, NULL);
>   		mutex_unlock(&dev->struct_mutex);
>   		if (ret) {
>   			DRM_ERROR("failed to pin boot fb on pipe %d\n",
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 02d8317..73650ae 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1034,7 +1034,8 @@ void intel_release_load_detect_pipe(struct drm_connector *connector,
>   int intel_pin_and_fence_fb_obj(struct drm_plane *plane,
>   			       struct drm_framebuffer *fb,
>   			       const struct drm_plane_state *plane_state,
> -			       struct intel_engine_cs *pipelined);
> +			       struct intel_engine_cs *pipelined,
> +			       struct drm_i915_gem_request **pipelined_request);
>   struct drm_framebuffer *
>   __intel_framebuffer_create(struct drm_device *dev,
>   			   struct drm_mode_fb_cmd2 *mode_cmd,
> diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
> index 4e7e7da..dd9f3b2 100644
> --- a/drivers/gpu/drm/i915/intel_fbdev.c
> +++ b/drivers/gpu/drm/i915/intel_fbdev.c
> @@ -151,7 +151,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
>   	}
>
>   	/* Flush everything out, we'll be doing GTT only from now on */
> -	ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL);
> +	ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL, NULL);
>   	if (ret) {
>   		DRM_ERROR("failed to pin obj: %d\n", ret);
>   		goto out_fb;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 6d005b1..f8e8fdb 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -638,7 +638,7 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
>   		struct drm_i915_gem_object *obj = vma->obj;
>
>   		if (obj->active & other_rings) {
> -			ret = i915_gem_object_sync(obj, req->ring);
> +			ret = i915_gem_object_sync(obj, req->ring, &req);
>   			if (ret)
>   				return ret;
>   		}
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index e7534b9..0f8187a 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -724,7 +724,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
>   	if (ret != 0)
>   		return ret;
>
> -	ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL,
> +	ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL, NULL,
>   						   &i915_ggtt_view_normal);
>   	if (ret != 0)
>   		return ret;
>