[Intel-gfx] [PATCH 25/55] drm/i915: Update i915_gem_object_sync() to take a request structure

Thu Jun 4 05:57:16 PDT 2015

On 02/06/2015 19:26, Tomas Elf wrote:
> On 29/05/2015 17:43, John.C.Harrison at Intel.com wrote:
>> From: John Harrison <John.C.Harrison at Intel.com>
>>
>> The plan is to pass requests around as the basic submission tracking 
>> structure
>> rather than rings and contexts. This patch updates the 
>> i915_gem_object_sync()
>> code path.
>>
>> v2: Much more complex patch to share a single request between the 
>> sync and the
>> page flip. The _sync() function now supports lazy allocation of the 
>> request
>> structure. That is, if one is passed in then that will be used. If 
>> one is not,
>> then a request will be allocated and passed back out. Note that the 
>> _sync() code
>> does not necessarily require a request. Thus one will only be created 
>> until
>> certain situations. The reason the lazy allocation must be done 
>> within the
>> _sync() code itself is because the decision to need one or not is not 
>> really
>> something that code above can second guess (except in the case where 
>> one is
>> definitely not required because no ring is passed in).
>>
>> The call chains above _sync() now support passing a request through 
>> which most
>> callers passing in NULL and assuming that no request will be required 
>> (because
>> they also pass in NULL for the ring and therefore can't be generating 
>> any ring
>> code).
>>
>> The exeception is intel_crtc_page_flip() which now supports having a 
>> request
>
> 1. "The exeception" -> "The exception"
>
>> returned from _sync(). If one is, then that request is shared by the 
>> page flip
>> (if the page flip is of a type to need a request). If _sync() does 
>> not generate
>> a request but the page flip does need one, then the page flip path 
>> will create
>> its own request.
>>
>> v3: Updated comment description to be clearer about 'to_req' 
>> parameter (Tomas
>> Elf review request). Rebased onto newer tree that significantly 
>> changed the
>> synchronisation code.
>>
>> For: VIZ-5115
>> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h            |    4 ++-
>>   drivers/gpu/drm/i915/i915_gem.c            |   48 
>> +++++++++++++++++++++-------
>>   drivers/gpu/drm/i915/i915_gem_execbuffer.c |    2 +-
>>   drivers/gpu/drm/i915/intel_display.c       |   17 +++++++---
>>   drivers/gpu/drm/i915/intel_drv.h           |    3 +-
>>   drivers/gpu/drm/i915/intel_fbdev.c         |    2 +-
>>   drivers/gpu/drm/i915/intel_lrc.c           |    2 +-
>>   drivers/gpu/drm/i915/intel_overlay.c       |    2 +-
>>   8 files changed, 58 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 64a10fa..f69e9cb 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2778,7 +2778,8 @@ static inline void 
>> i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>>
>>   int __must_check i915_mutex_lock_interruptible(struct drm_device 
>> *dev);
>>   int i915_gem_object_sync(struct drm_i915_gem_object *obj,
>> -             struct intel_engine_cs *to);
>> +             struct intel_engine_cs *to,
>> +             struct drm_i915_gem_request **to_req);
>>   void i915_vma_move_to_active(struct i915_vma *vma,
>>                    struct intel_engine_cs *ring);
>>   int i915_gem_dumb_create(struct drm_file *file_priv,
>> @@ -2889,6 +2890,7 @@ int __must_check
>>   i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>>                        u32 alignment,
>>                        struct intel_engine_cs *pipelined,
>> +                     struct drm_i915_gem_request **pipelined_request,
>>                        const struct i915_ggtt_view *view);
>>   void i915_gem_object_unpin_from_display_plane(struct 
>> drm_i915_gem_object *obj,
>>                             const struct i915_ggtt_view *view);
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c 
>> b/drivers/gpu/drm/i915/i915_gem.c
>> index b7d66aa..db90043 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -3098,25 +3098,26 @@ out:
>>   static int
>>   __i915_gem_object_sync(struct drm_i915_gem_object *obj,
>>                  struct intel_engine_cs *to,
>> -               struct drm_i915_gem_request *req)
>> +               struct drm_i915_gem_request *from_req,
>> +               struct drm_i915_gem_request **to_req)
>>   {
>>       struct intel_engine_cs *from;
>>       int ret;
>>
>> -    from = i915_gem_request_get_ring(req);
>> +    from = i915_gem_request_get_ring(from_req);
>>       if (to == from)
>>           return 0;
>>
>> -    if (i915_gem_request_completed(req, true))
>> +    if (i915_gem_request_completed(from_req, true))
>>           return 0;
>>
>> -    ret = i915_gem_check_olr(req);
>> +    ret = i915_gem_check_olr(from_req);
>>       if (ret)
>>           return ret;
>>
>>       if (!i915_semaphore_is_enabled(obj->base.dev)) {
>>           struct drm_i915_private *i915 = to_i915(obj->base.dev);
>> -        ret = __i915_wait_request(req,
>> +        ret = __i915_wait_request(from_req,
>> atomic_read(&i915->gpu_error.reset_counter),
>>                         i915->mm.interruptible,
>>                         NULL,
>> @@ -3124,15 +3125,25 @@ __i915_gem_object_sync(struct 
>> drm_i915_gem_object *obj,
>>           if (ret)
>>               return ret;
>>
>> -        i915_gem_object_retire_request(obj, req);
>> +        i915_gem_object_retire_request(obj, from_req);
>>       } else {
>>           int idx = intel_ring_sync_index(from, to);
>> -        u32 seqno = i915_gem_request_get_seqno(req);
>> +        u32 seqno = i915_gem_request_get_seqno(from_req);
>>
>> +        WARN_ON(!to_req);
>> +
>> +        /* Optimization: Avoid semaphore sync when we are sure we 
>> already
>> +         * waited for an object with higher seqno */
>
> 2. How about using the standard multi-line comment format?
>

Not my comment. It looks like Chris removed it in his re-write of the 
sync code and I accidentally put it back in when resolving the merge 
conflicts. I'll drop it again.

> /* (empty line)
>  * (first line)
>  * (second line)
>  */
>
>>           if (seqno <= from->semaphore.sync_seqno[idx])
>>               return 0;
>>
>> -        trace_i915_gem_ring_sync_to(from, to, req);
>> +        if (*to_req == NULL) {
>> +            ret = i915_gem_request_alloc(to, to->default_context, 
>> to_req);
>> +            if (ret)
>> +                return ret;
>> +        }
>> +
>> +        trace_i915_gem_ring_sync_to(from, to, from_req);
>>           ret = to->semaphore.sync_to(to, from, seqno);
>>           if (ret)
>>               return ret;
>> @@ -3153,6 +3164,9 @@ __i915_gem_object_sync(struct 
>> drm_i915_gem_object *obj,
>>    *
>>    * @obj: object which may be in use on another ring.
>>    * @to: ring we wish to use the object on. May be NULL.
>> + * @to_req: request we wish to use the object for. See below.
>> + *          This will be allocated and returned if a request is
>> + *          required but not passed in.
>>    *
>>    * This code is meant to abstract object synchronization with the GPU.
>>    * Calling with NULL implies synchronizing the object with the CPU
>> @@ -3168,11 +3182,22 @@ __i915_gem_object_sync(struct 
>> drm_i915_gem_object *obj,
>>    * - If we are a write request (pending_write_domain is set), the new
>>    *   request must wait for outstanding read requests to complete.
>>    *
>> + * For CPU synchronisation (NULL to) no request is required. For 
>> syncing with
>> + * rings to_req must be non-NULL. However, a request does not have 
>> to be
>> + * pre-allocated. If *to_req is null and sync commands will be 
>> emitted then a
>> + * request will be allocated automatically and returned through 
>> *to_req. Note
>> + * that it is not guaranteed that commands will be emitted (because the
>> + * might already be idle). Hence there is no need to create a 
>> request that
>> + * might never have any work submitted. Note further that if a 
>> request is
>> + * returned in *to_req, it is the responsibility of the caller to 
>> submit
>> + * that request (after potentially adding more work to it).
>> + *
>
> 3. "(because the might already be idle)" : The what? The engine?
> 4. "NULL" and "null" mixed. Please be consistent.
>
> Overall, the explanation is better than in the last patch version
>
> With those minor changes:
>
> Reviewed-by: Tomas Elf <tomas.elf at intel.com>
>
> Thanks,
> Tomas
>
>>    * Returns 0 if successful, else propagates up the lower layer error.
>>    */
>>   int
>>   i915_gem_object_sync(struct drm_i915_gem_object *obj,
>> -             struct intel_engine_cs *to)
>> +             struct intel_engine_cs *to,
>> +             struct drm_i915_gem_request **to_req)
>>   {
>>       const bool readonly = obj->base.pending_write_domain == 0;
>>       struct drm_i915_gem_request *req[I915_NUM_RINGS];
>> @@ -3194,7 +3219,7 @@ i915_gem_object_sync(struct drm_i915_gem_object 
>> *obj,
>>                   req[n++] = obj->last_read_req[i];
>>       }
>>       for (i = 0; i < n; i++) {
>> -        ret = __i915_gem_object_sync(obj, to, req[i]);
>> +        ret = __i915_gem_object_sync(obj, to, req[i], to_req);
>>           if (ret)
>>               return ret;
>>       }
>> @@ -4144,12 +4169,13 @@ int
>>   i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>>                        u32 alignment,
>>                        struct intel_engine_cs *pipelined,
>> +                     struct drm_i915_gem_request **pipelined_request,
>>                        const struct i915_ggtt_view *view)
>>   {
>>       u32 old_read_domains, old_write_domain;
>>       int ret;
>>
>> -    ret = i915_gem_object_sync(obj, pipelined);
>> +    ret = i915_gem_object_sync(obj, pipelined, pipelined_request);
>>       if (ret)
>>           return ret;
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
>> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> index 50b1ced..bea92ad 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> @@ -899,7 +899,7 @@ i915_gem_execbuffer_move_to_gpu(struct 
>> drm_i915_gem_request *req,
>>           struct drm_i915_gem_object *obj = vma->obj;
>>
>>           if (obj->active & other_rings) {
>> -            ret = i915_gem_object_sync(obj, req->ring);
>> +            ret = i915_gem_object_sync(obj, req->ring, &req);
>>               if (ret)
>>                   return ret;
>>           }
>> diff --git a/drivers/gpu/drm/i915/intel_display.c 
>> b/drivers/gpu/drm/i915/intel_display.c
>> index 657a333..6528ada 100644
>> --- a/drivers/gpu/drm/i915/intel_display.c
>> +++ b/drivers/gpu/drm/i915/intel_display.c
>> @@ -2338,7 +2338,8 @@ int
>>   intel_pin_and_fence_fb_obj(struct drm_plane *plane,
>>                  struct drm_framebuffer *fb,
>>                  const struct drm_plane_state *plane_state,
>> -               struct intel_engine_cs *pipelined)
>> +               struct intel_engine_cs *pipelined,
>> +               struct drm_i915_gem_request **pipelined_request)
>>   {
>>       struct drm_device *dev = fb->dev;
>>       struct drm_i915_private *dev_priv = dev->dev_private;
>> @@ -2403,7 +2404,7 @@ intel_pin_and_fence_fb_obj(struct drm_plane 
>> *plane,
>>
>>       dev_priv->mm.interruptible = false;
>>       ret = i915_gem_object_pin_to_display_plane(obj, alignment, 
>> pipelined,
>> -                           &view);
>> +                           pipelined_request, &view);
>>       if (ret)
>>           goto err_interruptible;
>>
>> @@ -11119,6 +11120,7 @@ static int intel_crtc_page_flip(struct 
>> drm_crtc *crtc,
>>       struct intel_unpin_work *work;
>>       struct intel_engine_cs *ring;
>>       bool mmio_flip;
>> +    struct drm_i915_gem_request *request = NULL;
>>       int ret;
>>
>>       /*
>> @@ -11225,7 +11227,7 @@ static int intel_crtc_page_flip(struct 
>> drm_crtc *crtc,
>>        */
>>       ret = intel_pin_and_fence_fb_obj(crtc->primary, fb,
>>                        crtc->primary->state,
>> -                     mmio_flip ? 
>> i915_gem_request_get_ring(obj->last_write_req) : ring);
>> +                     mmio_flip ? 
>> i915_gem_request_get_ring(obj->last_write_req) : ring, &request);
>>       if (ret)
>>           goto cleanup_pending;
>>
>> @@ -11256,6 +11258,9 @@ static int intel_crtc_page_flip(struct 
>> drm_crtc *crtc,
>>                       intel_ring_get_request(ring));
>>       }
>>
>> +    if (request)
>> +        i915_add_request_no_flush(request->ring);
>> +
>>       work->flip_queued_vblank = drm_crtc_vblank_count(crtc);
>>       work->enable_stall_check = true;
>>
>> @@ -11273,6 +11278,8 @@ static int intel_crtc_page_flip(struct 
>> drm_crtc *crtc,
>>   cleanup_unpin:
>>       intel_unpin_fb_obj(fb, crtc->primary->state);
>>   cleanup_pending:
>> +    if (request)
>> +        i915_gem_request_cancel(request);
>>       atomic_dec(&intel_crtc->unpin_work_count);
>>       mutex_unlock(&dev->struct_mutex);
>>   cleanup:
>> @@ -13171,7 +13178,7 @@ intel_prepare_plane_fb(struct drm_plane *plane,
>>           if (ret)
>>               DRM_DEBUG_KMS("failed to attach phys object\n");
>>       } else {
>> -        ret = intel_pin_and_fence_fb_obj(plane, fb, new_state, NULL);
>> +        ret = intel_pin_and_fence_fb_obj(plane, fb, new_state, NULL, 
>> NULL);
>>       }
>>
>>       if (ret == 0)
>> @@ -15218,7 +15225,7 @@ void intel_modeset_gem_init(struct drm_device 
>> *dev)
>>           ret = intel_pin_and_fence_fb_obj(c->primary,
>>                            c->primary->fb,
>>                            c->primary->state,
>> -                         NULL);
>> +                         NULL, NULL);
>>           mutex_unlock(&dev->struct_mutex);
>>           if (ret) {
>>               DRM_ERROR("failed to pin boot fb on pipe %d\n",
>> diff --git a/drivers/gpu/drm/i915/intel_drv.h 
>> b/drivers/gpu/drm/i915/intel_drv.h
>> index 02d8317..73650ae 100644
>> --- a/drivers/gpu/drm/i915/intel_drv.h
>> +++ b/drivers/gpu/drm/i915/intel_drv.h
>> @@ -1034,7 +1034,8 @@ void intel_release_load_detect_pipe(struct 
>> drm_connector *connector,
>>   int intel_pin_and_fence_fb_obj(struct drm_plane *plane,
>>                      struct drm_framebuffer *fb,
>>                      const struct drm_plane_state *plane_state,
>> -                   struct intel_engine_cs *pipelined);
>> +                   struct intel_engine_cs *pipelined,
>> +                   struct drm_i915_gem_request **pipelined_request);
>>   struct drm_framebuffer *
>>   __intel_framebuffer_create(struct drm_device *dev,
>>                  struct drm_mode_fb_cmd2 *mode_cmd,
>> diff --git a/drivers/gpu/drm/i915/intel_fbdev.c 
>> b/drivers/gpu/drm/i915/intel_fbdev.c
>> index 4e7e7da..dd9f3b2 100644
>> --- a/drivers/gpu/drm/i915/intel_fbdev.c
>> +++ b/drivers/gpu/drm/i915/intel_fbdev.c
>> @@ -151,7 +151,7 @@ static int intelfb_alloc(struct drm_fb_helper 
>> *helper,
>>       }
>>
>>       /* Flush everything out, we'll be doing GTT only from now on */
>> -    ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL);
>> +    ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL, NULL);
>>       if (ret) {
>>           DRM_ERROR("failed to pin obj: %d\n", ret);
>>           goto out_fb;
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>> b/drivers/gpu/drm/i915/intel_lrc.c
>> index 6d005b1..f8e8fdb 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -638,7 +638,7 @@ static int execlists_move_to_gpu(struct 
>> drm_i915_gem_request *req,
>>           struct drm_i915_gem_object *obj = vma->obj;
>>
>>           if (obj->active & other_rings) {
>> -            ret = i915_gem_object_sync(obj, req->ring);
>> +            ret = i915_gem_object_sync(obj, req->ring, &req);
>>               if (ret)
>>                   return ret;
>>           }
>> diff --git a/drivers/gpu/drm/i915/intel_overlay.c 
>> b/drivers/gpu/drm/i915/intel_overlay.c
>> index e7534b9..0f8187a 100644
>> --- a/drivers/gpu/drm/i915/intel_overlay.c
>> +++ b/drivers/gpu/drm/i915/intel_overlay.c
>> @@ -724,7 +724,7 @@ static int intel_overlay_do_put_image(struct 
>> intel_overlay *overlay,
>>       if (ret != 0)
>>           return ret;
>>
>> -    ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL,
>> +    ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL, NULL,
>>                              &i915_ggtt_view_normal);
>>       if (ret != 0)
>>           return ret;
>>
>