[Intel-gfx] [PATCH 18/31] drm/i915: Simplify request_alloc by returning the allocated request
Joonas Lahtinen
joonas.lahtinen at linux.intel.com
Mon Jul 25 09:18:45 UTC 2016
On ma, 2016-07-25 at 08:44 +0100, Chris Wilson wrote:
> If is simpler and leads to more readable code through the callstack if
> the allocation returns the allocated struct through the return value.
>
> The importance of this is that it no longer looks like we accidentally
> allocate requests as side-effect of calling certain functions.
>
Dave seems to have expressed to wish to review this around January, so
CC'ing him here.
>From me,
Reviewed-by: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 3 +-
> drivers/gpu/drm/i915/i915_gem.c | 75 ++++++++----------------------
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 12 ++---
> drivers/gpu/drm/i915/i915_gem_request.c | 58 ++++++++---------------
> drivers/gpu/drm/i915/i915_trace.h | 13 +++---
> drivers/gpu/drm/i915/intel_display.c | 36 ++++++--------
> drivers/gpu/drm/i915/intel_lrc.c | 2 +-
> drivers/gpu/drm/i915/intel_overlay.c | 20 ++++----
> 8 files changed, 79 insertions(+), 140 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 09e72b0510ab..1031f5a7126a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3171,8 +3171,7 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
>
> int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
> int i915_gem_object_sync(struct drm_i915_gem_object *obj,
> - struct intel_engine_cs *to,
> - struct drm_i915_gem_request **to_req);
> + struct drm_i915_gem_request *to);
> void i915_vma_move_to_active(struct i915_vma *vma,
> struct drm_i915_gem_request *req);
> int i915_gem_dumb_create(struct drm_file *file_priv,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 59890f523c5f..b6c4ff63725f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2845,51 +2845,35 @@ out:
>
> static int
> __i915_gem_object_sync(struct drm_i915_gem_object *obj,
> - struct intel_engine_cs *to,
> - struct drm_i915_gem_request *from_req,
> - struct drm_i915_gem_request **to_req)
> + struct drm_i915_gem_request *to,
> + struct drm_i915_gem_request *from)
> {
> - struct intel_engine_cs *from;
> int ret;
>
> - from = i915_gem_request_get_engine(from_req);
> - if (to == from)
> + if (to->engine == from->engine)
> return 0;
>
> - if (i915_gem_request_completed(from_req))
> + if (i915_gem_request_completed(from))
> return 0;
>
> if (!i915.semaphores) {
> - struct drm_i915_private *i915 = to_i915(obj->base.dev);
> - ret = __i915_wait_request(from_req,
> - i915->mm.interruptible,
> + ret = __i915_wait_request(from,
> + from->i915->mm.interruptible,
> NULL,
> NO_WAITBOOST);
> if (ret)
> return ret;
>
> - i915_gem_object_retire_request(obj, from_req);
> + i915_gem_object_retire_request(obj, from);
> } else {
> - int idx = intel_engine_sync_index(from, to);
> - u32 seqno = i915_gem_request_get_seqno(from_req);
> + int idx = intel_engine_sync_index(from->engine, to->engine);
> + u32 seqno = i915_gem_request_get_seqno(from);
>
> - WARN_ON(!to_req);
> -
> - if (seqno <= from->semaphore.sync_seqno[idx])
> + if (seqno <= from->engine->semaphore.sync_seqno[idx])
> return 0;
>
> - if (*to_req == NULL) {
> - struct drm_i915_gem_request *req;
> -
> - req = i915_gem_request_alloc(to, NULL);
> - if (IS_ERR(req))
> - return PTR_ERR(req);
> -
> - *to_req = req;
> - }
> -
> - trace_i915_gem_ring_sync_to(*to_req, from, from_req);
> - ret = to->semaphore.sync_to(*to_req, from, seqno);
> + trace_i915_gem_ring_sync_to(to, from);
> + ret = to->engine->semaphore.sync_to(to, from->engine, seqno);
> if (ret)
> return ret;
>
> @@ -2897,8 +2881,8 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
> * might have just caused seqno wrap under
> * the radar.
> */
> - from->semaphore.sync_seqno[idx] =
> - i915_gem_request_get_seqno(obj->last_read_req[from->id]);
> + from->engine->semaphore.sync_seqno[idx] =
> + i915_gem_request_get_seqno(obj->last_read_req[from->engine->id]);
> }
>
> return 0;
> @@ -2908,17 +2892,12 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
> * i915_gem_object_sync - sync an object to a ring.
> *
> * @obj: object which may be in use on another ring.
> - * @to: ring we wish to use the object on. May be NULL.
> - * @to_req: request we wish to use the object for. See below.
> - * This will be allocated and returned if a request is
> - * required but not passed in.
> + * @to: request we are wishing to use
> *
> * This code is meant to abstract object synchronization with the GPU.
> - * Calling with NULL implies synchronizing the object with the CPU
> - * rather than a particular GPU ring. Conceptually we serialise writes
> - * between engines inside the GPU. We only allow one engine to write
> - * into a buffer at any time, but multiple readers. To ensure each has
> - * a coherent view of memory, we must:
> + * Conceptually we serialise writes between engines inside the GPU.
> + * We only allow one engine to write into a buffer at any time, but
> + * multiple readers. To ensure each has a coherent view of memory, we must:
> *
> * - If there is an outstanding write request to the object, the new
> * request must wait for it to complete (either CPU or in hw, requests
> @@ -2927,22 +2906,11 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
> * - If we are a write request (pending_write_domain is set), the new
> * request must wait for outstanding read requests to complete.
> *
> - * For CPU synchronisation (NULL to) no request is required. For syncing with
> - * rings to_req must be non-NULL. However, a request does not have to be
> - * pre-allocated. If *to_req is NULL and sync commands will be emitted then a
> - * request will be allocated automatically and returned through *to_req. Note
> - * that it is not guaranteed that commands will be emitted (because the system
> - * might already be idle). Hence there is no need to create a request that
> - * might never have any work submitted. Note further that if a request is
> - * returned in *to_req, it is the responsibility of the caller to submit
> - * that request (after potentially adding more work to it).
> - *
> * Returns 0 if successful, else propagates up the lower layer error.
> */
> int
> i915_gem_object_sync(struct drm_i915_gem_object *obj,
> - struct intel_engine_cs *to,
> - struct drm_i915_gem_request **to_req)
> + struct drm_i915_gem_request *to)
> {
> const bool readonly = obj->base.pending_write_domain == 0;
> struct drm_i915_gem_request *req[I915_NUM_ENGINES];
> @@ -2951,9 +2919,6 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
> if (!obj->active)
> return 0;
>
> - if (to == NULL)
> - return i915_gem_object_wait_rendering(obj, readonly);
> -
> n = 0;
> if (readonly) {
> if (obj->last_write_req)
> @@ -2964,7 +2929,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
> req[n++] = obj->last_read_req[i];
> }
> for (i = 0; i < n; i++) {
> - ret = __i915_gem_object_sync(obj, to, req[i], to_req);
> + ret = __i915_gem_object_sync(obj, to, req[i]);
> if (ret)
> return ret;
> }
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 048050176ff9..75d19932847d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -981,7 +981,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
> struct drm_i915_gem_object *obj = vma->obj;
>
> if (obj->active & other_rings) {
> - ret = i915_gem_object_sync(obj, req->engine, &req);
> + ret = i915_gem_object_sync(obj, req);
> if (ret)
> return ret;
> }
> @@ -1427,7 +1427,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> {
> struct drm_i915_private *dev_priv = to_i915(dev);
> struct i915_ggtt *ggtt = &dev_priv->ggtt;
> - struct drm_i915_gem_request *req = NULL;
> struct eb_vmas *eb;
> struct drm_i915_gem_object *batch_obj;
> struct drm_i915_gem_exec_object2 shadow_exec_entry;
> @@ -1615,13 +1614,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm);
>
> /* Allocate a request for this batch buffer nice and early. */
> - req = i915_gem_request_alloc(engine, ctx);
> - if (IS_ERR(req)) {
> - ret = PTR_ERR(req);
> + params->request = i915_gem_request_alloc(engine, ctx);
> + if (IS_ERR(params->request)) {
> + ret = PTR_ERR(params->request);
> goto err_batch_unpin;
> }
>
> - ret = i915_gem_request_add_to_client(req, file);
> + ret = i915_gem_request_add_to_client(params->request, file);
> if (ret)
> goto err_request;
>
> @@ -1637,7 +1636,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> params->dispatch_flags = dispatch_flags;
> params->batch_obj = batch_obj;
> params->ctx = ctx;
> - params->request = req;
>
> ret = dev_priv->gt.execbuf_submit(params, args, &eb->vmas);
> err_request:
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 7b772d914e23..995ae99e9cf7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -292,10 +292,21 @@ static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno)
> return 0;
> }
>
> -static inline int
> -__i915_gem_request_alloc(struct intel_engine_cs *engine,
> - struct i915_gem_context *ctx,
> - struct drm_i915_gem_request **req_out)
> +/**
> + * i915_gem_request_alloc - allocate a request structure
> + *
> + * @engine: engine that we wish to issue the request on.
> + * @ctx: context that the request will be associated with.
> + * This can be NULL if the request is not directly related to
> + * any specific user context, in which case this function will
> + * choose an appropriate context to use.
> + *
> + * Returns a pointer to the allocated request if successful,
> + * or an error code if not.
> + */
> +struct drm_i915_gem_request *
> +i915_gem_request_alloc(struct intel_engine_cs *engine,
> + struct i915_gem_context *ctx)
> {
> struct drm_i915_private *dev_priv = engine->i915;
> unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error);
> @@ -303,18 +314,13 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine,
> u32 seqno;
> int ret;
>
> - if (!req_out)
> - return -EINVAL;
> -
> - *req_out = NULL;
> -
> /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
> * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
> * and restart.
> */
> ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
> if (ret)
> - return ret;
> + return ERR_PTR(ret);
>
> /* Move the oldest request to the slab-cache (if not in use!) */
> req = list_first_entry_or_null(&engine->request_list,
> @@ -324,7 +330,7 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine,
>
> req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
> if (!req)
> - return -ENOMEM;
> + return ERR_PTR(-ENOMEM);
>
> ret = i915_gem_get_seqno(dev_priv, &seqno);
> if (ret)
> @@ -357,39 +363,13 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine,
> if (ret)
> goto err_ctx;
>
> - *req_out = req;
> - return 0;
> + return req;
>
> err_ctx:
> i915_gem_context_put(ctx);
> err:
> kmem_cache_free(dev_priv->requests, req);
> - return ret;
> -}
> -
> -/**
> - * i915_gem_request_alloc - allocate a request structure
> - *
> - * @engine: engine that we wish to issue the request on.
> - * @ctx: context that the request will be associated with.
> - * This can be NULL if the request is not directly related to
> - * any specific user context, in which case this function will
> - * choose an appropriate context to use.
> - *
> - * Returns a pointer to the allocated request if successful,
> - * or an error code if not.
> - */
> -struct drm_i915_gem_request *
> -i915_gem_request_alloc(struct intel_engine_cs *engine,
> - struct i915_gem_context *ctx)
> -{
> - struct drm_i915_gem_request *req;
> - int err;
> -
> - if (!ctx)
> - ctx = engine->i915->kernel_context;
> - err = __i915_gem_request_alloc(engine, ctx, &req);
> - return err ? ERR_PTR(err) : req;
> + return ERR_PTR(ret);
> }
>
> static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index 007112d1e049..9e43c0aa6e3b 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -449,10 +449,9 @@ TRACE_EVENT(i915_gem_evict_vm,
> );
>
> TRACE_EVENT(i915_gem_ring_sync_to,
> - TP_PROTO(struct drm_i915_gem_request *to_req,
> - struct intel_engine_cs *from,
> - struct drm_i915_gem_request *req),
> - TP_ARGS(to_req, from, req),
> + TP_PROTO(struct drm_i915_gem_request *to,
> + struct drm_i915_gem_request *from),
> + TP_ARGS(to, from),
>
> TP_STRUCT__entry(
> __field(u32, dev)
> @@ -463,9 +462,9 @@ TRACE_EVENT(i915_gem_ring_sync_to,
>
> TP_fast_assign(
> __entry->dev = from->i915->drm.primary->index;
> - __entry->sync_from = from->id;
> - __entry->sync_to = to_req->engine->id;
> - __entry->seqno = req->fence.seqno;
> + __entry->sync_from = from->engine->id;
> + __entry->sync_to = to->engine->id;
> + __entry->seqno = from->fence.seqno;
> ),
>
> TP_printk("dev=%u, sync-from=%u, sync-to=%u, seqno=%u",
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index bff172c45ff7..5d4420b67642 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -11583,7 +11583,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
> struct intel_flip_work *work;
> struct intel_engine_cs *engine;
> bool mmio_flip;
> - struct drm_i915_gem_request *request = NULL;
> + struct drm_i915_gem_request *request;
> int ret;
>
> /*
> @@ -11690,22 +11690,6 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>
> mmio_flip = use_mmio_flip(engine, obj);
>
> - /* When using CS flips, we want to emit semaphores between rings.
> - * However, when using mmio flips we will create a task to do the
> - * synchronisation, so all we want here is to pin the framebuffer
> - * into the display plane and skip any waits.
> - */
> - if (!mmio_flip) {
> - ret = i915_gem_object_sync(obj, engine, &request);
> - if (!ret && !request) {
> - request = i915_gem_request_alloc(engine, NULL);
> - ret = PTR_ERR_OR_ZERO(request);
> - }
> -
> - if (ret)
> - goto cleanup_pending;
> - }
> -
> ret = intel_pin_and_fence_fb_obj(fb, primary->state->rotation);
> if (ret)
> goto cleanup_pending;
> @@ -11723,14 +11707,24 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>
> schedule_work(&work->mmio_work);
> } else {
> - i915_gem_request_assign(&work->flip_queued_req, request);
> + request = i915_gem_request_alloc(engine, engine->last_context);
> + if (IS_ERR(request)) {
> + ret = PTR_ERR(request);
> + goto cleanup_unpin;
> + }
> +
> + ret = i915_gem_object_sync(obj, request);
> + if (ret)
> + goto cleanup_request;
> +
> ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, request,
> page_flip_flags);
> if (ret)
> - goto cleanup_unpin;
> + goto cleanup_request;
>
> intel_mark_page_flip_active(intel_crtc, work);
>
> + work->flip_queued_req = i915_gem_request_get(request);
> i915_add_request_no_flush(request);
> }
>
> @@ -11745,11 +11739,11 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>
> return 0;
>
> +cleanup_request:
> + i915_add_request_no_flush(request);
> cleanup_unpin:
> intel_unpin_fb_obj(fb, crtc->primary->state->rotation);
> cleanup_pending:
> - if (!IS_ERR_OR_NULL(request))
> - i915_add_request_no_flush(request);
> atomic_dec(&intel_crtc->unpin_work_count);
> mutex_unlock(&dev->struct_mutex);
> cleanup:
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 6487794ac96c..a37af1533618 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -655,7 +655,7 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
> struct drm_i915_gem_object *obj = vma->obj;
>
> if (obj->active & other_rings) {
> - ret = i915_gem_object_sync(obj, req->engine, &req);
> + ret = i915_gem_object_sync(obj, req);
> if (ret)
> return ret;
> }
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index 8f1d4d9ef345..651efe4e468e 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -229,11 +229,18 @@ static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
> return 0;
> }
>
> +static struct drm_i915_gem_request *alloc_request(struct intel_overlay *overlay)
> +{
> + struct drm_i915_private *dev_priv = overlay->i915;
> + struct intel_engine_cs *engine = &dev_priv->engine[RCS];
> +
> + return i915_gem_request_alloc(engine, dev_priv->kernel_context);
> +}
> +
> /* overlay needs to be disable in OCMD reg */
> static int intel_overlay_on(struct intel_overlay *overlay)
> {
> struct drm_i915_private *dev_priv = overlay->i915;
> - struct intel_engine_cs *engine = &dev_priv->engine[RCS];
> struct drm_i915_gem_request *req;
> struct intel_ring *ring;
> int ret;
> @@ -241,7 +248,7 @@ static int intel_overlay_on(struct intel_overlay *overlay)
> WARN_ON(overlay->active);
> WARN_ON(IS_I830(dev_priv) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE));
>
> - req = i915_gem_request_alloc(engine, NULL);
> + req = alloc_request(overlay);
> if (IS_ERR(req))
> return PTR_ERR(req);
>
> @@ -268,7 +275,6 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
> bool load_polyphase_filter)
> {
> struct drm_i915_private *dev_priv = overlay->i915;
> - struct intel_engine_cs *engine = &dev_priv->engine[RCS];
> struct drm_i915_gem_request *req;
> struct intel_ring *ring;
> u32 flip_addr = overlay->flip_addr;
> @@ -285,7 +291,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
> if (tmp & (1 << 17))
> DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp);
>
> - req = i915_gem_request_alloc(engine, NULL);
> + req = alloc_request(overlay);
> if (IS_ERR(req))
> return PTR_ERR(req);
>
> @@ -338,7 +344,6 @@ static void intel_overlay_off_tail(struct intel_overlay *overlay)
> static int intel_overlay_off(struct intel_overlay *overlay)
> {
> struct drm_i915_private *dev_priv = overlay->i915;
> - struct intel_engine_cs *engine = &dev_priv->engine[RCS];
> struct drm_i915_gem_request *req;
> struct intel_ring *ring;
> u32 flip_addr = overlay->flip_addr;
> @@ -352,7 +357,7 @@ static int intel_overlay_off(struct intel_overlay *overlay)
> * of the hw. Do it in both cases */
> flip_addr |= OFC_UPDATE;
>
> - req = i915_gem_request_alloc(engine, NULL);
> + req = alloc_request(overlay);
> if (IS_ERR(req))
> return PTR_ERR(req);
>
> @@ -412,7 +417,6 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
> static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
> {
> struct drm_i915_private *dev_priv = overlay->i915;
> - struct intel_engine_cs *engine = &dev_priv->engine[RCS];
> int ret;
>
> lockdep_assert_held(&dev_priv->drm.struct_mutex);
> @@ -428,7 +432,7 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
> struct drm_i915_gem_request *req;
> struct intel_ring *ring;
>
> - req = i915_gem_request_alloc(engine, NULL);
> + req = alloc_request(overlay);
> if (IS_ERR(req))
> return PTR_ERR(req);
>
--
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
More information about the Intel-gfx
mailing list