[Intel-gfx] [PATCH 08/22] drm/i915: Explicitly pin the logical context for execbuf
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Tue Apr 2 16:09:18 UTC 2019
On 25/03/2019 09:03, Chris Wilson wrote:
> In order to separate the reservation phase of building a request from
> its emission phase, we need to pull some of the request alloc activities
> from deep inside i915_request to the surface, GEM_EXECBUFFER.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 106 +++++++++++++--------
> drivers/gpu/drm/i915/i915_request.c | 9 --
> 2 files changed, 67 insertions(+), 48 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 3d672c9edb94..8754bb02c6ec 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -36,6 +36,7 @@
>
> #include "i915_drv.h"
> #include "i915_gem_clflush.h"
> +#include "i915_gem_pm.h"
> #include "i915_trace.h"
> #include "intel_drv.h"
> #include "intel_frontbuffer.h"
> @@ -236,7 +237,8 @@ struct i915_execbuffer {
> unsigned int *flags;
>
> struct intel_engine_cs *engine; /** engine to queue the request to */
> - struct i915_gem_context *ctx; /** context for building the request */
> + struct intel_context *context; /* logical state for the request */
> + struct i915_gem_context *gem_context; /** caller's context */
> struct i915_address_space *vm; /** GTT and vma for the request */
>
> struct i915_request *request; /** our request to build */
> @@ -738,7 +740,7 @@ static int eb_select_context(struct i915_execbuffer *eb)
> if (unlikely(!ctx))
> return -ENOENT;
>
> - eb->ctx = ctx;
> + eb->gem_context = ctx;
> if (ctx->ppgtt) {
> eb->vm = &ctx->ppgtt->vm;
> eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
> @@ -761,7 +763,7 @@ static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring)
> * Completely unscientific finger-in-the-air estimates for suitable
> * maximum user request size (to avoid blocking) and then backoff.
> */
> - if (intel_ring_update_space(ring) >= PAGE_SIZE)
> + if (!ring || intel_ring_update_space(ring) >= PAGE_SIZE)
Move the comment explaining why ring can be NULL while moving the code?
> return NULL;
>
> /*
> @@ -784,7 +786,6 @@ static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring)
>
> static int eb_wait_for_ring(const struct i915_execbuffer *eb)
> {
> - const struct intel_context *ce;
> struct i915_request *rq;
> int ret = 0;
>
> @@ -794,11 +795,7 @@ static int eb_wait_for_ring(const struct i915_execbuffer *eb)
> * keeping all of their resources pinned.
> */
>
> - ce = intel_context_lookup(eb->ctx, eb->engine);
> - if (!ce || !ce->ring) /* first use, assume empty! */
> - return 0;
> -
> - rq = __eb_wait_for_ring(ce->ring);
> + rq = __eb_wait_for_ring(eb->context->ring);
> if (rq) {
> mutex_unlock(&eb->i915->drm.struct_mutex);
>
> @@ -817,15 +814,15 @@ static int eb_wait_for_ring(const struct i915_execbuffer *eb)
>
> static int eb_lookup_vmas(struct i915_execbuffer *eb)
> {
> - struct radix_tree_root *handles_vma = &eb->ctx->handles_vma;
> + struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma;
> struct drm_i915_gem_object *obj;
> unsigned int i, batch;
> int err;
>
> - if (unlikely(i915_gem_context_is_closed(eb->ctx)))
> + if (unlikely(i915_gem_context_is_closed(eb->gem_context)))
> return -ENOENT;
>
> - if (unlikely(i915_gem_context_is_banned(eb->ctx)))
> + if (unlikely(i915_gem_context_is_banned(eb->gem_context)))
> return -EIO;
>
> INIT_LIST_HEAD(&eb->relocs);
> @@ -870,8 +867,8 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
> if (!vma->open_count++)
> i915_vma_reopen(vma);
> list_add(&lut->obj_link, &obj->lut_list);
> - list_add(&lut->ctx_link, &eb->ctx->handles_list);
> - lut->ctx = eb->ctx;
> + list_add(&lut->ctx_link, &eb->gem_context->handles_list);
> + lut->ctx = eb->gem_context;
> lut->handle = handle;
>
> add_vma:
> @@ -1227,7 +1224,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
> if (err)
> goto err_unmap;
>
> - rq = i915_request_alloc(eb->engine, eb->ctx);
> + rq = i915_request_create(eb->context);
> if (IS_ERR(rq)) {
> err = PTR_ERR(rq);
> goto err_unpin;
> @@ -2088,8 +2085,41 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
> [I915_EXEC_VEBOX] = VECS0
> };
>
> -static struct intel_engine_cs *
> -eb_select_engine(struct drm_i915_private *dev_priv,
> +static int eb_pin_context(struct i915_execbuffer *eb,
> + struct intel_engine_cs *engine)
> +{
> + struct intel_context *ce;
> + int err;
> +
> + /*
> + * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
> + * EIO if the GPU is already wedged.
> + */
> + err = i915_terminally_wedged(eb->i915);
> + if (err)
> + return err;
> +
> + /*
> + * Pinning the contexts may generate requests in order to acquire
> + * GGTT space, so do this first before we reserve a seqno for
> + * ourselves.
> + */
> + ce = intel_context_pin(eb->gem_context, engine);
> + if (IS_ERR(ce))
> + return PTR_ERR(ce);
> +
> + eb->engine = engine;
> + eb->context = ce;
> + return 0;
> +}
> +
> +static void eb_unpin_context(struct i915_execbuffer *eb)
> +{
> + intel_context_unpin(eb->context);
> +}
> +
> +static int
> +eb_select_engine(struct i915_execbuffer *eb,
> struct drm_file *file,
> struct drm_i915_gem_execbuffer2 *args)
> {
> @@ -2098,21 +2128,21 @@ eb_select_engine(struct drm_i915_private *dev_priv,
>
> if (user_ring_id > I915_USER_RINGS) {
> DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
> - return NULL;
> + return -EINVAL;
> }
>
> if ((user_ring_id != I915_EXEC_BSD) &&
> ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
> DRM_DEBUG("execbuf with non bsd ring but with invalid "
> "bsd dispatch flags: %d\n", (int)(args->flags));
> - return NULL;
> + return -EINVAL;
> }
>
> - if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(dev_priv, VCS1)) {
> + if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(eb->i915, VCS1)) {
> unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
>
> if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
> - bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
> + bsd_idx = gen8_dispatch_bsd_engine(eb->i915, file);
> } else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
> bsd_idx <= I915_EXEC_BSD_RING2) {
> bsd_idx >>= I915_EXEC_BSD_SHIFT;
> @@ -2120,20 +2150,20 @@ eb_select_engine(struct drm_i915_private *dev_priv,
> } else {
> DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
> bsd_idx);
> - return NULL;
> + return -EINVAL;
> }
>
> - engine = dev_priv->engine[_VCS(bsd_idx)];
> + engine = eb->i915->engine[_VCS(bsd_idx)];
> } else {
> - engine = dev_priv->engine[user_ring_map[user_ring_id]];
> + engine = eb->i915->engine[user_ring_map[user_ring_id]];
Cache dev_priv/i915 in a local?
> }
>
> if (!engine) {
> DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);
> - return NULL;
> + return -EINVAL;
> }
>
> - return engine;
> + return eb_pin_context(eb, engine);
> }
>
> static void
> @@ -2275,7 +2305,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> struct i915_execbuffer eb;
> struct dma_fence *in_fence = NULL;
> struct sync_file *out_fence = NULL;
> - intel_wakeref_t wakeref;
> int out_fence_fd = -1;
> int err;
>
> @@ -2335,12 +2364,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> if (unlikely(err))
> goto err_destroy;
>
> - eb.engine = eb_select_engine(eb.i915, file, args);
> - if (!eb.engine) {
> - err = -EINVAL;
> - goto err_engine;
> - }
> -
> /*
> * Take a local wakeref for preparing to dispatch the execbuf as
> * we expect to access the hardware fairly frequently in the
> @@ -2348,16 +2371,20 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> * wakeref that we hold until the GPU has been idle for at least
> * 100ms.
> */
> - wakeref = intel_runtime_pm_get(eb.i915);
> + i915_gem_unpark(eb.i915);
>
> err = i915_mutex_lock_interruptible(dev);
> if (err)
> goto err_rpm;
>
> - err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
> + err = eb_select_engine(&eb, file, args);
> if (unlikely(err))
> goto err_unlock;
>
> + err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
> + if (unlikely(err))
> + goto err_engine;
> +
> err = eb_relocate(&eb);
> if (err) {
> /*
> @@ -2441,7 +2468,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> GEM_BUG_ON(eb.reloc_cache.rq);
>
> /* Allocate a request for this batch buffer nice and early. */
> - eb.request = i915_request_alloc(eb.engine, eb.ctx);
> + eb.request = i915_request_create(eb.context);
> if (IS_ERR(eb.request)) {
> err = PTR_ERR(eb.request);
> goto err_batch_unpin;
> @@ -2502,12 +2529,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> err_vma:
> if (eb.exec)
> eb_release_vmas(&eb);
> +err_engine:
> + eb_unpin_context(&eb);
> err_unlock:
> mutex_unlock(&dev->struct_mutex);
> err_rpm:
> - intel_runtime_pm_put(eb.i915, wakeref);
> -err_engine:
> - i915_gem_context_put(eb.ctx);
> + i915_gem_park(eb.i915);
> + i915_gem_context_put(eb.gem_context);
> err_destroy:
> eb_destroy(&eb);
> err_out_fence:
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index fd24f576ca61..10edeb285870 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -741,7 +741,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
> struct drm_i915_private *i915 = engine->i915;
> struct intel_context *ce;
> struct i915_request *rq;
> - int ret;
>
> /*
> * Preempt contexts are reserved for exclusive use to inject a
> @@ -750,14 +749,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
> */
> GEM_BUG_ON(ctx == i915->preempt_context);
>
> - /*
> - * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
> - * EIO if the GPU is already wedged.
> - */
> - ret = i915_terminally_wedged(i915);
> - if (ret)
> - return ERR_PTR(ret);
> -
> /*
> * Pinning the contexts may generate requests in order to acquire
> * GGTT space, so do this first before we reserve a seqno for
>
With the comment preserved:
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list