[Intel-gfx] [RFC 5/9] drm/i915: Add per context timelines to fence object
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Thu Jul 23 06:50:37 PDT 2015
Hi,
On 07/17/2015 03:31 PM, John.C.Harrison at Intel.com wrote:
> From: John Harrison <John.C.Harrison at Intel.com>
>
> The fence object used inside the request structure requires a sequence number.
> Although this is not used by the i915 driver itself, it could potentially be
> used by non-i915 code if the fence is passed outside of the driver. This is the
> intention as it allows external kernel drivers and user applications to wait on
> batch buffer completion asynchronously via the dma-buff fence API.
>
> To ensure that such external users are not confused by strange things happening
> with the seqno, this patch adds in a per context timeline that can provide a
> guaranteed in-order seqno value for the fence. This is safe because the
> scheduler will not re-order batch buffers within a context - they are considered
> to be mutually dependent.
>
> [new patch in series]
>
> For: VIZ-5190
> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 25 ++++++++----
> drivers/gpu/drm/i915/i915_gem.c | 69 ++++++++++++++++++++++++++++++---
> drivers/gpu/drm/i915/i915_gem_context.c | 15 ++++++-
> drivers/gpu/drm/i915/intel_lrc.c | 8 ++++
> drivers/gpu/drm/i915/intel_ringbuffer.h | 1 -
> 5 files changed, 103 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 0c7df46..88a4746 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -840,6 +840,15 @@ struct i915_ctx_hang_stats {
> bool banned;
> };
>
> +struct i915_fence_timeline {
> + unsigned fence_context;
> + uint32_t context;
Unused field?
> + uint32_t next;
fence.h defines seqnos as 'unsigned', which matches this in practice,
but maybe it would be nicer to use the same type name.
> +
> + struct intel_context *ctx;
> + struct intel_engine_cs *ring;
> +};
> +
> /* This must match up with the value previously used for execbuf2.rsvd1. */
> #define DEFAULT_CONTEXT_HANDLE 0
>
> @@ -885,6 +894,7 @@ struct intel_context {
> struct drm_i915_gem_object *state;
> struct intel_ringbuffer *ringbuf;
> int pin_count;
> + struct i915_fence_timeline fence_timeline;
> } engine[I915_NUM_RINGS];
>
> struct list_head link;
> @@ -2153,13 +2163,10 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
> struct drm_i915_gem_request {
> /**
> * Underlying object for implementing the signal/wait stuff.
> - * NB: Never call fence_later() or return this fence object to user
> - * land! Due to lazy allocation, scheduler re-ordering, pre-emption,
> - * etc., there is no guarantee at all about the validity or
> - * sequentiality of the fence's seqno! It is also unsafe to let
> - * anything outside of the i915 driver get hold of the fence object
> - * as the clean up when decrementing the reference count requires
> - * holding the driver mutex lock.
> + * NB: Never return this fence object to user land! It is unsafe to
> + * let anything outside of the i915 driver get hold of the fence
> + * object as the clean up when decrementing the reference count
> + * requires holding the driver mutex lock.
> */
> struct fence fence;
>
> @@ -2239,6 +2246,10 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
> struct drm_i915_gem_request **req_out);
> void i915_gem_request_cancel(struct drm_i915_gem_request *req);
>
> +int i915_create_fence_timeline(struct drm_device *dev,
> + struct intel_context *ctx,
> + struct intel_engine_cs *ring);
> +
> static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req)
> {
> return fence_is_signaled(&req->fence);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 3970250..af79716 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2671,6 +2671,25 @@ static bool i915_gem_request_is_completed(struct fence *req_fence)
> return i915_seqno_passed(seqno, req->seqno);
> }
>
> +static void i915_fence_timeline_value_str(struct fence *fence, char *str, int size)
> +{
> + struct drm_i915_gem_request *req;
> +
> + req = container_of(fence, typeof(*req), fence);
> +
> + /* Last signalled timeline value ??? */
> + snprintf(str, size, "? [%d]"/*, tl->value*/, req->ring->get_seqno(req->ring, true));
> +}
If timelines are per context now maybe we should update
i915_gem_request_get_timeline_name to be per context instead of per
engine as well? Like this we have a name space overlap / seqno
collisions from userspace point of view.
> +static void i915_fence_value_str(struct fence *fence, char *str, int size)
> +{
> + struct drm_i915_gem_request *req;
> +
> + req = container_of(fence, typeof(*req), fence);
> +
> + snprintf(str, size, "%d [%d]", req->fence.seqno, req->seqno);
> +}
> +
> static const struct fence_ops i915_gem_request_fops = {
> .get_driver_name = i915_gem_request_get_driver_name,
> .get_timeline_name = i915_gem_request_get_timeline_name,
> @@ -2678,8 +2697,48 @@ static const struct fence_ops i915_gem_request_fops = {
> .signaled = i915_gem_request_is_completed,
> .wait = fence_default_wait,
> .release = i915_gem_request_free,
> + .fence_value_str = i915_fence_value_str,
> + .timeline_value_str = i915_fence_timeline_value_str,
> };
>
> +int i915_create_fence_timeline(struct drm_device *dev,
> + struct intel_context *ctx,
> + struct intel_engine_cs *ring)
> +{
> + struct i915_fence_timeline *timeline;
> +
> + timeline = &ctx->engine[ring->id].fence_timeline;
> +
> + if (timeline->ring)
> + return 0;
> +
> + timeline->fence_context = fence_context_alloc(1);
> +
> + /*
> + * Start the timeline from seqno 0 as this is a special value
> + * that is reserved for invalid sync points.
> + */
> + timeline->next = 1;
> + timeline->ctx = ctx;
> + timeline->ring = ring;
> +
> + return 0;
> +}
> +
> +static uint32_t i915_fence_timeline_get_next_seqno(struct i915_fence_timeline *timeline)
> +{
> + uint32_t seqno;
> +
> + seqno = timeline->next;
> +
> + /* Reserve zero for invalid */
> + if (++timeline->next == 0 ) {
> + timeline->next = 1;
> + }
> +
> + return seqno;
> +}
> +
> int i915_gem_request_alloc(struct intel_engine_cs *ring,
> struct intel_context *ctx,
> struct drm_i915_gem_request **req_out)
> @@ -2715,7 +2774,9 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
> goto err;
> }
>
> - fence_init(&req->fence, &i915_gem_request_fops, &ring->fence_lock, ring->fence_context, req->seqno);
> + fence_init(&req->fence, &i915_gem_request_fops, &ring->fence_lock,
> + ctx->engine[ring->id].fence_timeline.fence_context,
> + i915_fence_timeline_get_next_seqno(&ctx->engine[ring->id].fence_timeline));
I suppose for debugging it could be useful to add this new seqno in
i915_gem_request_info to have visibility at both sides. To map userspace
seqnos to driver state.
> /*
> * Reserve space in the ring buffer for all the commands required to
> @@ -5065,7 +5126,7 @@ i915_gem_init_hw(struct drm_device *dev)
> {
> struct drm_i915_private *dev_priv = dev->dev_private;
> struct intel_engine_cs *ring;
> - int ret, i, j, fence_base;
> + int ret, i, j;
>
> if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
> return -EIO;
> @@ -5117,16 +5178,12 @@ i915_gem_init_hw(struct drm_device *dev)
> goto out;
> }
>
> - fence_base = fence_context_alloc(I915_NUM_RINGS);
> -
> /* Now it is safe to go back round and do everything else: */
> for_each_ring(ring, dev_priv, i) {
> struct drm_i915_gem_request *req;
>
> WARN_ON(!ring->default_context);
>
> - ring->fence_context = fence_base + i;
> -
> ret = i915_gem_request_alloc(ring, ring->default_context, &req);
> if (ret) {
> i915_gem_cleanup_ringbuffer(dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index b77a8f7..7eb8694 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -242,7 +242,7 @@ i915_gem_create_context(struct drm_device *dev,
> {
> const bool is_global_default_ctx = file_priv == NULL;
> struct intel_context *ctx;
> - int ret = 0;
> + int i, ret = 0;
>
> BUG_ON(!mutex_is_locked(&dev->struct_mutex));
>
> @@ -250,6 +250,19 @@ i915_gem_create_context(struct drm_device *dev,
> if (IS_ERR(ctx))
> return ctx;
>
> + if (!i915.enable_execlists) {
> + struct intel_engine_cs *ring;
> +
> + /* Create a per context timeline for fences */
> + for_each_ring(ring, to_i915(dev), i) {
> + ret = i915_create_fence_timeline(dev, ctx, ring);
> + if (ret) {
> + DRM_ERROR("Fence timeline creation failed for legacy %s: %p\n", ring->name, ctx);
> + goto err_destroy;
> + }
> + }
> + }
> +
> if (is_global_default_ctx && ctx->legacy_hw_ctx.rcs_state) {
> /* We may need to do things with the shrinker which
> * require us to immediately switch back to the default
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index ee4aecd..8f255de 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2376,6 +2376,14 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
> goto error;
> }
>
> + /* Create a per context timeline for fences */
> + ret = i915_create_fence_timeline(dev, ctx, ring);
> + if (ret) {
> + DRM_ERROR("Fence timeline creation failed for ring %s, ctx %p\n",
> + ring->name, ctx);
> + goto error;
> + }
> +
We must be 100% sure userspace cannot provoke context creation failure
by accident or deliberately. Otherwise we would leak fence contexts
until overflow which would be bad.
Perhaps matching fence_context_release for existing fence_context_alloc
should be added?
Regards,
Tvrtko
More information about the Intel-gfx
mailing list