[Intel-gfx] [RFC 5/9] drm/i915: Add per context timelines to fence object
John Harrison
John.C.Harrison at Intel.com
Wed Oct 28 05:59:31 PDT 2015
Have finally had some time to come back to this and respond
to/incorporate the comments made some while ago...
On 23/07/2015 14:50, Tvrtko Ursulin wrote:
> Hi,
>
> On 07/17/2015 03:31 PM, John.C.Harrison at Intel.com wrote:
>> From: John Harrison <John.C.Harrison at Intel.com>
>>
>> The fence object used inside the request structure requires a
>> sequence number.
>> Although this is not used by the i915 driver itself, it could
>> potentially be
>> used by non-i915 code if the fence is passed outside of the driver.
>> This is the
>> intention as it allows external kernel drivers and user applications
>> to wait on
>> batch buffer completion asynchronously via the dma-buff fence API.
>>
>> To ensure that such external users are not confused by strange things
>> happening
>> with the seqno, this patch adds in a per context timeline that can
>> provide a
>> guaranteed in-order seqno value for the fence. This is safe because the
>> scheduler will not re-order batch buffers within a context - they are
>> considered
>> to be mutually dependent.
>>
>> [new patch in series]
>>
>> For: VIZ-5190
>> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
>> ---
>> drivers/gpu/drm/i915/i915_drv.h | 25 ++++++++----
>> drivers/gpu/drm/i915/i915_gem.c | 69
>> ++++++++++++++++++++++++++++++---
>> drivers/gpu/drm/i915/i915_gem_context.c | 15 ++++++-
>> drivers/gpu/drm/i915/intel_lrc.c | 8 ++++
>> drivers/gpu/drm/i915/intel_ringbuffer.h | 1 -
>> 5 files changed, 103 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 0c7df46..88a4746 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -840,6 +840,15 @@ struct i915_ctx_hang_stats {
>> bool banned;
>> };
>>
>> +struct i915_fence_timeline {
>> + unsigned fence_context;
>> + uint32_t context;
>
> Unused field?
>
>> + uint32_t next;
>
> fence.h defines seqnos as 'unsigned', which matches this in practice,
> but maybe it would be nicer to use the same type name.
>
>> +
>> + struct intel_context *ctx;
>> + struct intel_engine_cs *ring;
>> +};
>> +
>> /* This must match up with the value previously used for
>> execbuf2.rsvd1. */
>> #define DEFAULT_CONTEXT_HANDLE 0
>>
>> @@ -885,6 +894,7 @@ struct intel_context {
>> struct drm_i915_gem_object *state;
>> struct intel_ringbuffer *ringbuf;
>> int pin_count;
>> + struct i915_fence_timeline fence_timeline;
>> } engine[I915_NUM_RINGS];
>>
>> struct list_head link;
>> @@ -2153,13 +2163,10 @@ void i915_gem_track_fb(struct
>> drm_i915_gem_object *old,
>> struct drm_i915_gem_request {
>> /**
>> * Underlying object for implementing the signal/wait stuff.
>> - * NB: Never call fence_later() or return this fence object to user
>> - * land! Due to lazy allocation, scheduler re-ordering,
>> pre-emption,
>> - * etc., there is no guarantee at all about the validity or
>> - * sequentiality of the fence's seqno! It is also unsafe to let
>> - * anything outside of the i915 driver get hold of the fence object
>> - * as the clean up when decrementing the reference count requires
>> - * holding the driver mutex lock.
>> + * NB: Never return this fence object to user land! It is unsafe to
>> + * let anything outside of the i915 driver get hold of the fence
>> + * object as the clean up when decrementing the reference count
>> + * requires holding the driver mutex lock.
>> */
>> struct fence fence;
>>
>> @@ -2239,6 +2246,10 @@ int i915_gem_request_alloc(struct
>> intel_engine_cs *ring,
>> struct drm_i915_gem_request **req_out);
>> void i915_gem_request_cancel(struct drm_i915_gem_request *req);
>>
>> +int i915_create_fence_timeline(struct drm_device *dev,
>> + struct intel_context *ctx,
>> + struct intel_engine_cs *ring);
>> +
>> static inline bool i915_gem_request_completed(struct
>> drm_i915_gem_request *req)
>> {
>> return fence_is_signaled(&req->fence);
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c
>> b/drivers/gpu/drm/i915/i915_gem.c
>> index 3970250..af79716 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -2671,6 +2671,25 @@ static bool
>> i915_gem_request_is_completed(struct fence *req_fence)
>> return i915_seqno_passed(seqno, req->seqno);
>> }
>>
>> +static void i915_fence_timeline_value_str(struct fence *fence, char
>> *str, int size)
>> +{
>> + struct drm_i915_gem_request *req;
>> +
>> + req = container_of(fence, typeof(*req), fence);
>> +
>> + /* Last signalled timeline value ??? */
>> + snprintf(str, size, "? [%d]"/*, tl->value*/,
>> req->ring->get_seqno(req->ring, true));
>> +}
>
> If timelines are per context now maybe we should update
> i915_gem_request_get_timeline_name to be per context instead of per
> engine as well? Like this we have a name space overlap / seqno
> collisions from userspace point of view.
>
>> +static void i915_fence_value_str(struct fence *fence, char *str, int
>> size)
>> +{
>> + struct drm_i915_gem_request *req;
>> +
>> + req = container_of(fence, typeof(*req), fence);
>> +
>> + snprintf(str, size, "%d [%d]", req->fence.seqno, req->seqno);
>> +}
>> +
>> static const struct fence_ops i915_gem_request_fops = {
>> .get_driver_name = i915_gem_request_get_driver_name,
>> .get_timeline_name = i915_gem_request_get_timeline_name,
>> @@ -2678,8 +2697,48 @@ static const struct fence_ops
>> i915_gem_request_fops = {
>> .signaled = i915_gem_request_is_completed,
>> .wait = fence_default_wait,
>> .release = i915_gem_request_free,
>> + .fence_value_str = i915_fence_value_str,
>> + .timeline_value_str = i915_fence_timeline_value_str,
>> };
>>
>> +int i915_create_fence_timeline(struct drm_device *dev,
>> + struct intel_context *ctx,
>> + struct intel_engine_cs *ring)
>> +{
>> + struct i915_fence_timeline *timeline;
>> +
>> + timeline = &ctx->engine[ring->id].fence_timeline;
>> +
>> + if (timeline->ring)
>> + return 0;
>> +
>> + timeline->fence_context = fence_context_alloc(1);
>> +
>> + /*
>> + * Start the timeline from seqno 0 as this is a special value
>> + * that is reserved for invalid sync points.
>> + */
>> + timeline->next = 1;
>> + timeline->ctx = ctx;
>> + timeline->ring = ring;
>> +
>> + return 0;
>> +}
>> +
>> +static uint32_t i915_fence_timeline_get_next_seqno(struct
>> i915_fence_timeline *timeline)
>> +{
>> + uint32_t seqno;
>> +
>> + seqno = timeline->next;
>> +
>> + /* Reserve zero for invalid */
>> + if (++timeline->next == 0 ) {
>> + timeline->next = 1;
>> + }
>> +
>> + return seqno;
>> +}
>> +
>> int i915_gem_request_alloc(struct intel_engine_cs *ring,
>> struct intel_context *ctx,
>> struct drm_i915_gem_request **req_out)
>> @@ -2715,7 +2774,9 @@ int i915_gem_request_alloc(struct
>> intel_engine_cs *ring,
>> goto err;
>> }
>>
>> - fence_init(&req->fence, &i915_gem_request_fops,
>> &ring->fence_lock, ring->fence_context, req->seqno);
>> + fence_init(&req->fence, &i915_gem_request_fops, &ring->fence_lock,
>> + ctx->engine[ring->id].fence_timeline.fence_context,
>> +
>> i915_fence_timeline_get_next_seqno(&ctx->engine[ring->id].fence_timeline));
>
> I suppose for debugging it could be useful to add this new seqno in
> i915_gem_request_info to have visibility at both sides. To map
> userspace seqnos to driver state.
>
>> /*
>> * Reserve space in the ring buffer for all the commands
>> required to
>> @@ -5065,7 +5126,7 @@ i915_gem_init_hw(struct drm_device *dev)
>> {
>> struct drm_i915_private *dev_priv = dev->dev_private;
>> struct intel_engine_cs *ring;
>> - int ret, i, j, fence_base;
>> + int ret, i, j;
>>
>> if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
>> return -EIO;
>> @@ -5117,16 +5178,12 @@ i915_gem_init_hw(struct drm_device *dev)
>> goto out;
>> }
>>
>> - fence_base = fence_context_alloc(I915_NUM_RINGS);
>> -
>> /* Now it is safe to go back round and do everything else: */
>> for_each_ring(ring, dev_priv, i) {
>> struct drm_i915_gem_request *req;
>>
>> WARN_ON(!ring->default_context);
>>
>> - ring->fence_context = fence_base + i;
>> -
>> ret = i915_gem_request_alloc(ring, ring->default_context,
>> &req);
>> if (ret) {
>> i915_gem_cleanup_ringbuffer(dev);
>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c
>> b/drivers/gpu/drm/i915/i915_gem_context.c
>> index b77a8f7..7eb8694 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>> @@ -242,7 +242,7 @@ i915_gem_create_context(struct drm_device *dev,
>> {
>> const bool is_global_default_ctx = file_priv == NULL;
>> struct intel_context *ctx;
>> - int ret = 0;
>> + int i, ret = 0;
>>
>> BUG_ON(!mutex_is_locked(&dev->struct_mutex));
>>
>> @@ -250,6 +250,19 @@ i915_gem_create_context(struct drm_device *dev,
>> if (IS_ERR(ctx))
>> return ctx;
>>
>> + if (!i915.enable_execlists) {
>> + struct intel_engine_cs *ring;
>> +
>> + /* Create a per context timeline for fences */
>> + for_each_ring(ring, to_i915(dev), i) {
>> + ret = i915_create_fence_timeline(dev, ctx, ring);
>> + if (ret) {
>> + DRM_ERROR("Fence timeline creation failed for legacy
>> %s: %p\n", ring->name, ctx);
>> + goto err_destroy;
>> + }
>> + }
>> + }
>> +
>> if (is_global_default_ctx && ctx->legacy_hw_ctx.rcs_state) {
>> /* We may need to do things with the shrinker which
>> * require us to immediately switch back to the default
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c
>> b/drivers/gpu/drm/i915/intel_lrc.c
>> index ee4aecd..8f255de 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -2376,6 +2376,14 @@ int intel_lr_context_deferred_create(struct
>> intel_context *ctx,
>> goto error;
>> }
>>
>> + /* Create a per context timeline for fences */
>> + ret = i915_create_fence_timeline(dev, ctx, ring);
>> + if (ret) {
>> + DRM_ERROR("Fence timeline creation failed for ring %s, ctx
>> %p\n",
>> + ring->name, ctx);
>> + goto error;
>> + }
>> +
>
> We must be 100% sure userspace cannot provoke context creation failure
> by accident or deliberately. Otherwise we would leak fence contexts
> until overflow which would be bad.
>
> Perhaps matching fence_context_release for existing
> fence_context_alloc should be added?
Note that there is no fence_context_release. The fence_context_alloc
code is simply 'return static_count++;'. There is no overflow checking.
There is no anti-re-use checking. When 4GB contexts have been allocated,
the old ones will get re-allocated and if they are still in use then
tough. It's a really bad API! On the other hand, the context is not
actually used for anything. So it doesn't really matter.
>
> Regards,
>
> Tvrtko
More information about the Intel-gfx
mailing list