[Intel-gfx] [PATCH 13/62] drm/i915: Derive GEM requests from dma-fence
Daniel Vetter
daniel at ffwll.ch
Wed Jun 8 09:14:23 UTC 2016
On Fri, Jun 03, 2016 at 05:36:38PM +0100, Chris Wilson wrote:
> dma-buf provides a generic fence class for interoperation between
> drivers. Internally we use the request structure as a fence, and so with
> only a little bit of interfacing we can rebase those requests on top of
> dma-buf fences. This will allow us, in the future, to pass those fences
> back to userspace or between drivers.
>
> v2: The fence_context needs to be globally unique, not just unique to
> this device.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Jesse Barnes <jbarnes at virtuousgeek.org>
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 2 +-
> drivers/gpu/drm/i915/i915_gem_request.c | 116 ++++++++++++++++++++++++++---
> drivers/gpu/drm/i915/i915_gem_request.h | 33 ++++----
> drivers/gpu/drm/i915/i915_gpu_error.c | 2 +-
> drivers/gpu/drm/i915/i915_guc_submission.c | 4 +-
> drivers/gpu/drm/i915/i915_trace.h | 10 +--
> drivers/gpu/drm/i915/intel_breadcrumbs.c | 7 +-
> drivers/gpu/drm/i915/intel_lrc.c | 3 +-
> drivers/gpu/drm/i915/intel_ringbuffer.c | 11 +--
> drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
> 10 files changed, 143 insertions(+), 46 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 8f576b443ff6..8e37315443f3 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -768,7 +768,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
> if (req->pid)
> task = pid_task(req->pid, PIDTYPE_PID);
> seq_printf(m, " %x @ %d: %s [%d]\n",
> - req->seqno,
> + req->fence.seqno,
> (int) (jiffies - req->emitted_jiffies),
> task ? task->comm : "<unknown>",
> task ? task->pid : -1);
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 34b2f151cdfc..512b15153ac6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -24,6 +24,98 @@
>
> #include "i915_drv.h"
>
> +static inline struct drm_i915_gem_request *
> +to_i915_request(struct fence *fence)
> +{
> + return container_of(fence, struct drm_i915_gem_request, fence);
> +}
> +
> +static const char *i915_fence_get_driver_name(struct fence *fence)
> +{
> + return "i915";
> +}
> +
> +static const char *i915_fence_get_timeline_name(struct fence *fence)
> +{
> + /* Timelines are bound by eviction to a VM. However, since
> + * we only have a global seqno at the moment, we only have
> + * a single timeline. Note that each timeline will have
> + * multiple execution contexts (fence contexts) as we allow
> + * engines within a single timeline to execute in parallel.
> + */
> + return "global";
> +}
> +
> +static bool i915_fence_signaled(struct fence *fence)
> +{
> + return i915_gem_request_completed(to_i915_request(fence));
> +}
> +
> +static bool i915_fence_enable_signaling(struct fence *fence)
> +{
> + if (i915_fence_signaled(fence))
> + return false;
> +
> + return intel_engine_enable_signaling(to_i915_request(fence)) == 0;
> +}
> +
> +static signed long i915_fence_wait(struct fence *fence,
> + bool interruptible,
> + signed long timeout_jiffies)
> +{
> + s64 timeout_ns, *timeout;
> + int ret;
> +
> + if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) {
> + timeout_ns = jiffies_to_nsecs(timeout_jiffies);
> + timeout = &timeout_ns;
> + } else
> + timeout = NULL;
> +
> + ret = __i915_wait_request(to_i915_request(fence),
> + interruptible, timeout,
> + NULL);
> + if (ret == -ETIME)
> + return 0;
> +
> + if (ret < 0)
> + return ret;
> +
> + if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT)
> + timeout_jiffies = nsecs_to_jiffies(timeout_ns);
> +
> + return timeout_jiffies;
> +}
> +
> +static void i915_fence_value_str(struct fence *fence, char *str, int size)
> +{
> + snprintf(str, size, "%u", fence->seqno);
> +}
> +
> +static void i915_fence_timeline_value_str(struct fence *fence, char *str,
> + int size)
> +{
> + snprintf(str, size, "%u",
> + intel_engine_get_seqno(to_i915_request(fence)->engine));
> +}
> +
> +static void i915_fence_release(struct fence *fence)
> +{
> + struct drm_i915_gem_request *req = to_i915_request(fence);
> + kmem_cache_free(req->i915->requests, req);
> +}
> +
> +static const struct fence_ops i915_fence_ops = {
> + .get_driver_name = i915_fence_get_driver_name,
> + .get_timeline_name = i915_fence_get_timeline_name,
> + .enable_signaling = i915_fence_enable_signaling,
> + .signaled = i915_fence_signaled,
> + .wait = i915_fence_wait,
> + .release = i915_fence_release,
> + .fence_value_str = i915_fence_value_str,
> + .timeline_value_str = i915_fence_timeline_value_str,
> +};
> +
> static int i915_gem_check_wedge(unsigned reset_counter, bool interruptible)
> {
> if (__i915_terminally_wedged(reset_counter))
> @@ -117,6 +209,7 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine,
> struct drm_i915_private *dev_priv = engine->i915;
> unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error);
> struct drm_i915_gem_request *req;
> + u32 seqno;
> int ret;
>
> if (!req_out)
> @@ -136,11 +229,17 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine,
> if (req == NULL)
> return -ENOMEM;
>
> - ret = i915_gem_get_seqno(dev_priv, &req->seqno);
> + ret = i915_gem_get_seqno(dev_priv, &seqno);
> if (ret)
> goto err;
>
> - kref_init(&req->ref);
> + spin_lock_init(&req->lock);
> + fence_init(&req->fence,
> + &i915_fence_ops,
> + &req->lock,
> + engine->fence_context,
> + seqno);
> +
> req->i915 = dev_priv;
> req->engine = engine;
> req->reset_counter = reset_counter;
> @@ -376,7 +475,7 @@ void __i915_add_request(struct drm_i915_gem_request *request,
> */
> request->emitted_jiffies = jiffies;
> request->previous_seqno = engine->last_submitted_seqno;
> - smp_store_mb(engine->last_submitted_seqno, request->seqno);
> + smp_store_mb(engine->last_submitted_seqno, request->fence.seqno);
> list_add_tail(&request->list, &engine->request_list);
>
> /* Record the position of the start of the request so that
> @@ -543,7 +642,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
> if (i915_spin_request(req, state, 5))
> goto complete;
>
> - intel_wait_init(&wait, req->seqno);
> + intel_wait_init(&wait, req->fence.seqno);
> set_current_state(state);
> if (intel_engine_add_wait(req->engine, &wait))
> /* In order to check that we haven't missed the interrupt
> @@ -609,7 +708,7 @@ complete:
> *timeout = 0;
> }
>
> - if (rps && req->seqno == req->engine->last_submitted_seqno) {
> + if (rps && req->fence.seqno == req->engine->last_submitted_seqno) {
> /* The GPU is now idle and this client has stalled.
> * Since no other client has submitted a request in the
> * meantime, assume that this client is the only one
> @@ -650,10 +749,3 @@ int i915_wait_request(struct drm_i915_gem_request *req)
>
> return 0;
> }
> -
> -void i915_gem_request_free(struct kref *req_ref)
> -{
> - struct drm_i915_gem_request *req =
> - container_of(req_ref, typeof(*req), ref);
> - kmem_cache_free(req->i915->requests, req);
> -}
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 166e0733d2d8..248aec2c09b7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -25,6 +25,8 @@
> #ifndef I915_GEM_REQUEST_H
> #define I915_GEM_REQUEST_H
>
> +#include <linux/fence.h>
> +
> /**
> * Request queue structure.
> *
> @@ -36,11 +38,11 @@
> * emission time to be associated with the request for tracking how far ahead
> * of the GPU the submission is.
> *
> - * The requests are reference counted, so upon creation they should have an
> - * initial reference taken using kref_init
> + * The requests are reference counted.
> */
> struct drm_i915_gem_request {
> - struct kref ref;
> + struct fence fence;
> + spinlock_t lock;
>
> /** On Which ring this request was generated */
> struct drm_i915_private *i915;
> @@ -68,12 +70,6 @@ struct drm_i915_gem_request {
> */
> u32 previous_seqno;
>
> - /** GEM sequence number associated with this request,
> - * when the HWS breadcrumb is equal or greater than this the GPU
> - * has finished processing this request.
> - */
> - u32 seqno;
> -
> /** Position in the ringbuffer of the start of the request */
> u32 head;
>
> @@ -152,7 +148,6 @@ __request_to_i915(const struct drm_i915_gem_request *request)
> struct drm_i915_gem_request * __must_check
> i915_gem_request_alloc(struct intel_engine_cs *engine,
> struct i915_gem_context *ctx);
> -void i915_gem_request_free(struct kref *req_ref);
> int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
> struct drm_file *file);
> void i915_gem_request_retire_upto(struct drm_i915_gem_request *req);
> @@ -160,7 +155,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req);
> static inline uint32_t
> i915_gem_request_get_seqno(struct drm_i915_gem_request *req)
> {
> - return req ? req->seqno : 0;
> + return req ? req->fence.seqno : 0;
> }
>
> static inline struct intel_engine_cs *
> @@ -170,17 +165,23 @@ i915_gem_request_get_engine(struct drm_i915_gem_request *req)
> }
>
> static inline struct drm_i915_gem_request *
> +to_request(struct fence *fence)
> +{
> + /* We assume that NULL fence/request are interoperable */
> + BUILD_BUG_ON(offsetof(struct drm_i915_gem_request, fence) != 0);
> + return container_of(fence, struct drm_i915_gem_request, fence);
For future-proofing to make sure we don't accidentally call this on a
foreign fence:
BUG_ON(fence->ops != i915_fence_ops);
BUG_ON since I don't want to splatter all callers with handlers for this.
And if we ever get this wrong debugging it with just some randomy
corruption would be serious pain, so I think the overhead is justified.
-Daniel
> +}
> +
> +static inline struct drm_i915_gem_request *
> i915_gem_request_reference(struct drm_i915_gem_request *req)
> {
> - if (req)
> - kref_get(&req->ref);
> - return req;
> + return to_request(fence_get(&req->fence));
> }
>
> static inline void
> i915_gem_request_unreference(struct drm_i915_gem_request *req)
> {
> - kref_put(&req->ref, i915_gem_request_free);
> + fence_put(&req->fence);
> }
>
> static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
> @@ -230,7 +231,7 @@ static inline bool i915_gem_request_started(const struct drm_i915_gem_request *r
> static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req)
> {
> return i915_seqno_passed(intel_engine_get_seqno(req->engine),
> - req->seqno);
> + req->fence.seqno);
> }
>
> bool __i915_spin_request(const struct drm_i915_gem_request *request,
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 3ba5302ce19f..5332bd32c555 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -1181,7 +1181,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
> }
>
> erq = &error->ring[i].requests[count++];
> - erq->seqno = request->seqno;
> + erq->seqno = request->fence.seqno;
> erq->jiffies = request->emitted_jiffies;
> erq->tail = request->postfix;
> }
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index ac72451c571c..629111d42ce0 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -538,7 +538,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc,
> rq->engine);
>
> wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
> - wqi->fence_id = rq->seqno;
> + wqi->fence_id = rq->fence.seqno;
>
> kunmap_atomic(base);
> }
> @@ -578,7 +578,7 @@ int i915_guc_submit(struct drm_i915_gem_request *rq)
> client->b_fail += 1;
>
> guc->submissions[engine_id] += 1;
> - guc->last_seqno[engine_id] = rq->seqno;
> + guc->last_seqno[engine_id] = rq->fence.seqno;
>
> return b_ret;
> }
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index f59cf07184ae..0296a77b586a 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -465,7 +465,7 @@ TRACE_EVENT(i915_gem_ring_sync_to,
> __entry->dev = from->i915->dev->primary->index;
> __entry->sync_from = from->id;
> __entry->sync_to = to_req->engine->id;
> - __entry->seqno = i915_gem_request_get_seqno(req);
> + __entry->seqno = req->fence.seqno;
> ),
>
> TP_printk("dev=%u, sync-from=%u, sync-to=%u, seqno=%u",
> @@ -488,9 +488,9 @@ TRACE_EVENT(i915_gem_ring_dispatch,
> TP_fast_assign(
> __entry->dev = req->i915->dev->primary->index;
> __entry->ring = req->engine->id;
> - __entry->seqno = req->seqno;
> + __entry->seqno = req->fence.seqno;
> __entry->flags = flags;
> - intel_engine_enable_signaling(req);
> + fence_enable_sw_signaling(&req->fence);
> ),
>
> TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x",
> @@ -533,7 +533,7 @@ DECLARE_EVENT_CLASS(i915_gem_request,
> TP_fast_assign(
> __entry->dev = req->i915->dev->primary->index;
> __entry->ring = req->engine->id;
> - __entry->seqno = req->seqno;
> + __entry->seqno = req->fence.seqno;
> ),
>
> TP_printk("dev=%u, ring=%u, seqno=%u",
> @@ -595,7 +595,7 @@ TRACE_EVENT(i915_gem_request_wait_begin,
> TP_fast_assign(
> __entry->dev = req->i915->dev->primary->index;
> __entry->ring = req->engine->id;
> - __entry->seqno = req->seqno;
> + __entry->seqno = req->fence.seqno;
> __entry->blocking =
> mutex_is_locked(&req->i915->dev->struct_mutex);
> ),
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index dc65a007fa20..05f62f706897 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -396,6 +396,7 @@ static int intel_breadcrumbs_signaler(void *arg)
> */
> intel_engine_remove_wait(engine,
> &request->signaling.wait);
> + fence_signal(&request->fence);
>
> /* Find the next oldest signal. Note that as we have
> * not been holding the lock, another client may
> @@ -444,7 +445,7 @@ int intel_engine_enable_signaling(struct drm_i915_gem_request *request)
> }
>
> request->signaling.wait.task = b->signaler;
> - request->signaling.wait.seqno = request->seqno;
> + request->signaling.wait.seqno = request->fence.seqno;
> i915_gem_request_reference(request);
>
> /* First add ourselves into the list of waiters, but register our
> @@ -466,8 +467,8 @@ int intel_engine_enable_signaling(struct drm_i915_gem_request *request)
> p = &b->signals.rb_node;
> while (*p) {
> parent = *p;
> - if (i915_seqno_passed(request->seqno,
> - to_signal(parent)->seqno)) {
> + if (i915_seqno_passed(request->fence.seqno,
> + to_signal(parent)->fence.seqno)) {
> p = &parent->rb_right;
> first = false;
> } else
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 0742a849acce..c7a9ebdb0811 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1731,7 +1731,7 @@ static int gen8_emit_request(struct drm_i915_gem_request *request)
> intel_hws_seqno_address(request->engine) |
> MI_FLUSH_DW_USE_GTT);
> intel_logical_ring_emit(ringbuf, 0);
> - intel_logical_ring_emit(ringbuf, request->seqno);
> + intel_logical_ring_emit(ringbuf, request->fence.seqno);
> intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
> intel_logical_ring_emit(ringbuf, MI_NOOP);
> return intel_logical_ring_advance_and_submit(request);
> @@ -1964,6 +1964,7 @@ logical_ring_setup(struct drm_device *dev, enum intel_engine_id id)
> engine->exec_id = info->exec_id;
> engine->guc_id = info->guc_id;
> engine->mmio_base = info->mmio_base;
> + engine->fence_context = fence_context_alloc(1);
>
> engine->i915 = dev_priv;
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 327ad7fdf118..c3d6345aa2c1 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1266,7 +1266,7 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
> PIPE_CONTROL_CS_STALL);
> intel_ring_emit(signaller, lower_32_bits(gtt_offset));
> intel_ring_emit(signaller, upper_32_bits(gtt_offset));
> - intel_ring_emit(signaller, signaller_req->seqno);
> + intel_ring_emit(signaller, signaller_req->fence.seqno);
> intel_ring_emit(signaller, 0);
> intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
> MI_SEMAPHORE_TARGET(waiter->hw_id));
> @@ -1304,7 +1304,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
> intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
> MI_FLUSH_DW_USE_GTT);
> intel_ring_emit(signaller, upper_32_bits(gtt_offset));
> - intel_ring_emit(signaller, signaller_req->seqno);
> + intel_ring_emit(signaller, signaller_req->fence.seqno);
> intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
> MI_SEMAPHORE_TARGET(waiter->hw_id));
> intel_ring_emit(signaller, 0);
> @@ -1337,7 +1337,7 @@ static int gen6_signal(struct drm_i915_gem_request *signaller_req,
> if (i915_mmio_reg_valid(mbox_reg)) {
> intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
> intel_ring_emit_reg(signaller, mbox_reg);
> - intel_ring_emit(signaller, signaller_req->seqno);
> + intel_ring_emit(signaller, signaller_req->fence.seqno);
> }
> }
>
> @@ -1373,7 +1373,7 @@ gen6_add_request(struct drm_i915_gem_request *req)
> intel_ring_emit(engine, MI_STORE_DWORD_INDEX);
> intel_ring_emit(engine,
> I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
> - intel_ring_emit(engine, req->seqno);
> + intel_ring_emit(engine, req->fence.seqno);
> intel_ring_emit(engine, MI_USER_INTERRUPT);
> __intel_ring_advance(engine);
>
> @@ -1623,7 +1623,7 @@ i9xx_add_request(struct drm_i915_gem_request *req)
> intel_ring_emit(engine, MI_STORE_DWORD_INDEX);
> intel_ring_emit(engine,
> I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
> - intel_ring_emit(engine, req->seqno);
> + intel_ring_emit(engine, req->fence.seqno);
> intel_ring_emit(engine, MI_USER_INTERRUPT);
> __intel_ring_advance(engine);
>
> @@ -2092,6 +2092,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
> WARN_ON(engine->buffer);
>
> engine->i915 = dev_priv;
> + engine->fence_context = fence_context_alloc(1);
> INIT_LIST_HEAD(&engine->active_list);
> INIT_LIST_HEAD(&engine->request_list);
> INIT_LIST_HEAD(&engine->execlist_queue);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 6017367e94fb..b041fb6a6d01 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -158,6 +158,7 @@ struct intel_engine_cs {
> unsigned int exec_id;
> unsigned int hw_id;
> unsigned int guc_id; /* XXX same as hw_id? */
> + unsigned fence_context;
> u32 mmio_base;
> struct intel_ringbuffer *buffer;
> struct list_head buffers;
> --
> 2.8.1
>
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
More information about the Intel-gfx
mailing list