[Intel-gfx] [PATCH 03/13] drm/i915: Derive GEM requests from dma-fence
Chris Wilson
chris at chris-wilson.co.uk
Mon Jul 4 07:49:11 UTC 2016
dma-buf provides a generic fence class for interoperation between
drivers. Internally we use the request structure as a fence, and so with
only a little bit of interfacing we can rebase those requests on top of
dma-buf fences. This will allow us, in the future, to pass those fences
back to userspace or between drivers.
v2: The fence_context needs to be globally unique, not just unique to
this device.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Jesse Barnes <jbarnes at virtuousgeek.org>
Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
Reviewed-by: Daniel Vetter <daniel.vetter at ffwll.ch>
---
drivers/gpu/drm/i915/i915_debugfs.c | 2 +-
drivers/gpu/drm/i915/i915_gem_request.c | 119 ++++++++++++++++++++++++++---
drivers/gpu/drm/i915/i915_gem_request.h | 43 ++++++-----
drivers/gpu/drm/i915/i915_gpu_error.c | 2 +-
drivers/gpu/drm/i915/i915_guc_submission.c | 4 +-
drivers/gpu/drm/i915/i915_trace.h | 10 +--
drivers/gpu/drm/i915/intel_breadcrumbs.c | 7 +-
drivers/gpu/drm/i915/intel_lrc.c | 3 +-
drivers/gpu/drm/i915/intel_ringbuffer.c | 11 +--
drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
10 files changed, 153 insertions(+), 49 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 7c4a60b87eb5..ec0162585238 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -768,7 +768,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
if (req->pid)
task = pid_task(req->pid, PIDTYPE_PID);
seq_printf(m, " %x @ %d: %s [%d]\n",
- req->seqno,
+ req->fence.seqno,
(int) (jiffies - req->emitted_jiffies),
task ? task->comm : "<unknown>",
task ? task->pid : -1);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 25fd91aad74b..3c4b1af42a46 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -24,6 +24,101 @@
#include "i915_drv.h"
+static inline struct drm_i915_gem_request *
+to_i915_request(struct fence *fence)
+{
+ return container_of(fence, struct drm_i915_gem_request, fence);
+}
+
+static const char *i915_fence_get_driver_name(struct fence *fence)
+{
+ return "i915";
+}
+
+static const char *i915_fence_get_timeline_name(struct fence *fence)
+{
+ /* Timelines are bound by eviction to a VM. However, since
+ * we only have a global seqno at the moment, we only have
+ * a single timeline. Note that each timeline will have
+ * multiple execution contexts (fence contexts) as we allow
+ * engines within a single timeline to execute in parallel.
+ */
+ return "global";
+}
+
+static bool i915_fence_signaled(struct fence *fence)
+{
+ return i915_gem_request_completed(to_i915_request(fence));
+}
+
+static bool i915_fence_enable_signaling(struct fence *fence)
+{
+ if (i915_fence_signaled(fence))
+ return false;
+
+ intel_engine_enable_signaling(to_i915_request(fence));
+ return true;
+}
+
+static signed long i915_fence_wait(struct fence *fence,
+ bool interruptible,
+ signed long timeout_jiffies)
+{
+ s64 timeout_ns, *timeout;
+ int ret;
+
+ if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) {
+ timeout_ns = jiffies_to_nsecs(timeout_jiffies);
+ timeout = &timeout_ns;
+ } else {
+ timeout = NULL;
+ }
+
+ ret = __i915_wait_request(to_i915_request(fence),
+ interruptible, timeout,
+ NULL);
+ if (ret == -ETIME)
+ return 0;
+
+ if (ret < 0)
+ return ret;
+
+ if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT)
+ timeout_jiffies = nsecs_to_jiffies(timeout_ns);
+
+ return timeout_jiffies;
+}
+
+static void i915_fence_value_str(struct fence *fence, char *str, int size)
+{
+ snprintf(str, size, "%u", fence->seqno);
+}
+
+static void i915_fence_timeline_value_str(struct fence *fence, char *str,
+ int size)
+{
+ snprintf(str, size, "%u",
+ intel_engine_get_seqno(to_i915_request(fence)->engine));
+}
+
+static void i915_fence_release(struct fence *fence)
+{
+ struct drm_i915_gem_request *req = to_i915_request(fence);
+
+ kmem_cache_free(req->i915->requests, req);
+}
+
+const struct fence_ops i915_fence_ops = {
+ .get_driver_name = i915_fence_get_driver_name,
+ .get_timeline_name = i915_fence_get_timeline_name,
+ .enable_signaling = i915_fence_enable_signaling,
+ .signaled = i915_fence_signaled,
+ .wait = i915_fence_wait,
+ .release = i915_fence_release,
+ .fence_value_str = i915_fence_value_str,
+ .timeline_value_str = i915_fence_timeline_value_str,
+};
+
int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
struct drm_file *file)
{
@@ -211,6 +306,7 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine,
struct drm_i915_private *dev_priv = engine->i915;
unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error);
struct drm_i915_gem_request *req;
+ u32 seqno;
int ret;
if (!req_out)
@@ -237,11 +333,17 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine,
if (!req)
return -ENOMEM;
- ret = i915_gem_get_seqno(dev_priv, &req->seqno);
+ ret = i915_gem_get_seqno(dev_priv, &seqno);
if (ret)
goto err;
- kref_init(&req->ref);
+ spin_lock_init(&req->lock);
+ fence_init(&req->fence,
+ &i915_fence_ops,
+ &req->lock,
+ engine->fence_context,
+ seqno);
+
req->i915 = dev_priv;
req->engine = engine;
req->ctx = ctx;
@@ -384,7 +486,7 @@ void __i915_add_request(struct drm_i915_gem_request *request,
*/
request->emitted_jiffies = jiffies;
request->previous_seqno = engine->last_submitted_seqno;
- smp_store_mb(engine->last_submitted_seqno, request->seqno);
+ smp_store_mb(engine->last_submitted_seqno, request->fence.seqno);
list_add_tail(&request->list, &engine->request_list);
/* Record the position of the start of the request so that
@@ -555,7 +657,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
set_current_state(state);
add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
- intel_wait_init(&wait, req->seqno);
+ intel_wait_init(&wait, req->fence.seqno);
if (intel_engine_add_wait(req->engine, &wait))
/* In order to check that we haven't missed the interrupt
* as we enabled it, we need to kick ourselves to do a
@@ -625,7 +727,7 @@ complete:
*timeout = 0;
}
- if (rps && req->seqno == req->engine->last_submitted_seqno) {
+ if (rps && req->fence.seqno == req->engine->last_submitted_seqno) {
/* The GPU is now idle and this client has stalled.
* Since no other client has submitted a request in the
* meantime, assume that this client is the only one
@@ -665,10 +767,3 @@ int i915_wait_request(struct drm_i915_gem_request *req)
return 0;
}
-
-void i915_gem_request_free(struct kref *req_ref)
-{
- struct drm_i915_gem_request *req =
- container_of(req_ref, typeof(*req), ref);
- kmem_cache_free(req->i915->requests, req);
-}
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index d90db63553a2..6f2c820785f3 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -25,6 +25,10 @@
#ifndef I915_GEM_REQUEST_H
#define I915_GEM_REQUEST_H
+#include <linux/fence.h>
+
+#include "i915_gem.h"
+
/**
* Request queue structure.
*
@@ -36,11 +40,11 @@
* emission time to be associated with the request for tracking how far ahead
* of the GPU the submission is.
*
- * The requests are reference counted, so upon creation they should have an
- * initial reference taken using kref_init
+ * The requests are reference counted.
*/
struct drm_i915_gem_request {
- struct kref ref;
+ struct fence fence;
+ spinlock_t lock;
/** On Which ring this request was generated */
struct drm_i915_private *i915;
@@ -66,12 +70,6 @@ struct drm_i915_gem_request {
*/
u32 previous_seqno;
- /** GEM sequence number associated with this request,
- * when the HWS breadcrumb is equal or greater than this the GPU
- * has finished processing this request.
- */
- u32 seqno;
-
/** Position in the ringbuffer of the start of the request */
u32 head;
@@ -140,16 +138,16 @@ struct drm_i915_gem_request {
unsigned int ctx_hw_id;
};
-static inline struct drm_i915_private *
-__request_to_i915(const struct drm_i915_gem_request *request)
+extern const struct fence_ops i915_fence_ops;
+
+static inline bool fence_is_i915(struct fence *fence)
{
- return request->i915;
+ return fence->ops == &i915_fence_ops;
}
struct drm_i915_gem_request * __must_check
i915_gem_request_alloc(struct intel_engine_cs *engine,
struct i915_gem_context *ctx);
-void i915_gem_request_free(struct kref *req_ref);
int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
struct drm_file *file);
void i915_gem_request_retire_upto(struct drm_i915_gem_request *req);
@@ -157,7 +155,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req);
static inline u32
i915_gem_request_get_seqno(struct drm_i915_gem_request *req)
{
- return req ? req->seqno : 0;
+ return req ? req->fence.seqno : 0;
}
static inline struct intel_engine_cs *
@@ -167,17 +165,24 @@ i915_gem_request_get_engine(struct drm_i915_gem_request *req)
}
static inline struct drm_i915_gem_request *
+to_request(struct fence *fence)
+{
+ /* We assume that NULL fence/request are interoperable */
+ BUILD_BUG_ON(offsetof(struct drm_i915_gem_request, fence) != 0);
+ GEM_BUG_ON(fence && !fence_is_i915(fence));
+ return container_of(fence, struct drm_i915_gem_request, fence);
+}
+
+static inline struct drm_i915_gem_request *
i915_gem_request_reference(struct drm_i915_gem_request *req)
{
- if (req)
- kref_get(&req->ref);
- return req;
+ return to_request(fence_get(&req->fence));
}
static inline void
i915_gem_request_unreference(struct drm_i915_gem_request *req)
{
- kref_put(&req->ref, i915_gem_request_free);
+ fence_put(&req->fence);
}
static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
@@ -229,7 +234,7 @@ static inline bool
i915_gem_request_completed(const struct drm_i915_gem_request *req)
{
return i915_seqno_passed(intel_engine_get_seqno(req->engine),
- req->seqno);
+ req->fence.seqno);
}
bool __i915_spin_request(const struct drm_i915_gem_request *request,
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 3a43b434c92e..27739226a1d7 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1182,7 +1182,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
}
erq = &error->ring[i].requests[count++];
- erq->seqno = request->seqno;
+ erq->seqno = request->fence.seqno;
erq->jiffies = request->emitted_jiffies;
erq->tail = request->postfix;
}
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 355b6475e74c..34a502c02fd6 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -500,7 +500,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc,
rq->engine);
wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
- wqi->fence_id = rq->seqno;
+ wqi->fence_id = rq->fence.seqno;
kunmap_atomic(base);
}
@@ -595,7 +595,7 @@ int i915_guc_submit(struct drm_i915_gem_request *rq)
client->b_fail += 1;
guc->submissions[engine_id] += 1;
- guc->last_seqno[engine_id] = rq->seqno;
+ guc->last_seqno[engine_id] = rq->fence.seqno;
return b_ret;
}
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index f59cf07184ae..0296a77b586a 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -465,7 +465,7 @@ TRACE_EVENT(i915_gem_ring_sync_to,
__entry->dev = from->i915->dev->primary->index;
__entry->sync_from = from->id;
__entry->sync_to = to_req->engine->id;
- __entry->seqno = i915_gem_request_get_seqno(req);
+ __entry->seqno = req->fence.seqno;
),
TP_printk("dev=%u, sync-from=%u, sync-to=%u, seqno=%u",
@@ -488,9 +488,9 @@ TRACE_EVENT(i915_gem_ring_dispatch,
TP_fast_assign(
__entry->dev = req->i915->dev->primary->index;
__entry->ring = req->engine->id;
- __entry->seqno = req->seqno;
+ __entry->seqno = req->fence.seqno;
__entry->flags = flags;
- intel_engine_enable_signaling(req);
+ fence_enable_sw_signaling(&req->fence);
),
TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x",
@@ -533,7 +533,7 @@ DECLARE_EVENT_CLASS(i915_gem_request,
TP_fast_assign(
__entry->dev = req->i915->dev->primary->index;
__entry->ring = req->engine->id;
- __entry->seqno = req->seqno;
+ __entry->seqno = req->fence.seqno;
),
TP_printk("dev=%u, ring=%u, seqno=%u",
@@ -595,7 +595,7 @@ TRACE_EVENT(i915_gem_request_wait_begin,
TP_fast_assign(
__entry->dev = req->i915->dev->primary->index;
__entry->ring = req->engine->id;
- __entry->seqno = req->seqno;
+ __entry->seqno = req->fence.seqno;
__entry->blocking =
mutex_is_locked(&req->i915->dev->struct_mutex);
),
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 009d6e121767..b7a86ac53eae 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -421,6 +421,7 @@ static int intel_breadcrumbs_signaler(void *arg)
*/
intel_engine_remove_wait(engine,
&request->signaling.wait);
+ fence_signal(&request->fence);
/* Find the next oldest signal. Note that as we have
* not been holding the lock, another client may
@@ -467,7 +468,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
}
request->signaling.wait.tsk = b->signaler;
- request->signaling.wait.seqno = request->seqno;
+ request->signaling.wait.seqno = request->fence.seqno;
i915_gem_request_reference(request);
/* First add ourselves into the list of waiters, but register our
@@ -489,8 +490,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
p = &b->signals.rb_node;
while (*p) {
parent = *p;
- if (i915_seqno_passed(request->seqno,
- to_signaler(parent)->seqno)) {
+ if (i915_seqno_passed(request->fence.seqno,
+ to_signaler(parent)->fence.seqno)) {
p = &parent->rb_right;
first = false;
} else {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 2e66f310595f..370b912aed1b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1802,7 +1802,7 @@ static int gen8_emit_request(struct drm_i915_gem_request *request)
intel_hws_seqno_address(request->engine) |
MI_FLUSH_DW_USE_GTT);
intel_logical_ring_emit(ringbuf, 0);
- intel_logical_ring_emit(ringbuf, request->seqno);
+ intel_logical_ring_emit(ringbuf, request->fence.seqno);
intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
intel_logical_ring_emit(ringbuf, MI_NOOP);
return intel_logical_ring_advance_and_submit(request);
@@ -2120,6 +2120,7 @@ logical_ring_setup(struct drm_i915_private *dev_priv, enum intel_engine_id id)
engine->exec_id = info->exec_id;
engine->guc_id = info->guc_id;
engine->mmio_base = info->mmio_base;
+ engine->fence_context = fence_context_alloc(1);
engine->i915 = dev_priv;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 3fafc35d99c1..3bca2d3a285e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1352,7 +1352,7 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
PIPE_CONTROL_CS_STALL);
intel_ring_emit(signaller, lower_32_bits(gtt_offset));
intel_ring_emit(signaller, upper_32_bits(gtt_offset));
- intel_ring_emit(signaller, signaller_req->seqno);
+ intel_ring_emit(signaller, signaller_req->fence.seqno);
intel_ring_emit(signaller, 0);
intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
MI_SEMAPHORE_TARGET(waiter->hw_id));
@@ -1390,7 +1390,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
MI_FLUSH_DW_USE_GTT);
intel_ring_emit(signaller, upper_32_bits(gtt_offset));
- intel_ring_emit(signaller, signaller_req->seqno);
+ intel_ring_emit(signaller, signaller_req->fence.seqno);
intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
MI_SEMAPHORE_TARGET(waiter->hw_id));
intel_ring_emit(signaller, 0);
@@ -1423,7 +1423,7 @@ static int gen6_signal(struct drm_i915_gem_request *signaller_req,
if (i915_mmio_reg_valid(mbox_reg)) {
intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
intel_ring_emit_reg(signaller, mbox_reg);
- intel_ring_emit(signaller, signaller_req->seqno);
+ intel_ring_emit(signaller, signaller_req->fence.seqno);
}
}
@@ -1459,7 +1459,7 @@ gen6_add_request(struct drm_i915_gem_request *req)
intel_ring_emit(engine, MI_STORE_DWORD_INDEX);
intel_ring_emit(engine,
I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
- intel_ring_emit(engine, req->seqno);
+ intel_ring_emit(engine, req->fence.seqno);
intel_ring_emit(engine, MI_USER_INTERRUPT);
__intel_ring_advance(engine);
@@ -1708,7 +1708,7 @@ i9xx_add_request(struct drm_i915_gem_request *req)
intel_ring_emit(engine, MI_STORE_DWORD_INDEX);
intel_ring_emit(engine,
I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
- intel_ring_emit(engine, req->seqno);
+ intel_ring_emit(engine, req->fence.seqno);
intel_ring_emit(engine, MI_USER_INTERRUPT);
__intel_ring_advance(engine);
@@ -2177,6 +2177,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
WARN_ON(engine->buffer);
engine->i915 = dev_priv;
+ engine->fence_context = fence_context_alloc(1);
INIT_LIST_HEAD(&engine->active_list);
INIT_LIST_HEAD(&engine->request_list);
INIT_LIST_HEAD(&engine->execlist_queue);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 121294c602c3..3d90fc79be8c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -146,6 +146,7 @@ struct intel_engine_cs {
unsigned int exec_id;
unsigned int hw_id;
unsigned int guc_id; /* XXX same as hw_id? */
+ u64 fence_context;
u32 mmio_base;
struct intel_ringbuffer *buffer;
struct list_head buffers;
--
2.8.1
More information about the Intel-gfx
mailing list