[PATCH 10/15] global-seqno
Chris Wilson
chris at chris-wilson.co.uk
Tue Sep 13 09:32:46 UTC 2016
---
drivers/gpu/drm/i915/i915_debugfs.c | 2 +-
drivers/gpu/drm/i915/i915_drv.h | 4 ++--
drivers/gpu/drm/i915/i915_gem.c | 4 ++--
drivers/gpu/drm/i915/i915_gem_request.c | 23 ++++++++++++++++-------
drivers/gpu/drm/i915/i915_gem_request.h | 30 +++++++++++++++++++++++++-----
drivers/gpu/drm/i915/i915_gpu_error.c | 2 +-
drivers/gpu/drm/i915/i915_guc_submission.c | 4 ++--
drivers/gpu/drm/i915/i915_trace.h | 8 ++++----
drivers/gpu/drm/i915/intel_breadcrumbs.c | 8 +++++---
drivers/gpu/drm/i915/intel_lrc.c | 4 ++--
drivers/gpu/drm/i915/intel_ringbuffer.c | 14 +++++++-------
11 files changed, 67 insertions(+), 36 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d1752b0fee37..b325ec3b5a09 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -661,7 +661,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
rcu_read_lock();
task = pid ? pid_task(pid, PIDTYPE_PID) : NULL;
seq_printf(m, " %x @ %d: %s [%d]\n",
- req->fence.seqno,
+ req->global_seqno,
(int) (jiffies - req->emitted_jiffies),
task ? task->comm : "<unknown>",
task ? task->pid : -1);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6fa1a2bac0a9..f9447b1c81fa 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3844,7 +3844,7 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req)
/* Before we do the heavier coherent read of the seqno,
* check the value (hopefully) in the CPU cacheline.
*/
- if (i915_gem_request_completed(req))
+ if (__i915_gem_request_completed(req))
return true;
/* Ensure our read of the seqno is coherent so that we
@@ -3895,7 +3895,7 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req)
wake_up_process(tsk);
rcu_read_unlock();
- if (i915_gem_request_completed(req))
+ if (__i915_gem_request_completed(req))
return true;
}
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b570c1fbddb0..40b338677eba 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2601,7 +2601,7 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
* not need an engine->irq_seqno_barrier() before the seqno reads.
*/
list_for_each_entry(request, &engine->timeline->requests, link) {
- if (i915_gem_request_completed(request))
+ if (__i915_gem_request_completed(request))
continue;
if (!i915_sw_fence_done(&request->submit))
@@ -2651,7 +2651,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
return;
DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
- engine->name, request->fence.seqno);
+ engine->name, request->global_seqno);
/* Setup the CS to resume from the breadcrumb of the hung request */
engine->reset_hw(engine, request);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 445978721024..d09cfb368a5d 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -356,7 +356,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
/* Move the oldest request to the slab-cache (if not in use!) */
req = list_first_entry_or_null(&engine->timeline->requests,
typeof(*req), link);
- if (req && i915_gem_request_completed(req))
+ if (req && __i915_gem_request_completed(req))
i915_gem_request_retire(req);
/* Beware: Dragons be flying overhead.
@@ -367,7 +367,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
* of being read by __i915_gem_active_get_rcu(). As such,
* we have to be very careful when overwriting the contents. During
* the RCU lookup, we change chase the request->engine pointer,
- * read the request->fence.seqno and increment the reference count.
+ * read the request->global_seqno and increment the reference count.
*
* The reference count is incremented atomically. If it is zero,
* the lookup knows the request is unallocated and complete. Otherwise,
@@ -408,6 +408,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
req->i915 = dev_priv;
req->engine = engine;
req->timeline = engine->timeline;
+ req->global_seqno = seqno;
req->ctx = i915_gem_context_get(ctx);
/* No zalloc, must clear what we need by hand */
@@ -464,8 +465,15 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
return ret < 0 ? ret : 0;
}
+ if (from->global_seqno == 0) {
+ ret = i915_sw_fence_await_dma_fence(&to->submit,
+ &from->fence, 0,
+ GFP_KERNEL);
+ return ret < 0 ? ret : 0;
+ }
+
idx = intel_engine_sync_index(from->engine, to->engine);
- if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
+ if (from->global_seqno <= from->engine->semaphore.sync_seqno[idx])
return 0;
trace_i915_gem_ring_sync_to(to, from);
@@ -483,7 +491,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
return ret;
}
- from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
+ from->engine->semaphore.sync_seqno[idx] = from->global_seqno;
return 0;
}
@@ -754,7 +762,7 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
timeout_us += local_clock_us(&cpu);
do {
- if (i915_gem_request_completed(req))
+ if (__i915_gem_request_completed(req))
return true;
if (signal_pending_state(state, current))
@@ -855,6 +863,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
if (timeout < 0)
goto complete;
}
+ GEM_BUG_ON(!req->global_seqno);
/* Optimistic short spin before touching IRQs */
if (i915_spin_request(req, state, 5))
@@ -864,7 +873,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
if (flags & I915_WAIT_LOCKED)
add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
- intel_wait_init(&wait, req->fence.seqno);
+ intel_wait_init(&wait, req->global_seqno);
if (intel_engine_add_wait(req->engine, &wait))
/* In order to check that we haven't missed the interrupt
* as we enabled it, we need to kick ourselves to do a
@@ -937,7 +946,7 @@ static bool engine_retire_requests(struct intel_engine_cs *engine)
list_for_each_entry_safe(request, next,
&engine->timeline->requests, link) {
- if (!i915_gem_request_completed(request))
+ if (!__i915_gem_request_completed(request))
return false;
i915_gem_request_retire(request);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 6c78ca0d85a9..22dd7ac270e6 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -87,6 +87,8 @@ struct drm_i915_gem_request {
struct i915_sw_fence submit;
wait_queue_t submitq;
+ u32 global_seqno;
+
/** GEM sequence number associated with the previous request,
* when the HWS breadcrumb is equal to this the GPU is processing
* this request.
@@ -163,7 +165,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req);
static inline u32
i915_gem_request_get_seqno(struct drm_i915_gem_request *req)
{
- return req ? req->fence.seqno : 0;
+ return req ? req->global_seqno : 0;
}
static inline struct intel_engine_cs *
@@ -248,17 +250,35 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
}
static inline bool
-i915_gem_request_started(const struct drm_i915_gem_request *req)
+__i915_gem_request_started(const struct drm_i915_gem_request *req)
{
return i915_seqno_passed(intel_engine_get_seqno(req->engine),
req->previous_seqno);
}
static inline bool
-i915_gem_request_completed(const struct drm_i915_gem_request *req)
+i915_gem_request_started(const struct drm_i915_gem_request *req)
+{
+ if (!req->global_seqno)
+ return false;
+
+ return __i915_gem_request_started(req);
+}
+
+static inline bool
+__i915_gem_request_completed(const struct drm_i915_gem_request *req)
{
return i915_seqno_passed(intel_engine_get_seqno(req->engine),
- req->fence.seqno);
+ req->global_seqno);
+}
+
+static inline bool
+i915_gem_request_completed(const struct drm_i915_gem_request *req)
+{
+ if (!req->global_seqno)
+ return false;
+
+ return __i915_gem_request_completed(req);
}
bool __i915_spin_request(const struct drm_i915_gem_request *request,
@@ -266,7 +286,7 @@ bool __i915_spin_request(const struct drm_i915_gem_request *request,
static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
int state, unsigned long timeout_us)
{
- return (i915_gem_request_started(request) &&
+ return (__i915_gem_request_started(request) &&
__i915_spin_request(request, state, timeout_us));
}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 1e271008f1ef..65761c16ac48 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1124,7 +1124,7 @@ static void engine_record_requests(struct intel_engine_cs *engine,
}
erq = &ee->requests[count++];
- erq->seqno = request->fence.seqno;
+ erq->seqno = request->global_seqno;
erq->jiffies = request->emitted_jiffies;
erq->head = request->head;
erq->tail = request->tail;
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index b73e9020cbd2..d7b44e7afe59 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -508,7 +508,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc,
wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine);
wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
- wqi->fence_id = rq->fence.seqno;
+ wqi->fence_id = rq->global_seqno;
kunmap_atomic(base);
}
@@ -604,7 +604,7 @@ static void i915_guc_submit(struct drm_i915_gem_request *rq)
client->b_fail += 1;
guc->submissions[engine_id] += 1;
- guc->last_seqno[engine_id] = rq->fence.seqno;
+ guc->last_seqno[engine_id] = rq->global_seqno;
spin_unlock(&client->wq_lock);
}
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 178798002a73..4c46f7c00323 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -466,7 +466,7 @@ TRACE_EVENT(i915_gem_ring_sync_to,
__entry->dev = from->i915->drm.primary->index;
__entry->sync_from = from->engine->id;
__entry->sync_to = to->engine->id;
- __entry->seqno = from->fence.seqno;
+ __entry->seqno = from->global_seqno;
),
TP_printk("dev=%u, sync-from=%u, sync-to=%u, seqno=%u",
@@ -489,7 +489,7 @@ TRACE_EVENT(i915_gem_ring_dispatch,
TP_fast_assign(
__entry->dev = req->i915->drm.primary->index;
__entry->ring = req->engine->id;
- __entry->seqno = req->fence.seqno;
+ __entry->seqno = req->global_seqno;
__entry->flags = flags;
fence_enable_sw_signaling(&req->fence);
),
@@ -534,7 +534,7 @@ DECLARE_EVENT_CLASS(i915_gem_request,
TP_fast_assign(
__entry->dev = req->i915->drm.primary->index;
__entry->ring = req->engine->id;
- __entry->seqno = req->fence.seqno;
+ __entry->seqno = req->global_seqno;
),
TP_printk("dev=%u, ring=%u, seqno=%u",
@@ -596,7 +596,7 @@ TRACE_EVENT(i915_gem_request_wait_begin,
TP_fast_assign(
__entry->dev = req->i915->drm.primary->index;
__entry->ring = req->engine->id;
- __entry->seqno = req->fence.seqno;
+ __entry->seqno = req->global_seqno;
__entry->blocking =
mutex_is_locked(&req->i915->drm.struct_mutex);
),
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 9bad14d22c95..9ad1028681cf 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -504,9 +504,11 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
/* locked by fence_enable_sw_signaling() */
assert_spin_locked(&request->lock);
+ if (!request->global_seqno)
+ return;
request->signaling.wait.tsk = b->signaler;
- request->signaling.wait.seqno = request->fence.seqno;
+ request->signaling.wait.seqno = request->global_seqno;
i915_gem_request_get(request);
spin_lock(&b->lock);
@@ -530,8 +532,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
p = &b->signals.rb_node;
while (*p) {
parent = *p;
- if (i915_seqno_passed(request->fence.seqno,
- to_signaler(parent)->fence.seqno)) {
+ if (i915_seqno_passed(request->global_seqno,
+ to_signaler(parent)->global_seqno)) {
p = &parent->rb_right;
first = false;
} else {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 16d7cdd11082..00fcf36ba919 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1566,7 +1566,7 @@ static int gen8_emit_request(struct drm_i915_gem_request *request)
intel_hws_seqno_address(request->engine) |
MI_FLUSH_DW_USE_GTT);
intel_ring_emit(ring, 0);
- intel_ring_emit(ring, request->fence.seqno);
+ intel_ring_emit(ring, request->global_seqno);
intel_ring_emit(ring, MI_USER_INTERRUPT);
intel_ring_emit(ring, MI_NOOP);
return intel_logical_ring_advance(request);
@@ -1595,7 +1595,7 @@ static int gen8_emit_request_render(struct drm_i915_gem_request *request)
PIPE_CONTROL_QW_WRITE));
intel_ring_emit(ring, intel_hws_seqno_address(request->engine));
intel_ring_emit(ring, 0);
- intel_ring_emit(ring, i915_gem_request_get_seqno(request));
+ intel_ring_emit(ring, request->global_seqno);
/* We're thrashing one dword of HWS. */
intel_ring_emit(ring, 0);
intel_ring_emit(ring, MI_USER_INTERRUPT);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 4bf6bd056b26..597e35c9b699 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1296,7 +1296,7 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *req)
PIPE_CONTROL_CS_STALL);
intel_ring_emit(ring, lower_32_bits(gtt_offset));
intel_ring_emit(ring, upper_32_bits(gtt_offset));
- intel_ring_emit(ring, req->fence.seqno);
+ intel_ring_emit(ring, req->global_seqno);
intel_ring_emit(ring, 0);
intel_ring_emit(ring,
MI_SEMAPHORE_SIGNAL |
@@ -1332,7 +1332,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *req)
lower_32_bits(gtt_offset) |
MI_FLUSH_DW_USE_GTT);
intel_ring_emit(ring, upper_32_bits(gtt_offset));
- intel_ring_emit(ring, req->fence.seqno);
+ intel_ring_emit(ring, req->global_seqno);
intel_ring_emit(ring,
MI_SEMAPHORE_SIGNAL |
MI_SEMAPHORE_TARGET(waiter->hw_id));
@@ -1365,7 +1365,7 @@ static int gen6_signal(struct drm_i915_gem_request *req)
if (i915_mmio_reg_valid(mbox_reg)) {
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
intel_ring_emit_reg(ring, mbox_reg);
- intel_ring_emit(ring, req->fence.seqno);
+ intel_ring_emit(ring, req->global_seqno);
}
}
@@ -1396,7 +1396,7 @@ static int i9xx_emit_request(struct drm_i915_gem_request *req)
intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
- intel_ring_emit(ring, req->fence.seqno);
+ intel_ring_emit(ring, req->global_seqno);
intel_ring_emit(ring, MI_USER_INTERRUPT);
intel_ring_advance(ring);
@@ -1446,7 +1446,7 @@ static int gen8_render_emit_request(struct drm_i915_gem_request *req)
PIPE_CONTROL_QW_WRITE));
intel_ring_emit(ring, intel_hws_seqno_address(engine));
intel_ring_emit(ring, 0);
- intel_ring_emit(ring, i915_gem_request_get_seqno(req));
+ intel_ring_emit(ring, req->global_seqno);
/* We're thrashing one dword of HWS. */
intel_ring_emit(ring, 0);
intel_ring_emit(ring, MI_USER_INTERRUPT);
@@ -1484,7 +1484,7 @@ gen8_ring_sync_to(struct drm_i915_gem_request *req,
MI_SEMAPHORE_WAIT |
MI_SEMAPHORE_GLOBAL_GTT |
MI_SEMAPHORE_SAD_GTE_SDD);
- intel_ring_emit(ring, signal->fence.seqno);
+ intel_ring_emit(ring, signal->global_seqno);
intel_ring_emit(ring, lower_32_bits(offset));
intel_ring_emit(ring, upper_32_bits(offset));
intel_ring_advance(ring);
@@ -1522,7 +1522,7 @@ gen6_ring_sync_to(struct drm_i915_gem_request *req,
* seqno is >= the last seqno executed. However for hardware the
* comparison is strictly greater than.
*/
- intel_ring_emit(ring, signal->fence.seqno - 1);
+ intel_ring_emit(ring, signal->global_seqno - 1);
intel_ring_emit(ring, 0);
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
--
2.9.3
More information about the Intel-gfx-trybot
mailing list