[Intel-gfx] [PATCH 2/5] drm/i915: Separate out the seqno-barrier from engine->get_seqno
Mika Kuoppala
mika.kuoppala at linux.intel.com
Tue Feb 16 12:51:13 UTC 2016
Chris Wilson <chris at chris-wilson.co.uk> writes:
> In order to simplify future patches, extract the
> lazy_coherency optimisation our of the engine->get_seqno() vfunc into
> its own callback.
>
> v2: Rename the barrier to engine->irq_seqno_barrier to try and better
> reflect that the barrier is only required after the user interrupt before
> reading the seqno (to ensure that the seqno update lands in time as we
> do not have strict seqno-irq ordering on all platforms).
>
> Reviewed-by: Dave Gordon <david.s.gordon at intel.com> [#v2]
>
> v3: Comments for hangcheck paranoia. Mika wanted to keep the extra
> barrier inside the hangcheck, just in case. I can argue that it doesn't
> provide a barrier against anything, but the side-effects of applying the
> barrier may prevent a false declaration of a hung GPU.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at intel.com>
> Cc: Dave Gordon <david.s.gordon at intel.com>
> ---
Reviewed-by: Mika Kuoppala <mika.kuoppala at intel.com>
> drivers/gpu/drm/i915/i915_debugfs.c | 6 +++---
> drivers/gpu/drm/i915/i915_drv.h | 12 ++++++++----
> drivers/gpu/drm/i915/i915_gpu_error.c | 2 +-
> drivers/gpu/drm/i915/i915_irq.c | 14 ++++++++++++--
> drivers/gpu/drm/i915/i915_trace.h | 2 +-
> drivers/gpu/drm/i915/intel_lrc.c | 18 ++++++-----------
> drivers/gpu/drm/i915/intel_ringbuffer.c | 34 +++++++++++++++++----------------
> drivers/gpu/drm/i915/intel_ringbuffer.h | 4 ++--
> 8 files changed, 51 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index ec0c2a05eed6..c4df580ed0de 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -600,7 +600,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
> ring->name,
> i915_gem_request_get_seqno(work->flip_queued_req),
> dev_priv->next_seqno,
> - ring->get_seqno(ring, true),
> + ring->get_seqno(ring),
> i915_gem_request_completed(work->flip_queued_req, true));
> } else
> seq_printf(m, "Flip not associated with any ring\n");
> @@ -732,7 +732,7 @@ static void i915_ring_seqno_info(struct seq_file *m,
> {
> if (ring->get_seqno) {
> seq_printf(m, "Current sequence (%s): %x\n",
> - ring->name, ring->get_seqno(ring, false));
> + ring->name, ring->get_seqno(ring));
> }
> }
>
> @@ -1342,8 +1342,8 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
> intel_runtime_pm_get(dev_priv);
>
> for_each_ring(ring, dev_priv, i) {
> - seqno[i] = ring->get_seqno(ring, false);
> acthd[i] = intel_ring_get_active_head(ring);
> + seqno[i] = ring->get_seqno(ring);
> }
>
> i915_get_extra_instdone(dev, instdone);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 20d9dbd9f9cf..525f39c0551b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3001,15 +3001,19 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
> static inline bool i915_gem_request_started(struct drm_i915_gem_request *req,
> bool lazy_coherency)
> {
> - u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency);
> - return i915_seqno_passed(seqno, req->previous_seqno);
> + if (!lazy_coherency && req->ring->irq_seqno_barrier)
> + req->ring->irq_seqno_barrier(req->ring);
> + return i915_seqno_passed(req->ring->get_seqno(req->ring),
> + req->previous_seqno);
> }
>
> static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
> bool lazy_coherency)
> {
> - u32 seqno = req->ring->get_seqno(req->ring, lazy_coherency);
> - return i915_seqno_passed(seqno, req->seqno);
> + if (!lazy_coherency && req->ring->irq_seqno_barrier)
> + req->ring->irq_seqno_barrier(req->ring);
> + return i915_seqno_passed(req->ring->get_seqno(req->ring),
> + req->seqno);
> }
>
> int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno);
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 978c026963b8..33f1e0636a03 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -906,8 +906,8 @@ static void i915_record_ring_state(struct drm_device *dev,
>
> ering->waiting = waitqueue_active(&ring->irq_queue);
> ering->instpm = I915_READ(RING_INSTPM(ring->mmio_base));
> - ering->seqno = ring->get_seqno(ring, false);
> ering->acthd = intel_ring_get_active_head(ring);
> + ering->seqno = ring->get_seqno(ring);
> ering->start = I915_READ_START(ring);
> ering->head = I915_READ_HEAD(ring);
> ering->tail = I915_READ_TAIL(ring);
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 25a89373df63..07bc2cdd6252 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2937,7 +2937,7 @@ static int semaphore_passed(struct intel_engine_cs *ring)
> if (signaller->hangcheck.deadlock >= I915_NUM_RINGS)
> return -1;
>
> - if (i915_seqno_passed(signaller->get_seqno(signaller, false), seqno))
> + if (i915_seqno_passed(signaller->get_seqno(signaller), seqno))
> return 1;
>
> /* cursory check for an unkickable deadlock */
> @@ -3101,8 +3101,18 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
>
> semaphore_clear_deadlocks(dev_priv);
>
> - seqno = ring->get_seqno(ring, false);
> + /* We don't strictly need an irq-barrier here, as we are not
> + * serving an interrupt request, be paranoid in case the
> + * barrier has side-effects (such as preventing a broken
> + * cacheline snoop) and so be sure that we can see the seqno
> + * advance. If the seqno should stick, due to a stale
> + * cacheline, we would erroneously declare the GPU hung.
> + */
> + if (ring->irq_seqno_barrier)
> + ring->irq_seqno_barrier(ring);
> +
> acthd = intel_ring_get_active_head(ring);
> + seqno = ring->get_seqno(ring);
>
> if (ring->hangcheck.seqno == seqno) {
> if (ring_idle(ring, seqno)) {
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index 52b2d409945d..cfb5f78a6e84 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -573,7 +573,7 @@ TRACE_EVENT(i915_gem_request_notify,
> TP_fast_assign(
> __entry->dev = ring->dev->primary->index;
> __entry->ring = ring->id;
> - __entry->seqno = ring->get_seqno(ring, false);
> + __entry->seqno = ring->get_seqno(ring);
> ),
>
> TP_printk("dev=%u, ring=%u, seqno=%u",
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 3a03646e343d..0e833d470c5e 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1837,7 +1837,7 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
> return 0;
> }
>
> -static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
> +static u32 gen8_get_seqno(struct intel_engine_cs *ring)
> {
> return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
> }
> @@ -1847,9 +1847,8 @@ static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno)
> intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
> }
>
> -static u32 bxt_a_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
> +static void bxt_seqno_barrier(struct intel_engine_cs *ring)
> {
> -
> /*
> * On BXT A steppings there is a HW coherency issue whereby the
> * MI_STORE_DATA_IMM storing the completed request's seqno
> @@ -1860,11 +1859,7 @@ static u32 bxt_a_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
> * bxt_a_set_seqno(), where we also do a clflush after the write. So
> * this clflush in practice becomes an invalidate operation.
> */
> -
> - if (!lazy_coherency)
> - intel_flush_status_page(ring, I915_GEM_HWS_INDEX);
> -
> - return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
> + intel_flush_status_page(ring, I915_GEM_HWS_INDEX);
> }
>
> static void bxt_a_set_seqno(struct intel_engine_cs *ring, u32 seqno)
> @@ -2034,12 +2029,11 @@ logical_ring_default_vfuncs(struct drm_device *dev,
> ring->irq_get = gen8_logical_ring_get_irq;
> ring->irq_put = gen8_logical_ring_put_irq;
> ring->emit_bb_start = gen8_emit_bb_start;
> + ring->get_seqno = gen8_get_seqno;
> + ring->set_seqno = gen8_set_seqno;
> if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
> - ring->get_seqno = bxt_a_get_seqno;
> + ring->irq_seqno_barrier = bxt_seqno_barrier;
> ring->set_seqno = bxt_a_set_seqno;
> - } else {
> - ring->get_seqno = gen8_get_seqno;
> - ring->set_seqno = gen8_set_seqno;
> }
> }
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index abac0560d075..573203618dfe 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1548,8 +1548,8 @@ pc_render_add_request(struct drm_i915_gem_request *req)
> return 0;
> }
>
> -static u32
> -gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
> +static void
> +gen6_seqno_barrier(struct intel_engine_cs *ring)
> {
> /* Workaround to force correct ordering between irq and seqno writes on
> * ivb (and maybe also on snb) by reading from a CS register (like
> @@ -1563,16 +1563,12 @@ gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
> * batch i.e. much more frequent than a delay when waiting for the
> * interrupt (with the same net latency).
> */
> - if (!lazy_coherency) {
> - struct drm_i915_private *dev_priv = ring->dev->dev_private;
> - POSTING_READ_FW(RING_ACTHD(ring->mmio_base));
> - }
> -
> - return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
> + struct drm_i915_private *dev_priv = to_i915(ring->dev);
> + POSTING_READ_FW(RING_ACTHD(ring->mmio_base));
> }
>
> static u32
> -ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
> +ring_get_seqno(struct intel_engine_cs *ring)
> {
> return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
> }
> @@ -1584,7 +1580,7 @@ ring_set_seqno(struct intel_engine_cs *ring, u32 seqno)
> }
>
> static u32
> -pc_render_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
> +pc_render_get_seqno(struct intel_engine_cs *ring)
> {
> return ring->scratch.cpu_page[0];
> }
> @@ -2783,7 +2779,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
> ring->irq_get = gen8_ring_get_irq;
> ring->irq_put = gen8_ring_put_irq;
> ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
> - ring->get_seqno = gen6_ring_get_seqno;
> + ring->irq_seqno_barrier = gen6_seqno_barrier;
> + ring->get_seqno = ring_get_seqno;
> ring->set_seqno = ring_set_seqno;
> if (i915_semaphore_is_enabled(dev)) {
> WARN_ON(!dev_priv->semaphore_obj);
> @@ -2800,7 +2797,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
> ring->irq_get = gen6_ring_get_irq;
> ring->irq_put = gen6_ring_put_irq;
> ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
> - ring->get_seqno = gen6_ring_get_seqno;
> + ring->irq_seqno_barrier = gen6_seqno_barrier;
> + ring->get_seqno = ring_get_seqno;
> ring->set_seqno = ring_set_seqno;
> if (i915_semaphore_is_enabled(dev)) {
> ring->semaphore.sync_to = gen6_ring_sync;
> @@ -2915,7 +2913,8 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
> ring->write_tail = gen6_bsd_ring_write_tail;
> ring->flush = gen6_bsd_ring_flush;
> ring->add_request = gen6_add_request;
> - ring->get_seqno = gen6_ring_get_seqno;
> + ring->irq_seqno_barrier = gen6_seqno_barrier;
> + ring->get_seqno = ring_get_seqno;
> ring->set_seqno = ring_set_seqno;
> if (INTEL_INFO(dev)->gen >= 8) {
> ring->irq_enable_mask =
> @@ -2988,7 +2987,8 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev)
> ring->mmio_base = GEN8_BSD2_RING_BASE;
> ring->flush = gen6_bsd_ring_flush;
> ring->add_request = gen6_add_request;
> - ring->get_seqno = gen6_ring_get_seqno;
> + ring->irq_seqno_barrier = gen6_seqno_barrier;
> + ring->get_seqno = ring_get_seqno;
> ring->set_seqno = ring_set_seqno;
> ring->irq_enable_mask =
> GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
> @@ -3019,7 +3019,8 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
> ring->write_tail = ring_write_tail;
> ring->flush = gen6_ring_flush;
> ring->add_request = gen6_add_request;
> - ring->get_seqno = gen6_ring_get_seqno;
> + ring->irq_seqno_barrier = gen6_seqno_barrier;
> + ring->get_seqno = ring_get_seqno;
> ring->set_seqno = ring_set_seqno;
> if (INTEL_INFO(dev)->gen >= 8) {
> ring->irq_enable_mask =
> @@ -3077,7 +3078,8 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev)
> ring->write_tail = ring_write_tail;
> ring->flush = gen6_ring_flush;
> ring->add_request = gen6_add_request;
> - ring->get_seqno = gen6_ring_get_seqno;
> + ring->irq_seqno_barrier = gen6_seqno_barrier;
> + ring->get_seqno = ring_get_seqno;
> ring->set_seqno = ring_set_seqno;
>
> if (INTEL_INFO(dev)->gen >= 8) {
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 566b0ae10ce0..4cea04491392 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -196,8 +196,8 @@ struct intel_engine_cs {
> * seen value is good enough. Note that the seqno will always be
> * monotonic, even if not coherent.
> */
> - u32 (*get_seqno)(struct intel_engine_cs *ring,
> - bool lazy_coherency);
> + void (*irq_seqno_barrier)(struct intel_engine_cs *ring);
> + u32 (*get_seqno)(struct intel_engine_cs *ring);
> void (*set_seqno)(struct intel_engine_cs *ring,
> u32 seqno);
> int (*dispatch_execbuffer)(struct drm_i915_gem_request *req,
> --
> 2.7.0
More information about the Intel-gfx
mailing list