[Intel-gfx] [PATCH] drm/i915: Lazily apply the SNB+ seqno w/a
Daniel Vetter
daniel at ffwll.ch
Thu Aug 9 12:29:16 CEST 2012
On Thu, Aug 09, 2012 at 10:58:30AM +0100, Chris Wilson wrote:
> Avoid the forcewake overhead when simply retiring requests, as often the
> last seen seqno is good enough to satisfy the retirment process and will
> be promptly re-run in any case. Only ensure that we force the coherent
> seqno read when we are explicitly waiting upon a completion event to be
> sure that none go missing, and also for when we are reporting seqno
> values in case of error or debugging.
>
> This greatly reduces the load for userspace using the busy-ioctl to
> track active buffers, for instance halving the CPU used by X in pushing
> the pixels from a software render (flash). The effect will be even more
> magnified with userptr and so providing a zero-copy upload path in that
> instance, or in similar instances where X is simply compositing DRI
> buffers.
>
> v2: Reverse the polarity of the tachyon stream. Daniel suggested that
> 'force' was too generic for the parameter name and that 'lazy_coherency'
> better encapsulated the semantics of it being an optimization and its
> purpose. Also notice that gen6_get_seqno() is only used by gen6/7
> chipsets and so the test for IS_GEN6 || IS_GEN7 is redundant in that
> function.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Yeah, I like the new color.
Reviewed-by: Daniel Vetter <daniel.vetter at ffwll.ch>
I'll muse over this some more before picking it up, just in case I'll
notice a place this could blow up ...
-Daniel
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 2 +-
> drivers/gpu/drm/i915/i915_gem.c | 6 +++---
> drivers/gpu/drm/i915/i915_irq.c | 9 +++++----
> drivers/gpu/drm/i915/intel_ringbuffer.c | 10 ++++------
> drivers/gpu/drm/i915/intel_ringbuffer.h | 9 ++++++++-
> 5 files changed, 21 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 86444fe..544abf5 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -426,7 +426,7 @@ static void i915_ring_seqno_info(struct seq_file *m,
> {
> if (ring->get_seqno) {
> seq_printf(m, "Current sequence (%s): %d\n",
> - ring->name, ring->get_seqno(ring));
> + ring->name, ring->get_seqno(ring, false));
> }
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 153c533..0582e22 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1888,7 +1888,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
>
> WARN_ON(i915_verify_lists(ring->dev));
>
> - seqno = ring->get_seqno(ring);
> + seqno = ring->get_seqno(ring, true);
>
> for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++)
> if (seqno >= ring->sync_seqno[i])
> @@ -2060,7 +2060,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
> bool wait_forever = true;
> int ret;
>
> - if (i915_seqno_passed(ring->get_seqno(ring), seqno))
> + if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
> return 0;
>
> trace_i915_gem_request_wait_begin(ring, seqno);
> @@ -2079,7 +2079,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
> getrawmonotonic(&before);
>
> #define EXIT_COND \
> - (i915_seqno_passed(ring->get_seqno(ring), seqno) || \
> + (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
> atomic_read(&dev_priv->mm.wedged))
> do {
> if (interruptible)
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 57e4f2b..0ba15e8 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -335,7 +335,7 @@ static void notify_ring(struct drm_device *dev,
> if (ring->obj == NULL)
> return;
>
> - trace_i915_gem_request_complete(ring, ring->get_seqno(ring));
> + trace_i915_gem_request_complete(ring, ring->get_seqno(ring, false));
>
> wake_up_all(&ring->irq_queue);
> if (i915_enable_hangcheck) {
> @@ -1052,7 +1052,7 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
> if (!ring->get_seqno)
> return NULL;
>
> - seqno = ring->get_seqno(ring);
> + seqno = ring->get_seqno(ring, false);
> list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) {
> if (obj->ring != ring)
> continue;
> @@ -1106,7 +1106,7 @@ static void i915_record_ring_state(struct drm_device *dev,
>
> error->waiting[ring->id] = waitqueue_active(&ring->irq_queue);
> error->instpm[ring->id] = I915_READ(RING_INSTPM(ring->mmio_base));
> - error->seqno[ring->id] = ring->get_seqno(ring);
> + error->seqno[ring->id] = ring->get_seqno(ring, false);
> error->acthd[ring->id] = intel_ring_get_active_head(ring);
> error->head[ring->id] = I915_READ_HEAD(ring);
> error->tail[ring->id] = I915_READ_TAIL(ring);
> @@ -1603,7 +1603,8 @@ ring_last_seqno(struct intel_ring_buffer *ring)
> static bool i915_hangcheck_ring_idle(struct intel_ring_buffer *ring, bool *err)
> {
> if (list_empty(&ring->request_list) ||
> - i915_seqno_passed(ring->get_seqno(ring), ring_last_seqno(ring))) {
> + i915_seqno_passed(ring->get_seqno(ring, false),
> + ring_last_seqno(ring))) {
> /* Issue a wake-up to catch stuck h/w. */
> if (waitqueue_active(&ring->irq_queue)) {
> DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 6c0f504..c77bcd8 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -618,26 +618,24 @@ pc_render_add_request(struct intel_ring_buffer *ring,
> }
>
> static u32
> -gen6_ring_get_seqno(struct intel_ring_buffer *ring)
> +gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> {
> - struct drm_device *dev = ring->dev;
> -
> /* Workaround to force correct ordering between irq and seqno writes on
> * ivb (and maybe also on snb) by reading from a CS register (like
> * ACTHD) before reading the status page. */
> - if (IS_GEN6(dev) || IS_GEN7(dev))
> + if (!lazy_coherency)
> intel_ring_get_active_head(ring);
> return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
> }
>
> static u32
> -ring_get_seqno(struct intel_ring_buffer *ring)
> +ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> {
> return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
> }
>
> static u32
> -pc_render_get_seqno(struct intel_ring_buffer *ring)
> +pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
> {
> struct pipe_control *pc = ring->private;
> return pc->cpu_page[0];
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 8b2b92e..2ea7a31 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -72,7 +72,14 @@ struct intel_ring_buffer {
> u32 flush_domains);
> int (*add_request)(struct intel_ring_buffer *ring,
> u32 *seqno);
> - u32 (*get_seqno)(struct intel_ring_buffer *ring);
> + /* Some chipsets are not quite as coherent as advertised and need
> + * an expensive kick to force a true read of the up-to-date seqno.
> + * However, the up-to-date seqno is not always required and the last
> + * seen value is good enough. Note that the seqno will always be
> + * monotonic, even if not coherent.
> + */
> + u32 (*get_seqno)(struct intel_ring_buffer *ring,
> + bool lazy_coherency);
> int (*dispatch_execbuffer)(struct intel_ring_buffer *ring,
> u32 offset, u32 length);
> void (*cleanup)(struct intel_ring_buffer *ring);
> --
> 1.7.10.4
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Mail: daniel at ffwll.ch
Mobile: +41 (0)79 365 57 48
More information about the Intel-gfx
mailing list