[Intel-gfx] [PATCH 18/43] drm/i915/bdw: New logical ring submission mechanism
Daniel Vetter
daniel at ffwll.ch
Mon Aug 11 22:40:18 CEST 2014
On Thu, Jul 24, 2014 at 05:04:26PM +0100, Thomas Daniel wrote:
> From: Oscar Mateo <oscar.mateo at intel.com>
>
> Well, new-ish: if all this code looks familiar, that's because it's
> a clone of the existing submission mechanism (with some modifications
> here and there to adapt it to LRCs and Execlists).
>
> And why did we do this instead of reusing code, one might wonder?
> Well, there are some fears that the differences are big enough that
> they will end up breaking all platforms.
>
> Also, Execlists offer several advantages, like control over when the
> GPU is done with a given workload, that can help simplify the
> submission mechanism, no doubt. I am interested in getting Execlists
> to work first and foremost, but in the future this parallel submission
> mechanism will help us to fine tune the mechanism without affecting
> old gens.
>
> v2: Pass the ringbuffer only (whenever possible).
>
> Signed-off-by: Oscar Mateo <oscar.mateo at intel.com>
> ---
> drivers/gpu/drm/i915/intel_lrc.c | 193 +++++++++++++++++++++++++++++++
> drivers/gpu/drm/i915/intel_lrc.h | 12 ++
> drivers/gpu/drm/i915/intel_ringbuffer.c | 20 ++--
> drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +
> 4 files changed, 218 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index f171fd5..bd37d51 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -106,6 +106,199 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring)
> /* TODO */
> }
>
> +void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
> +{
> + intel_logical_ring_advance(ringbuf);
> +
> + if (intel_ring_stopped(ringbuf->ring))
> + return;
> +
> + /* TODO: how to submit a context to the ELSP is not here yet */
> +}
> +
> +static int logical_ring_alloc_seqno(struct intel_engine_cs *ring)
> +{
> + if (ring->outstanding_lazy_seqno)
> + return 0;
> +
> + if (ring->preallocated_lazy_request == NULL) {
> + struct drm_i915_gem_request *request;
> +
> + request = kmalloc(sizeof(*request), GFP_KERNEL);
> + if (request == NULL)
> + return -ENOMEM;
> +
> + ring->preallocated_lazy_request = request;
> + }
> +
> + return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
> +}
> +
> +static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf, int bytes)
> +{
> + struct intel_engine_cs *ring = ringbuf->ring;
> + struct drm_i915_gem_request *request;
> + u32 seqno = 0;
> + int ret;
> +
> + if (ringbuf->last_retired_head != -1) {
> + ringbuf->head = ringbuf->last_retired_head;
> + ringbuf->last_retired_head = -1;
> +
> + ringbuf->space = intel_ring_space(ringbuf);
> + if (ringbuf->space >= bytes)
> + return 0;
> + }
> +
> + list_for_each_entry(request, &ring->request_list, list) {
> + if (__intel_ring_space(request->tail, ringbuf->tail,
> + ringbuf->size) >= bytes) {
> + seqno = request->seqno;
> + break;
> + }
> + }
> +
> + if (seqno == 0)
> + return -ENOSPC;
> +
> + ret = i915_wait_seqno(ring, seqno);
> + if (ret)
> + return ret;
> +
> + /* TODO: make sure we update the right ringbuffer's last_retired_head
> + * when retiring requests */
> + i915_gem_retire_requests_ring(ring);
> + ringbuf->head = ringbuf->last_retired_head;
> + ringbuf->last_retired_head = -1;
> +
> + ringbuf->space = intel_ring_space(ringbuf);
> + return 0;
> +}
> +
> +static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf, int bytes)
> +{
> + struct intel_engine_cs *ring = ringbuf->ring;
> + struct drm_device *dev = ring->dev;
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + unsigned long end;
> + int ret;
> +
> + ret = logical_ring_wait_request(ringbuf, bytes);
> + if (ret != -ENOSPC)
> + return ret;
> +
> + /* Force the context submission in case we have been skipping it */
> + intel_logical_ring_advance_and_submit(ringbuf);
> +
> + /* With GEM the hangcheck timer should kick us out of the loop,
> + * leaving it early runs the risk of corrupting GEM state (due
> + * to running on almost untested codepaths). But on resume
> + * timers don't work yet, so prevent a complete hang in that
> + * case by choosing an insanely large timeout. */
> + end = jiffies + 60 * HZ;
> +
> + do {
> + ringbuf->head = I915_READ_HEAD(ring);
> + ringbuf->space = intel_ring_space(ringbuf);
> + if (ringbuf->space >= bytes) {
> + ret = 0;
> + break;
> + }
> +
> + if (!drm_core_check_feature(dev, DRIVER_MODESET) &&
> + dev->primary->master) {
> + struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
> + if (master_priv->sarea_priv)
> + master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
> + }
sarea is legacy gunk. Really bad legacy gunk. The DRIVE_MODESET check
should have been a give-away. Also checkpatch.
Fixed while applying.
-Daniel
> +
> + msleep(1);
> +
> + if (dev_priv->mm.interruptible && signal_pending(current)) {
> + ret = -ERESTARTSYS;
> + break;
> + }
> +
> + ret = i915_gem_check_wedge(&dev_priv->gpu_error,
> + dev_priv->mm.interruptible);
> + if (ret)
> + break;
> +
> + if (time_after(jiffies, end)) {
> + ret = -EBUSY;
> + break;
> + }
> + } while (1);
> +
> + return ret;
> +}
> +
> +static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf)
> +{
> + uint32_t __iomem *virt;
> + int rem = ringbuf->size - ringbuf->tail;
> +
> + if (ringbuf->space < rem) {
> + int ret = logical_ring_wait_for_space(ringbuf, rem);
> + if (ret)
> + return ret;
> + }
> +
> + virt = ringbuf->virtual_start + ringbuf->tail;
> + rem /= 4;
> + while (rem--)
> + iowrite32(MI_NOOP, virt++);
> +
> + ringbuf->tail = 0;
> + ringbuf->space = intel_ring_space(ringbuf);
> +
> + return 0;
> +}
> +
> +static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes)
> +{
> + int ret;
> +
> + if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
> + ret = logical_ring_wrap_buffer(ringbuf);
> + if (unlikely(ret))
> + return ret;
> + }
> +
> + if (unlikely(ringbuf->space < bytes)) {
> + ret = logical_ring_wait_for_space(ringbuf, bytes);
> + if (unlikely(ret))
> + return ret;
> + }
> +
> + return 0;
> +}
> +
> +int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
> +{
> + struct intel_engine_cs *ring = ringbuf->ring;
> + struct drm_device *dev = ring->dev;
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + int ret;
> +
> + ret = i915_gem_check_wedge(&dev_priv->gpu_error,
> + dev_priv->mm.interruptible);
> + if (ret)
> + return ret;
> +
> + ret = logical_ring_prepare(ringbuf, num_dwords * sizeof(uint32_t));
> + if (ret)
> + return ret;
> +
> + /* Preallocate the olr before touching the ring */
> + ret = logical_ring_alloc_seqno(ring);
> + if (ret)
> + return ret;
> +
> + ringbuf->space -= num_dwords * sizeof(uint32_t);
> + return 0;
> +}
> +
> static int gen8_init_common_ring(struct intel_engine_cs *ring)
> {
> struct drm_device *dev = ring->dev;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index bf0eff4..16798b6 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -29,6 +29,18 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring);
> void intel_logical_ring_cleanup(struct intel_engine_cs *ring);
> int intel_logical_rings_init(struct drm_device *dev);
>
> +void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf);
> +static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf)
> +{
> + ringbuf->tail &= ringbuf->size - 1;
> +}
> +static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf, u32 data)
> +{
> + iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
> + ringbuf->tail += 4;
> +}
> +int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords);
> +
> /* Logical Ring Contexts */
> void intel_lr_context_free(struct intel_context *ctx);
> int intel_lr_context_deferred_create(struct intel_context *ctx,
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index ca45c58..dc2a991 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -57,7 +57,7 @@ intel_ring_initialized(struct intel_engine_cs *ring)
> return ring->buffer && ring->buffer->obj;
> }
>
> -static inline int __ring_space(int head, int tail, int size)
> +int __intel_ring_space(int head, int tail, int size)
> {
> int space = head - (tail + I915_RING_FREE_SPACE);
> if (space < 0)
> @@ -65,12 +65,12 @@ static inline int __ring_space(int head, int tail, int size)
> return space;
> }
>
> -static inline int ring_space(struct intel_ringbuffer *ringbuf)
> +int intel_ring_space(struct intel_ringbuffer *ringbuf)
> {
> - return __ring_space(ringbuf->head & HEAD_ADDR, ringbuf->tail, ringbuf->size);
> + return __intel_ring_space(ringbuf->head & HEAD_ADDR, ringbuf->tail, ringbuf->size);
> }
>
> -static bool intel_ring_stopped(struct intel_engine_cs *ring)
> +bool intel_ring_stopped(struct intel_engine_cs *ring)
> {
> struct drm_i915_private *dev_priv = ring->dev->dev_private;
> return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring);
> @@ -561,7 +561,7 @@ static int init_ring_common(struct intel_engine_cs *ring)
> else {
> ringbuf->head = I915_READ_HEAD(ring);
> ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
> - ringbuf->space = ring_space(ringbuf);
> + ringbuf->space = intel_ring_space(ringbuf);
> ringbuf->last_retired_head = -1;
> }
>
> @@ -1679,13 +1679,13 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
> ringbuf->head = ringbuf->last_retired_head;
> ringbuf->last_retired_head = -1;
>
> - ringbuf->space = ring_space(ringbuf);
> + ringbuf->space = intel_ring_space(ringbuf);
> if (ringbuf->space >= n)
> return 0;
> }
>
> list_for_each_entry(request, &ring->request_list, list) {
> - if (__ring_space(request->tail, ringbuf->tail, ringbuf->size) >= n) {
> + if (__intel_ring_space(request->tail, ringbuf->tail, ringbuf->size) >= n) {
> seqno = request->seqno;
> break;
> }
> @@ -1702,7 +1702,7 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
> ringbuf->head = ringbuf->last_retired_head;
> ringbuf->last_retired_head = -1;
>
> - ringbuf->space = ring_space(ringbuf);
> + ringbuf->space = intel_ring_space(ringbuf);
> return 0;
> }
>
> @@ -1731,7 +1731,7 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
> trace_i915_ring_wait_begin(ring);
> do {
> ringbuf->head = I915_READ_HEAD(ring);
> - ringbuf->space = ring_space(ringbuf);
> + ringbuf->space = intel_ring_space(ringbuf);
> if (ringbuf->space >= n) {
> ret = 0;
> break;
> @@ -1783,7 +1783,7 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
> iowrite32(MI_NOOP, virt++);
>
> ringbuf->tail = 0;
> - ringbuf->space = ring_space(ringbuf);
> + ringbuf->space = intel_ring_space(ringbuf);
>
> return 0;
> }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index c135334..c305df0 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -373,6 +373,9 @@ static inline void intel_ring_advance(struct intel_engine_cs *ring)
> struct intel_ringbuffer *ringbuf = ring->buffer;
> ringbuf->tail &= ringbuf->size - 1;
> }
> +int __intel_ring_space(int head, int tail, int size);
> +int intel_ring_space(struct intel_ringbuffer *ringbuf);
> +bool intel_ring_stopped(struct intel_engine_cs *ring);
> void __intel_ring_advance(struct intel_engine_cs *ring);
>
> int __must_check intel_ring_idle(struct intel_engine_cs *ring);
> --
> 1.7.9.5
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
More information about the Intel-gfx
mailing list