[Intel-gfx] [PATCH 26/53] drm/i915/bdw: New logical ring submission mechanism
Volkin, Bradley D
bradley.d.volkin at intel.com
Fri Jun 20 23:00:35 CEST 2014
On Fri, Jun 13, 2014 at 08:37:44AM -0700, oscar.mateo at intel.com wrote:
> From: Oscar Mateo <oscar.mateo at intel.com>
>
> Well, new-ish: if all this code looks familiar, that's because it's
> a clone of the existing submission mechanism (with some modifications
> here and there to adapt it to LRCs and Execlists).
>
> And why did we do this? Execlists offer several advantages, like
> control over when the GPU is done with a given workload, that can
> help simplify the submission mechanism, no doubt, but I am interested
> in getting Execlists to work first and foremost. As we are creating
> a parallel submission mechanism (even if itñś just a clone), we can
> now start improving it without the fear of breaking old gens.
>
> Signed-off-by: Oscar Mateo <oscar.mateo at intel.com>
> ---
> drivers/gpu/drm/i915/intel_lrc.c | 214 +++++++++++++++++++++++++++++++++++++++
> drivers/gpu/drm/i915/intel_lrc.h | 18 ++++
> 2 files changed, 232 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 02fc3d0..89aed7a 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -86,6 +86,220 @@ bool intel_enable_execlists(struct drm_device *dev)
> return HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev);
> }
>
> +static inline struct intel_ringbuffer *
> +logical_ringbuf_get(struct intel_engine_cs *ring, struct intel_context *ctx)
> +{
> + return ctx->engine[ring->id].ringbuf;
> +}
> +
> +void intel_logical_ring_advance_and_submit(struct intel_engine_cs *ring,
> + struct intel_context *ctx)
> +{
> + struct intel_ringbuffer *ringbuf = logical_ringbuf_get(ring, ctx);
> +
> + intel_logical_ring_advance(ringbuf);
> +
> + if (intel_ring_stopped(ring))
> + return;
> +
> + ring->submit_ctx(ring, ctx, ringbuf->tail);
> +}
> +
> +static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
> + struct intel_context *ctx)
> +{
> + if (ring->outstanding_lazy_seqno)
> + return 0;
> +
> + if (ring->preallocated_lazy_request == NULL) {
> + struct drm_i915_gem_request *request;
> +
> + request = kmalloc(sizeof(*request), GFP_KERNEL);
> + if (request == NULL)
> + return -ENOMEM;
> +
> + ring->preallocated_lazy_request = request;
> + }
> +
> + return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
> +}
> +
> +static int logical_ring_wait_request(struct intel_engine_cs *ring,
> + struct intel_ringbuffer *ringbuf,
> + struct intel_context *ctx,
> + int bytes)
> +{
> + struct drm_i915_gem_request *request;
> + u32 seqno = 0;
> + int ret;
> +
> + if (ringbuf->last_retired_head != -1) {
> + ringbuf->head = ringbuf->last_retired_head;
> + ringbuf->last_retired_head = -1;
> +
> + ringbuf->space = intel_ring_space(ringbuf);
> + if (ringbuf->space >= bytes)
> + return 0;
> + }
> +
> + list_for_each_entry(request, &ring->request_list, list) {
> + if (__intel_ring_space(request->tail, ringbuf->tail,
> + ringbuf->size) >= bytes) {
> + seqno = request->seqno;
> + break;
> + }
> + }
> +
> + if (seqno == 0)
> + return -ENOSPC;
> +
> + ret = i915_wait_seqno(ring, seqno);
> + if (ret)
> + return ret;
> +
> + /* TODO: make sure we update the right ringbuffer's last_retired_head
> + * when retiring requests */
> + i915_gem_retire_requests_ring(ring);
> + ringbuf->head = ringbuf->last_retired_head;
> + ringbuf->last_retired_head = -1;
> +
> + ringbuf->space = intel_ring_space(ringbuf);
> + return 0;
> +}
> +
> +static int logical_ring_wait_for_space(struct intel_engine_cs *ring,
> + struct intel_ringbuffer *ringbuf,
> + struct intel_context *ctx,
> + int bytes)
> +{
> + struct drm_device *dev = ring->dev;
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + unsigned long end;
> + int ret;
> +
> + ret = logical_ring_wait_request(ring, ringbuf, ctx, bytes);
> + if (ret != -ENOSPC)
> + return ret;
> +
> + /* Force the context submission in case we have been skipping it */
> + intel_logical_ring_advance_and_submit(ring, ctx);
> +
> + /* With GEM the hangcheck timer should kick us out of the loop,
> + * leaving it early runs the risk of corrupting GEM state (due
> + * to running on almost untested codepaths). But on resume
> + * timers don't work yet, so prevent a complete hang in that
> + * case by choosing an insanely large timeout. */
> + end = jiffies + 60 * HZ;
> +
In the legacy ringbuffer version, there are tracepoints around the do loop.
Should we keep those? Or add lrc specific equivalents?
> + do {
> + ringbuf->head = I915_READ_HEAD(ring);
> + ringbuf->space = intel_ring_space(ringbuf);
> + if (ringbuf->space >= bytes) {
> + ret = 0;
> + break;
> + }
> +
> + if (!drm_core_check_feature(dev, DRIVER_MODESET) &&
> + dev->primary->master) {
> + struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
> + if (master_priv->sarea_priv)
> + master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
> + }
> +
> + msleep(1);
> +
> + if (dev_priv->mm.interruptible && signal_pending(current)) {
> + ret = -ERESTARTSYS;
> + break;
> + }
> +
> + ret = i915_gem_check_wedge(&dev_priv->gpu_error,
> + dev_priv->mm.interruptible);
> + if (ret)
> + break;
> +
> + if (time_after(jiffies, end)) {
> + ret = -EBUSY;
> + break;
> + }
> + } while (1);
> +
> + return ret;
> +}
> +
> +static int logical_ring_wrap_buffer(struct intel_engine_cs *ring,
> + struct intel_ringbuffer *ringbuf,
> + struct intel_context *ctx)
> +{
> + uint32_t __iomem *virt;
> + int rem = ringbuf->size - ringbuf->tail;
> +
> + if (ringbuf->space < rem) {
> + int ret = logical_ring_wait_for_space(ring, ringbuf, ctx, rem);
> + if (ret)
> + return ret;
> + }
> +
> + virt = ringbuf->virtual_start + ringbuf->tail;
> + rem /= 4;
> + while (rem--)
> + iowrite32(MI_NOOP, virt++);
> +
> + ringbuf->tail = 0;
> + ringbuf->space = intel_ring_space(ringbuf);
> +
> + return 0;
> +}
> +
> +static int logical_ring_prepare(struct intel_engine_cs *ring,
> + struct intel_ringbuffer *ringbuf,
> + struct intel_context *ctx,
> + int bytes)
> +{
> + int ret;
> +
> + if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
> + ret = logical_ring_wrap_buffer(ring, ringbuf, ctx);
> + if (unlikely(ret))
> + return ret;
> + }
> +
> + if (unlikely(ringbuf->space < bytes)) {
> + ret = logical_ring_wait_for_space(ring, ringbuf, ctx, bytes);
> + if (unlikely(ret))
> + return ret;
> + }
> +
> + return 0;
> +}
> +
> +int intel_logical_ring_begin(struct intel_engine_cs *ring,
> + struct intel_context *ctx,
> + int num_dwords)
> +{
> + struct drm_i915_private *dev_priv = ring->dev->dev_private;
> + struct intel_ringbuffer *ringbuf = logical_ringbuf_get(ring, ctx);
> + int ret;
> +
> + ret = i915_gem_check_wedge(&dev_priv->gpu_error,
> + dev_priv->mm.interruptible);
> + if (ret)
> + return ret;
> +
> + ret = logical_ring_prepare(ring, ringbuf, ctx,
> + num_dwords * sizeof(uint32_t));
> + if (ret)
> + return ret;
> +
> + /* Preallocate the olr before touching the ring */
> + ret = logical_ring_alloc_seqno(ring, ctx);
> + if (ret)
> + return ret;
> +
> + ringbuf->space -= num_dwords * sizeof(uint32_t);
> + return 0;
> +}
> +
> static int gen8_init_common_ring(struct intel_engine_cs *ring)
> {
> struct drm_device *dev = ring->dev;
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index 26b0949..686ebf5 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -5,6 +5,24 @@
> void intel_logical_ring_cleanup(struct intel_engine_cs *ring);
> int intel_logical_rings_init(struct drm_device *dev);
>
> +void intel_logical_ring_advance_and_submit(struct intel_engine_cs *ring,
> + struct intel_context *ctx);
> +
> +static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf)
> +{
> + ringbuf->tail &= ringbuf->size - 1;
> +}
> +
> +static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf, u32 data)
> +{
> + iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
> + ringbuf->tail += 4;
> +}
> +
> +int intel_logical_ring_begin(struct intel_engine_cs *ring,
> + struct intel_context *ctx,
> + int num_dwords);
> +
I think all of these are only used in intel_lrc.c, so don't need to be in
the header and could all be static. Right?
Brad
> /* Logical Ring Contexts */
> void intel_lr_context_free(struct intel_context *ctx);
> int intel_lr_context_deferred_create(struct intel_context *ctx,
> --
> 1.9.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
More information about the Intel-gfx
mailing list