[Intel-gfx] [PATCH 1/9] drm/i915: Remove HW semaphores for gen7 inter-engine synchronisation
Mika Kuoppala
mika.kuoppala at linux.intel.com
Fri Dec 28 11:37:32 UTC 2018
Chris Wilson <chris at chris-wilson.co.uk> writes:
> The writing is on the wall for the existence of a single execution queue
> along each engine, and as a consequence we will not be able to track
> dependencies along the HW queue itself, i.e. we will not be able to use
> HW semaphores on gen7 as they use a global set of registers (and unlike
> gen8+ we can not effectively target memory to keep per-context seqno and
> dependencies).
>
> On the positive side, when we implement request reordering for gen7 we
> could also not presume a simple execution queue and would also require
> removing the current semaphore generation code. So this bring us another
> step closer to request reordering!
>
> The negative side is that using interrupts to drive inter-engine
> synchronisation is much slower (4us -> 15us to do a nop on each of the 3
> engines on ivb). This is much better than it at the time of introducing
> the HW semaphores and equally importantly userspace weaned itself of
s/of/off ?
> intermixing dependent BLT/RENDER operations (the prime culprit was glyph
> rendering in UXA). So while we regress the microbenchmarks it should not
> impact the user.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 19 +--
> drivers/gpu/drm/i915/i915_drv.c | 2 +-
> drivers/gpu/drm/i915/i915_drv.h | 3 -
> drivers/gpu/drm/i915/i915_gem.c | 4 +-
> drivers/gpu/drm/i915/i915_request.c | 126 +---------------
> drivers/gpu/drm/i915/i915_timeline.h | 8 -
> drivers/gpu/drm/i915/intel_engine_cs.c | 29 +---
> drivers/gpu/drm/i915/intel_hangcheck.c | 155 --------------------
> drivers/gpu/drm/i915/intel_ringbuffer.c | 187 +-----------------------
> drivers/gpu/drm/i915/intel_ringbuffer.h | 56 +------
> 10 files changed, 15 insertions(+), 574 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 77486a614614..b0bdf3c0d776 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1041,21 +1041,7 @@ static const struct file_operations i915_error_state_fops = {
> static int
> i915_next_seqno_set(void *data, u64 val)
> {
> - struct drm_i915_private *dev_priv = data;
> - struct drm_device *dev = &dev_priv->drm;
> - int ret;
> -
> - ret = mutex_lock_interruptible(&dev->struct_mutex);
> - if (ret)
> - return ret;
> -
> - intel_runtime_pm_get(dev_priv);
> - ret = i915_gem_set_global_seqno(dev, val);
> - intel_runtime_pm_put(dev_priv);
> -
> - mutex_unlock(&dev->struct_mutex);
> -
> - return ret;
> + return val ? 0 : -EINVAL;
This begs to meet it's maker soon.
I didn't find anything else in this patch, so looks
ok and what a clump of compilated code we can get rid off!
Code that we have dragged along with no small amount of
suffering. I am in favour.
Reviewed-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> }
>
> DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops,
> @@ -4219,9 +4205,6 @@ i915_drop_caches_set(void *data, u64 val)
> I915_WAIT_LOCKED,
> MAX_SCHEDULE_TIMEOUT);
>
> - if (ret == 0 && val & DROP_RESET_SEQNO)
> - ret = i915_gem_set_global_seqno(&i915->drm, 1);
> -
> if (val & DROP_RETIRE)
> i915_retire_requests(i915);
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index caa055ac9472..dcb935338c63 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -349,7 +349,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
> value = min_t(int, INTEL_PPGTT(dev_priv), I915_GEM_PPGTT_FULL);
> break;
> case I915_PARAM_HAS_SEMAPHORES:
> - value = HAS_LEGACY_SEMAPHORES(dev_priv);
> + value = 0;
> break;
> case I915_PARAM_HAS_SECURE_BATCHES:
> value = capable(CAP_SYS_ADMIN);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 815db160b966..7da653ece4dd 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1948,7 +1948,6 @@ struct drm_i915_private {
> struct list_head active_rings;
> struct list_head closed_vma;
> u32 active_requests;
> - u32 request_serial;
>
> /**
> * Is the GPU currently considered idle, or busy executing
> @@ -2394,8 +2393,6 @@ intel_info(const struct drm_i915_private *dev_priv)
> #define HAS_BLT(dev_priv) HAS_ENGINE(dev_priv, BCS)
> #define HAS_VEBOX(dev_priv) HAS_ENGINE(dev_priv, VECS)
>
> -#define HAS_LEGACY_SEMAPHORES(dev_priv) IS_GEN(dev_priv, 7)
> -
> #define HAS_LLC(dev_priv) ((dev_priv)->info.has_llc)
> #define HAS_SNOOP(dev_priv) ((dev_priv)->info.has_snoop)
> #define HAS_EDRAM(dev_priv) (!!((dev_priv)->edram_cap & EDRAM_ENABLED))
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index d92147ab4489..f4254e012620 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3318,7 +3318,7 @@ static void nop_submit_request(struct i915_request *request)
>
> spin_lock_irqsave(&request->engine->timeline.lock, flags);
> __i915_request_submit(request);
> - intel_engine_init_global_seqno(request->engine, request->global_seqno);
> + intel_engine_write_global_seqno(request->engine, request->global_seqno);
> spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
> }
>
> @@ -3359,7 +3359,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
>
> /*
> * Make sure no request can slip through without getting completed by
> - * either this call here to intel_engine_init_global_seqno, or the one
> + * either this call here to intel_engine_write_global_seqno, or the one
> * in nop_submit_request.
> */
> synchronize_rcu();
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 637909c59f1f..6cedcfea33b5 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -111,99 +111,10 @@ i915_request_remove_from_client(struct i915_request *request)
> spin_unlock(&file_priv->mm.lock);
> }
>
> -static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
> +static void reserve_gt(struct drm_i915_private *i915)
> {
> - struct intel_engine_cs *engine;
> - struct i915_timeline *timeline;
> - enum intel_engine_id id;
> - int ret;
> -
> - /* Carefully retire all requests without writing to the rings */
> - ret = i915_gem_wait_for_idle(i915,
> - I915_WAIT_INTERRUPTIBLE |
> - I915_WAIT_LOCKED,
> - MAX_SCHEDULE_TIMEOUT);
> - if (ret)
> - return ret;
> -
> - GEM_BUG_ON(i915->gt.active_requests);
> -
> - /* If the seqno wraps around, we need to clear the breadcrumb rbtree */
> - for_each_engine(engine, i915, id) {
> - GEM_TRACE("%s seqno %d (current %d) -> %d\n",
> - engine->name,
> - engine->timeline.seqno,
> - intel_engine_get_seqno(engine),
> - seqno);
> -
> - if (seqno == engine->timeline.seqno)
> - continue;
> -
> - kthread_park(engine->breadcrumbs.signaler);
> -
> - if (!i915_seqno_passed(seqno, engine->timeline.seqno)) {
> - /* Flush any waiters before we reuse the seqno */
> - intel_engine_disarm_breadcrumbs(engine);
> - intel_engine_init_hangcheck(engine);
> - GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals));
> - }
> -
> - /* Check we are idle before we fiddle with hw state! */
> - GEM_BUG_ON(!intel_engine_is_idle(engine));
> - GEM_BUG_ON(i915_gem_active_isset(&engine->timeline.last_request));
> -
> - /* Finally reset hw state */
> - intel_engine_init_global_seqno(engine, seqno);
> - engine->timeline.seqno = seqno;
> -
> - kthread_unpark(engine->breadcrumbs.signaler);
> - }
> -
> - list_for_each_entry(timeline, &i915->gt.timelines, link)
> - memset(timeline->global_sync, 0, sizeof(timeline->global_sync));
> -
> - i915->gt.request_serial = seqno;
> -
> - return 0;
> -}
> -
> -int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
> -{
> - struct drm_i915_private *i915 = to_i915(dev);
> -
> - lockdep_assert_held(&i915->drm.struct_mutex);
> -
> - if (seqno == 0)
> - return -EINVAL;
> -
> - /* HWS page needs to be set less than what we will inject to ring */
> - return reset_all_global_seqno(i915, seqno - 1);
> -}
> -
> -static int reserve_gt(struct drm_i915_private *i915)
> -{
> - int ret;
> -
> - /*
> - * Reservation is fine until we may need to wrap around
> - *
> - * By incrementing the serial for every request, we know that no
> - * individual engine may exceed that serial (as each is reset to 0
> - * on any wrap). This protects even the most pessimistic of migrations
> - * of every request from all engines onto just one.
> - */
> - while (unlikely(++i915->gt.request_serial == 0)) {
> - ret = reset_all_global_seqno(i915, 0);
> - if (ret) {
> - i915->gt.request_serial--;
> - return ret;
> - }
> - }
> -
> if (!i915->gt.active_requests++)
> i915_gem_unpark(i915);
> -
> - return 0;
> }
>
> static void unreserve_gt(struct drm_i915_private *i915)
> @@ -608,9 +519,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
> if (IS_ERR(ce))
> return ERR_CAST(ce);
>
> - ret = reserve_gt(i915);
> - if (ret)
> - goto err_unpin;
> + reserve_gt(i915);
>
> ret = intel_ring_wait_for_space(ce->ring, MIN_SPACE_FOR_ADD_REQUEST);
> if (ret)
> @@ -743,7 +652,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
> kmem_cache_free(i915->requests, rq);
> err_unreserve:
> unreserve_gt(i915);
> -err_unpin:
> intel_context_unpin(ce);
> return ERR_PTR(ret);
> }
> @@ -771,34 +679,12 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
> ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
> &from->submit,
> I915_FENCE_GFP);
> - return ret < 0 ? ret : 0;
> - }
> -
> - if (to->engine->semaphore.sync_to) {
> - u32 seqno;
> -
> - GEM_BUG_ON(!from->engine->semaphore.signal);
> -
> - seqno = i915_request_global_seqno(from);
> - if (!seqno)
> - goto await_dma_fence;
> -
> - if (seqno <= to->timeline->global_sync[from->engine->id])
> - return 0;
> -
> - trace_i915_gem_ring_sync_to(to, from);
> - ret = to->engine->semaphore.sync_to(to, from);
> - if (ret)
> - return ret;
> -
> - to->timeline->global_sync[from->engine->id] = seqno;
> - return 0;
> + } else {
> + ret = i915_sw_fence_await_dma_fence(&to->submit,
> + &from->fence, 0,
> + I915_FENCE_GFP);
> }
>
> -await_dma_fence:
> - ret = i915_sw_fence_await_dma_fence(&to->submit,
> - &from->fence, 0,
> - I915_FENCE_GFP);
> return ret < 0 ? ret : 0;
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
> index ebd71b487220..38c1e15e927a 100644
> --- a/drivers/gpu/drm/i915/i915_timeline.h
> +++ b/drivers/gpu/drm/i915/i915_timeline.h
> @@ -63,14 +63,6 @@ struct i915_timeline {
> * redundant and we can discard it without loss of generality.
> */
> struct i915_syncmap *sync;
> - /**
> - * Separately to the inter-context seqno map above, we track the last
> - * barrier (e.g. semaphore wait) to the global engine timelines. Note
> - * that this tracks global_seqno rather than the context.seqno, and
> - * so it is subject to the limitations of hw wraparound and that we
> - * may need to revoke global_seqno (on pre-emption).
> - */
> - u32 global_sync[I915_NUM_ENGINES];
>
> struct list_head link;
> const char *name;
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index d3dec31df123..1462bb49f420 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -454,25 +454,8 @@ int intel_engines_init(struct drm_i915_private *dev_priv)
> return err;
> }
>
> -void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
> +void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno)
> {
> - struct drm_i915_private *dev_priv = engine->i915;
> -
> - /* Our semaphore implementation is strictly monotonic (i.e. we proceed
> - * so long as the semaphore value in the register/page is greater
> - * than the sync value), so whenever we reset the seqno,
> - * so long as we reset the tracking semaphore value to 0, it will
> - * always be before the next request's seqno. If we don't reset
> - * the semaphore value, then when the seqno moves backwards all
> - * future waits will complete instantly (causing rendering corruption).
> - */
> - if (IS_GEN_RANGE(dev_priv, 6, 7)) {
> - I915_WRITE(RING_SYNC_0(engine->mmio_base), 0);
> - I915_WRITE(RING_SYNC_1(engine->mmio_base), 0);
> - if (HAS_VEBOX(dev_priv))
> - I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
> - }
> -
> intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
> clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
>
> @@ -1300,16 +1283,6 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
> drm_printf(m, "\tRING_IMR: %08x\n", I915_READ_IMR(engine));
> }
>
> - if (HAS_LEGACY_SEMAPHORES(dev_priv)) {
> - drm_printf(m, "\tSYNC_0: 0x%08x\n",
> - I915_READ(RING_SYNC_0(engine->mmio_base)));
> - drm_printf(m, "\tSYNC_1: 0x%08x\n",
> - I915_READ(RING_SYNC_1(engine->mmio_base)));
> - if (HAS_VEBOX(dev_priv))
> - drm_printf(m, "\tSYNC_2: 0x%08x\n",
> - I915_READ(RING_SYNC_2(engine->mmio_base)));
> - }
> -
> addr = intel_engine_get_active_head(engine);
> drm_printf(m, "\tACTHD: 0x%08x_%08x\n",
> upper_32_bits(addr), lower_32_bits(addr));
> diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
> index 495fa145f37f..c3f929f59424 100644
> --- a/drivers/gpu/drm/i915/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/intel_hangcheck.c
> @@ -24,144 +24,6 @@
>
> #include "i915_drv.h"
>
> -static bool
> -ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr)
> -{
> - ipehr &= ~MI_SEMAPHORE_SYNC_MASK;
> - return ipehr == (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE |
> - MI_SEMAPHORE_REGISTER);
> -}
> -
> -static struct intel_engine_cs *
> -semaphore_wait_to_signaller_ring(struct intel_engine_cs *engine, u32 ipehr,
> - u64 offset)
> -{
> - struct drm_i915_private *dev_priv = engine->i915;
> - u32 sync_bits = ipehr & MI_SEMAPHORE_SYNC_MASK;
> - struct intel_engine_cs *signaller;
> - enum intel_engine_id id;
> -
> - for_each_engine(signaller, dev_priv, id) {
> - if (engine == signaller)
> - continue;
> -
> - if (sync_bits == signaller->semaphore.mbox.wait[engine->hw_id])
> - return signaller;
> - }
> -
> - DRM_DEBUG_DRIVER("No signaller ring found for %s, ipehr 0x%08x\n",
> - engine->name, ipehr);
> -
> - return ERR_PTR(-ENODEV);
> -}
> -
> -static struct intel_engine_cs *
> -semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno)
> -{
> - struct drm_i915_private *dev_priv = engine->i915;
> - void __iomem *vaddr;
> - u32 cmd, ipehr, head;
> - u64 offset = 0;
> - int i, backwards;
> -
> - /*
> - * This function does not support execlist mode - any attempt to
> - * proceed further into this function will result in a kernel panic
> - * when dereferencing ring->buffer, which is not set up in execlist
> - * mode.
> - *
> - * The correct way of doing it would be to derive the currently
> - * executing ring buffer from the current context, which is derived
> - * from the currently running request. Unfortunately, to get the
> - * current request we would have to grab the struct_mutex before doing
> - * anything else, which would be ill-advised since some other thread
> - * might have grabbed it already and managed to hang itself, causing
> - * the hang checker to deadlock.
> - *
> - * Therefore, this function does not support execlist mode in its
> - * current form. Just return NULL and move on.
> - */
> - if (engine->buffer == NULL)
> - return NULL;
> -
> - ipehr = I915_READ(RING_IPEHR(engine->mmio_base));
> - if (!ipehr_is_semaphore_wait(engine, ipehr))
> - return NULL;
> -
> - /*
> - * HEAD is likely pointing to the dword after the actual command,
> - * so scan backwards until we find the MBOX. But limit it to just 3
> - * or 4 dwords depending on the semaphore wait command size.
> - * Note that we don't care about ACTHD here since that might
> - * point at at batch, and semaphores are always emitted into the
> - * ringbuffer itself.
> - */
> - head = I915_READ_HEAD(engine) & HEAD_ADDR;
> - backwards = (INTEL_GEN(dev_priv) >= 8) ? 5 : 4;
> - vaddr = (void __iomem *)engine->buffer->vaddr;
> -
> - for (i = backwards; i; --i) {
> - /*
> - * Be paranoid and presume the hw has gone off into the wild -
> - * our ring is smaller than what the hardware (and hence
> - * HEAD_ADDR) allows. Also handles wrap-around.
> - */
> - head &= engine->buffer->size - 1;
> -
> - /* This here seems to blow up */
> - cmd = ioread32(vaddr + head);
> - if (cmd == ipehr)
> - break;
> -
> - head -= 4;
> - }
> -
> - if (!i)
> - return NULL;
> -
> - *seqno = ioread32(vaddr + head + 4) + 1;
> - return semaphore_wait_to_signaller_ring(engine, ipehr, offset);
> -}
> -
> -static int semaphore_passed(struct intel_engine_cs *engine)
> -{
> - struct drm_i915_private *dev_priv = engine->i915;
> - struct intel_engine_cs *signaller;
> - u32 seqno;
> -
> - engine->hangcheck.deadlock++;
> -
> - signaller = semaphore_waits_for(engine, &seqno);
> - if (signaller == NULL)
> - return -1;
> -
> - if (IS_ERR(signaller))
> - return 0;
> -
> - /* Prevent pathological recursion due to driver bugs */
> - if (signaller->hangcheck.deadlock >= I915_NUM_ENGINES)
> - return -1;
> -
> - if (intel_engine_signaled(signaller, seqno))
> - return 1;
> -
> - /* cursory check for an unkickable deadlock */
> - if (I915_READ_CTL(signaller) & RING_WAIT_SEMAPHORE &&
> - semaphore_passed(signaller) < 0)
> - return -1;
> -
> - return 0;
> -}
> -
> -static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
> -{
> - struct intel_engine_cs *engine;
> - enum intel_engine_id id;
> -
> - for_each_engine(engine, dev_priv, id)
> - engine->hangcheck.deadlock = 0;
> -}
> -
> static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
> {
> u32 tmp = current_instdone | *old_instdone;
> @@ -252,21 +114,6 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
> return ENGINE_WAIT_KICK;
> }
>
> - if (IS_GEN_RANGE(dev_priv, 6, 7) && tmp & RING_WAIT_SEMAPHORE) {
> - switch (semaphore_passed(engine)) {
> - default:
> - return ENGINE_DEAD;
> - case 1:
> - i915_handle_error(dev_priv, ALL_ENGINES, 0,
> - "stuck semaphore on %s",
> - engine->name);
> - I915_WRITE_CTL(engine, tmp);
> - return ENGINE_WAIT_KICK;
> - case 0:
> - return ENGINE_WAIT;
> - }
> - }
> -
> return ENGINE_DEAD;
> }
>
> @@ -433,8 +280,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
> for_each_engine(engine, dev_priv, id) {
> struct intel_engine_hangcheck hc;
>
> - semaphore_clear_deadlocks(dev_priv);
> -
> hangcheck_load_sample(engine, &hc);
> hangcheck_accumulate_sample(engine, &hc);
> hangcheck_store_sample(engine, &hc);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 65fd92eb071d..d5a9edbcf1be 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -556,13 +556,6 @@ static int init_ring_common(struct intel_engine_cs *engine)
>
> intel_engine_reset_breadcrumbs(engine);
>
> - if (HAS_LEGACY_SEMAPHORES(engine->i915)) {
> - I915_WRITE(RING_SYNC_0(engine->mmio_base), 0);
> - I915_WRITE(RING_SYNC_1(engine->mmio_base), 0);
> - if (HAS_VEBOX(dev_priv))
> - I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
> - }
> -
> /* Enforce ordering by reading HEAD register back */
> I915_READ_HEAD(engine);
>
> @@ -745,33 +738,6 @@ static int init_render_ring(struct intel_engine_cs *engine)
> return 0;
> }
>
> -static u32 *gen6_signal(struct i915_request *rq, u32 *cs)
> -{
> - struct drm_i915_private *dev_priv = rq->i915;
> - struct intel_engine_cs *engine;
> - enum intel_engine_id id;
> - int num_rings = 0;
> -
> - for_each_engine(engine, dev_priv, id) {
> - i915_reg_t mbox_reg;
> -
> - if (!(BIT(engine->hw_id) & GEN6_SEMAPHORES_MASK))
> - continue;
> -
> - mbox_reg = rq->engine->semaphore.mbox.signal[engine->hw_id];
> - if (i915_mmio_reg_valid(mbox_reg)) {
> - *cs++ = MI_LOAD_REGISTER_IMM(1);
> - *cs++ = i915_mmio_reg_offset(mbox_reg);
> - *cs++ = rq->global_seqno;
> - num_rings++;
> - }
> - }
> - if (num_rings & 1)
> - *cs++ = MI_NOOP;
> -
> - return cs;
> -}
> -
> static void cancel_requests(struct intel_engine_cs *engine)
> {
> struct i915_request *request;
> @@ -822,39 +788,6 @@ static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
>
> static const int i9xx_emit_breadcrumb_sz = 4;
>
> -static void gen6_sema_emit_breadcrumb(struct i915_request *rq, u32 *cs)
> -{
> - return i9xx_emit_breadcrumb(rq, rq->engine->semaphore.signal(rq, cs));
> -}
> -
> -static int
> -gen6_ring_sync_to(struct i915_request *rq, struct i915_request *signal)
> -{
> - u32 dw1 = MI_SEMAPHORE_MBOX |
> - MI_SEMAPHORE_COMPARE |
> - MI_SEMAPHORE_REGISTER;
> - u32 wait_mbox = signal->engine->semaphore.mbox.wait[rq->engine->hw_id];
> - u32 *cs;
> -
> - WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
> -
> - cs = intel_ring_begin(rq, 4);
> - if (IS_ERR(cs))
> - return PTR_ERR(cs);
> -
> - *cs++ = dw1 | wait_mbox;
> - /* Throughout all of the GEM code, seqno passed implies our current
> - * seqno is >= the last seqno executed. However for hardware the
> - * comparison is strictly greater than.
> - */
> - *cs++ = signal->global_seqno - 1;
> - *cs++ = 0;
> - *cs++ = MI_NOOP;
> - intel_ring_advance(rq, cs);
> -
> - return 0;
> -}
> -
> static void
> gen5_seqno_barrier(struct intel_engine_cs *engine)
> {
> @@ -1602,12 +1535,6 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
> {
> struct drm_i915_private *i915 = rq->i915;
> struct intel_engine_cs *engine = rq->engine;
> - enum intel_engine_id id;
> - const int num_rings =
> - /* Use an extended w/a on gen7 if signalling from other rings */
> - (HAS_LEGACY_SEMAPHORES(i915) && IS_GEN(i915, 7)) ?
> - INTEL_INFO(i915)->num_rings - 1 :
> - 0;
> bool force_restore = false;
> int len;
> u32 *cs;
> @@ -1621,7 +1548,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
>
> len = 4;
> if (IS_GEN(i915, 7))
> - len += 2 + (num_rings ? 4*num_rings + 6 : 0);
> + len += 2;
> if (flags & MI_FORCE_RESTORE) {
> GEM_BUG_ON(flags & MI_RESTORE_INHIBIT);
> flags &= ~MI_FORCE_RESTORE;
> @@ -1634,23 +1561,8 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
> return PTR_ERR(cs);
>
> /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
> - if (IS_GEN(i915, 7)) {
> + if (IS_GEN(i915, 7))
> *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
> - if (num_rings) {
> - struct intel_engine_cs *signaller;
> -
> - *cs++ = MI_LOAD_REGISTER_IMM(num_rings);
> - for_each_engine(signaller, i915, id) {
> - if (signaller == engine)
> - continue;
> -
> - *cs++ = i915_mmio_reg_offset(
> - RING_PSMI_CTL(signaller->mmio_base));
> - *cs++ = _MASKED_BIT_ENABLE(
> - GEN6_PSMI_SLEEP_MSG_DISABLE);
> - }
> - }
> - }
>
> if (force_restore) {
> /*
> @@ -1681,30 +1593,8 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
> */
> *cs++ = MI_NOOP;
>
> - if (IS_GEN(i915, 7)) {
> - if (num_rings) {
> - struct intel_engine_cs *signaller;
> - i915_reg_t last_reg = {}; /* keep gcc quiet */
> -
> - *cs++ = MI_LOAD_REGISTER_IMM(num_rings);
> - for_each_engine(signaller, i915, id) {
> - if (signaller == engine)
> - continue;
> -
> - last_reg = RING_PSMI_CTL(signaller->mmio_base);
> - *cs++ = i915_mmio_reg_offset(last_reg);
> - *cs++ = _MASKED_BIT_DISABLE(
> - GEN6_PSMI_SLEEP_MSG_DISABLE);
> - }
> -
> - /* Insert a delay before the next switch! */
> - *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
> - *cs++ = i915_mmio_reg_offset(last_reg);
> - *cs++ = i915_scratch_offset(rq->i915);
> - *cs++ = MI_NOOP;
> - }
> + if (IS_GEN(i915, 7))
> *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> - }
>
> intel_ring_advance(rq, cs);
>
> @@ -2154,66 +2044,6 @@ static int gen6_ring_flush(struct i915_request *rq, u32 mode)
> return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB);
> }
>
> -static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
> - struct intel_engine_cs *engine)
> -{
> - int i;
> -
> - if (!HAS_LEGACY_SEMAPHORES(dev_priv))
> - return;
> -
> - GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
> - engine->semaphore.sync_to = gen6_ring_sync_to;
> - engine->semaphore.signal = gen6_signal;
> -
> - /*
> - * The current semaphore is only applied on pre-gen8
> - * platform. And there is no VCS2 ring on the pre-gen8
> - * platform. So the semaphore between RCS and VCS2 is
> - * initialized as INVALID.
> - */
> - for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) {
> - static const struct {
> - u32 wait_mbox;
> - i915_reg_t mbox_reg;
> - } sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = {
> - [RCS_HW] = {
> - [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC },
> - [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC },
> - [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC },
> - },
> - [VCS_HW] = {
> - [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC },
> - [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC },
> - [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC },
> - },
> - [BCS_HW] = {
> - [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC },
> - [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC },
> - [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC },
> - },
> - [VECS_HW] = {
> - [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC },
> - [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC },
> - [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC },
> - },
> - };
> - u32 wait_mbox;
> - i915_reg_t mbox_reg;
> -
> - if (i == engine->hw_id) {
> - wait_mbox = MI_SEMAPHORE_SYNC_INVALID;
> - mbox_reg = GEN6_NOSYNC;
> - } else {
> - wait_mbox = sem_data[engine->hw_id][i].wait_mbox;
> - mbox_reg = sem_data[engine->hw_id][i].mbox_reg;
> - }
> -
> - engine->semaphore.mbox.wait[i] = wait_mbox;
> - engine->semaphore.mbox.signal[i] = mbox_reg;
> - }
> -}
> -
> static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
> struct intel_engine_cs *engine)
> {
> @@ -2256,7 +2086,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
> GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8);
>
> intel_ring_init_irq(dev_priv, engine);
> - intel_ring_init_semaphores(dev_priv, engine);
>
> engine->init_hw = init_ring_common;
> engine->reset.prepare = reset_prepare;
> @@ -2268,16 +2097,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
>
> engine->emit_breadcrumb = i9xx_emit_breadcrumb;
> engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
> - if (HAS_LEGACY_SEMAPHORES(dev_priv)) {
> - int num_rings;
> -
> - engine->emit_breadcrumb = gen6_sema_emit_breadcrumb;
> -
> - num_rings = INTEL_INFO(dev_priv)->num_rings - 1;
> - engine->emit_breadcrumb_sz += num_rings * 3;
> - if (num_rings & 1)
> - engine->emit_breadcrumb_sz++;
> - }
>
> engine->set_default_submission = i9xx_set_default_submission;
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 6b41b9ce5f5b..c927bdfb1ed0 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -510,60 +510,6 @@ struct intel_engine_cs {
> void (*irq_seqno_barrier)(struct intel_engine_cs *engine);
> void (*cleanup)(struct intel_engine_cs *engine);
>
> - /* GEN8 signal/wait table - never trust comments!
> - * signal to signal to signal to signal to signal to
> - * RCS VCS BCS VECS VCS2
> - * --------------------------------------------------------------------
> - * RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) |
> - * |-------------------------------------------------------------------
> - * VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) |
> - * |-------------------------------------------------------------------
> - * BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) |
> - * |-------------------------------------------------------------------
> - * VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) | NOP (0x90) | VCS2 (0x98) |
> - * |-------------------------------------------------------------------
> - * VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP (0xc0) |
> - * |-------------------------------------------------------------------
> - *
> - * Generalization:
> - * f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id)
> - * ie. transpose of g(x, y)
> - *
> - * sync from sync from sync from sync from sync from
> - * RCS VCS BCS VECS VCS2
> - * --------------------------------------------------------------------
> - * RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) |
> - * |-------------------------------------------------------------------
> - * VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) |
> - * |-------------------------------------------------------------------
> - * BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) |
> - * |-------------------------------------------------------------------
> - * VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) | NOP (0x90) | VCS2 (0xb8) |
> - * |-------------------------------------------------------------------
> - * VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) | NOP (0xc0) |
> - * |-------------------------------------------------------------------
> - *
> - * Generalization:
> - * g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id)
> - * ie. transpose of f(x, y)
> - */
> - struct {
> -#define GEN6_SEMAPHORE_LAST VECS_HW
> -#define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1)
> -#define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0)
> - struct {
> - /* our mbox written by others */
> - u32 wait[GEN6_NUM_SEMAPHORES];
> - /* mboxes this ring signals to */
> - i915_reg_t signal[GEN6_NUM_SEMAPHORES];
> - } mbox;
> -
> - /* AKA wait() */
> - int (*sync_to)(struct i915_request *rq,
> - struct i915_request *signal);
> - u32 *(*signal)(struct i915_request *rq, u32 *cs);
> - } semaphore;
> -
> struct intel_engine_execlists execlists;
>
> /* Contexts are pinned whilst they are active on the GPU. The last
> @@ -889,7 +835,7 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
> return tail;
> }
>
> -void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
> +void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno);
>
> void intel_engine_setup_common(struct intel_engine_cs *engine);
> int intel_engine_init_common(struct intel_engine_cs *engine);
> --
> 2.20.0
More information about the Intel-gfx
mailing list