[Intel-gfx] [PATCH 06/13] drm/i915/bdw: implement semaphore signal
Ville Syrjälä
ville.syrjala at linux.intel.com
Thu Jan 30 13:38:17 CET 2014
On Wed, Jan 29, 2014 at 11:55:26AM -0800, Ben Widawsky wrote:
> Semaphore signalling works similarly to previous GENs with the exception
> that the per ring mailboxes no longer exist. Instead you must define
> your own space, somewhere in the GTT.
>
> The comments in the code define the layout I've opted for, which should
> be fairly future proof. Ie. I tried to define offsets in abstract terms
> (NUM_RINGS, seqno size, etc).
>
> NOTE: If one wanted to move this to the HWSP they could. I've decided
> one 4k object would be easier to deal with, and provide potential wins
> with cache locality, but that's all speculative.
>
> v2: Update the macro to not need the other ring's ring->id (Chris)
> Update the comment to use the correct formula (Chris)
>
> Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 1 +
> drivers/gpu/drm/i915/i915_reg.h | 5 +-
> drivers/gpu/drm/i915/intel_ringbuffer.c | 199 +++++++++++++++++++++++++-------
> drivers/gpu/drm/i915/intel_ringbuffer.h | 38 +++++-
> 4 files changed, 197 insertions(+), 46 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 3673ba1..f521059 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1380,6 +1380,7 @@ typedef struct drm_i915_private {
>
> struct pci_dev *bridge_dev;
> struct intel_ring_buffer ring[I915_NUM_RINGS];
> + struct drm_i915_gem_object *semaphore_obj;
> uint32_t last_seqno, next_seqno;
>
> drm_dma_handle_t *status_page_dmah;
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index cbbaf26..8b745dc 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -216,7 +216,7 @@
> #define MI_DISPLAY_FLIP_IVB_SPRITE_B (3 << 19)
> #define MI_DISPLAY_FLIP_IVB_PLANE_C (4 << 19)
> #define MI_DISPLAY_FLIP_IVB_SPRITE_C (5 << 19)
> -#define MI_SEMAPHORE_MBOX MI_INSTR(0x16, 1) /* gen6+ */
> +#define MI_SEMAPHORE_MBOX MI_INSTR(0x16, 1) /* gen6, gen7 */
> #define MI_SEMAPHORE_GLOBAL_GTT (1<<22)
> #define MI_SEMAPHORE_UPDATE (1<<21)
> #define MI_SEMAPHORE_COMPARE (1<<20)
> @@ -241,6 +241,8 @@
> #define MI_RESTORE_EXT_STATE_EN (1<<2)
> #define MI_FORCE_RESTORE (1<<1)
> #define MI_RESTORE_INHIBIT (1<<0)
> +#define MI_SEMAPHORE_SIGNAL MI_INSTR(0x1b, 0) /* GEN8+ */
> +#define MI_SEMAPHORE_TARGET(engine) ((engine)<<15)
> #define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1)
> #define MI_MEM_VIRTUAL (1 << 22) /* 965+ only */
> #define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1)
> @@ -329,6 +331,7 @@
> #define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */
> #define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9)
> #define PIPE_CONTROL_NOTIFY (1<<8)
> +#define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */
> #define PIPE_CONTROL_VF_CACHE_INVALIDATE (1<<4)
> #define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1<<3)
> #define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1<<2)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 37ae2b1..b750835 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -619,6 +619,13 @@ static int init_render_ring(struct intel_ring_buffer *ring)
> static void render_ring_cleanup(struct intel_ring_buffer *ring)
> {
> struct drm_device *dev = ring->dev;
> + struct drm_i915_private *dev_priv = dev->dev_private;
> +
> + if (dev_priv->semaphore_obj) {
> + i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj);
> + drm_gem_object_unreference(&dev_priv->semaphore_obj->base);
> + dev_priv->semaphore_obj = NULL;
> + }
>
> if (ring->scratch.obj == NULL)
> return;
> @@ -632,6 +639,86 @@ static void render_ring_cleanup(struct intel_ring_buffer *ring)
> ring->scratch.obj = NULL;
> }
>
> +static int gen8_rcs_signal(struct intel_ring_buffer *signaller,
> + unsigned int num_dwords)
> +{
> +#define MBOX_UPDATE_DWORDS 8
> + struct drm_device *dev = signaller->dev;
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + struct intel_ring_buffer *waiter;
> + int i, ret, num_rings;
> +
> + num_rings = hweight_long(INTEL_INFO(dev)->ring_mask);
> + num_dwords = (num_rings-1) * MBOX_UPDATE_DWORDS;
Again num_dwords +=
> +#undef MBOX_UPDATE_DWORDS
> +
> + /* XXX: + 4 for the caller */
> + ret = intel_ring_begin(signaller, num_dwords + 4);
and the +4 goes away.
> + if (ret)
> + return ret;
> +
> + for_each_ring(waiter, dev_priv, i) {
> + u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
> + if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
> + continue;
> +
> + intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
> + intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
> + PIPE_CONTROL_QW_WRITE |
> + PIPE_CONTROL_FLUSH_ENABLE);
> + intel_ring_emit(signaller, lower_32_bits(gtt_offset));
> + intel_ring_emit(signaller, upper_32_bits(gtt_offset));
> + intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
> + intel_ring_emit(signaller, 0);
> + intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
> + MI_SEMAPHORE_TARGET(waiter->id));
> + intel_ring_emit(signaller, 0);
> + }
> +
> + WARN_ON(i != num_rings);
> +
> + return 0;
> +}
<snip>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index c69ae10..f1e7a66 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -111,6 +111,39 @@ struct intel_ring_buffer {
> #define I915_DISPATCH_PINNED 0x2
> void (*cleanup)(struct intel_ring_buffer *ring);
>
> + /* GEN8 signal/wait table
> + * signal to signal to signal to signal to
> + * RCS VCS BCS VECS
> + * ------------------------------------------------------
> + * RCS | NOP (0x00) | BCS (0x08) | VCS (0x10) | VECS (0x18) |
> + * |-----------------------------------------------------
> + * VCS | RCS (0x20) | NOP (0x28) | BCS (0x30) | VECS (0x38) |
> + * |-----------------------------------------------------
> + * BCS | RCS (0x40) | VCS (0x48) | NOP (0x50) | VECS (0x58) |
> + * |-----------------------------------------------------
> + * VECS | RCS (0x60) | VCS (0x68) | BCS (0x70) | NOP (0x78) |
> + * |-----------------------------------------------------
> + *
> + * Generalization:
> + * f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id)
> + * ie. transpose of g(x, y)
> + *
> + * sync from sync from sync from sync from
> + * RCS VCS BCS VECS
> + * ------------------------------------------------------
> + * RCS | NOP (0x00) | BCS (0x20) | VCS (0x40) | VECS (0x60) |
> + * |-----------------------------------------------------
> + * VCS | RCS (0x08) | NOP (0x28) | BCS (0x48) | VECS (0x68) |
> + * |-----------------------------------------------------
> + * BCS | RCS (0x10) | VCS (0x30) | NOP (0x50) | VECS (0x60) |
> + * |-----------------------------------------------------
> + * VECS | RCS (0x18) | VCS (0x38) | BCS (0x58) | NOP (0x78) |
> + * |-----------------------------------------------------
> + *
> + * Generalization:
> + * g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id)
> + * ie. transpose of f(x, y)
> + */
> struct {
> u32 sync_seqno[I915_NUM_RINGS-1];
> /* AKA wait() */
> @@ -120,7 +153,10 @@ struct intel_ring_buffer {
> /* our mbox written by others */
> u32 mbox[I915_NUM_RINGS];
mbox should also get a u64 friend, right?
> /* mboxes this ring signals to */
> - u32 signal_mbox[I915_NUM_RINGS];
> + union {
> + u32 signal_mbox[I915_NUM_RINGS];
> + u64 signal_ggtt[I915_NUM_RINGS];
> + };
>
> /* num_dwords is space the caller will need for atomic update */
> int (*signal)(struct intel_ring_buffer *signaller,
> --
> 1.8.5.3
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Ville Syrjälä
Intel OTC
More information about the Intel-gfx
mailing list