[Intel-gfx] [PATCH 1/2 v3] drm/i915: Dumb down the semaphore logic

Daniel Vetter daniel at ffwll.ch
Wed Sep 14 09:40:50 CEST 2011


On Tue, Sep 13, 2011 at 09:11:46PM -0700, Ben Widawsky wrote:
> While I think the previous code is correct, it was hard to follow and
> hard to debug. Since we already have a ring abstraction, might as well
> use it to handle the semaphore updates and compares.
> 
> I don't expect this code to make semaphores better or worse, but you
> never know...
> 
> v2: Noting special
> * Took Keith's suggestions in.
> * Tried to take Daniel's suggestion in, but the change got too intrusive for
>   this supposedly simple cleanup.
> * Ran Daniel's gem_ring_sync_loop test on this.
> 
> v3: I thought I was smarter than Keith, which turned out to be wrong.
> 
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> Cc: Keith Packard <keithp at keithp.com>
> Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
> ---
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |    3 +-
>  drivers/gpu/drm/i915/i915_reg.h            |   13 ++
>  drivers/gpu/drm/i915/intel_ringbuffer.c    |  178 +++++++++++++++++++++-------
>  drivers/gpu/drm/i915/intel_ringbuffer.h    |    7 +-
>  4 files changed, 152 insertions(+), 49 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 4934cf8..3693e83 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -784,7 +784,8 @@ i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj,
>  	}
>  
>  	from->sync_seqno[idx] = seqno;
> -	return intel_ring_sync(to, from, seqno - 1);
> +
> +	return to->sync_to(to, from, seqno - 1);
>  }
>  
>  static int
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 542453f..6a92364 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -194,6 +194,13 @@
>  #define  MI_SEMAPHORE_UPDATE	    (1<<21)
>  #define  MI_SEMAPHORE_COMPARE	    (1<<20)
>  #define  MI_SEMAPHORE_REGISTER	    (1<<18)
> +#define  MI_SEMAPHORE_SYNC_RV	    (2<<16)
> +#define  MI_SEMAPHORE_SYNC_RB	    (0<<16)
> +#define  MI_SEMAPHORE_SYNC_VR	    (0<<16)
> +#define  MI_SEMAPHORE_SYNC_VB	    (2<<16)
> +#define  MI_SEMAPHORE_SYNC_BR	    (2<<16)
> +#define  MI_SEMAPHORE_SYNC_BV	    (0<<16)
> +#define  MI_SEMAPHORE_SYNC_INVALID  (1<<0)
>  /*
>   * 3D instructions used by the kernel
>   */
> @@ -296,6 +303,12 @@
>  #define RING_CTL(base)		((base)+0x3c)
>  #define RING_SYNC_0(base)	((base)+0x40)
>  #define RING_SYNC_1(base)	((base)+0x44)
> +#define RVSYNC (RING_SYNC_0(dev_priv->ring[RCS].mmio_base))
> +#define RBSYNC (RING_SYNC_1(dev_priv->ring[RCS].mmio_base))
> +#define VRSYNC (RING_SYNC_1(dev_priv->ring[VCS].mmio_base))
> +#define VBSYNC (RING_SYNC_0(dev_priv->ring[VCS].mmio_base))
> +#define BRSYNC (RING_SYNC_0(dev_priv->ring[BCS].mmio_base))
> +#define BVSYNC (RING_SYNC_1(dev_priv->ring[BCS].mmio_base))
>  #define RING_MAX_IDLE(base)	((base)+0x54)
>  #define RING_HWS_PGA(base)	((base)+0x80)
>  #define RING_HWS_PGA_GEN6(base)	((base)+0x2080)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index c30626e..d25c275 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -315,79 +315,155 @@ static void render_ring_cleanup(struct intel_ring_buffer *ring)
>  }
>  
>  static void
> -update_semaphore(struct intel_ring_buffer *ring, int i, u32 seqno)
> +update_mboxes(struct intel_ring_buffer *ring,
> +	    u32 seqno,
> +	    u32 mmio_offset)
>  {
> -	struct drm_device *dev = ring->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	int id;
> -
> -	/*
> -	 * cs -> 1 = vcs, 0 = bcs
> -	 * vcs -> 1 = bcs, 0 = cs,
> -	 * bcs -> 1 = cs, 0 = vcs.
> -	 */
> -	id = ring - dev_priv->ring;
> -	id += 2 - i;
> -	id %= 3;
> -
> -	intel_ring_emit(ring,
> -			MI_SEMAPHORE_MBOX |
> -			MI_SEMAPHORE_REGISTER |
> -			MI_SEMAPHORE_UPDATE);
> +	intel_ring_emit(ring, MI_SEMAPHORE_MBOX |
> +			      MI_SEMAPHORE_GLOBAL_GTT |
> +			      MI_SEMAPHORE_REGISTER |
> +			      MI_SEMAPHORE_UPDATE);
>  	intel_ring_emit(ring, seqno);
> -	intel_ring_emit(ring,
> -			RING_SYNC_0(dev_priv->ring[id].mmio_base) + 4*i);
> +	intel_ring_emit(ring, mmio_offset);
>  }
>  
> -static int
> +/**
> + * gen6_add_request - Update the semaphore mailbox registers
> + * 
> + * @ring - ring that is adding a request
> + * @mbox1_reg - mailbox address for RCS or VCS ring
> + * @mbox2_reg - mailbox address for VCS or BCS ring
> + * @seqno - return seqno stuck into the ring
> + *
> + * Update the mailbox registers in the *other* rings with the current seqno.
> + * This acts like a signal in the canonical semaphore.
> + */
> +static u32
>  gen6_add_request(struct intel_ring_buffer *ring,
> -		 u32 *result)
> +		 u32 mbox1_reg,
> +		 u32 mbox2_reg,
> +		 u32 *seqno)
>  {
> -	u32 seqno;
>  	int ret;
>  
>  	ret = intel_ring_begin(ring, 10);
>  	if (ret)
>  		return ret;
>  
> -	seqno = i915_gem_get_seqno(ring->dev);
> -	update_semaphore(ring, 0, seqno);
> -	update_semaphore(ring, 1, seqno);
> +	*seqno = i915_gem_get_seqno(ring->dev);
>  
> +	update_mboxes(ring, *seqno, mbox1_reg);
> +	update_mboxes(ring, *seqno, mbox2_reg);
>  	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
>  	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
> -	intel_ring_emit(ring, seqno);
> +	intel_ring_emit(ring, *seqno);
>  	intel_ring_emit(ring, MI_USER_INTERRUPT);
>  	intel_ring_advance(ring);
>  
> -	*result = seqno;
>  	return 0;
>  }
>  
> -int
> -intel_ring_sync(struct intel_ring_buffer *ring,
> -		struct intel_ring_buffer *to,
> +static int
> +gen6_blt_add_request(struct intel_ring_buffer *ring,
> +		     u32 *result)
> +{
> +	struct drm_device *dev = ring->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	return gen6_add_request(ring,
> +				RBSYNC, /* render->blitter sync */
> +				VBSYNC, /* video->blitter sync */
> +				result);
> +}
> +
> +static int
> +gen6_bsd_add_request(struct intel_ring_buffer *ring,
> +		     u32 *result)
> +{
> +	struct drm_device *dev = ring->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	return gen6_add_request(ring,
> +				RVSYNC, /* video->render sync */
> +				BVSYNC, /* video->blitter sync */
> +				result);
> +}
> +
> +static int
> +gen6_render_add_request(struct intel_ring_buffer *ring,
> +		        u32 *result)
> +{
> +	struct drm_device *dev = ring->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	return gen6_add_request(ring,
> +				VRSYNC, /* render->video sync */
> +				BRSYNC, /* render->blitter sync */
> +				result);
> +}

I don't get the point of this indirection. Relative to the ring mmio base,
you're always writing the same two ring sync registers. So why no just
drop ..SYNC defines, kill these three functions and change the
update_mboxes calls in gen6_add_request to use the RIN_SYNC_(0|1) macros?
Imo this code here is just bloat and actually adds to the confusion.

> +static int
> +intel_ring_sync(struct intel_ring_buffer *waiter,
> +		struct intel_ring_buffer *signaller,
> +		int ring,
>  		u32 seqno)
>  {
>  	int ret;
> +	u32 temp = MI_SEMAPHORE_MBOX |
> +		   MI_SEMAPHORE_COMPARE |
> +		   MI_SEMAPHORE_REGISTER;
>  
> -	ret = intel_ring_begin(ring, 4);
> +	ret = intel_ring_begin(waiter, 4);
>  	if (ret)
>  		return ret;
>  
> -	intel_ring_emit(ring,
> -			MI_SEMAPHORE_MBOX |
> -			MI_SEMAPHORE_REGISTER |
> -			intel_ring_sync_index(ring, to) << 17 |
> -			MI_SEMAPHORE_COMPARE);
> -	intel_ring_emit(ring, seqno);
> -	intel_ring_emit(ring, 0);
> -	intel_ring_emit(ring, MI_NOOP);
> -	intel_ring_advance(ring);
> +	intel_ring_emit(waiter, temp | signaller->semaphore_register[ring]);
> +	intel_ring_emit(waiter, seqno);
> +	intel_ring_emit(waiter, 0);
> +	intel_ring_emit(waiter, MI_NOOP);
> +	intel_ring_advance(waiter);
>  
>  	return 0;
>  }
>  
> +/* VCS->RCS (RVSYNC) or BCS->RCS (RBSYNC) */
> +int
> +render_ring_sync_to(struct intel_ring_buffer *waiter,
> +		    struct intel_ring_buffer *signaller,
> +		    u32 seqno)
> +{
> +	WARN_ON(signaller->semaphore_register[RCS] == MI_SEMAPHORE_SYNC_INVALID);
> +	return intel_ring_sync(waiter,
> +			       signaller,
> +			       RCS,
> +			       seqno);
> +}
> +
> +/* RCS->VCS (VRSYNC) or BCS->VCS (VBSYNC) */
> +int
> +gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter,
> +		      struct intel_ring_buffer *signaller,
> +		      u32 seqno)
> +{
> +	WARN_ON(signaller->semaphore_register[VCS] == MI_SEMAPHORE_SYNC_INVALID);
> +	return intel_ring_sync(waiter,
> +			       signaller,
> +			       VCS,
> +			       seqno);
> +}
> +
> +/* RCS->BCS (BRSYNC) or VCS->BCS (BVSYNC) */
> +int
> +gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter,
> +		      struct intel_ring_buffer *signaller,
> +		      u32 seqno)
> +{
> +	WARN_ON(signaller->semaphore_register[BCS] == MI_SEMAPHORE_SYNC_INVALID);
> +	return intel_ring_sync(waiter,
> +			       signaller,
> +			       BCS,
> +			       seqno);
> +}

What about storing the ring id (i.e. RCS, VCS, BCS) in the ring struct?
This way the above 4 functions would fold into one, with
	ring = waiter->id;
in intel_ring_sync. Now I know that there's already a ->id field, but
that's actually a ring flag (and used as such in all use-sites).

So my suggestions is to first change the values in ->id from flags to real
ids (i.e. RCS, VCS, BCS) and add a small inline helper
unsigned intel_ringbuffer_flag(ring)
{
	retunr (1<<ring->id);
}
for the current use-sites of ->id that actually want a flag. And then use
->id as suggested above.

With these 2 changes the bloat in your patch disappears and your cleanup
becomes much more clear. And a really like how this cleanup kills the
black magic from before.

>  #define PIPE_CONTROL_FLUSH(ring__, addr__)					\
>  do {									\
>  	intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL | PIPE_CONTROL_QW_WRITE |		\
> @@ -1027,6 +1103,10 @@ static const struct intel_ring_buffer render_ring = {
>  	.irq_put		= render_ring_put_irq,
>  	.dispatch_execbuffer	= render_ring_dispatch_execbuffer,
>         .cleanup			= render_ring_cleanup,
> +	.sync_to		= render_ring_sync_to,
> +	.semaphore_register	= {MI_SEMAPHORE_SYNC_INVALID,
> +				   MI_SEMAPHORE_SYNC_RV,
> +				   MI_SEMAPHORE_SYNC_RB},
>  };
>  
>  /* ring buffer for bit-stream decoder */
> @@ -1149,11 +1229,15 @@ static const struct intel_ring_buffer gen6_bsd_ring = {
>  	.init			= init_ring_common,
>  	.write_tail		= gen6_bsd_ring_write_tail,
>  	.flush			= gen6_ring_flush,
> -	.add_request		= gen6_add_request,
> +	.add_request		= gen6_bsd_add_request,
>  	.get_seqno		= ring_get_seqno,
>  	.irq_get		= gen6_bsd_ring_get_irq,
>  	.irq_put		= gen6_bsd_ring_put_irq,
>  	.dispatch_execbuffer	= gen6_ring_dispatch_execbuffer,
> +	.sync_to		= gen6_bsd_ring_sync_to,
> +	.semaphore_register	= {MI_SEMAPHORE_SYNC_VR,
> +				   MI_SEMAPHORE_SYNC_INVALID,
> +				   MI_SEMAPHORE_SYNC_VB},
>  };
>  
>  /* Blitter support (SandyBridge+) */
> @@ -1279,12 +1363,16 @@ static const struct intel_ring_buffer gen6_blt_ring = {
>         .init			= blt_ring_init,
>         .write_tail		= ring_write_tail,
>         .flush			= blt_ring_flush,
> -       .add_request		= gen6_add_request,
> +       .add_request		= gen6_blt_add_request,
>         .get_seqno		= ring_get_seqno,
>         .irq_get			= blt_ring_get_irq,
>         .irq_put			= blt_ring_put_irq,
>         .dispatch_execbuffer	= gen6_ring_dispatch_execbuffer,
>         .cleanup			= blt_ring_cleanup,
> +       .sync_to			= gen6_blt_ring_sync_to,
> +       .semaphore_register	= {MI_SEMAPHORE_SYNC_BR,
> +				   MI_SEMAPHORE_SYNC_BV,
> +				   MI_SEMAPHORE_SYNC_INVALID},
>  };
>  
>  int intel_init_render_ring_buffer(struct drm_device *dev)
> @@ -1294,7 +1382,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
>  
>  	*ring = render_ring;
>  	if (INTEL_INFO(dev)->gen >= 6) {
> -		ring->add_request = gen6_add_request;
> +		ring->add_request = gen6_render_add_request;
>  		ring->irq_get = gen6_render_ring_get_irq;
>  		ring->irq_put = gen6_render_ring_put_irq;
>  	} else if (IS_GEN5(dev)) {
> @@ -1317,7 +1405,7 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
>  
>  	*ring = render_ring;
>  	if (INTEL_INFO(dev)->gen >= 6) {
> -		ring->add_request = gen6_add_request;
> +		ring->add_request = gen6_render_add_request;
>  		ring->irq_get = gen6_render_ring_get_irq;
>  		ring->irq_put = gen6_render_ring_put_irq;
>  	} else if (IS_GEN5(dev)) {

Unrelated rant: Patching up a static function table is just bad style. We
get away with this because we only ever have one intel gpu in a machine,
but still.

> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 39ac2b6..98052fd 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -75,7 +75,11 @@ struct  intel_ring_buffer {
>  	int		(*dispatch_execbuffer)(struct intel_ring_buffer *ring,
>  					       u32 offset, u32 length);
>  	void		(*cleanup)(struct intel_ring_buffer *ring);
> +	int		(*sync_to)(struct intel_ring_buffer *ring,
> +				   struct intel_ring_buffer *to,
> +				   u32 seqno);
>  
> +	u32		semaphore_register[3];
>  	/**
>  	 * List of objects currently involved in rendering from the
>  	 * ringbuffer.
> @@ -180,9 +184,6 @@ static inline void intel_ring_emit(struct intel_ring_buffer *ring,
>  void intel_ring_advance(struct intel_ring_buffer *ring);
>  
>  u32 intel_ring_get_seqno(struct intel_ring_buffer *ring);
> -int intel_ring_sync(struct intel_ring_buffer *ring,
> -		    struct intel_ring_buffer *to,
> -		    u32 seqno);
>  
>  int intel_init_render_ring_buffer(struct drm_device *dev);
>  int intel_init_bsd_ring_buffer(struct drm_device *dev);
> -- 
> 1.7.6.1
> 

-- 
Daniel Vetter
Mail: daniel at ffwll.ch
Mobile: +41 (0)79 365 57 48



More information about the Intel-gfx mailing list