[Intel-gfx] [PATCH 10/11] drm/i915: add irq_barrier operation for synchronising reads

Daniel Vetter daniel at ffwll.ch
Wed Jan 28 01:55:53 PST 2015


On Mon, Jan 26, 2015 at 04:43:24AM -0800, Rodrigo Vivi wrote:
> From: Dave Gordon <david.s.gordon at intel.com>
> 
> On some generations of chips, it is necessary to read an MMIO register
> before getting the sequence number from the status page in main memory,
> in order to ensure coherency; and on all generations this should be
> either helpful or harmless.
> 
> In general, we want this operation to be the cheapest possible, since
> we require only the side-effect of DMA completion and don't interpret
> the result of the read, and don't require any coordination with other
> threads, power domains, or anything else.
> 
> However, finding a suitable register may be problematic; on GEN6 chips
> the ACTHD register was used, but on VLV et al access to this register
> requires FORCEWAKE and therefore many complications involving spinlocks
> and polling.
> 
> So this commit introduces this synchronising operation as a distinct
> vfunc in the engine structure, so that it can be GEN- or chip-specific
> if needed.
> 
> And there are three implementations; a dummy one, for chips where no
> synchronising read is needed, a gen6(+) version that issues a posting
> read (to TAIL), and a VLV-specific one that issues a raw read instead,
> avoiding touching FORCEWAKE and GTFIFO and other such complications.
> 
> We then change gen6_ring_get_seqno() to use this new irq_barrier rather
> than a POSTING_READ of ACTHD. Note that both older (pre-GEN6) and newer
> (GEN8+) devices running in LRC mode do not currently include any posting
> read in their own get_seqno() implementations, so this change only
> makes a difference on VLV (and not CHV+).
> 
> Signed-off-by: Dave Gordon <david.s.gordon at intel.com>
> Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he at intel.com)
> Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
> ---
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 37 +++++++++++++++++++++++++++++++--
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  1 +
>  2 files changed, 36 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 23020d6..97473ed 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1227,6 +1227,28 @@ pc_render_add_request(struct intel_engine_cs *ring)
>  	return 0;
>  }
>  
> +static void
> +dummy_irq_barrier(struct intel_engine_cs *ring)
> +{
> +}
> +
> +static void
> +gen6_irq_barrier(struct intel_engine_cs *ring)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(ring->dev);
> +	POSTING_READ(RING_TAIL(ring->mmio_base));
> +}
> +
> +#define __raw_i915_read32(dev_priv__, reg__)	readl((dev_priv__)->regs + (reg__))
> +#define RAW_POSTING_READ(reg__)			(void)__raw_i915_read32(dev_priv, reg__)
> +
> +static void
> +vlv_irq_barrier(struct intel_engine_cs *ring)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(ring->dev);
> +	RAW_POSTING_READ(RING_TAIL(ring->mmio_base));
> +}
> +
>  static u32
>  gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
>  {
> @@ -1234,8 +1256,7 @@ gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
>  	 * ivb (and maybe also on snb) by reading from a CS register (like
>  	 * ACTHD) before reading the status page. */
>  	if (!lazy_coherency) {
> -		struct drm_i915_private *dev_priv = ring->dev->dev_private;
> -		POSTING_READ(RING_ACTHD(ring->mmio_base));
> +		ring->irq_barrier(ring);
>  	}

Imo just do a vlv_ring_get_seqno if this is a problem. Adding a vfunc with
mostly empty or same implemenation to another very tiny vfunc isn't doing
a whole lot of good to the codebase.
-Daniel

>  
>  	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
> @@ -2393,6 +2414,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
>  		ring->irq_get = gen8_ring_get_irq;
>  		ring->irq_put = gen8_ring_put_irq;
>  		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
> +		ring->irq_barrier = gen6_irq_barrier;
>  		ring->get_seqno = gen6_ring_get_seqno;
>  		ring->set_seqno = ring_set_seqno;
>  		if (i915_semaphore_is_enabled(dev)) {
> @@ -2409,6 +2431,10 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
>  		ring->irq_get = gen6_ring_get_irq;
>  		ring->irq_put = gen6_ring_put_irq;
>  		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
> +		if (IS_VALLEYVIEW(dev) && !IS_GEN8(dev))
> +			ring->irq_barrier = vlv_irq_barrier;
> +		else
> +			ring->irq_barrier = gen6_irq_barrier;
>  		ring->get_seqno = gen6_ring_get_seqno;
>  		ring->set_seqno = ring_set_seqno;
>  		if (i915_semaphore_is_enabled(dev)) {
> @@ -2435,6 +2461,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
>  	} else if (IS_GEN5(dev)) {
>  		ring->add_request = pc_render_add_request;
>  		ring->flush = gen4_render_ring_flush;
> +		ring->irq_barrier = dummy_irq_barrier;
>  		ring->get_seqno = pc_render_get_seqno;
>  		ring->set_seqno = pc_render_set_seqno;
>  		ring->irq_get = gen5_ring_get_irq;
> @@ -2447,6 +2474,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
>  			ring->flush = gen2_render_ring_flush;
>  		else
>  			ring->flush = gen4_render_ring_flush;
> +		ring->irq_barrier = dummy_irq_barrier;
>  		ring->get_seqno = ring_get_seqno;
>  		ring->set_seqno = ring_set_seqno;
>  		if (IS_GEN2(dev)) {
> @@ -2523,6 +2551,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
>  			ring->write_tail = gen6_bsd_ring_write_tail;
>  		ring->flush = gen6_bsd_ring_flush;
>  		ring->add_request = gen6_add_request;
> +		ring->irq_barrier = gen6_irq_barrier;
>  		ring->get_seqno = gen6_ring_get_seqno;
>  		ring->set_seqno = ring_set_seqno;
>  		if (INTEL_INFO(dev)->gen >= 8) {
> @@ -2562,6 +2591,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
>  		ring->mmio_base = BSD_RING_BASE;
>  		ring->flush = bsd_ring_flush;
>  		ring->add_request = i9xx_add_request;
> +		ring->irq_barrier = dummy_irq_barrier;
>  		ring->get_seqno = ring_get_seqno;
>  		ring->set_seqno = ring_set_seqno;
>  		if (IS_GEN5(dev)) {
> @@ -2601,6 +2631,7 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev)
>  	ring->mmio_base = GEN8_BSD2_RING_BASE;
>  	ring->flush = gen6_bsd_ring_flush;
>  	ring->add_request = gen6_add_request;
> +	ring->irq_barrier = gen6_irq_barrier;
>  	ring->get_seqno = gen6_ring_get_seqno;
>  	ring->set_seqno = ring_set_seqno;
>  	ring->irq_enable_mask =
> @@ -2631,6 +2662,7 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
>  	ring->write_tail = ring_write_tail;
>  	ring->flush = gen6_ring_flush;
>  	ring->add_request = gen6_add_request;
> +	ring->irq_barrier = gen6_irq_barrier;
>  	ring->get_seqno = gen6_ring_get_seqno;
>  	ring->set_seqno = ring_set_seqno;
>  	if (INTEL_INFO(dev)->gen >= 8) {
> @@ -2688,6 +2720,7 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev)
>  	ring->write_tail = ring_write_tail;
>  	ring->flush = gen6_ring_flush;
>  	ring->add_request = gen6_add_request;
> +	ring->irq_barrier = gen6_irq_barrier;
>  	ring->get_seqno = gen6_ring_get_seqno;
>  	ring->set_seqno = ring_set_seqno;
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 6dbb6f4..f686929 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -163,6 +163,7 @@ struct  intel_engine_cs {
>  	 * seen value is good enough. Note that the seqno will always be
>  	 * monotonic, even if not coherent.
>  	 */
> +	void		(*irq_barrier)(struct intel_engine_cs *ring);
>  	u32		(*get_seqno)(struct intel_engine_cs *ring,
>  				     bool lazy_coherency);
>  	void		(*set_seqno)(struct intel_engine_cs *ring,
> -- 
> 1.9.3
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch


More information about the Intel-gfx mailing list