[Intel-gfx] [PATCH v4 3/4] Introduce & use new lightweight SGL iterators

Fri May 20 08:15:06 UTC 2016

On Fri, May 20, 2016 at 01:31:29AM +0100, Dave Gordon wrote:
> The existing for_each_sg_page() iterator is somewhat heavyweight, and is
> limiting i915 driver performance in a few benchmarks. So here we
> introduce somewhat lighter weight iterators, primarily for use with GEM
> objects or other case where we need only deal with whole aligned pages.
> 
> Unlike the old iterator, the new iterators use an internal state
> structure which is not intended to be accessed by the caller; instead
> each takes as a parameter an output variable which is set before each
> iteration. This makes them particularly simple to use :)
> 
> One of the new iterators provides the caller with the DMA address of
> each page in turn; the other provides the 'struct page' pointer required
> by many memory management operations.
> 
> Various uses of for_each_sg_page() are then converted to the new macros.
> 
> Signed-off-by: Dave Gordon <david.s.gordon at intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         | 62 +++++++++++++++++++++++++--
>  drivers/gpu/drm/i915/i915_gem.c         | 20 ++++-----
>  drivers/gpu/drm/i915/i915_gem_fence.c   | 14 +++---
>  drivers/gpu/drm/i915/i915_gem_gtt.c     | 76 ++++++++++++++++-----------------
>  drivers/gpu/drm/i915/i915_gem_userptr.c |  7 ++-
>  5 files changed, 116 insertions(+), 63 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 6894d8e..01cde0f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2120,6 +2120,10 @@ struct drm_i915_gem_object_ops {
>  #define INTEL_FRONTBUFFER_ALL_MASK(pipe) \
>  	(0xff << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)))
>  
> +void i915_gem_track_fb(struct drm_i915_gem_object *old,
> +		       struct drm_i915_gem_object *new,
> +		       unsigned frontbuffer_bits);
> +

That's not a much better spot, since this is now before the object it
acts upon. One day we'll get our types separated from their actors.

>  struct drm_i915_gem_object {
>  	struct drm_gem_object base;
>  
> @@ -2251,9 +2255,61 @@ struct drm_i915_gem_object {
>  };
>  #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
>  
> -void i915_gem_track_fb(struct drm_i915_gem_object *old,
> -		       struct drm_i915_gem_object *new,
> -		       unsigned frontbuffer_bits);
> +/*
> + * New optimised SGL iterator for GEM objects
> + */
> +struct sgt_iter {
> +	struct scatterlist *sgp;
> +	union {
> +		unsigned long pfn;
> +		unsigned long dma;
> +	} ix;

An anonymous union here would be slightly more pleasant. And we should
be using the correct types.

> +	unsigned int curr;
> +	unsigned int max;
> +};
> +
> +/* Constructor for new iterator */
> +static inline struct sgt_iter
> +__sgt_iter(struct scatterlist *sgl, bool dma)
> +{
> +	struct sgt_iter s = { .sgp = sgl };
> +
> +	if (s.sgp) {

Not acceptable just to GPF out if passed in a NULL? It's a BUG for all
our use cases. And would save a conditional every chain step.

> +		s.max = s.curr = s.sgp->offset;
> +		s.max += s.sgp->length;
> +		if (dma)
> +			s.ix.dma = sg_dma_address(s.sgp);
> +		else
> +			s.ix.pfn = page_to_pfn(sg_page(s.sgp));
> +	}
> +
> +	return s;
> +}
> +
> +/**
> + * for_each_sgt_dma - iterate over the DMA addresses of the given sg_table
> + * @__dmap:	DMA address (output)
> + * @__iter:	'struct sgt_iter' (iterator state, internal)
> + * @__sgt:	sg_table to iterate over (input)
> + */
> +#define for_each_sgt_dma(__dmap, __iter, __sgt)				\
> +	for ((__iter) = __sgt_iter((__sgt)->sgl, true);			\
> +	     ((__dmap) = (__iter).ix.dma + (__iter).curr);		\

This prevents us using dma_addr_t 0, which is the error value iirc, so ok.

> +	     (((__iter).curr += PAGE_SIZE) < (__iter).max) ||		\
> +		((__iter) = __sgt_iter(sg_next((__iter).sgp), true), 0))
> +
> +/**
> + * for_each_sgt_page - iterate over the pages of the given sg_table
> + * @__pp:	page pointer (output)
> + * @__iter:	'struct sgt_iter' (iterator state, internal)
> + * @__sgt:	sg_table to iterate over (input)
> + */
> +#define for_each_sgt_page(__pp, __iter, __sgt)				\
> +	for ((__iter) = __sgt_iter((__sgt)->sgl, false);		\
> +	     ((__pp) = (__iter).ix.pfn == 0 ? NULL :			\
> +	           pfn_to_page((__iter).ix.pfn + ((__iter).curr >> PAGE_SHIFT)));\

If we shifted in the __sgt_iter() we could do simpler instructions inside
the loop. The compiler won't know that pfn_to_page() is always true, so
maybe a pfn_to_page(),1 here will help?

> +	     (((__iter).curr += PAGE_SIZE) < (__iter).max) ||		\
> +		((__iter) = __sgt_iter(sg_next((__iter).sgp), false), 0))

-- 
Chris Wilson, Intel Open Source Technology Centre