[Intel-gfx] [PATCH v4 3/4] Introduce & use new lightweight SGL iterators
Chris Wilson
chris at chris-wilson.co.uk
Fri May 20 08:15:06 UTC 2016
On Fri, May 20, 2016 at 01:31:29AM +0100, Dave Gordon wrote:
> The existing for_each_sg_page() iterator is somewhat heavyweight, and is
> limiting i915 driver performance in a few benchmarks. So here we
> introduce somewhat lighter weight iterators, primarily for use with GEM
> objects or other case where we need only deal with whole aligned pages.
>
> Unlike the old iterator, the new iterators use an internal state
> structure which is not intended to be accessed by the caller; instead
> each takes as a parameter an output variable which is set before each
> iteration. This makes them particularly simple to use :)
>
> One of the new iterators provides the caller with the DMA address of
> each page in turn; the other provides the 'struct page' pointer required
> by many memory management operations.
>
> Various uses of for_each_sg_page() are then converted to the new macros.
>
> Signed-off-by: Dave Gordon <david.s.gordon at intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 62 +++++++++++++++++++++++++--
> drivers/gpu/drm/i915/i915_gem.c | 20 ++++-----
> drivers/gpu/drm/i915/i915_gem_fence.c | 14 +++---
> drivers/gpu/drm/i915/i915_gem_gtt.c | 76 ++++++++++++++++-----------------
> drivers/gpu/drm/i915/i915_gem_userptr.c | 7 ++-
> 5 files changed, 116 insertions(+), 63 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 6894d8e..01cde0f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2120,6 +2120,10 @@ struct drm_i915_gem_object_ops {
> #define INTEL_FRONTBUFFER_ALL_MASK(pipe) \
> (0xff << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)))
>
> +void i915_gem_track_fb(struct drm_i915_gem_object *old,
> + struct drm_i915_gem_object *new,
> + unsigned frontbuffer_bits);
> +
That's not a much better spot, since this is now before the object it
acts upon. One day we'll get our types separated from their actors.
> struct drm_i915_gem_object {
> struct drm_gem_object base;
>
> @@ -2251,9 +2255,61 @@ struct drm_i915_gem_object {
> };
> #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
>
> -void i915_gem_track_fb(struct drm_i915_gem_object *old,
> - struct drm_i915_gem_object *new,
> - unsigned frontbuffer_bits);
> +/*
> + * New optimised SGL iterator for GEM objects
> + */
> +struct sgt_iter {
> + struct scatterlist *sgp;
> + union {
> + unsigned long pfn;
> + unsigned long dma;
> + } ix;
An anonymous union here would be slightly more pleasant. And we should
be using the correct types.
> + unsigned int curr;
> + unsigned int max;
> +};
> +
> +/* Constructor for new iterator */
> +static inline struct sgt_iter
> +__sgt_iter(struct scatterlist *sgl, bool dma)
> +{
> + struct sgt_iter s = { .sgp = sgl };
> +
> + if (s.sgp) {
Not acceptable just to GPF out if passed in a NULL? It's a BUG for all
our use cases. And would save a conditional every chain step.
> + s.max = s.curr = s.sgp->offset;
> + s.max += s.sgp->length;
> + if (dma)
> + s.ix.dma = sg_dma_address(s.sgp);
> + else
> + s.ix.pfn = page_to_pfn(sg_page(s.sgp));
> + }
> +
> + return s;
> +}
> +
> +/**
> + * for_each_sgt_dma - iterate over the DMA addresses of the given sg_table
> + * @__dmap: DMA address (output)
> + * @__iter: 'struct sgt_iter' (iterator state, internal)
> + * @__sgt: sg_table to iterate over (input)
> + */
> +#define for_each_sgt_dma(__dmap, __iter, __sgt) \
> + for ((__iter) = __sgt_iter((__sgt)->sgl, true); \
> + ((__dmap) = (__iter).ix.dma + (__iter).curr); \
This prevents us using dma_addr_t 0, which is the error value iirc, so ok.
> + (((__iter).curr += PAGE_SIZE) < (__iter).max) || \
> + ((__iter) = __sgt_iter(sg_next((__iter).sgp), true), 0))
> +
> +/**
> + * for_each_sgt_page - iterate over the pages of the given sg_table
> + * @__pp: page pointer (output)
> + * @__iter: 'struct sgt_iter' (iterator state, internal)
> + * @__sgt: sg_table to iterate over (input)
> + */
> +#define for_each_sgt_page(__pp, __iter, __sgt) \
> + for ((__iter) = __sgt_iter((__sgt)->sgl, false); \
> + ((__pp) = (__iter).ix.pfn == 0 ? NULL : \
> + pfn_to_page((__iter).ix.pfn + ((__iter).curr >> PAGE_SHIFT)));\
If we shifted in the __sgt_iter() we could do simpler instructions inside
the loop. The compiler won't know that pfn_to_page() is always true, so
maybe a pfn_to_page(),1 here will help?
> + (((__iter).curr += PAGE_SIZE) < (__iter).max) || \
> + ((__iter) = __sgt_iter(sg_next((__iter).sgp), false), 0))
--
Chris Wilson, Intel Open Source Technology Centre
More information about the Intel-gfx
mailing list