[Intel-gfx] [PATCH v4 3/4] Introduce & use new lightweight SGL iterators
Chris Wilson
chris at chris-wilson.co.uk
Fri May 20 09:56:31 UTC 2016
On Fri, May 20, 2016 at 09:15:06AM +0100, Chris Wilson wrote:
> On Fri, May 20, 2016 at 01:31:29AM +0100, Dave Gordon wrote:
> > The existing for_each_sg_page() iterator is somewhat heavyweight, and is
> > limiting i915 driver performance in a few benchmarks. So here we
> > introduce somewhat lighter weight iterators, primarily for use with GEM
> > objects or other case where we need only deal with whole aligned pages.
> >
> > Unlike the old iterator, the new iterators use an internal state
> > structure which is not intended to be accessed by the caller; instead
> > each takes as a parameter an output variable which is set before each
> > iteration. This makes them particularly simple to use :)
> >
> > One of the new iterators provides the caller with the DMA address of
> > each page in turn; the other provides the 'struct page' pointer required
> > by many memory management operations.
> >
> > Various uses of for_each_sg_page() are then converted to the new macros.
> >
> > Signed-off-by: Dave Gordon <david.s.gordon at intel.com>
> > Cc: Chris Wilson <chris at chris-wilson.co.uk>
> > ---
> > drivers/gpu/drm/i915/i915_drv.h | 62 +++++++++++++++++++++++++--
> > drivers/gpu/drm/i915/i915_gem.c | 20 ++++-----
> > drivers/gpu/drm/i915/i915_gem_fence.c | 14 +++---
> > drivers/gpu/drm/i915/i915_gem_gtt.c | 76 ++++++++++++++++-----------------
> > drivers/gpu/drm/i915/i915_gem_userptr.c | 7 ++-
> > 5 files changed, 116 insertions(+), 63 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 6894d8e..01cde0f 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -2120,6 +2120,10 @@ struct drm_i915_gem_object_ops {
> > #define INTEL_FRONTBUFFER_ALL_MASK(pipe) \
> > (0xff << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)))
> >
> > +void i915_gem_track_fb(struct drm_i915_gem_object *old,
> > + struct drm_i915_gem_object *new,
> > + unsigned frontbuffer_bits);
> > +
>
> That's not a much better spot, since this is now before the object it
> acts upon. One day we'll get our types separated from their actors.
>
> > struct drm_i915_gem_object {
> > struct drm_gem_object base;
> >
> > @@ -2251,9 +2255,61 @@ struct drm_i915_gem_object {
> > };
> > #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
> >
> > -void i915_gem_track_fb(struct drm_i915_gem_object *old,
> > - struct drm_i915_gem_object *new,
> > - unsigned frontbuffer_bits);
> > +/*
> > + * New optimised SGL iterator for GEM objects
> > + */
> > +struct sgt_iter {
> > + struct scatterlist *sgp;
> > + union {
> > + unsigned long pfn;
> > + unsigned long dma;
> > + } ix;
>
> An anonymous union here would be slightly more pleasant. And we should
> be using the correct types.
>
> > + unsigned int curr;
> > + unsigned int max;
> > +};
> > +
> > +/* Constructor for new iterator */
> > +static inline struct sgt_iter
> > +__sgt_iter(struct scatterlist *sgl, bool dma)
> > +{
> > + struct sgt_iter s = { .sgp = sgl };
> > +
> > + if (s.sgp) {
>
> Not acceptable just to GPF out if passed in a NULL? It's a BUG for all
> our use cases. And would save a conditional every chain step.
Oh, it's how we stop after __sg_next().
Ok, played a bit and the only thing that sticks is
/*
- * New optimised SGL iterator for GEM objects
+ * Optimised SGL iterator for GEM objects
*/
struct sgt_iter {
struct scatterlist *sgp;
union {
unsigned long pfn;
- unsigned long dma;
- } ix;
+ dma_addr_t dma;
+ };
unsigned int curr;
unsigned int max;
};
-/* Constructor for new iterator */
-static inline struct sgt_iter
+static __always_inline struct sgt_iter
Nothing is ever new past the commit that introduces it!
Final diff (a few % in micro, consistently above the noise):
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 53ff4993f5b8..586f06646630 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2256,31 +2256,26 @@ struct drm_i915_gem_object {
#define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
/*
- * New optimised SGL iterator for GEM objects
+ * Optimised SGL iterator for GEM objects
*/
-struct sgt_iter {
+static __always_inline struct sgt_iter {
struct scatterlist *sgp;
union {
unsigned long pfn;
- unsigned long dma;
- } ix;
+ dma_addr_t dma;
+ };
unsigned int curr;
unsigned int max;
-};
-
-/* Constructor for new iterator */
-static inline struct sgt_iter
-__sgt_iter(struct scatterlist *sgl, bool dma)
-{
+} __sgt_iter(struct scatterlist *sgl, bool dma) {
struct sgt_iter s = { .sgp = sgl };
if (s.sgp) {
s.max = s.curr = s.sgp->offset;
s.max += s.sgp->length;
if (dma)
- s.ix.dma = sg_dma_address(s.sgp);
+ s.dma = sg_dma_address(s.sgp);
else
- s.ix.pfn = page_to_pfn(sg_page(s.sgp));
+ s.pfn = page_to_pfn(sg_page(s.sgp));
}
return s;
@@ -2313,9 +2308,9 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg)
*/
#define for_each_sgt_dma(__dmap, __iter, __sgt) \
for ((__iter) = __sgt_iter((__sgt)->sgl, true); \
- ((__dmap) = (__iter).ix.dma + (__iter).curr); \
+ ((__dmap) = (__iter).dma + (__iter).curr); \
(((__iter).curr += PAGE_SIZE) < (__iter).max) || \
- ((__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0))
+ ((__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0))
/**
* for_each_sgt_page - iterate over the pages of the given sg_table
@@ -2325,10 +2320,10 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg)
*/
#define for_each_sgt_page(__pp, __iter, __sgt) \
for ((__iter) = __sgt_iter((__sgt)->sgl, false); \
- ((__pp) = (__iter).ix.pfn == 0 ? NULL : \
- pfn_to_page((__iter).ix.pfn + ((__iter).curr >> PAGE_SHIFT)));\
+ ((__pp) = (__iter).pfn == 0 ? NULL : \
+ pfn_to_page((__iter).pfn + ((__iter).curr >> PAGE_SHIFT))); \
(((__iter).curr += PAGE_SIZE) < (__iter).max) || \
- ((__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0))
+ ((__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0))
And with that,
Reviewed-by: Chris Wilson <chris at chris-wilson.co.uk>
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
More information about the Intel-gfx
mailing list