[Intel-gfx] [PATCH v2 2/2] drm/i915: Perform object clflushing asynchronously
Joonas Lahtinen
joonas.lahtinen at linux.intel.com
Tue Feb 21 14:49:56 UTC 2017
On ma, 2017-02-20 at 20:59 +0000, Chris Wilson wrote:
> Flushing the cachelines for an object is slow, can be as much as 100ms
> for a large framebuffer. We currently do this under the struct_mutex BKL
> on execution or on pageflip. But now with the ability to add fences to
> obj->resv for both flips and execbuf (and we naturally wait on the fence
> before CPU access), we can move the clflush operation to a workqueue and
> signal a fence for completion, thereby doing the work asynchronously and
> not blocking the driver or its clients.
>
> Suggested-by: Akash Goel <akash.goel at intel.com>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> Cc: Matthew Auld <matthew.auld at intel.com>
<SNIP>
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3389,7 +3389,13 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
> void i915_gem_reset(struct drm_i915_private *dev_priv);
> void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
> void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
> -void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
i915_gem_clflush.h
> +
> +void i915_gem_clflush_init(struct drm_i915_private *i915);
> +void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
> > + unsigned int flags);
> +#define I915_CLFLUSH_FORCE BIT(0)
> +#define I915_CLFLUSH_SYNC BIT(1)
> +
> void i915_gem_init_mmio(struct drm_i915_private *i915);
> int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
> int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv);
<SNIP>
> +static const char *i915_clflush_get_driver_name(struct dma_fence *fence)
> +{
> + return "i915";
Why not DRIVER_NAME?
> +static void i915_clflush_release(struct dma_fence *fence)
> +{
> + struct clflushf *f = container_of(fence, typeof(*f), dma);
Better variable and struct name needed.
> +
> + i915_sw_fence_fini(&f->wait);
> +
> + BUILD_BUG_ON(offsetof(typeof(*f), dma));
Maybe make a comment at the struct definition, too?
> +static bool cpu_cache_is_coherent(struct drm_device *dev,
> + enum i915_cache_level level)
> +{
> + return HAS_LLC(to_i915(dev)) || level != I915_CACHE_NONE;
> +}
We're not using this elsewhere?
> +
> +static void __i915_do_clflush(struct drm_i915_gem_object *obj)
> +{
> + drm_clflush_sg(obj->mm.pages);
> + obj->cache_dirty = false;
> +
> + intel_fb_obj_flush(obj, false, ORIGIN_CPU);
This being hardcoded, use of ORIGIN_DIRTYFB disappears. Nobody is going
to miss it?
> +}
> +
> +static void i915_clflush_work(struct work_struct *work)
> +{
> + struct clflushf *f = container_of(work, typeof(*f), work);
> + struct drm_i915_gem_object *obj = f->obj;
> +
> + if (i915_gem_object_pin_pages(obj)) {
> + obj->cache_dirty = true;
> + goto out;
> + }
For the time being, I'd just WARN here.
> +
> + __i915_do_clflush(obj);
> +
> + i915_gem_object_unpin_pages(obj);
> +
> +out:
> + i915_gem_object_put(obj);
> +
> + dma_fence_signal(&f->dma);
> + dma_fence_put(&f->dma);
> +}
> +
<SNIP>
> @@ -14279,15 +14282,11 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb,
> struct drm_clip_rect *clips,
> unsigned num_clips)
> {
> - struct drm_device *dev = fb->dev;
> struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
> struct drm_i915_gem_object *obj = intel_fb->obj;
>
> - mutex_lock(&dev->struct_mutex);
> if (obj->pin_display && obj->cache_dirty)
> - i915_gem_clflush_object(obj, true);
> - intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
> - mutex_unlock(&dev->struct_mutex);
> + i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
You removed mutex_lock, add READ_ONCE for code coherency.
With above addressed;
Reviewed-by: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
Regards, Joonas
>
> return 0;
> }
--
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
More information about the Intel-gfx
mailing list