[Intel-gfx] [PATCH 3/4] drm/i915: opt-out CPU and WC mmaps from FBC
Paulo Zanoni
paulo.r.zanoni at intel.com
Mon Mar 21 19:26:57 UTC 2016
FBC and the frontbuffer tracking infrastructure were designed assuming
that user space applications would follow a specific set of rules
regarding frontbuffer management and mmapping. I recently discovered
that current user space is not exactly following these rules: my
investigation led me to the conclusion that the generic backend from
SNA - used by SKL and the other new platforms without a specific
backend - is not issuing sw_finish/dirty_fb IOCTLs when using the CPU
and WC mmaps. I discovered this when running lightdm: I would type the
password and nothing would appear on the screen unless I moved the
mouse over the place where the letters were supposed to appear.
Since we can't detect whether the current DRM master is a well-behaved
application or not, let's use the sledgehammer approach and assume
that every user space client on every gen is bad by disabling FBC when
the frontbuffer is CPU/WC mmapped. This will allow us to enable FBC
on SKL, moving its FBC-related power savings from "always 0%" to "> 0%
in some cases".
There's also some additional code to disable the workaround for
frontbuffers that ever received a sw_finish or dirty_fb IOCTL, since
the current bad user space never issues those calls. This should help
for some specific cases and our IGT tests, but won't be enough for a
new xf86-video-intel using TearFree.
Notice that we may need an equivalent commit for PSR too. We also need
to investigate whether PSR needs to be disabled on GTT mmaps: if
that's the case, we'll have problems since we seem to always have GTT
mmaps on our current user space, so we would end up keeping PSR
disabled forever.
v2:
- Rename some things.
- Disable the WA on sw_finish/dirty_fb (Daniel).
- Fix NULL obj checking.
- Restric to Gen 9.
Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Signed-off-by: Paulo Zanoni <paulo.r.zanoni at intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 9 +++++++++
drivers/gpu/drm/i915/i915_gem.c | 19 +++++++++++-------
drivers/gpu/drm/i915/intel_display.c | 1 +
drivers/gpu/drm/i915/intel_drv.h | 3 +++
drivers/gpu/drm/i915/intel_fbc.c | 33 ++++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/intel_frontbuffer.c | 31 ++++++++++++++++++++++++++++++
6 files changed, 89 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index efca534..45e31d2 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -873,6 +873,13 @@ enum fb_op_origin {
ORIGIN_DIRTYFB,
};
+enum fb_mmap_wa_flags {
+ FB_MMAP_WA_CPU = 1 << 0,
+ FB_MMAP_WA_GTT = 1 << 1,
+ FB_MMAP_WA_DISABLE = 1 << 2,
+ FB_MMAP_WA_FLAG_COUNT = 3,
+};
+
struct intel_fbc {
/* This is always the inner lock when overlapping with struct_mutex and
* it's the outer lock when overlapping with stolen_lock. */
@@ -910,6 +917,7 @@ struct intel_fbc {
unsigned int stride;
int fence_reg;
unsigned int tiling_mode;
+ unsigned int mmap_wa_flags;
} fb;
} state_cache;
@@ -2143,6 +2151,7 @@ struct drm_i915_gem_object {
unsigned int cache_dirty:1;
unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
+ unsigned int fb_mmap_wa_flags:FB_MMAP_WA_FLAG_COUNT;
unsigned int pin_display;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8588c83..d44f27e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1692,6 +1692,8 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
goto unlock;
}
+ intel_fb_obj_mmap_wa(obj, FB_MMAP_WA_DISABLE);
+
/* Pinned buffers may be scanout, so flush the cache */
if (obj->pin_display)
i915_gem_object_flush_cpu_write_domain(obj);
@@ -1724,7 +1726,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
struct drm_i915_gem_mmap *args = data;
- struct drm_gem_object *obj;
+ struct drm_i915_gem_object *obj;
unsigned long addr;
if (args->flags & ~(I915_MMAP_WC))
@@ -1733,19 +1735,19 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
if (args->flags & I915_MMAP_WC && !cpu_has_pat)
return -ENODEV;
- obj = drm_gem_object_lookup(dev, file, args->handle);
- if (obj == NULL)
+ obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
+ if (&obj->base == NULL)
return -ENOENT;
/* prime objects have no backing filp to GEM mmap
* pages from.
*/
- if (!obj->filp) {
- drm_gem_object_unreference_unlocked(obj);
+ if (!obj->base.filp) {
+ drm_gem_object_unreference_unlocked(&obj->base);
return -EINVAL;
}
- addr = vm_mmap(obj->filp, 0, args->size,
+ addr = vm_mmap(obj->base.filp, 0, args->size,
PROT_READ | PROT_WRITE, MAP_SHARED,
args->offset);
if (args->flags & I915_MMAP_WC) {
@@ -1761,10 +1763,12 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
addr = -ENOMEM;
up_write(&mm->mmap_sem);
}
- drm_gem_object_unreference_unlocked(obj);
+ drm_gem_object_unreference_unlocked(&obj->base);
if (IS_ERR((void *)addr))
return addr;
+ intel_fb_obj_mmap_wa(obj, FB_MMAP_WA_CPU);
+
args->addr_ptr = (uint64_t) addr;
return 0;
@@ -2099,6 +2103,7 @@ i915_gem_mmap_gtt(struct drm_file *file,
goto out;
*offset = drm_vma_node_offset_addr(&obj->base.vma_node);
+ intel_fb_obj_mmap_wa(obj, FB_MMAP_WA_GTT);
out:
drm_gem_object_unreference(&obj->base);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 28ead66..6e7aa9e 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -14705,6 +14705,7 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb,
struct drm_i915_gem_object *obj = intel_fb->obj;
mutex_lock(&dev->struct_mutex);
+ intel_fb_obj_mmap_wa(obj, FB_MMAP_WA_DISABLE);
intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
mutex_unlock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index ba45245..bbea7c4 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1082,6 +1082,7 @@ void intel_frontbuffer_flip_complete(struct drm_device *dev,
unsigned frontbuffer_bits);
void intel_frontbuffer_flip(struct drm_device *dev,
unsigned frontbuffer_bits);
+void intel_fb_obj_mmap_wa(struct drm_i915_gem_object *obj, unsigned int flags);
unsigned int intel_fb_align_height(struct drm_device *dev,
unsigned int height,
uint32_t pixel_format,
@@ -1371,6 +1372,8 @@ void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
enum fb_op_origin origin);
void intel_fbc_flush(struct drm_i915_private *dev_priv,
unsigned int frontbuffer_bits, enum fb_op_origin origin);
+void intel_fbc_mmap_wa(struct drm_i915_private *dev_priv,
+ unsigned int frontbuffer_bits, unsigned int flags);
void intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv);
/* intel_hdmi.c */
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index 7101880..718ac38 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -745,6 +745,14 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc)
cache->fb.stride = fb->pitches[0];
cache->fb.fence_reg = obj->fence_reg;
cache->fb.tiling_mode = obj->tiling_mode;
+ cache->fb.mmap_wa_flags = obj->fb_mmap_wa_flags;
+}
+
+static bool need_mmap_disable_workaround(struct intel_fbc *fbc)
+{
+ unsigned int flags = fbc->state_cache.fb.mmap_wa_flags;
+
+ return (flags & FB_MMAP_WA_CPU) && !(flags & FB_MMAP_WA_DISABLE);
}
static bool intel_fbc_can_activate(struct intel_crtc *crtc)
@@ -816,6 +824,11 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc)
return false;
}
+ if (need_mmap_disable_workaround(fbc)) {
+ fbc->no_fbc_reason = "FB is CPU or WC mmapped";
+ return false;
+ }
+
return true;
}
@@ -1008,6 +1021,26 @@ out:
mutex_unlock(&fbc->lock);
}
+void intel_fbc_mmap_wa(struct drm_i915_private *dev_priv,
+ unsigned int frontbuffer_bits, unsigned int flags)
+{
+ struct intel_fbc *fbc = &dev_priv->fbc;
+
+ if (!fbc_supported(dev_priv))
+ return;
+
+ mutex_lock(&fbc->lock);
+
+ if (fbc->enabled &&
+ (intel_fbc_get_frontbuffer_bit(fbc) & frontbuffer_bits)) {
+ fbc->state_cache.fb.mmap_wa_flags = flags;
+ if (need_mmap_disable_workaround(fbc))
+ intel_fbc_deactivate(dev_priv);
+ }
+
+ mutex_unlock(&fbc->lock);
+}
+
/**
* intel_fbc_choose_crtc - select a CRTC to enable FBC on
* @dev_priv: i915 device instance
diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c
index ac85357..8d9b327 100644
--- a/drivers/gpu/drm/i915/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
@@ -241,3 +241,34 @@ void intel_frontbuffer_flip(struct drm_device *dev,
intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP);
}
+
+/**
+ * intel_fb_obj_mmap_wa - notifies about objects being mmapped
+ * @obj: GEM object being mmapped
+ *
+ * This function gets called whenever an object gets mmapped. Not every user
+ * space application follows the protocol assumed by the frontbuffer tracking
+ * subsystem when it was created, so this mmap notify callback can be used to
+ * completely disable frontbuffer features such as FBC and PSR. Even if at some
+ * point we fix ever user space application, there's still the possibility that
+ * the user may have a new Kernel with the old user space.
+ *
+ * Also notice that there's no munmap API because user space calls munmap()
+ * directly. Even if we had, it probably wouldn't help since munmap() calls are
+ * not common.
+ */
+void intel_fb_obj_mmap_wa(struct drm_i915_gem_object *obj, unsigned int flags)
+{
+ struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+
+ if (!IS_GEN9(dev_priv))
+ return;
+
+ obj->fb_mmap_wa_flags |= flags;
+
+ if (!obj->frontbuffer_bits)
+ return;
+
+ intel_fbc_mmap_wa(dev_priv, obj->frontbuffer_bits,
+ obj->fb_mmap_wa_flags);
+}
--
2.7.0
More information about the Intel-gfx
mailing list