[Intel-gfx] [PATCH 2/3] drm/i915: Use RPM as the barrier for controlling user mmap access
Daniel Vetter
daniel at ffwll.ch
Thu Oct 13 14:44:23 UTC 2016
On Tue, Oct 11, 2016 at 03:37:58PM +0100, Chris Wilson wrote:
> We can remove the false coupling between RPM and struct mutex by the
> observation that we can use the RPM wakeref as the barrier around user
> mmap access. That is as we tear down the user's PTE atomically from
> within rpm suspend and then to fault in new PTE requires the rpm
> wakeref, means that no user access is possible through those PTE without
> RPM being awake. Having made that observation, we can then remove the
> presumption of having to take rpm outside of struct_mutex and so allow
> fine grained acquisition of a wakeref around hw access rather than
> having to remember to acquire the wakeref early on.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Imre Deak <imre.deak at intel.com>
> Cc: Daniel Vetter <daniel at ffwll.ch>
> Cc: Ville Syrjälä <ville.syrjala at linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 22 ----------------------
> drivers/gpu/drm/i915/i915_drv.c | 19 -------------------
> drivers/gpu/drm/i915/i915_gem.c | 14 +++++---------
> drivers/gpu/drm/i915/i915_gem_gtt.c | 17 +++++++++++++----
> drivers/gpu/drm/i915/i915_gem_tiling.c | 4 ----
> drivers/gpu/drm/i915/intel_uncore.c | 6 +++---
> 6 files changed, 21 insertions(+), 61 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index d4ecc5283c2a..d4779abd89e3 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1400,14 +1400,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
> static int ironlake_drpc_info(struct seq_file *m)
> {
> struct drm_i915_private *dev_priv = node_to_i915(m->private);
> - struct drm_device *dev = &dev_priv->drm;
> u32 rgvmodectl, rstdbyctl;
> u16 crstandvid;
> - int ret;
>
> - ret = mutex_lock_interruptible(&dev->struct_mutex);
> - if (ret)
> - return ret;
> intel_runtime_pm_get(dev_priv);
>
> rgvmodectl = I915_READ(MEMMODECTL);
> @@ -1415,7 +1410,6 @@ static int ironlake_drpc_info(struct seq_file *m)
> crstandvid = I915_READ16(CRSTANDVID);
>
> intel_runtime_pm_put(dev_priv);
> - mutex_unlock(&dev->struct_mutex);
>
> seq_printf(m, "HD boost: %s\n", yesno(rgvmodectl & MEMMODE_BOOST_EN));
> seq_printf(m, "Boost freq: %d\n",
> @@ -2093,12 +2087,7 @@ static const char *swizzle_string(unsigned swizzle)
> static int i915_swizzle_info(struct seq_file *m, void *data)
> {
> struct drm_i915_private *dev_priv = node_to_i915(m->private);
> - struct drm_device *dev = &dev_priv->drm;
> - int ret;
>
> - ret = mutex_lock_interruptible(&dev->struct_mutex);
> - if (ret)
> - return ret;
> intel_runtime_pm_get(dev_priv);
>
> seq_printf(m, "bit6 swizzle for X-tiling = %s\n",
> @@ -2138,7 +2127,6 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
> seq_puts(m, "L-shaped memory detected\n");
>
> intel_runtime_pm_put(dev_priv);
> - mutex_unlock(&dev->struct_mutex);
>
> return 0;
> }
> @@ -4797,13 +4785,9 @@ i915_wedged_set(void *data, u64 val)
> if (i915_reset_in_progress(&dev_priv->gpu_error))
> return -EAGAIN;
>
> - intel_runtime_pm_get(dev_priv);
> -
> i915_handle_error(dev_priv, val,
> "Manually setting wedged to %llu", val);
>
> - intel_runtime_pm_put(dev_priv);
> -
> return 0;
> }
>
> @@ -5038,22 +5022,16 @@ static int
> i915_cache_sharing_get(void *data, u64 *val)
> {
> struct drm_i915_private *dev_priv = data;
> - struct drm_device *dev = &dev_priv->drm;
> u32 snpcr;
> - int ret;
>
> if (!(IS_GEN6(dev_priv) || IS_GEN7(dev_priv)))
> return -ENODEV;
>
> - ret = mutex_lock_interruptible(&dev->struct_mutex);
> - if (ret)
> - return ret;
> intel_runtime_pm_get(dev_priv);
>
> snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
>
> intel_runtime_pm_put(dev_priv);
> - mutex_unlock(&dev->struct_mutex);
>
> *val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT;
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 89d322215c84..31eee32fcf6d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -2313,24 +2313,6 @@ static int intel_runtime_suspend(struct device *kdev)
>
> DRM_DEBUG_KMS("Suspending device\n");
>
> - /*
> - * We could deadlock here in case another thread holding struct_mutex
> - * calls RPM suspend concurrently, since the RPM suspend will wait
> - * first for this RPM suspend to finish. In this case the concurrent
> - * RPM resume will be followed by its RPM suspend counterpart. Still
> - * for consistency return -EAGAIN, which will reschedule this suspend.
> - */
> - if (!mutex_trylock(&dev->struct_mutex)) {
> - DRM_DEBUG_KMS("device lock contention, deffering suspend\n");
> - /*
> - * Bump the expiration timestamp, otherwise the suspend won't
> - * be rescheduled.
> - */
> - pm_runtime_mark_last_busy(kdev);
> -
> - return -EAGAIN;
> - }
> -
> disable_rpm_wakeref_asserts(dev_priv);
>
> /*
> @@ -2338,7 +2320,6 @@ static int intel_runtime_suspend(struct device *kdev)
> * an RPM reference.
> */
> i915_gem_release_all_mmaps(dev_priv);
> - mutex_unlock(&dev->struct_mutex);
>
> intel_guc_suspend(dev);
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 91910ffe0964..587a91af5a3f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1469,7 +1469,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> */
> if (!i915_gem_object_has_struct_page(obj) ||
> cpu_write_needs_clflush(obj)) {
> + intel_runtime_pm_get(dev_priv);
> ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
> + intel_runtime_pm_put(dev_priv);
I'd move the rpm_get/put into gtt_pwrite_fast - there's only one caller,
and it's in the spirit of this patch of moving the rpm get/put closer to
where we really need it.
> /* Note that the gtt paths might fail with non-page-backed user
> * pointers (e.g. gtt mappings when moving data between
> * textures). Fallback to the shmem path in that case. */
> @@ -1850,6 +1852,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
> if (ret)
> goto err_unpin;
>
> + assert_rpm_wakelock_held(dev_priv);
> spin_lock(&dev_priv->mm.userfault_lock);
> list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
> spin_unlock(&dev_priv->mm.userfault_lock);
> @@ -1932,7 +1935,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
> * requirement that operations to the GGTT be made holding the RPM
> * wakeref.
> */
> - lockdep_assert_held(&obj->base.dev->struct_mutex);
> + assert_rpm_wakelock_held(i915);
>
> spin_lock(&i915->mm.userfault_lock);
> if (!list_empty(&obj->userfault_link)) {
> @@ -3469,7 +3472,6 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
> int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file)
> {
> - struct drm_i915_private *dev_priv = to_i915(dev);
> struct drm_i915_gem_caching *args = data;
> struct drm_i915_gem_object *obj;
> enum i915_cache_level level;
> @@ -3498,11 +3500,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
> return -EINVAL;
> }
>
> - intel_runtime_pm_get(dev_priv);
> -
> ret = i915_mutex_lock_interruptible(dev);
> if (ret)
> - goto rpm_put;
> + return ret;
>
> obj = i915_gem_object_lookup(file, args->handle);
> if (!obj) {
> @@ -3511,13 +3511,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
> }
>
> ret = i915_gem_object_set_cache_level(obj, level);
> -
> i915_gem_object_put(obj);
> unlock:
> mutex_unlock(&dev->struct_mutex);
> -rpm_put:
> - intel_runtime_pm_put(dev_priv);
> -
> return ret;
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 2d846aa39ca5..c0a4ba478309 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2596,6 +2596,7 @@ static int ggtt_bind_vma(struct i915_vma *vma,
> enum i915_cache_level cache_level,
> u32 flags)
> {
> + struct drm_i915_private *i915 = to_i915(vma->vm->dev);
> struct drm_i915_gem_object *obj = vma->obj;
> u32 pte_flags = 0;
> int ret;
> @@ -2608,8 +2609,10 @@ static int ggtt_bind_vma(struct i915_vma *vma,
> if (obj->gt_ro)
> pte_flags |= PTE_READ_ONLY;
>
> + intel_runtime_pm_get(i915);
> vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start,
> cache_level, pte_flags);
> + intel_runtime_pm_put(i915);
>
> /*
> * Without aliasing PPGTT there's no difference between
> @@ -2625,6 +2628,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
> enum i915_cache_level cache_level,
> u32 flags)
> {
> + struct drm_i915_private *i915 = to_i915(vma->vm->dev);
> u32 pte_flags;
> int ret;
>
> @@ -2639,14 +2643,15 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
>
>
> if (flags & I915_VMA_GLOBAL_BIND) {
> + intel_runtime_pm_get(i915);
> vma->vm->insert_entries(vma->vm,
> vma->pages, vma->node.start,
> cache_level, pte_flags);
> + intel_runtime_pm_put(i915);
> }
>
> if (flags & I915_VMA_LOCAL_BIND) {
> - struct i915_hw_ppgtt *appgtt =
> - to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
> + struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
> appgtt->base.insert_entries(&appgtt->base,
> vma->pages, vma->node.start,
> cache_level, pte_flags);
> @@ -2657,13 +2662,17 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
>
> static void ggtt_unbind_vma(struct i915_vma *vma)
> {
> - struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
> + struct drm_i915_private *i915 = to_i915(vma->vm->dev);
> + struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
> const u64 size = min(vma->size, vma->node.size);
>
> - if (vma->flags & I915_VMA_GLOBAL_BIND)
> + if (vma->flags & I915_VMA_GLOBAL_BIND) {
> + intel_runtime_pm_get(i915);
> vma->vm->clear_range(vma->vm,
> vma->node.start, size,
> true);
> + intel_runtime_pm_put(i915);
> + }
>
> if (vma->flags & I915_VMA_LOCAL_BIND && appgtt)
> appgtt->base.clear_range(&appgtt->base,
> diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> index a14b1e3d4c78..08f796a4f5f6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> @@ -204,8 +204,6 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
> return -EINVAL;
> }
>
> - intel_runtime_pm_get(dev_priv);
> -
> mutex_lock(&dev->struct_mutex);
> if (obj->pin_display || obj->framebuffer_references) {
> err = -EBUSY;
> @@ -301,8 +299,6 @@ err:
> i915_gem_object_put(obj);
> mutex_unlock(&dev->struct_mutex);
>
> - intel_runtime_pm_put(dev_priv);
> -
> return err;
> }
>
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index e2b188dcf908..e0c3bd941e38 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -1364,7 +1364,7 @@ int i915_reg_read_ioctl(struct drm_device *dev,
> struct register_whitelist const *entry = whitelist;
> unsigned size;
> i915_reg_t offset_ldw, offset_udw;
> - int i, ret = 0;
> + int i, ret;
>
> for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) {
> if (i915_mmio_reg_offset(entry->offset_ldw) == (reg->offset & -entry->size) &&
> @@ -1386,6 +1386,7 @@ int i915_reg_read_ioctl(struct drm_device *dev,
>
> intel_runtime_pm_get(dev_priv);
>
> + ret = 0;
> switch (size) {
> case 8 | 1:
> reg->val = I915_READ64_2x32(offset_ldw, offset_udw);
> @@ -1404,10 +1405,9 @@ int i915_reg_read_ioctl(struct drm_device *dev,
> break;
> default:
> ret = -EINVAL;
> - goto out;
> + break;
> }
>
> -out:
> intel_runtime_pm_put(dev_priv);
> return ret;
Spurious hunks in i915_reg_read_ioctl, please remove from the patch. With
the userfault_list/lock I'm happy with this now. With the 2 nits
addressed.
Reviewed-by: Daniel Vetter <daniel.vetter at ffwll.ch>
> }
> --
> 2.9.3
>
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
More information about the Intel-gfx
mailing list