[Intel-gfx] [PATCH 1/7] drm/i915/gtt: Defer address space cleanup to an RCU worker

Wed Jun 19 14:51:18 UTC 2019

On 19/06/2019 12:23, Chris Wilson wrote:
> Enable RCU protection of i915_address_space and its ppgtt superclasses,
> and defer its cleanup into a worker executed after an RCU grace period.
> 
> In the future we will be able to use the RCU protection to reduce the
> locking around VM lookups, but the immediate benefit is being able to
> defer the release into a kworker (process context). This is required as
> we may need to sleep to reap the WC pages stashed away inside the ppgtt.

I can't see that it adds RCU protection apart from using queue_rcu_work 
for some reason. It _seems_ like it could just as well use normal 
deferred worker? RCU may have a benefit of being relaxed in timing ie we 
don't care it happens immediately, but also don't want to put some made 
up time period against it. So it's all about natural batching only at 
this point?

> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110934
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/Makefile.header-test     |   1 +
>   drivers/gpu/drm/i915/i915_gem_gtt.c           | 109 ++++++++++--------
>   drivers/gpu/drm/i915/i915_gem_gtt.h           |   5 +
>   drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |   2 -
>   4 files changed, 66 insertions(+), 51 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test
> index e6ba66f787f9..cb74242f9c3b 100644
> --- a/drivers/gpu/drm/i915/Makefile.header-test
> +++ b/drivers/gpu/drm/i915/Makefile.header-test
> @@ -6,6 +6,7 @@ header_test := \
>   	i915_active_types.h \
>   	i915_debugfs.h \
>   	i915_drv.h \
> +	i915_gem_gtt.h \
>   	i915_irq.h \
>   	i915_params.h \
>   	i915_priolist_types.h \
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 8ab820145ea6..d9eddbd79670 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -482,9 +482,69 @@ static void vm_free_page(struct i915_address_space *vm, struct page *page)
>   	spin_unlock(&vm->free_pages.lock);
>   }
>   
> +static void i915_address_space_fini(struct i915_address_space *vm)
> +{
> +	spin_lock(&vm->free_pages.lock);
> +	if (pagevec_count(&vm->free_pages.pvec))
> +		vm_free_pages_release(vm, true);
> +	GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec));
> +	spin_unlock(&vm->free_pages.lock);
> +
> +	drm_mm_takedown(&vm->mm);
> +
> +	mutex_destroy(&vm->mutex);
> +}
> +
> +static void ppgtt_destroy_vma(struct i915_address_space *vm)
> +{
> +	struct list_head *phases[] = {
> +		&vm->bound_list,
> +		&vm->unbound_list,
> +		NULL,
> +	}, **phase;
> +
> +	mutex_lock(&vm->i915->drm.struct_mutex);
> +	for (phase = phases; *phase; phase++) {
> +		struct i915_vma *vma, *vn;
> +
> +		list_for_each_entry_safe(vma, vn, *phase, vm_link)

No one can access the lists at this point, right? So apart from 
inefficiency, strictly pedantic thing would be to keep taking and 
releasing struct_mutext for i915_vma_destroy only I think. It's find to 
optimize that, but maybe add a comment above mutex_lock.

> +			i915_vma_destroy(vma);
> +	}
> +	mutex_unlock(&vm->i915->drm.struct_mutex);
> +}
> +
> +static void __i915_vm_release(struct work_struct *work)
> +{
> +	struct i915_address_space *vm =
> +		container_of(work, struct i915_address_space, rcu.work);
> +
> +	ppgtt_destroy_vma(vm);
> +
> +	GEM_BUG_ON(!list_empty(&vm->bound_list));
> +	GEM_BUG_ON(!list_empty(&vm->unbound_list));
> +
> +	vm->cleanup(vm);
> +	i915_address_space_fini(vm);
> +
> +	kfree(vm);
> +}
> +
> +void i915_vm_release(struct kref *kref)
> +{
> +	struct i915_address_space *vm =
> +		container_of(kref, struct i915_address_space, ref);
> +
> +	GEM_BUG_ON(i915_is_ggtt(vm));
> +	trace_i915_ppgtt_release(vm);
> +
> +	vm->closed = true;

This is now outside any locking although I am not sure if it matters. 
Feels weird at least.

> +	queue_rcu_work(system_unbound_wq, &vm->rcu);
> +}
> +
>   static void i915_address_space_init(struct i915_address_space *vm, int subclass)
>   {
>   	kref_init(&vm->ref);
> +	INIT_RCU_WORK(&vm->rcu, __i915_vm_release);
>   
>   	/*
>   	 * The vm->mutex must be reclaim safe (for use in the shrinker).
> @@ -505,19 +565,6 @@ static void i915_address_space_init(struct i915_address_space *vm, int subclass)
>   	INIT_LIST_HEAD(&vm->bound_list);
>   }
>   
> -static void i915_address_space_fini(struct i915_address_space *vm)
> -{
> -	spin_lock(&vm->free_pages.lock);
> -	if (pagevec_count(&vm->free_pages.pvec))
> -		vm_free_pages_release(vm, true);
> -	GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec));
> -	spin_unlock(&vm->free_pages.lock);
> -
> -	drm_mm_takedown(&vm->mm);
> -
> -	mutex_destroy(&vm->mutex);
> -}
> -
>   static int __setup_page_dma(struct i915_address_space *vm,
>   			    struct i915_page_dma *p,
>   			    gfp_t gfp)
> @@ -2250,42 +2297,6 @@ i915_ppgtt_create(struct drm_i915_private *i915)
>   	return ppgtt;
>   }
>   
> -static void ppgtt_destroy_vma(struct i915_address_space *vm)
> -{
> -	struct list_head *phases[] = {
> -		&vm->bound_list,
> -		&vm->unbound_list,
> -		NULL,
> -	}, **phase;
> -
> -	vm->closed = true;
> -	for (phase = phases; *phase; phase++) {
> -		struct i915_vma *vma, *vn;
> -
> -		list_for_each_entry_safe(vma, vn, *phase, vm_link)
> -			i915_vma_destroy(vma);
> -	}
> -}
> -
> -void i915_vm_release(struct kref *kref)
> -{
> -	struct i915_address_space *vm =
> -		container_of(kref, struct i915_address_space, ref);
> -
> -	GEM_BUG_ON(i915_is_ggtt(vm));
> -	trace_i915_ppgtt_release(vm);
> -
> -	ppgtt_destroy_vma(vm);
> -
> -	GEM_BUG_ON(!list_empty(&vm->bound_list));
> -	GEM_BUG_ON(!list_empty(&vm->unbound_list));
> -
> -	vm->cleanup(vm);
> -	i915_address_space_fini(vm);
> -
> -	kfree(vm);
> -}
> -
>   /* Certain Gen5 chipsets require require idling the GPU before
>    * unmapping anything from the GTT when VT-d is enabled.
>    */
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 812717ccc69b..8de57f67a911 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -35,8 +35,12 @@
>   #define __I915_GEM_GTT_H__
>   
>   #include <linux/io-mapping.h>
> +#include <linux/kref.h>
>   #include <linux/mm.h>
>   #include <linux/pagevec.h>
> +#include <linux/workqueue.h>
> +
> +#include <drm/drm_mm.h>
>   
>   #include "gt/intel_reset.h"
>   #include "i915_gem_fence_reg.h"
> @@ -280,6 +284,7 @@ struct pagestash {
>   
>   struct i915_address_space {
>   	struct kref ref;
> +	struct rcu_work rcu;
>   
>   	struct drm_mm mm;
>   	struct drm_i915_private *i915;
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> index 1a60b9fe8221..0c47276ed5df 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> @@ -208,9 +208,7 @@ static int igt_ppgtt_alloc(void *arg)
>   	}
>   
>   err_ppgtt_cleanup:
> -	mutex_lock(&dev_priv->drm.struct_mutex);
>   	i915_vm_put(&ppgtt->vm);
> -	mutex_unlock(&dev_priv->drm.struct_mutex);
>   	return err;
>   }
>   
> 

Regards,

Tvrtko