[Intel-gfx] [PATCH 03/29] drm/i915: Use static allocation for i915_globals_park()
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Mon Apr 8 14:31:42 UTC 2019
On 08/04/2019 10:17, Chris Wilson wrote:
> In order to avoid the malloc inside i915_globals_park() occurring
> underneath a lock connected to the shrinker (thus causing circular
> lockdeps warnings), move the rcu_worker to a global.
>
> <4> [39.085073] ======================================================
> <4> [39.085273] WARNING: possible circular locking dependency detected
> <4> [39.085552] 5.1.0-rc3-CI-Trybot_4088+ #1 Tainted: G U
> <4> [39.085752] ------------------------------------------------------
> <4> [39.085949] kswapd0/32 is trying to acquire lock:
> <4> [39.086121] 00000000004b5f91 (wakeref#3){+.+.}, at: intel_engine_pm_put+0x1b/0x40 [i915]
> <4> [39.086493]
> but task is already holding lock:
> <4> [39.086682] 00000000dd009a9a (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x0/0x30
> <4> [39.086910]
> which lock already depends on the new lock.
>
> <4> [39.087139]
> the existing dependency chain (in reverse order) is:
> <4> [39.087356]
> -> #2 (fs_reclaim){+.+.}:
> <4> [39.087604] fs_reclaim_acquire.part.24+0x24/0x30
> <4> [39.087785] kmem_cache_alloc_trace+0x2a/0x290
> <4> [39.087998] i915_globals_park+0x22/0xa0 [i915]
> <4> [39.088478] idle_work_handler+0x1df/0x220 [i915]
> <4> [39.089016] process_one_work+0x245/0x610
> <4> [39.089447] worker_thread+0x37/0x380
> <4> [39.089956] kthread+0x119/0x130
> <4> [39.090374] ret_from_fork+0x3a/0x50
> <4> [39.090868]
> -> #1 (wakeref#4){+.+.}:
> <4> [39.091569] __mutex_lock+0x8c/0x960
> <4> [39.092054] atomic_dec_and_mutex_lock+0x33/0x50
> <4> [39.092521] intel_gt_pm_put+0x1b/0x40 [i915]
> <4> [39.093047] intel_engine_park+0xeb/0x1d0 [i915]
> <4> [39.093514] __intel_wakeref_put_once+0x10/0x30 [i915]
> <4> [39.094062] i915_request_retire+0x477/0xaf0 [i915]
> <4> [39.094547] ring_retire_requests+0x86/0x160 [i915]
> <4> [39.095110] i915_retire_requests+0x58/0xc0 [i915]
> <4> [39.095587] i915_gem_wait_for_idle.part.22+0xb2/0xf0 [i915]
> <4> [39.096142] switch_to_kernel_context_sync+0x2a/0x70 [i915]
> <4> [39.096633] i915_gem_init+0x59c/0x9c0 [i915]
> <4> [39.097174] i915_driver_load+0xd96/0x1880 [i915]
> <4> [39.097640] i915_pci_probe+0x29/0xa0 [i915]
> <4> [39.098145] pci_device_probe+0xa1/0x120
> <4> [39.098607] really_probe+0xf3/0x3e0
> <4> [39.099031] driver_probe_device+0x10a/0x120
> <4> [39.099599] device_driver_attach+0x4b/0x50
> <4> [39.100033] __driver_attach+0x97/0x130
> <4> [39.100525] bus_for_each_dev+0x74/0xc0
> <4> [39.100954] bus_add_driver+0x13f/0x210
> <4> [39.101441] driver_register+0x56/0xe0
> <4> [39.101891] do_one_initcall+0x58/0x2e0
> <4> [39.102319] do_init_module+0x56/0x1ea
> <4> [39.102805] load_module+0x2701/0x29e0
> <4> [39.103231] __se_sys_finit_module+0xd3/0xf0
> <4> [39.103727] do_syscall_64+0x55/0x190
> <4> [39.104153] entry_SYSCALL_64_after_hwframe+0x49/0xbe
> <4> [39.104736]
> -> #0 (wakeref#3){+.+.}:
> <4> [39.105437] lock_acquire+0xa6/0x1c0
> <4> [39.105923] __mutex_lock+0x8c/0x960
> <4> [39.106345] atomic_dec_and_mutex_lock+0x33/0x50
> <4> [39.106897] intel_engine_pm_put+0x1b/0x40 [i915]
> <4> [39.107375] i915_request_retire+0x477/0xaf0 [i915]
> <4> [39.107930] ring_retire_requests+0x86/0x160 [i915]
> <4> [39.108412] i915_retire_requests+0x58/0xc0 [i915]
> <4> [39.108934] i915_gem_shrink+0xd8/0x5b0 [i915]
> <4> [39.109431] i915_gem_shrinker_scan+0x59/0x130 [i915]
> <4> [39.109884] do_shrink_slab+0x131/0x3e0
> <4> [39.110380] shrink_slab+0x228/0x2c0
> <4> [39.110810] shrink_node+0x177/0x460
> <4> [39.111317] balance_pgdat+0x239/0x580
> <4> [39.111743] kswapd+0x186/0x570
> <4> [39.112221] kthread+0x119/0x130
> <4> [39.112641] ret_from_fork+0x3a/0x50
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_globals.c | 74 +++++++++++++----------------
> 1 file changed, 32 insertions(+), 42 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c
> index 2f5c72e2a9d1..81e5c2ce336b 100644
> --- a/drivers/gpu/drm/i915/i915_globals.c
> +++ b/drivers/gpu/drm/i915/i915_globals.c
> @@ -17,6 +17,33 @@
>
> static LIST_HEAD(globals);
>
> +static atomic_t active;
> +static atomic_t epoch;
> +static struct park_work {
> + struct rcu_work work;
> + int epoch;
> +} park;
> +
> +static void i915_globals_shrink(void)
> +{
> + struct i915_global *global;
> +
> + /*
> + * kmem_cache_shrink() discards empty slabs and reorders partially
> + * filled slabs to prioritise allocating from the mostly full slabs,
> + * with the aim of reducing fragmentation.
> + */
> + list_for_each_entry(global, &globals, link)
> + global->shrink();
> +}
> +
> +static void __i915_globals_park(struct work_struct *work)
> +{
> + /* Confirm nothing woke up in the last grace period */
> + if (park.epoch == atomic_read(&epoch))
> + i915_globals_shrink();
> +}
> +
> void __init i915_global_register(struct i915_global *global)
> {
> GEM_BUG_ON(!global->shrink);
> @@ -57,44 +84,12 @@ int __init i915_globals_init(void)
> }
> }
>
> + INIT_RCU_WORK(&park.work, __i915_globals_park);
> return 0;
> }
>
> -static void i915_globals_shrink(void)
> -{
> - struct i915_global *global;
> -
> - /*
> - * kmem_cache_shrink() discards empty slabs and reorders partially
> - * filled slabs to prioritise allocating from the mostly full slabs,
> - * with the aim of reducing fragmentation.
> - */
> - list_for_each_entry(global, &globals, link)
> - global->shrink();
> -}
> -
> -static atomic_t active;
> -static atomic_t epoch;
> -struct park_work {
> - struct rcu_work work;
> - int epoch;
> -};
> -
> -static void __i915_globals_park(struct work_struct *work)
> -{
> - struct park_work *wrk = container_of(work, typeof(*wrk), work.work);
> -
> - /* Confirm nothing woke up in the last grace period */
> - if (wrk->epoch == atomic_read(&epoch))
> - i915_globals_shrink();
> -
> - kfree(wrk);
> -}
> -
> void i915_globals_park(void)
> {
> - struct park_work *wrk;
> -
> /*
> * Defer shrinking the global slab caches (and other work) until
> * after a RCU grace period has completed with no activity. This
> @@ -107,13 +102,8 @@ void i915_globals_park(void)
> if (!atomic_dec_and_test(&active))
> return;
>
> - wrk = kmalloc(sizeof(*wrk), GFP_KERNEL);
> - if (!wrk)
> - return;
> -
> - wrk->epoch = atomic_inc_return(&epoch);
> - INIT_RCU_WORK(&wrk->work, __i915_globals_park);
> - queue_rcu_work(system_wq, &wrk->work);
> + park.epoch = atomic_inc_return(&epoch);
> + queue_rcu_work(system_wq, &park.work);
> }
>
> void i915_globals_unpark(void)
> @@ -125,8 +115,8 @@ void i915_globals_unpark(void)
> void __exit i915_globals_exit(void)
> {
> /* Flush any residual park_work */
> - rcu_barrier();
> - flush_scheduled_work();
> + atomic_inc(&epoch);
> + flush_rcu_work(&park.work);
>
> __i915_globals_cleanup();
>
>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list