[Intel-gfx] [PATCH 04/11] drm/i915: Make request allocation caches global

Wed Feb 27 10:29:43 UTC 2019

On 26/02/2019 10:23, Chris Wilson wrote:
> As kmem_caches share the same properties (size, allocation/free behaviour)
> for all potential devices, we can use global caches. While this
> potential has worse fragmentation behaviour (one can argue that
> different devices would have different activity lifetimes, but you can
> also argue that activity is temporal across the system) it is the
> default behaviour of the system at large to amalgamate matching caches.
> 
> The benefit for us is much reduced pointer dancing along the frequent
> allocation paths.
> 
> v2: Defer shrinking until after a global grace period for futureproofing
> multiple consumers of the slab caches, similar to the current strategy
> for avoiding shrinking too early.

I suggested to call i915_globals_park directly from __i915_gem_park for 
symmetry with how i915_gem_unpark calls i915_globals_unpark. 
i915_globals has it's own delayed setup so I don't think it benefits 
from the double indirection courtesy of being called from shrink_caches.

Otherwise I had not other complaints, but this asymmetry for no reason 
is bugging me.

Regards,

Tvrtko

> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/Makefile                 |   1 +
>   drivers/gpu/drm/i915/i915_active.c            |   7 +-
>   drivers/gpu/drm/i915/i915_active.h            |   1 +
>   drivers/gpu/drm/i915/i915_drv.h               |   3 -
>   drivers/gpu/drm/i915/i915_gem.c               |  34 +-----
>   drivers/gpu/drm/i915/i915_globals.c           | 113 ++++++++++++++++++
>   drivers/gpu/drm/i915/i915_globals.h           |  15 +++
>   drivers/gpu/drm/i915/i915_pci.c               |   8 +-
>   drivers/gpu/drm/i915/i915_request.c           |  53 ++++++--
>   drivers/gpu/drm/i915/i915_request.h           |  10 ++
>   drivers/gpu/drm/i915/i915_scheduler.c         |  66 +++++++---
>   drivers/gpu/drm/i915/i915_scheduler.h         |  34 +++++-
>   drivers/gpu/drm/i915/intel_guc_submission.c   |   3 +-
>   drivers/gpu/drm/i915/intel_lrc.c              |   6 +-
>   drivers/gpu/drm/i915/intel_ringbuffer.h       |  17 ---
>   drivers/gpu/drm/i915/selftests/intel_lrc.c    |   2 +-
>   drivers/gpu/drm/i915/selftests/mock_engine.c  |  45 ++++---
>   .../gpu/drm/i915/selftests/mock_gem_device.c  |  26 ----
>   drivers/gpu/drm/i915/selftests/mock_request.c |  12 +-
>   drivers/gpu/drm/i915/selftests/mock_request.h |   7 --
>   20 files changed, 313 insertions(+), 150 deletions(-)
>   create mode 100644 drivers/gpu/drm/i915/i915_globals.c
>   create mode 100644 drivers/gpu/drm/i915/i915_globals.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 1787e1299b1b..a1d834068765 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -77,6 +77,7 @@ i915-y += \
>   	  i915_gem_tiling.o \
>   	  i915_gem_userptr.o \
>   	  i915_gemfs.o \
> +	  i915_globals.o \
>   	  i915_query.o \
>   	  i915_request.o \
>   	  i915_scheduler.o \
> diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
> index db7bb5bd5add..d9f6471ac16c 100644
> --- a/drivers/gpu/drm/i915/i915_active.c
> +++ b/drivers/gpu/drm/i915/i915_active.c
> @@ -294,7 +294,12 @@ int __init i915_global_active_init(void)
>   	return 0;
>   }
>   
> -void __exit i915_global_active_exit(void)
> +void i915_global_active_shrink(void)
> +{
> +	kmem_cache_shrink(global.slab_cache);
> +}
> +
> +void i915_global_active_exit(void)
>   {
>   	kmem_cache_destroy(global.slab_cache);
>   }
> diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
> index 12b5c1d287d1..5fbd9102384b 100644
> --- a/drivers/gpu/drm/i915/i915_active.h
> +++ b/drivers/gpu/drm/i915/i915_active.h
> @@ -420,6 +420,7 @@ static inline void i915_active_fini(struct i915_active *ref) { }
>   #endif
>   
>   int i915_global_active_init(void);
> +void i915_global_active_shrink(void);
>   void i915_global_active_exit(void);
>   
>   #endif /* _I915_ACTIVE_H_ */
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index cc09caf3870e..f16016b330b3 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1473,9 +1473,6 @@ struct drm_i915_private {
>   	struct kmem_cache *objects;
>   	struct kmem_cache *vmas;
>   	struct kmem_cache *luts;
> -	struct kmem_cache *requests;
> -	struct kmem_cache *dependencies;
> -	struct kmem_cache *priorities;
>   
>   	const struct intel_device_info __info; /* Use INTEL_INFO() to access. */
>   	struct intel_runtime_info __runtime; /* Use RUNTIME_INFO() to access. */
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 2b261524cfa4..713ed6fbdcc8 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -42,6 +42,7 @@
>   #include "i915_drv.h"
>   #include "i915_gem_clflush.h"
>   #include "i915_gemfs.h"
> +#include "i915_globals.h"
>   #include "i915_reset.h"
>   #include "i915_trace.h"
>   #include "i915_vgpu.h"
> @@ -187,6 +188,8 @@ void i915_gem_unpark(struct drm_i915_private *i915)
>   	if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
>   		i915->gt.epoch = 1;
>   
> +	i915_globals_unpark();
> +
>   	intel_enable_gt_powersave(i915);
>   	i915_update_gfx_val(i915);
>   	if (INTEL_GEN(i915) >= 6)
> @@ -2892,12 +2895,11 @@ static void shrink_caches(struct drm_i915_private *i915)
>   	 * filled slabs to prioritise allocating from the mostly full slabs,
>   	 * with the aim of reducing fragmentation.
>   	 */
> -	kmem_cache_shrink(i915->priorities);
> -	kmem_cache_shrink(i915->dependencies);
> -	kmem_cache_shrink(i915->requests);
>   	kmem_cache_shrink(i915->luts);
>   	kmem_cache_shrink(i915->vmas);
>   	kmem_cache_shrink(i915->objects);
> +
> +	i915_globals_park();
>   }
>   
>   struct sleep_rcu_work {
> @@ -5235,23 +5237,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
>   	if (!dev_priv->luts)
>   		goto err_vmas;
>   
> -	dev_priv->requests = KMEM_CACHE(i915_request,
> -					SLAB_HWCACHE_ALIGN |
> -					SLAB_RECLAIM_ACCOUNT |
> -					SLAB_TYPESAFE_BY_RCU);
> -	if (!dev_priv->requests)
> -		goto err_luts;
> -
> -	dev_priv->dependencies = KMEM_CACHE(i915_dependency,
> -					    SLAB_HWCACHE_ALIGN |
> -					    SLAB_RECLAIM_ACCOUNT);
> -	if (!dev_priv->dependencies)
> -		goto err_requests;
> -
> -	dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN);
> -	if (!dev_priv->priorities)
> -		goto err_dependencies;
> -
>   	INIT_LIST_HEAD(&dev_priv->gt.active_rings);
>   	INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
>   
> @@ -5276,12 +5261,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
>   
>   	return 0;
>   
> -err_dependencies:
> -	kmem_cache_destroy(dev_priv->dependencies);
> -err_requests:
> -	kmem_cache_destroy(dev_priv->requests);
> -err_luts:
> -	kmem_cache_destroy(dev_priv->luts);
>   err_vmas:
>   	kmem_cache_destroy(dev_priv->vmas);
>   err_objects:
> @@ -5299,9 +5278,6 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
>   
>   	cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
>   
> -	kmem_cache_destroy(dev_priv->priorities);
> -	kmem_cache_destroy(dev_priv->dependencies);
> -	kmem_cache_destroy(dev_priv->requests);
>   	kmem_cache_destroy(dev_priv->luts);
>   	kmem_cache_destroy(dev_priv->vmas);
>   	kmem_cache_destroy(dev_priv->objects);
> diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c
> new file mode 100644
> index 000000000000..7fd1b3945a04
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_globals.c
> @@ -0,0 +1,113 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#include <linux/slab.h>
> +#include <linux/workqueue.h>
> +
> +#include "i915_active.h"
> +#include "i915_globals.h"
> +#include "i915_request.h"
> +#include "i915_scheduler.h"
> +
> +int __init i915_globals_init(void)
> +{
> +	int err;
> +
> +	err = i915_global_active_init();
> +	if (err)
> +		return err;
> +
> +	err = i915_global_request_init();
> +	if (err)
> +		goto err_active;
> +
> +	err = i915_global_scheduler_init();
> +	if (err)
> +		goto err_request;
> +
> +	return 0;
> +
> +err_request:
> +	i915_global_request_exit();
> +err_active:
> +	i915_global_active_exit();
> +	return err;
> +}
> +
> +static void i915_globals_shrink(void)
> +{
> +	/*
> +	 * kmem_cache_shrink() discards empty slabs and reorders partially
> +	 * filled slabs to prioritise allocating from the mostly full slabs,
> +	 * with the aim of reducing fragmentation.
> +	 */
> +	i915_global_active_shrink();
> +	i915_global_request_shrink();
> +	i915_global_scheduler_shrink();
> +}
> +
> +static atomic_t active;
> +static atomic_t epoch;
> +struct park_work {
> +	struct rcu_work work;
> +	int epoch;
> +};
> +
> +static void __i915_globals_park(struct work_struct *work)
> +{
> +	struct park_work *wrk = container_of(work, typeof(*wrk), work.work);
> +
> +	/* Confirm nothing woke up in the last grace period */
> +	if (wrk->epoch == atomic_read(&epoch))
> +		i915_globals_shrink();
> +
> +	kfree(wrk);
> +}
> +
> +void i915_globals_park(void)
> +{
> +	struct park_work *wrk;
> +
> +	/*
> +	 * Defer shrinking the global slab caches (and other work) until
> +	 * after a RCU grace period has completed with no activity. This
> +	 * is to try and reduce the latency impact on the consumers caused
> +	 * by us shrinking the caches the same time as they are trying to
> +	 * allocate, with the assumption being that if we idle long enough
> +	 * for an RCU grace period to elapse since the last use, it is likely
> +	 * to be longer until we need the caches again.
> +	 */
> +	if (!atomic_dec_and_test(&active))
> +		return;
> +
> +	wrk = kmalloc(sizeof(*wrk), GFP_KERNEL);
> +	if (!wrk)
> +		return;
> +
> +	wrk->epoch = atomic_inc_return(&epoch);
> +	INIT_RCU_WORK(&wrk->work, __i915_globals_park);
> +	queue_rcu_work(system_wq, &wrk->work);
> +}
> +
> +void i915_globals_unpark(void)
> +{
> +	atomic_inc(&epoch);
> +	atomic_inc(&active);
> +}
> +
> +void __exit i915_globals_exit(void)
> +{
> +	/* Flush any residual park_work */
> +	rcu_barrier();
> +	flush_scheduled_work();
> +
> +	i915_global_scheduler_exit();
> +	i915_global_request_exit();
> +	i915_global_active_exit();
> +
> +	/* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
> +	rcu_barrier();
> +}
> diff --git a/drivers/gpu/drm/i915/i915_globals.h b/drivers/gpu/drm/i915/i915_globals.h
> new file mode 100644
> index 000000000000..e468f0413a73
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_globals.h
> @@ -0,0 +1,15 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef _I915_GLOBALS_H_
> +#define _I915_GLOBALS_H_
> +
> +int i915_globals_init(void);
> +void i915_globals_park(void);
> +void i915_globals_unpark(void);
> +void i915_globals_exit(void);
> +
> +#endif /* _I915_GLOBALS_H_ */
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index c4d6b8da9b03..a9211c370cd1 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -28,8 +28,8 @@
>   
>   #include <drm/drm_drv.h>
>   
> -#include "i915_active.h"
>   #include "i915_drv.h"
> +#include "i915_globals.h"
>   #include "i915_selftest.h"
>   
>   #define PLATFORM(x) .platform = (x), .platform_mask = BIT(x)
> @@ -802,7 +802,9 @@ static int __init i915_init(void)
>   	bool use_kms = true;
>   	int err;
>   
> -	i915_global_active_init();
> +	err = i915_globals_init();
> +	if (err)
> +		return err;
>   
>   	err = i915_mock_selftests();
>   	if (err)
> @@ -835,7 +837,7 @@ static void __exit i915_exit(void)
>   		return;
>   
>   	pci_unregister_driver(&i915_pci_driver);
> -	i915_global_active_exit();
> +	i915_globals_exit();
>   }
>   
>   module_init(i915_init);
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 00a1ea7cd907..c65f6c990fdd 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -32,6 +32,11 @@
>   #include "i915_active.h"
>   #include "i915_reset.h"
>   
> +static struct i915_global_request {
> +	struct kmem_cache *slab_requests;
> +	struct kmem_cache *slab_dependencies;
> +} global;
> +
>   static const char *i915_fence_get_driver_name(struct dma_fence *fence)
>   {
>   	return "i915";
> @@ -86,7 +91,7 @@ static void i915_fence_release(struct dma_fence *fence)
>   	 */
>   	i915_sw_fence_fini(&rq->submit);
>   
> -	kmem_cache_free(rq->i915->requests, rq);
> +	kmem_cache_free(global.slab_requests, rq);
>   }
>   
>   const struct dma_fence_ops i915_fence_ops = {
> @@ -292,7 +297,7 @@ static void i915_request_retire(struct i915_request *request)
>   
>   	unreserve_gt(request->i915);
>   
> -	i915_sched_node_fini(request->i915, &request->sched);
> +	i915_sched_node_fini(&request->sched);
>   	i915_request_put(request);
>   }
>   
> @@ -506,7 +511,7 @@ i915_request_alloc_slow(struct intel_context *ce)
>   	ring_retire_requests(ring);
>   
>   out:
> -	return kmem_cache_alloc(ce->gem_context->i915->requests, GFP_KERNEL);
> +	return kmem_cache_alloc(global.slab_requests, GFP_KERNEL);
>   }
>   
>   static int add_timeline_barrier(struct i915_request *rq)
> @@ -594,7 +599,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>   	 *
>   	 * Do not use kmem_cache_zalloc() here!
>   	 */
> -	rq = kmem_cache_alloc(i915->requests,
> +	rq = kmem_cache_alloc(global.slab_requests,
>   			      GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
>   	if (unlikely(!rq)) {
>   		rq = i915_request_alloc_slow(ce);
> @@ -681,7 +686,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>   	GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
>   	GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
>   
> -	kmem_cache_free(i915->requests, rq);
> +	kmem_cache_free(global.slab_requests, rq);
>   err_unreserve:
>   	unreserve_gt(i915);
>   	intel_context_unpin(ce);
> @@ -700,9 +705,7 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
>   		return 0;
>   
>   	if (to->engine->schedule) {
> -		ret = i915_sched_node_add_dependency(to->i915,
> -						     &to->sched,
> -						     &from->sched);
> +		ret = i915_sched_node_add_dependency(&to->sched, &from->sched);
>   		if (ret < 0)
>   			return ret;
>   	}
> @@ -1190,3 +1193,37 @@ void i915_retire_requests(struct drm_i915_private *i915)
>   #include "selftests/mock_request.c"
>   #include "selftests/i915_request.c"
>   #endif
> +
> +int __init i915_global_request_init(void)
> +{
> +	global.slab_requests = KMEM_CACHE(i915_request,
> +					  SLAB_HWCACHE_ALIGN |
> +					  SLAB_RECLAIM_ACCOUNT |
> +					  SLAB_TYPESAFE_BY_RCU);
> +	if (!global.slab_requests)
> +		return -ENOMEM;
> +
> +	global.slab_dependencies = KMEM_CACHE(i915_dependency,
> +					      SLAB_HWCACHE_ALIGN |
> +					      SLAB_RECLAIM_ACCOUNT);
> +	if (!global.slab_dependencies)
> +		goto err_requests;
> +
> +	return 0;
> +
> +err_requests:
> +	kmem_cache_destroy(global.slab_requests);
> +	return -ENOMEM;
> +}
> +
> +void i915_global_request_shrink(void)
> +{
> +	kmem_cache_shrink(global.slab_dependencies);
> +	kmem_cache_shrink(global.slab_requests);
> +}
> +
> +void i915_global_request_exit(void)
> +{
> +	kmem_cache_destroy(global.slab_dependencies);
> +	kmem_cache_destroy(global.slab_requests);
> +}
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 1e127c1c53fa..be3ded6bcf56 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -29,6 +29,7 @@
>   
>   #include "i915_gem.h"
>   #include "i915_scheduler.h"
> +#include "i915_selftest.h"
>   #include "i915_sw_fence.h"
>   
>   #include <uapi/drm/i915_drm.h>
> @@ -196,6 +197,11 @@ struct i915_request {
>   	struct drm_i915_file_private *file_priv;
>   	/** file_priv list entry for this request */
>   	struct list_head client_link;
> +
> +	I915_SELFTEST_DECLARE(struct {
> +		struct list_head link;
> +		unsigned long delay;
> +	} mock;)
>   };
>   
>   #define I915_FENCE_GFP (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
> @@ -371,4 +377,8 @@ static inline void i915_request_mark_complete(struct i915_request *rq)
>   
>   void i915_retire_requests(struct drm_i915_private *i915);
>   
> +int i915_global_request_init(void);
> +void i915_global_request_shrink(void);
> +void i915_global_request_exit(void);
> +
>   #endif /* I915_REQUEST_H */
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 9fb96ff57a29..50018ad30233 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -10,6 +10,11 @@
>   #include "i915_request.h"
>   #include "i915_scheduler.h"
>   
> +static struct i915_global_scheduler {
> +	struct kmem_cache *slab_dependencies;
> +	struct kmem_cache *slab_priorities;
> +} global;
> +
>   static DEFINE_SPINLOCK(schedule_lock);
>   
>   static const struct i915_request *
> @@ -37,16 +42,15 @@ void i915_sched_node_init(struct i915_sched_node *node)
>   }
>   
>   static struct i915_dependency *
> -i915_dependency_alloc(struct drm_i915_private *i915)
> +i915_dependency_alloc(void)
>   {
> -	return kmem_cache_alloc(i915->dependencies, GFP_KERNEL);
> +	return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL);
>   }
>   
>   static void
> -i915_dependency_free(struct drm_i915_private *i915,
> -		     struct i915_dependency *dep)
> +i915_dependency_free(struct i915_dependency *dep)
>   {
> -	kmem_cache_free(i915->dependencies, dep);
> +	kmem_cache_free(global.slab_dependencies, dep);
>   }
>   
>   bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
> @@ -73,25 +77,23 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
>   	return ret;
>   }
>   
> -int i915_sched_node_add_dependency(struct drm_i915_private *i915,
> -				   struct i915_sched_node *node,
> +int i915_sched_node_add_dependency(struct i915_sched_node *node,
>   				   struct i915_sched_node *signal)
>   {
>   	struct i915_dependency *dep;
>   
> -	dep = i915_dependency_alloc(i915);
> +	dep = i915_dependency_alloc();
>   	if (!dep)
>   		return -ENOMEM;
>   
>   	if (!__i915_sched_node_add_dependency(node, signal, dep,
>   					      I915_DEPENDENCY_ALLOC))
> -		i915_dependency_free(i915, dep);
> +		i915_dependency_free(dep);
>   
>   	return 0;
>   }
>   
> -void i915_sched_node_fini(struct drm_i915_private *i915,
> -			  struct i915_sched_node *node)
> +void i915_sched_node_fini(struct i915_sched_node *node)
>   {
>   	struct i915_dependency *dep, *tmp;
>   
> @@ -111,7 +113,7 @@ void i915_sched_node_fini(struct drm_i915_private *i915,
>   
>   		list_del(&dep->wait_link);
>   		if (dep->flags & I915_DEPENDENCY_ALLOC)
> -			i915_dependency_free(i915, dep);
> +			i915_dependency_free(dep);
>   	}
>   
>   	/* Remove ourselves from everyone who depends upon us */
> @@ -121,7 +123,7 @@ void i915_sched_node_fini(struct drm_i915_private *i915,
>   
>   		list_del(&dep->signal_link);
>   		if (dep->flags & I915_DEPENDENCY_ALLOC)
> -			i915_dependency_free(i915, dep);
> +			i915_dependency_free(dep);
>   	}
>   
>   	spin_unlock(&schedule_lock);
> @@ -198,7 +200,7 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
>   	if (prio == I915_PRIORITY_NORMAL) {
>   		p = &execlists->default_priolist;
>   	} else {
> -		p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC);
> +		p = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC);
>   		/* Convert an allocation failure to a priority bump */
>   		if (unlikely(!p)) {
>   			prio = I915_PRIORITY_NORMAL; /* recurses just once */
> @@ -423,3 +425,39 @@ void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
>   
>   	spin_unlock_bh(&schedule_lock);
>   }
> +
> +void __i915_priolist_free(struct i915_priolist *p)
> +{
> +	kmem_cache_free(global.slab_priorities, p);
> +}
> +
> +int __init i915_global_scheduler_init(void)
> +{
> +	global.slab_dependencies = KMEM_CACHE(i915_dependency,
> +					      SLAB_HWCACHE_ALIGN);
> +	if (!global.slab_dependencies)
> +		return -ENOMEM;
> +
> +	global.slab_priorities = KMEM_CACHE(i915_priolist,
> +					    SLAB_HWCACHE_ALIGN);
> +	if (!global.slab_priorities)
> +		goto err_priorities;
> +
> +	return 0;
> +
> +err_priorities:
> +	kmem_cache_destroy(global.slab_priorities);
> +	return -ENOMEM;
> +}
> +
> +void i915_global_scheduler_shrink(void)
> +{
> +	kmem_cache_shrink(global.slab_dependencies);
> +	kmem_cache_shrink(global.slab_priorities);
> +}
> +
> +void i915_global_scheduler_exit(void)
> +{
> +	kmem_cache_destroy(global.slab_dependencies);
> +	kmem_cache_destroy(global.slab_priorities);
> +}
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index 54bd6c89817e..5196ce07b6c2 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -85,6 +85,23 @@ struct i915_dependency {
>   #define I915_DEPENDENCY_ALLOC BIT(0)
>   };
>   
> +struct i915_priolist {
> +	struct list_head requests[I915_PRIORITY_COUNT];
> +	struct rb_node node;
> +	unsigned long used;
> +	int priority;
> +};
> +
> +#define priolist_for_each_request(it, plist, idx) \
> +	for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \
> +		list_for_each_entry(it, &(plist)->requests[idx], sched.link)
> +
> +#define priolist_for_each_request_consume(it, n, plist, idx) \
> +	for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \
> +		list_for_each_entry_safe(it, n, \
> +					 &(plist)->requests[idx - 1], \
> +					 sched.link)
> +
>   void i915_sched_node_init(struct i915_sched_node *node);
>   
>   bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
> @@ -92,12 +109,10 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
>   				      struct i915_dependency *dep,
>   				      unsigned long flags);
>   
> -int i915_sched_node_add_dependency(struct drm_i915_private *i915,
> -				   struct i915_sched_node *node,
> +int i915_sched_node_add_dependency(struct i915_sched_node *node,
>   				   struct i915_sched_node *signal);
>   
> -void i915_sched_node_fini(struct drm_i915_private *i915,
> -			  struct i915_sched_node *node);
> +void i915_sched_node_fini(struct i915_sched_node *node);
>   
>   void i915_schedule(struct i915_request *request,
>   		   const struct i915_sched_attr *attr);
> @@ -107,4 +122,15 @@ void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump);
>   struct list_head *
>   i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio);
>   
> +void __i915_priolist_free(struct i915_priolist *p);
> +static inline void i915_priolist_free(struct i915_priolist *p)
> +{
> +	if (p->priority != I915_PRIORITY_NORMAL)
> +		__i915_priolist_free(p);
> +}
> +
> +int i915_global_scheduler_init(void);
> +void i915_global_scheduler_shrink(void);
> +void i915_global_scheduler_exit(void);
> +
>   #endif /* _I915_SCHEDULER_H_ */
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index a2846ea1e62c..56ba2fcbabe6 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -781,8 +781,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
>   		}
>   
>   		rb_erase_cached(&p->node, &execlists->queue);
> -		if (p->priority != I915_PRIORITY_NORMAL)
> -			kmem_cache_free(engine->i915->priorities, p);
> +		i915_priolist_free(p);
>   	}
>   done:
>   	execlists->queue_priority_hint =
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index dba19baf6808..29b2a2f34edb 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -818,8 +818,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   		}
>   
>   		rb_erase_cached(&p->node, &execlists->queue);
> -		if (p->priority != I915_PRIORITY_NORMAL)
> -			kmem_cache_free(engine->i915->priorities, p);
> +		i915_priolist_free(p);
>   	}
>   
>   done:
> @@ -972,8 +971,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
>   		}
>   
>   		rb_erase_cached(&p->node, &execlists->queue);
> -		if (p->priority != I915_PRIORITY_NORMAL)
> -			kmem_cache_free(engine->i915->priorities, p);
> +		i915_priolist_free(p);
>   	}
>   
>   	/* Remaining _unready_ requests will be nop'ed when submitted */
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index de8dba7565b0..5284f243931a 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -187,23 +187,6 @@ enum intel_engine_id {
>   #define _VECS(n) (VECS + (n))
>   };
>   
> -struct i915_priolist {
> -	struct list_head requests[I915_PRIORITY_COUNT];
> -	struct rb_node node;
> -	unsigned long used;
> -	int priority;
> -};
> -
> -#define priolist_for_each_request(it, plist, idx) \
> -	for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \
> -		list_for_each_entry(it, &(plist)->requests[idx], sched.link)
> -
> -#define priolist_for_each_request_consume(it, n, plist, idx) \
> -	for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \
> -		list_for_each_entry_safe(it, n, \
> -					 &(plist)->requests[idx - 1], \
> -					 sched.link)
> -
>   struct st_preempt_hang {
>   	struct completion completion;
>   	unsigned int count;
> diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> index 7172f6c7f25a..2d582a21eba9 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> @@ -441,7 +441,7 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine)
>   static void dummy_request_free(struct i915_request *dummy)
>   {
>   	i915_request_mark_complete(dummy);
> -	i915_sched_node_fini(dummy->engine->i915, &dummy->sched);
> +	i915_sched_node_fini(&dummy->sched);
>   	i915_sw_fence_fini(&dummy->submit);
>   
>   	dma_fence_free(&dummy->fence);
> diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
> index 6f3fb803c747..ec1ae948954c 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_engine.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
> @@ -76,26 +76,26 @@ static void mock_ring_free(struct intel_ring *base)
>   	kfree(ring);
>   }
>   
> -static struct mock_request *first_request(struct mock_engine *engine)
> +static struct i915_request *first_request(struct mock_engine *engine)
>   {
>   	return list_first_entry_or_null(&engine->hw_queue,
> -					struct mock_request,
> -					link);
> +					struct i915_request,
> +					mock.link);
>   }
>   
> -static void advance(struct mock_request *request)
> +static void advance(struct i915_request *request)
>   {
> -	list_del_init(&request->link);
> -	i915_request_mark_complete(&request->base);
> -	GEM_BUG_ON(!i915_request_completed(&request->base));
> +	list_del_init(&request->mock.link);
> +	i915_request_mark_complete(request);
> +	GEM_BUG_ON(!i915_request_completed(request));
>   
> -	intel_engine_queue_breadcrumbs(request->base.engine);
> +	intel_engine_queue_breadcrumbs(request->engine);
>   }
>   
>   static void hw_delay_complete(struct timer_list *t)
>   {
>   	struct mock_engine *engine = from_timer(engine, t, hw_delay);
> -	struct mock_request *request;
> +	struct i915_request *request;
>   	unsigned long flags;
>   
>   	spin_lock_irqsave(&engine->hw_lock, flags);
> @@ -110,8 +110,9 @@ static void hw_delay_complete(struct timer_list *t)
>   	 * requeue the timer for the next delayed request.
>   	 */
>   	while ((request = first_request(engine))) {
> -		if (request->delay) {
> -			mod_timer(&engine->hw_delay, jiffies + request->delay);
> +		if (request->mock.delay) {
> +			mod_timer(&engine->hw_delay,
> +				  jiffies + request->mock.delay);
>   			break;
>   		}
>   
> @@ -169,10 +170,8 @@ mock_context_pin(struct intel_engine_cs *engine,
>   
>   static int mock_request_alloc(struct i915_request *request)
>   {
> -	struct mock_request *mock = container_of(request, typeof(*mock), base);
> -
> -	INIT_LIST_HEAD(&mock->link);
> -	mock->delay = 0;
> +	INIT_LIST_HEAD(&request->mock.link);
> +	request->mock.delay = 0;
>   
>   	return 0;
>   }
> @@ -190,7 +189,6 @@ static u32 *mock_emit_breadcrumb(struct i915_request *request, u32 *cs)
>   
>   static void mock_submit_request(struct i915_request *request)
>   {
> -	struct mock_request *mock = container_of(request, typeof(*mock), base);
>   	struct mock_engine *engine =
>   		container_of(request->engine, typeof(*engine), base);
>   	unsigned long flags;
> @@ -198,12 +196,13 @@ static void mock_submit_request(struct i915_request *request)
>   	i915_request_submit(request);
>   
>   	spin_lock_irqsave(&engine->hw_lock, flags);
> -	list_add_tail(&mock->link, &engine->hw_queue);
> -	if (mock->link.prev == &engine->hw_queue) {
> -		if (mock->delay)
> -			mod_timer(&engine->hw_delay, jiffies + mock->delay);
> +	list_add_tail(&request->mock.link, &engine->hw_queue);
> +	if (list_is_first(&request->mock.link, &engine->hw_queue)) {
> +		if (request->mock.delay)
> +			mod_timer(&engine->hw_delay,
> +				  jiffies + request->mock.delay);
>   		else
> -			advance(mock);
> +			advance(request);
>   	}
>   	spin_unlock_irqrestore(&engine->hw_lock, flags);
>   }
> @@ -263,12 +262,12 @@ void mock_engine_flush(struct intel_engine_cs *engine)
>   {
>   	struct mock_engine *mock =
>   		container_of(engine, typeof(*mock), base);
> -	struct mock_request *request, *rn;
> +	struct i915_request *request, *rn;
>   
>   	del_timer_sync(&mock->hw_delay);
>   
>   	spin_lock_irq(&mock->hw_lock);
> -	list_for_each_entry_safe(request, rn, &mock->hw_queue, link)
> +	list_for_each_entry_safe(request, rn, &mock->hw_queue, mock.link)
>   		advance(request);
>   	spin_unlock_irq(&mock->hw_lock);
>   }
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> index fc516a2970f4..5a98caba6d69 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> @@ -79,9 +79,6 @@ static void mock_device_release(struct drm_device *dev)
>   
>   	destroy_workqueue(i915->wq);
>   
> -	kmem_cache_destroy(i915->priorities);
> -	kmem_cache_destroy(i915->dependencies);
> -	kmem_cache_destroy(i915->requests);
>   	kmem_cache_destroy(i915->vmas);
>   	kmem_cache_destroy(i915->objects);
>   
> @@ -211,23 +208,6 @@ struct drm_i915_private *mock_gem_device(void)
>   	if (!i915->vmas)
>   		goto err_objects;
>   
> -	i915->requests = KMEM_CACHE(mock_request,
> -				    SLAB_HWCACHE_ALIGN |
> -				    SLAB_RECLAIM_ACCOUNT |
> -				    SLAB_TYPESAFE_BY_RCU);
> -	if (!i915->requests)
> -		goto err_vmas;
> -
> -	i915->dependencies = KMEM_CACHE(i915_dependency,
> -					SLAB_HWCACHE_ALIGN |
> -					SLAB_RECLAIM_ACCOUNT);
> -	if (!i915->dependencies)
> -		goto err_requests;
> -
> -	i915->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN);
> -	if (!i915->priorities)
> -		goto err_dependencies;
> -
>   	i915_timelines_init(i915);
>   
>   	INIT_LIST_HEAD(&i915->gt.active_rings);
> @@ -257,12 +237,6 @@ struct drm_i915_private *mock_gem_device(void)
>   err_unlock:
>   	mutex_unlock(&i915->drm.struct_mutex);
>   	i915_timelines_fini(i915);
> -	kmem_cache_destroy(i915->priorities);
> -err_dependencies:
> -	kmem_cache_destroy(i915->dependencies);
> -err_requests:
> -	kmem_cache_destroy(i915->requests);
> -err_vmas:
>   	kmem_cache_destroy(i915->vmas);
>   err_objects:
>   	kmem_cache_destroy(i915->objects);
> diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c
> index 0dc29e242597..d1a7c9608712 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_request.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_request.c
> @@ -31,29 +31,25 @@ mock_request(struct intel_engine_cs *engine,
>   	     unsigned long delay)
>   {
>   	struct i915_request *request;
> -	struct mock_request *mock;
>   
>   	/* NB the i915->requests slab cache is enlarged to fit mock_request */
>   	request = i915_request_alloc(engine, context);
>   	if (IS_ERR(request))
>   		return NULL;
>   
> -	mock = container_of(request, typeof(*mock), base);
> -	mock->delay = delay;
> -
> -	return &mock->base;
> +	request->mock.delay = delay;
> +	return request;
>   }
>   
>   bool mock_cancel_request(struct i915_request *request)
>   {
> -	struct mock_request *mock = container_of(request, typeof(*mock), base);
>   	struct mock_engine *engine =
>   		container_of(request->engine, typeof(*engine), base);
>   	bool was_queued;
>   
>   	spin_lock_irq(&engine->hw_lock);
> -	was_queued = !list_empty(&mock->link);
> -	list_del_init(&mock->link);
> +	was_queued = !list_empty(&request->mock.link);
> +	list_del_init(&request->mock.link);
>   	spin_unlock_irq(&engine->hw_lock);
>   
>   	if (was_queued)
> diff --git a/drivers/gpu/drm/i915/selftests/mock_request.h b/drivers/gpu/drm/i915/selftests/mock_request.h
> index 995fb728380c..4acf0211df20 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_request.h
> +++ b/drivers/gpu/drm/i915/selftests/mock_request.h
> @@ -29,13 +29,6 @@
>   
>   #include "../i915_request.h"
>   
> -struct mock_request {
> -	struct i915_request base;
> -
> -	struct list_head link;
> -	unsigned long delay;
> -};
> -
>   struct i915_request *
>   mock_request(struct intel_engine_cs *engine,
>   	     struct i915_gem_context *context,
>