[Intel-gfx] [PATCH 04/22] drm/i915: Guard unpark/park with the gt.active_mutex
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Mon Apr 1 15:22:55 UTC 2019
On 25/03/2019 09:03, Chris Wilson wrote:
> If we introduce a new mutex for the exclusive use of GEM's runtime power
> management, we can remove its requirement of holding the struct_mutex.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 9 +--
> drivers/gpu/drm/i915/i915_drv.h | 3 +-
> drivers/gpu/drm/i915/i915_gem.c | 2 +-
> drivers/gpu/drm/i915/i915_gem.h | 3 -
> drivers/gpu/drm/i915/i915_gem_evict.c | 2 +-
> drivers/gpu/drm/i915/i915_gem_pm.c | 70 ++++++++++++-------
> drivers/gpu/drm/i915/i915_request.c | 24 ++-----
> .../gpu/drm/i915/selftests/i915_gem_context.c | 4 +-
> .../gpu/drm/i915/selftests/i915_gem_object.c | 13 ++--
> .../gpu/drm/i915/selftests/mock_gem_device.c | 3 +-
> 10 files changed, 68 insertions(+), 65 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 47bf07a59b5e..98ff1a14ccf3 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2034,7 +2034,8 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>
> seq_printf(m, "RPS enabled? %d\n", rps->enabled);
> seq_printf(m, "GPU busy? %s [%d requests]\n",
> - yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
> + yesno(dev_priv->gt.awake),
> + atomic_read(&dev_priv->gt.active_requests));
> seq_printf(m, "Boosts outstanding? %d\n",
> atomic_read(&rps->num_waiters));
> seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive));
> @@ -2055,7 +2056,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
>
> if (INTEL_GEN(dev_priv) >= 6 &&
> rps->enabled &&
> - dev_priv->gt.active_requests) {
> + atomic_read(&dev_priv->gt.active_requests)) {
> u32 rpup, rpupei;
> u32 rpdown, rpdownei;
>
> @@ -3087,7 +3088,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
>
> seq_printf(m, "GT awake? %s\n", yesno(dev_priv->gt.awake));
> seq_printf(m, "Global active requests: %d\n",
> - dev_priv->gt.active_requests);
> + atomic_read(&dev_priv->gt.active_requests));
> seq_printf(m, "CS timestamp frequency: %u kHz\n",
> RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz);
>
> @@ -3933,7 +3934,7 @@ i915_drop_caches_set(void *data, u64 val)
>
> if (val & DROP_IDLE) {
> do {
> - if (READ_ONCE(i915->gt.active_requests))
> + if (atomic_read(&i915->gt.active_requests))
> flush_delayed_work(&i915->gt.retire_work);
> drain_delayed_work(&i915->gt.idle_work);
> } while (READ_ONCE(i915->gt.awake));
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 11803d485275..7c7afe99986c 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2008,7 +2008,8 @@ struct drm_i915_private {
> intel_engine_mask_t active_engines;
> struct list_head active_rings;
> struct list_head closed_vma;
> - u32 active_requests;
> + atomic_t active_requests;
> + struct mutex active_mutex;
My initial reaction is why not gem_pm_mutex to match where code was
moved and what the commit message says?
>
> /**
> * Is the GPU currently considered idle, or busy executing
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 47c672432594..79919e0cf03d 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2914,7 +2914,7 @@ wait_for_timelines(struct drm_i915_private *i915,
> struct i915_gt_timelines *gt = &i915->gt.timelines;
> struct i915_timeline *tl;
>
> - if (!READ_ONCE(i915->gt.active_requests))
> + if (!atomic_read(&i915->gt.active_requests))
> return timeout;
>
> mutex_lock(>->mutex);
> diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
> index 5c073fe73664..bd13198a9058 100644
> --- a/drivers/gpu/drm/i915/i915_gem.h
> +++ b/drivers/gpu/drm/i915/i915_gem.h
> @@ -77,9 +77,6 @@ struct drm_i915_private;
>
> #define I915_GEM_IDLE_TIMEOUT (HZ / 5)
>
> -void i915_gem_park(struct drm_i915_private *i915);
> -void i915_gem_unpark(struct drm_i915_private *i915);
> -
> static inline void __tasklet_disable_sync_once(struct tasklet_struct *t)
> {
> if (!atomic_fetch_inc(&t->count))
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index 060f5903544a..20e835a7f116 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -38,7 +38,7 @@ I915_SELFTEST_DECLARE(static struct igt_evict_ctl {
>
> static bool ggtt_is_idle(struct drm_i915_private *i915)
> {
> - return !i915->gt.active_requests;
> + return !atomic_read(&i915->gt.active_requests);
> }
>
> static int ggtt_flush(struct drm_i915_private *i915)
> diff --git a/drivers/gpu/drm/i915/i915_gem_pm.c b/drivers/gpu/drm/i915/i915_gem_pm.c
> index faa4eb42ec0a..6ecd9f8ac87d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_pm.c
> +++ b/drivers/gpu/drm/i915/i915_gem_pm.c
> @@ -14,8 +14,8 @@ static void __i915_gem_park(struct drm_i915_private *i915)
>
> GEM_TRACE("\n");
>
> - lockdep_assert_held(&i915->drm.struct_mutex);
> - GEM_BUG_ON(i915->gt.active_requests);
> + lockdep_assert_held(&i915->gt.active_mutex);
> + GEM_BUG_ON(atomic_read(&i915->gt.active_requests));
> GEM_BUG_ON(!list_empty(&i915->gt.active_rings));
>
> if (!i915->gt.awake)
> @@ -94,12 +94,13 @@ static void idle_work_handler(struct work_struct *work)
> if (!READ_ONCE(i915->gt.awake))
> return;
>
> - if (READ_ONCE(i915->gt.active_requests))
> + if (atomic_read(&i915->gt.active_requests))
> return;
>
> rearm_hangcheck =
> cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
>
> + /* XXX need to support lockless kernel_context before removing! */
> if (!mutex_trylock(&i915->drm.struct_mutex)) {
> /* Currently busy, come back later */
> mod_delayed_work(i915->wq,
> @@ -114,18 +115,19 @@ static void idle_work_handler(struct work_struct *work)
> * while we are idle (such as the GPU being power cycled), no users
> * will be harmed.
> */
> + mutex_lock(&i915->gt.active_mutex);
> if (!work_pending(&i915->gt.idle_work.work) &&
> - !i915->gt.active_requests) {
> - ++i915->gt.active_requests; /* don't requeue idle */
> + !atomic_read(&i915->gt.active_requests)) {
> + atomic_inc(&i915->gt.active_requests); /* don't requeue idle */
atomic_inc_not_zero?
>
> switch_to_kernel_context_sync(i915, i915->gt.active_engines);
>
> - if (!--i915->gt.active_requests) {
> + if (atomic_dec_and_test(&i915->gt.active_requests)) {
> __i915_gem_park(i915);
> rearm_hangcheck = false;
> }
> }
> -
> + mutex_unlock(&i915->gt.active_mutex);
> mutex_unlock(&i915->drm.struct_mutex);
>
> out_rearm:
> @@ -137,26 +139,16 @@ static void idle_work_handler(struct work_struct *work)
>
> void i915_gem_park(struct drm_i915_private *i915)
> {
> - GEM_TRACE("\n");
> -
> - lockdep_assert_held(&i915->drm.struct_mutex);
> - GEM_BUG_ON(i915->gt.active_requests);
> -
> - if (!i915->gt.awake)
> - return;
> -
> /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */
> - mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100));
> + GEM_BUG_ON(!atomic_read(&i915->gt.active_requests));
> + if (atomic_dec_and_test(&i915->gt.active_requests))
> + mod_delayed_work(i915->wq,
> + &i915->gt.idle_work,
> + msecs_to_jiffies(100));
> }
>
> -void i915_gem_unpark(struct drm_i915_private *i915)
> +static void __i915_gem_unpark(struct drm_i915_private *i915)
> {
> - GEM_TRACE("\n");
> -
> - lockdep_assert_held(&i915->drm.struct_mutex);
> - GEM_BUG_ON(!i915->gt.active_requests);
> - assert_rpm_wakelock_held(i915);
> -
> if (i915->gt.awake)
> return;
>
> @@ -191,11 +183,29 @@ void i915_gem_unpark(struct drm_i915_private *i915)
> round_jiffies_up_relative(HZ));
> }
>
> +void i915_gem_unpark(struct drm_i915_private *i915)
> +{
> + if (atomic_add_unless(&i915->gt.active_requests, 1, 0))
> + return;
This looks wrong - how can it be okay to maybe not increment
active_requests on unpark? What am I missing?
I would expect here you would need
"atomic_inc_and_return_true_if_OLD_value_was_zero" but I don't think
there is such API.
> +
> + mutex_lock(&i915->gt.active_mutex);
> + if (!atomic_read(&i915->gt.active_requests)) {
> + GEM_TRACE("\n");
> + __i915_gem_unpark(i915);
> + smp_mb__before_atomic();
Why is this needed? I have no idea.. but I think we want comments with
all memory barriers.
> + }
> + atomic_inc(&i915->gt.active_requests);
> + mutex_unlock(&i915->gt.active_mutex);
> +}
> +
> bool i915_gem_load_power_context(struct drm_i915_private *i915)
> {
> + mutex_lock(&i915->gt.active_mutex);
> + atomic_inc(&i915->gt.active_requests);
Why this function has to manually manage active_requests? Can it be
written in a simpler way?
> +
> /* Force loading the kernel context on all engines */
> if (!switch_to_kernel_context_sync(i915, ALL_ENGINES))
> - return false;
> + goto err_active;
>
> /*
> * Immediately park the GPU so that we enable powersaving and
> @@ -203,9 +213,20 @@ bool i915_gem_load_power_context(struct drm_i915_private *i915)
> * unpark and start using the engine->pinned_default_state, otherwise
> * it is in limbo and an early reset may fail.
> */
> +
> + if (!atomic_dec_and_test(&i915->gt.active_requests))
> + goto err_unlock;
> +
> __i915_gem_park(i915);
> + mutex_unlock(&i915->gt.active_mutex);
>
> return true;
> +
> +err_active:
> + atomic_dec(&i915->gt.active_requests);
> +err_unlock:
> + mutex_unlock(&i915->gt.active_mutex);
> + return false;
> }
>
> void i915_gem_suspend(struct drm_i915_private *i915)
> @@ -337,5 +358,6 @@ void i915_gem_resume(struct drm_i915_private *i915)
>
> void i915_gem_init__pm(struct drm_i915_private *i915)
> {
> + mutex_init(&i915->gt.active_mutex);
> INIT_DELAYED_WORK(&i915->gt.idle_work, idle_work_handler);
> }
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index e9c2094ab8ea..8d396f3c747d 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -31,6 +31,7 @@
>
> #include "i915_drv.h"
> #include "i915_active.h"
> +#include "i915_gem_pm.h" /* XXX layering violation! */
> #include "i915_globals.h"
> #include "i915_reset.h"
>
> @@ -130,19 +131,6 @@ i915_request_remove_from_client(struct i915_request *request)
> spin_unlock(&file_priv->mm.lock);
> }
>
> -static void reserve_gt(struct drm_i915_private *i915)
> -{
> - if (!i915->gt.active_requests++)
> - i915_gem_unpark(i915);
> -}
> -
> -static void unreserve_gt(struct drm_i915_private *i915)
> -{
> - GEM_BUG_ON(!i915->gt.active_requests);
> - if (!--i915->gt.active_requests)
> - i915_gem_park(i915);
> -}
> -
> static void advance_ring(struct i915_request *request)
> {
> struct intel_ring *ring = request->ring;
> @@ -304,7 +292,7 @@ static void i915_request_retire(struct i915_request *request)
>
> __retire_engine_upto(request->engine, request);
>
> - unreserve_gt(request->i915);
> + i915_gem_park(request->i915);
>
> i915_sched_node_fini(&request->sched);
> i915_request_put(request);
> @@ -607,8 +595,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
> u32 seqno;
> int ret;
>
> - lockdep_assert_held(&i915->drm.struct_mutex);
> -
> /*
> * Preempt contexts are reserved for exclusive use to inject a
> * preemption context switch. They are never to be used for any trivial
> @@ -633,7 +619,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
> if (IS_ERR(ce))
> return ERR_CAST(ce);
>
> - reserve_gt(i915);
> + i915_gem_unpark(i915);
> mutex_lock(&ce->ring->timeline->mutex);
>
> /* Move our oldest request to the slab-cache (if not in use!) */
> @@ -766,7 +752,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
> kmem_cache_free(global.slab_requests, rq);
> err_unreserve:
> mutex_unlock(&ce->ring->timeline->mutex);
> - unreserve_gt(i915);
> + i915_gem_park(i915);
> intel_context_unpin(ce);
> return ERR_PTR(ret);
> }
> @@ -1356,7 +1342,7 @@ void i915_retire_requests(struct drm_i915_private *i915)
>
> lockdep_assert_held(&i915->drm.struct_mutex);
>
> - if (!i915->gt.active_requests)
> + if (!atomic_read(&i915->gt.active_requests))
> return;
>
> list_for_each_entry_safe(ring, tmp,
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index 45f73b8b4e6d..6ce366091e0b 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -1646,9 +1646,9 @@ static int __igt_switch_to_kernel_context(struct drm_i915_private *i915,
> return err;
> }
>
> - if (i915->gt.active_requests) {
> + if (atomic_read(&i915->gt.active_requests)) {
> pr_err("%d active requests remain after switching to kernel context, pass %d (%s) on %s engine%s\n",
> - i915->gt.active_requests,
> + atomic_read(&i915->gt.active_requests),
> pass, from_idle ? "idle" : "busy",
> __engine_name(i915, engines),
> is_power_of_2(engines) ? "" : "s");
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> index 971148fbe6f5..c2b08fdf23cf 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
> @@ -506,12 +506,7 @@ static void disable_retire_worker(struct drm_i915_private *i915)
> i915_gem_shrinker_unregister(i915);
>
> mutex_lock(&i915->drm.struct_mutex);
> - if (!i915->gt.active_requests++) {
> - intel_wakeref_t wakeref;
> -
> - with_intel_runtime_pm(i915, wakeref)
> - i915_gem_unpark(i915);
> - }
> + i915_gem_unpark(i915);
> mutex_unlock(&i915->drm.struct_mutex);
>
> cancel_delayed_work_sync(&i915->gt.retire_work);
> @@ -616,10 +611,10 @@ static int igt_mmap_offset_exhaustion(void *arg)
> drm_mm_remove_node(&resv);
> out_park:
> mutex_lock(&i915->drm.struct_mutex);
> - if (--i915->gt.active_requests)
> - queue_delayed_work(i915->wq, &i915->gt.retire_work, 0);
> - else
> + if (atomic_dec_and_test(&i915->gt.active_requests))
> queue_delayed_work(i915->wq, &i915->gt.idle_work, 0);
> + else
> + queue_delayed_work(i915->wq, &i915->gt.retire_work, 0);
> mutex_unlock(&i915->drm.struct_mutex);
> i915_gem_shrinker_register(i915);
> return err;
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> index 60bbf8b4df40..7afc5ee8dda5 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> @@ -44,7 +44,7 @@ void mock_device_flush(struct drm_i915_private *i915)
> mock_engine_flush(engine);
>
> i915_retire_requests(i915);
> - GEM_BUG_ON(i915->gt.active_requests);
> + GEM_BUG_ON(atomic_read(&i915->gt.active_requests));
> }
>
> static void mock_device_release(struct drm_device *dev)
> @@ -203,6 +203,7 @@ struct drm_i915_private *mock_gem_device(void)
>
> i915_timelines_init(i915);
>
> + mutex_init(&i915->gt.active_mutex);
> INIT_LIST_HEAD(&i915->gt.active_rings);
> INIT_LIST_HEAD(&i915->gt.closed_vma);
>
>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list