On Thu, Dec 23, 2021 at 10:01:42AM +0000, Tvrtko Ursulin wrote:
On 22/12/2021 23:29, Matthew Brost wrote:
Use a lockless list structure for destroyed contexts to avoid hammering on global submission spin lock.
Thanks for looking into it quickly!
On the topic of "lockless" yes I agree the llist in principle is not a concern. That part looks fine to me. On the actual "integration" (how it slots in) with the GuC code I leave one comment below.
Agree.
Suggested-by: Tvrtko Ursulin tvrtko.ursulin@intel.com Signed-off-by: Matthew Brost matthew.brost@intel.com
drivers/gpu/drm/i915/gt/intel_context.c | 2 - drivers/gpu/drm/i915/gt/intel_context_types.h | 3 +- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 3 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 44 +++++-------------- 4 files changed, 16 insertions(+), 36 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 5d0ec7c49b6a..4aacb4b0418d 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -403,8 +403,6 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) ce->guc_id.id = GUC_INVALID_LRC_ID; INIT_LIST_HEAD(&ce->guc_id.link);
- INIT_LIST_HEAD(&ce->destroyed_link);
- INIT_LIST_HEAD(&ce->parallel.child_list); /*
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 30cd81ad8911..4532d43ec9c0 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -9,6 +9,7 @@ #include <linux/average.h> #include <linux/kref.h> #include <linux/list.h> +#include <linux/llist.h> #include <linux/mutex.h> #include <linux/types.h> @@ -224,7 +225,7 @@ struct intel_context { * list when context is pending to be destroyed (deregistered with the * GuC), protected by guc->submission_state.lock */
- struct list_head destroyed_link;
- struct llist_node destroyed_link; /** @parallel: sub-structure for parallel submission members */ struct {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index f9240d4baa69..705085058411 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -8,6 +8,7 @@ #include <linux/xarray.h> #include <linux/delay.h> +#include <linux/llist.h> #include "intel_uncore.h" #include "intel_guc_fw.h" @@ -112,7 +113,7 @@ struct intel_guc { * @destroyed_contexts: list of contexts waiting to be destroyed * (deregistered with the GuC) */
struct list_head destroyed_contexts;
/**struct llist_head destroyed_contexts;
- @destroyed_worker: worker to deregister contexts, need as we
- need to take a GT PM reference and can't from destroy
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 0a03a30e4c6d..6f7643edc139 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1771,7 +1771,7 @@ int intel_guc_submission_init(struct intel_guc *guc) spin_lock_init(&guc->submission_state.lock); INIT_LIST_HEAD(&guc->submission_state.guc_id_list); ida_init(&guc->submission_state.guc_ids);
- INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
- init_llist_head(&guc->submission_state.destroyed_contexts); INIT_WORK(&guc->submission_state.destroyed_worker, destroyed_worker_func);
@@ -2696,26 +2696,18 @@ static void __guc_context_destroy(struct intel_context *ce) } } +#define take_destroyed_contexts(guc) \
- llist_del_all(&guc->submission_state.destroyed_contexts)
- static void guc_flush_destroyed_contexts(struct intel_guc *guc) {
- struct intel_context *ce;
- unsigned long flags;
- struct intel_context *ce, *cn; GEM_BUG_ON(!submission_disabled(guc) && guc_submission_initialized(guc));
- while (!list_empty(&guc->submission_state.destroyed_contexts)) {
spin_lock_irqsave(&guc->submission_state.lock, flags);
ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
struct intel_context,
destroyed_link);
if (ce)
list_del_init(&ce->destroyed_link);
spin_unlock_irqrestore(&guc->submission_state.lock, flags);
if (!ce)
break;
- llist_for_each_entry_safe(ce, cn, take_destroyed_contexts(guc),
release_guc_id(guc, ce); __guc_context_destroy(ce); }destroyed_link) {
@@ -2723,23 +2715,11 @@ static void guc_flush_destroyed_contexts(struct intel_guc *guc) static void deregister_destroyed_contexts(struct intel_guc *guc) {
- struct intel_context *ce;
- unsigned long flags;
- while (!list_empty(&guc->submission_state.destroyed_contexts)) {
spin_lock_irqsave(&guc->submission_state.lock, flags);
ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,
struct intel_context,
destroyed_link);
if (ce)
list_del_init(&ce->destroyed_link);
spin_unlock_irqrestore(&guc->submission_state.lock, flags);
if (!ce)
break;
- struct intel_context *ce, *cn;
- llist_for_each_entry_safe(ce, cn, take_destroyed_contexts(guc),
guc_lrc_desc_unpin(ce);destroyed_link)
- } } static void destroyed_worker_func(struct work_struct *w)
@@ -2771,8 +2751,8 @@ static void guc_context_destroy(struct kref *kref) if (likely(!destroy)) { if (!list_empty(&ce->guc_id.link)) list_del_init(&ce->guc_id.link);
list_add_tail(&ce->destroyed_link,
&guc->submission_state.destroyed_contexts);
llist_add(&ce->destroyed_link,
&guc->submission_state.destroyed_contexts);
So here presumably submission lock is still needed for unlinking the from guc_id list.
Yes, the submission lock also protects the guc_id list.
According to the llist DoC the llist_add could be outside a lock (multiple producers can safely call llist_add simultaneously but it is also harmless to call within a lock.
Mechanical flows of the patch looks good to me, but I leave to you and John to decide on llist vs keeping the existing doubly linked list. I mean agreeing what fits better with the existing locking and data structure design.
Sounds good.
Matt
Regards,
Tvrtko
} else { __release_guc_id(guc, ce); }