[Intel-gfx] [PATCH 06/18] drm/i915/guc: Implement GuC context operations for new inteface
Daniele Ceraolo Spurio
daniele.ceraolospurio at intel.com
Wed Jul 21 23:57:21 UTC 2021
On 7/20/2021 6:51 PM, John Harrison wrote:
> On 7/20/2021 15:39, Matthew Brost wrote:
>> Implement GuC context operations which includes GuC specific operations
>> alloc, pin, unpin, and destroy.
>>
>> v2:
>> (Daniel Vetter)
>> - Use msleep_interruptible rather than cond_resched in busy loop
>> (Michal)
>> - Remove C++ style comment
>> v3:
>> (Matthew Brost)
>> - Drop GUC_ID_START
>> (John Harrison)
>> - Fix a bunch of typos
>> - Use drm_err rather than drm_dbg for G2H errors
>> (Daniele)
>> - Fix ;; typo
>> - Clean up sched state functions
>> - Add lockdep for guc_id functions
>> - Don't call __release_guc_id when guc_id is invalid
>> - Use MISSING_CASE
>> - Add comment in guc_context_pin
>> - Use shorter path to rpm
>> (Daniele / CI)
>> - Don't call release_guc_id on an invalid guc_id in destroy
>>
>> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
>> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
>> ---
>> drivers/gpu/drm/i915/gt/intel_context.c | 5 +
>> drivers/gpu/drm/i915/gt/intel_context_types.h | 22 +-
>> drivers/gpu/drm/i915/gt/intel_lrc_reg.h | 1 -
>> drivers/gpu/drm/i915/gt/uc/intel_guc.h | 40 ++
>> drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 4 +
>> .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 667 ++++++++++++++++--
>> drivers/gpu/drm/i915/i915_reg.h | 1 +
>> drivers/gpu/drm/i915/i915_request.c | 1 +
>> 8 files changed, 686 insertions(+), 55 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c
>> b/drivers/gpu/drm/i915/gt/intel_context.c
>> index bd63813c8a80..32fd6647154b 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_context.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
>> @@ -384,6 +384,11 @@ intel_context_init(struct intel_context *ce,
>> struct intel_engine_cs *engine)
>> mutex_init(&ce->pin_mutex);
>> + spin_lock_init(&ce->guc_state.lock);
>> +
>> + ce->guc_id = GUC_INVALID_LRC_ID;
>> + INIT_LIST_HEAD(&ce->guc_id_link);
>> +
>> i915_active_init(&ce->active,
>> __intel_context_active, __intel_context_retire, 0);
>> }
>> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h
>> b/drivers/gpu/drm/i915/gt/intel_context_types.h
>> index 6d99631d19b9..606c480aec26 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
>> @@ -96,6 +96,7 @@ struct intel_context {
>> #define CONTEXT_BANNED 6
>> #define CONTEXT_FORCE_SINGLE_SUBMISSION 7
>> #define CONTEXT_NOPREEMPT 8
>> +#define CONTEXT_LRCA_DIRTY 9
>> struct {
>> u64 timeout_us;
>> @@ -138,14 +139,29 @@ struct intel_context {
>> u8 wa_bb_page; /* if set, page num reserved for context
>> workarounds */
>> + struct {
>> + /** lock: protects everything in guc_state */
>> + spinlock_t lock;
>> + /**
>> + * sched_state: scheduling state of this context using GuC
>> + * submission
>> + */
>> + u8 sched_state;
>> + } guc_state;
>> +
>> /* GuC scheduling state flags that do not require a lock. */
>> atomic_t guc_sched_state_no_lock;
>> + /* GuC LRC descriptor ID */
>> + u16 guc_id;
>> +
>> + /* GuC LRC descriptor reference count */
>> + atomic_t guc_id_ref;
>> +
>> /*
>> - * GuC LRC descriptor ID - Not assigned in this patch but future
>> patches
>> - * in the series will.
>> + * GuC ID link - in list when unpinned but guc_id still valid in
>> GuC
>> */
>> - u16 guc_id;
>> + struct list_head guc_id_link;
>> };
>> #endif /* __INTEL_CONTEXT_TYPES__ */
>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
>> b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
>> index 41e5350a7a05..49d4857ad9b7 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
>> @@ -87,7 +87,6 @@
>> #define GEN11_CSB_WRITE_PTR_MASK (GEN11_CSB_PTR_MASK << 0)
>> #define MAX_CONTEXT_HW_ID (1 << 21) /* exclusive */
>> -#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */
>> #define GEN11_MAX_CONTEXT_HW_ID (1 << 11) /* exclusive */
>> /* in Gen12 ID 0x7FF is reserved to indicate idle */
>> #define GEN12_MAX_CONTEXT_HW_ID (GEN11_MAX_CONTEXT_HW_ID - 1)
>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
>> b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
>> index 8c7b92f699f1..30773cd699f5 100644
>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
>> @@ -7,6 +7,7 @@
>> #define _INTEL_GUC_H_
>> #include <linux/xarray.h>
>> +#include <linux/delay.h>
>> #include "intel_uncore.h"
>> #include "intel_guc_fw.h"
>> @@ -44,6 +45,14 @@ struct intel_guc {
>> void (*disable)(struct intel_guc *guc);
>> } interrupts;
>> + /*
>> + * contexts_lock protects the pool of free guc ids and a linked
>> list of
>> + * guc ids available to be stolen
>> + */
>> + spinlock_t contexts_lock;
>> + struct ida guc_ids;
>> + struct list_head guc_id_list;
>> +
>> bool submission_selected;
>> struct i915_vma *ads_vma;
>> @@ -101,6 +110,34 @@ intel_guc_send_and_receive(struct intel_guc
>> *guc, const u32 *action, u32 len,
>> response_buf, response_buf_size, 0);
>> }
>> +static inline int intel_guc_send_busy_loop(struct intel_guc* guc,
>> + const u32 *action,
>> + u32 len,
>> + bool loop)
>> +{
>> + int err;
>> + unsigned int sleep_period_ms = 1;
>> + bool not_atomic = !in_atomic() && !irqs_disabled();
>> +
>> + /* No sleeping with spin locks, just busy loop */
>> + might_sleep_if(loop && not_atomic);
>> +
>> +retry:
>> + err = intel_guc_send_nb(guc, action, len);
>> + if (unlikely(err == -EBUSY && loop)) {
>> + if (likely(not_atomic)) {
>> + if (msleep_interruptible(sleep_period_ms))
>> + return -EINTR;
>> + sleep_period_ms = sleep_period_ms << 1;
>> + } else {
>> + cpu_relax();
>> + }
>> + goto retry;
>> + }
>> +
>> + return err;
>> +}
>> +
>> static inline void intel_guc_to_host_event_handler(struct intel_guc
>> *guc)
>> {
>> intel_guc_ct_event_handler(&guc->ct);
>> @@ -202,6 +239,9 @@ static inline void intel_guc_disable_msg(struct
>> intel_guc *guc, u32 mask)
>> int intel_guc_reset_engine(struct intel_guc *guc,
>> struct intel_engine_cs *engine);
>> +int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
>> + const u32 *msg, u32 len);
>> +
>> void intel_guc_load_status(struct intel_guc *guc, struct
>> drm_printer *p);
>> #endif
>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>> index 83ec60ea3f89..28ff82c5be45 100644
>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>> @@ -928,6 +928,10 @@ static int ct_process_request(struct
>> intel_guc_ct *ct, struct ct_incoming_msg *r
>> case INTEL_GUC_ACTION_DEFAULT:
>> ret = intel_guc_to_host_process_recv_msg(guc, payload, len);
>> break;
>> + case INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
>> + ret = intel_guc_deregister_done_process_msg(guc, payload,
>> + len);
>> + break;
>> default:
>> ret = -EOPNOTSUPP;
>> break;
>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>> index 53b4a5eb4a85..6940b9d62118 100644
>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>> @@ -13,7 +13,9 @@
>> #include "gt/intel_gt.h"
>> #include "gt/intel_gt_irq.h"
>> #include "gt/intel_gt_pm.h"
>> +#include "gt/intel_gt_requests.h"
>> #include "gt/intel_lrc.h"
>> +#include "gt/intel_lrc_reg.h"
>> #include "gt/intel_mocs.h"
>> #include "gt/intel_ring.h"
>> @@ -85,6 +87,72 @@ static inline void clr_context_enabled(struct
>> intel_context *ce)
>> &ce->guc_sched_state_no_lock);
>> }
>> +/*
>> + * Below is a set of functions which control the GuC scheduling
>> state which
>> + * require a lock, aside from the special case where the functions
>> are called
>> + * from guc_lrc_desc_pin(). In that case it isn't possible for any
>> other code
>> + * path to be executing on the context.
>> + */
>> +#define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0)
>> +#define SCHED_STATE_DESTROYED BIT(1)
>> +static inline void init_sched_state(struct intel_context *ce)
>> +{
>> + /* Only should be called from guc_lrc_desc_pin() */
>> + atomic_set(&ce->guc_sched_state_no_lock, 0);
>> + ce->guc_state.sched_state = 0;
>> +}
>> +
>> +static inline bool
>> +context_wait_for_deregister_to_register(struct intel_context *ce)
>> +{
>> + return ce->guc_state.sched_state &
>> + SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
>> +}
>> +
>> +static inline void
>> +set_context_wait_for_deregister_to_register(struct intel_context *ce)
>> +{
>> + /* Only should be called from guc_lrc_desc_pin() */
>> + ce->guc_state.sched_state |=
>> + SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
>> +}
>> +
>> +static inline void
>> +clr_context_wait_for_deregister_to_register(struct intel_context *ce)
>> +{
>> + lockdep_assert_held(&ce->guc_state.lock);
>> + ce->guc_state.sched_state &=
>> + ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER;
>> +}
>> +
>> +static inline bool
>> +context_destroyed(struct intel_context *ce)
>> +{
>> + return ce->guc_state.sched_state & SCHED_STATE_DESTROYED;
>> +}
>> +
>> +static inline void
>> +set_context_destroyed(struct intel_context *ce)
>> +{
>> + lockdep_assert_held(&ce->guc_state.lock);
>> + ce->guc_state.sched_state |= SCHED_STATE_DESTROYED;
>> +}
>> +
>> +static inline bool context_guc_id_invalid(struct intel_context *ce)
>> +{
>> + return (ce->guc_id == GUC_INVALID_LRC_ID);
> Could have dropped the brackets from this one too.
>
>> +}
>> +
>> +static inline void set_context_guc_id_invalid(struct intel_context *ce)
>> +{
>> + ce->guc_id = GUC_INVALID_LRC_ID;
>> +}
>> +
>> +static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
>> +{
>> + return &ce->engine->gt->uc.guc;
>> +}
>> +
>> static inline struct i915_priolist *to_priolist(struct rb_node *rb)
>> {
>> return rb_entry(rb, struct i915_priolist, node);
>> @@ -155,6 +223,9 @@ static int guc_add_request(struct intel_guc *guc,
>> struct i915_request *rq)
>> int len = 0;
>> bool enabled = context_enabled(ce);
>> + GEM_BUG_ON(!atomic_read(&ce->guc_id_ref));
>> + GEM_BUG_ON(context_guc_id_invalid(ce));
>> +
>> if (!enabled) {
>> action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
>> action[len++] = ce->guc_id;
>> @@ -417,6 +488,10 @@ int intel_guc_submission_init(struct intel_guc
>> *guc)
>> xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
>> + spin_lock_init(&guc->contexts_lock);
>> + INIT_LIST_HEAD(&guc->guc_id_list);
>> + ida_init(&guc->guc_ids);
>> +
>> return 0;
>> }
>> @@ -429,9 +504,305 @@ void intel_guc_submission_fini(struct
>> intel_guc *guc)
>> i915_sched_engine_put(guc->sched_engine);
>> }
>> -static int guc_context_alloc(struct intel_context *ce)
>> +static inline void queue_request(struct i915_sched_engine
>> *sched_engine,
>> + struct i915_request *rq,
>> + int prio)
>> {
>> - return lrc_alloc(ce, ce->engine);
>> + GEM_BUG_ON(!list_empty(&rq->sched.link));
>> + list_add_tail(&rq->sched.link,
>> + i915_sched_lookup_priolist(sched_engine, prio));
>> + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
>> +}
>> +
>> +static int guc_bypass_tasklet_submit(struct intel_guc *guc,
>> + struct i915_request *rq)
>> +{
>> + int ret;
>> +
>> + __i915_request_submit(rq);
>> +
>> + trace_i915_request_in(rq, 0);
>> +
>> + guc_set_lrc_tail(rq);
>> + ret = guc_add_request(guc, rq);
>> + if (ret == -EBUSY)
>> + guc->stalled_request = rq;
>> +
>> + return ret;
>> +}
>> +
>> +static void guc_submit_request(struct i915_request *rq)
>> +{
>> + struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
>> + struct intel_guc *guc = &rq->engine->gt->uc.guc;
>> + unsigned long flags;
>> +
>> + /* Will be called from irq-context when using foreign fences. */
>> + spin_lock_irqsave(&sched_engine->lock, flags);
>> +
>> + if (guc->stalled_request ||
>> !i915_sched_engine_is_empty(sched_engine))
>> + queue_request(sched_engine, rq, rq_prio(rq));
>> + else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY)
>> + tasklet_hi_schedule(&sched_engine->tasklet);
>> +
>> + spin_unlock_irqrestore(&sched_engine->lock, flags);
>> +}
>> +
>> +static int new_guc_id(struct intel_guc *guc)
>> +{
>> + return ida_simple_get(&guc->guc_ids, 0,
>> + GUC_MAX_LRC_DESCRIPTORS, GFP_KERNEL |
>> + __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
>> +}
>> +
>> +static void __release_guc_id(struct intel_guc *guc, struct
>> intel_context *ce)
>> +{
>> + if (!context_guc_id_invalid(ce)) {
>> + ida_simple_remove(&guc->guc_ids, ce->guc_id);
>> + reset_lrc_desc(guc, ce->guc_id);
>> + set_context_guc_id_invalid(ce);
>> + }
>> + if (!list_empty(&ce->guc_id_link))
>> + list_del_init(&ce->guc_id_link);
>> +}
>> +
>> +static void release_guc_id(struct intel_guc *guc, struct
>> intel_context *ce)
>> +{
>> + unsigned long flags;
>> +
>> + spin_lock_irqsave(&guc->contexts_lock, flags);
>> + __release_guc_id(guc, ce);
>> + spin_unlock_irqrestore(&guc->contexts_lock, flags);
>> +}
>> +
>> +static int steal_guc_id(struct intel_guc *guc)
>> +{
>> + struct intel_context *ce;
>> + int guc_id;
>> +
>> + lockdep_assert_held(&guc->contexts_lock);
>> +
>> + if (!list_empty(&guc->guc_id_list)) {
>> + ce = list_first_entry(&guc->guc_id_list,
>> + struct intel_context,
>> + guc_id_link);
>> +
>> + GEM_BUG_ON(atomic_read(&ce->guc_id_ref));
>> + GEM_BUG_ON(context_guc_id_invalid(ce));
>> +
>> + list_del_init(&ce->guc_id_link);
>> + guc_id = ce->guc_id;
>> + set_context_guc_id_invalid(ce);
>> + return guc_id;
>> + } else {
>> + return -EAGAIN;
>> + }
>> +}
>> +
>> +static int assign_guc_id(struct intel_guc *guc, u16 *out)
>> +{
>> + int ret;
>> +
>> + lockdep_assert_held(&guc->contexts_lock);
>> +
>> + ret = new_guc_id(guc);
>> + if (unlikely(ret < 0)) {
>> + ret = steal_guc_id(guc);
>> + if (ret < 0)
>> + return ret;
>> + }
>> +
>> + *out = ret;
>> + return 0;
>> +}
>> +
>> +#define PIN_GUC_ID_TRIES 4
>> +static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
>> +{
>> + int ret = 0;
>> + unsigned long flags, tries = PIN_GUC_ID_TRIES;
>> +
>> + GEM_BUG_ON(atomic_read(&ce->guc_id_ref));
>> +
>> +try_again:
>> + spin_lock_irqsave(&guc->contexts_lock, flags);
>> +
>> + if (context_guc_id_invalid(ce)) {
>> + ret = assign_guc_id(guc, &ce->guc_id);
>> + if (ret)
>> + goto out_unlock;
>> + ret = 1; /* Indidcates newly assigned guc_id */
>> + }
>> + if (!list_empty(&ce->guc_id_link))
>> + list_del_init(&ce->guc_id_link);
>> + atomic_inc(&ce->guc_id_ref);
>> +
>> +out_unlock:
>> + spin_unlock_irqrestore(&guc->contexts_lock, flags);
>> +
>> + /*
>> + * -EAGAIN indicates no guc_ids are available, let's retire any
>> + * outstanding requests to see if that frees up a guc_id. If the
>> first
>> + * retire didn't help, insert a sleep with the timeslice
>> duration before
>> + * attempting to retire more requests. Double the sleep period each
>> + * subsequent pass before finally giving up. The sleep period
>> has max of
>> + * 100ms and minimum of 1ms.
>> + */
>> + if (ret == -EAGAIN && --tries) {
>> + if (PIN_GUC_ID_TRIES - tries > 1) {
>> + unsigned int timeslice_shifted =
>> + ce->engine->props.timeslice_duration_ms <<
>> + (PIN_GUC_ID_TRIES - tries - 2);
>> + unsigned int max = min_t(unsigned int, 100,
>> + timeslice_shifted);
>> +
>> + msleep(max_t(unsigned int, max, 1));
>> + }
>> + intel_gt_retire_requests(guc_to_gt(guc));
>> + goto try_again;
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +static void unpin_guc_id(struct intel_guc *guc, struct intel_context
>> *ce)
>> +{
>> + unsigned long flags;
>> +
>> + GEM_BUG_ON(atomic_read(&ce->guc_id_ref) < 0);
>> +
>> + spin_lock_irqsave(&guc->contexts_lock, flags);
>> + if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id_link) &&
>> + !atomic_read(&ce->guc_id_ref))
>> + list_add_tail(&ce->guc_id_link, &guc->guc_id_list);
>> + spin_unlock_irqrestore(&guc->contexts_lock, flags);
>> +}
>> +
>> +static int __guc_action_register_context(struct intel_guc *guc,
>> + u32 guc_id,
>> + u32 offset)
>> +{
>> + u32 action[] = {
>> + INTEL_GUC_ACTION_REGISTER_CONTEXT,
>> + guc_id,
>> + offset,
>> + };
>> +
>> + return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action),
>> true);
>> +}
>> +
>> +static int register_context(struct intel_context *ce)
>> +{
>> + struct intel_guc *guc = ce_to_guc(ce);
>> + u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) +
>> + ce->guc_id * sizeof(struct guc_lrc_desc);
>> +
>> + return __guc_action_register_context(guc, ce->guc_id, offset);
>> +}
>> +
>> +static int __guc_action_deregister_context(struct intel_guc *guc,
>> + u32 guc_id)
>> +{
>> + u32 action[] = {
>> + INTEL_GUC_ACTION_DEREGISTER_CONTEXT,
>> + guc_id,
>> + };
>> +
>> + return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action),
>> true);
>> +}
>> +
>> +static int deregister_context(struct intel_context *ce, u32 guc_id)
>> +{
>> + struct intel_guc *guc = ce_to_guc(ce);
>> +
>> + return __guc_action_deregister_context(guc, guc_id);
>> +}
>> +
>> +static intel_engine_mask_t adjust_engine_mask(u8 class,
>> intel_engine_mask_t mask)
>> +{
>> + switch (class) {
>> + case RENDER_CLASS:
>> + return mask >> RCS0;
>> + case VIDEO_ENHANCEMENT_CLASS:
>> + return mask >> VECS0;
>> + case VIDEO_DECODE_CLASS:
>> + return mask >> VCS0;
>> + case COPY_ENGINE_CLASS:
>> + return mask >> BCS0;
>> + default:
>> + MISSING_CASE(class);
>> + return 0;
>> + }
>> +}
>> +
>> +static void guc_context_policy_init(struct intel_engine_cs *engine,
>> + struct guc_lrc_desc *desc)
>> +{
>> + desc->policy_flags = 0;
>> +
>> + desc->execution_quantum =
>> CONTEXT_POLICY_DEFAULT_EXECUTION_QUANTUM_US;
>> + desc->preemption_timeout =
>> CONTEXT_POLICY_DEFAULT_PREEMPTION_TIME_US;
>> +}
>> +
>> +static int guc_lrc_desc_pin(struct intel_context *ce)
>> +{
>> + struct intel_engine_cs *engine = ce->engine;
>> + struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
>> + struct intel_guc *guc = &engine->gt->uc.guc;
>> + u32 desc_idx = ce->guc_id;
>> + struct guc_lrc_desc *desc;
>> + bool context_registered;
>> + intel_wakeref_t wakeref;
>> + int ret = 0;
>> +
>> + GEM_BUG_ON(!engine->mask);
>> +
>> + /*
>> + * Ensure LRC + CT vmas are is same region as write barrier is done
>> + * based on CT vma region.
>> + */
>> + GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
>> + i915_gem_object_is_lmem(ce->ring->vma->obj));
>> +
>> + context_registered = lrc_desc_registered(guc, desc_idx);
>> +
>> + reset_lrc_desc(guc, desc_idx);
>> + set_lrc_desc_registered(guc, desc_idx, ce);
>> +
>> + desc = __get_lrc_desc(guc, desc_idx);
>> + desc->engine_class = engine_class_to_guc_class(engine->class);
>> + desc->engine_submit_mask = adjust_engine_mask(engine->class,
>> + engine->mask);
>> + desc->hw_context_desc = ce->lrc.lrca;
>> + desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL;
>> + desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
>> + guc_context_policy_init(engine, desc);
>> + init_sched_state(ce);
>> +
>> + /*
>> + * The context_lookup xarray is used to determine if the hardware
>> + * context is currently registered. There are two cases in which it
>> + * could be registered either the guc_id has been stolen from from
>> + * another context or the lrc descriptor address of this context
>> has
>> + * changed. In either case the context needs to be deregistered
>> with the
>> + * GuC before registering this context.
>> + */
>> + if (context_registered) {
>> + set_context_wait_for_deregister_to_register(ce);
>> + intel_context_get(ce);
>> +
>> + /*
>> + * If stealing the guc_id, this ce has the same guc_id as the
>> + * context whose guc_id was stolen.
>> + */
>> + with_intel_runtime_pm(runtime_pm, wakeref)
>> + ret = deregister_context(ce, ce->guc_id);
>> + } else {
>> + with_intel_runtime_pm(runtime_pm, wakeref)
>> + ret = register_context(ce);
>> + }
>> +
>> + return ret;
>> }
>> static int guc_context_pre_pin(struct intel_context *ce,
>> @@ -443,36 +814,144 @@ static int guc_context_pre_pin(struct
>> intel_context *ce,
>> static int guc_context_pin(struct intel_context *ce, void *vaddr)
>> {
>> + if (i915_ggtt_offset(ce->state) !=
>> + (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK))
>> + set_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
>> +
>> + /*
>> + * GuC context gets pinned in guc_request_alloc. See that
>> function for
>> + * explaination of why.
>> + */
>> +
>> return lrc_pin(ce, ce->engine, vaddr);
>> }
>> +static void guc_context_unpin(struct intel_context *ce)
>> +{
>> + struct intel_guc *guc = ce_to_guc(ce);
>> +
>> + unpin_guc_id(guc, ce);
>> + lrc_unpin(ce);
>> +}
>> +
>> +static void guc_context_post_unpin(struct intel_context *ce)
>> +{
>> + lrc_post_unpin(ce);
>> +}
>> +
>> +static inline void guc_lrc_desc_unpin(struct intel_context *ce)
>> +{
>> + struct intel_guc *guc = ce_to_guc(ce);
>> + unsigned long flags;
>> +
>> + GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id));
>> + GEM_BUG_ON(ce != __get_context(guc, ce->guc_id));
>> +
>> + spin_lock_irqsave(&ce->guc_state.lock, flags);
>> + set_context_destroyed(ce);
>> + spin_unlock_irqrestore(&ce->guc_state.lock, flags);
>> +
>> + deregister_context(ce, ce->guc_id);
>> +}
>> +
>> +static void guc_context_destroy(struct kref *kref)
>> +{
>> + struct intel_context *ce = container_of(kref, typeof(*ce), ref);
>> + struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
>> + struct intel_guc *guc = ce_to_guc(ce);
>> + intel_wakeref_t wakeref;
>> + unsigned long flags;
>> +
>> + /*
>> + * If the guc_id is invalid this context has been stolen and we
>> can free
>> + * it immediately. Also can be freed immediately if the context
>> is not
>> + * registered with the GuC.
>> + */
>> + if (context_guc_id_invalid(ce)) {
>> + lrc_destroy(kref);
>> + return;
>> + } else if (!lrc_desc_registered(guc, ce->guc_id)) {
>> + release_guc_id(guc, ce);
>> + lrc_destroy(kref);
>> + return;
>> + }
>> +
>> + /*
>> + * We have to acquire the context spinlock and check guc_id
>> again, if it
>> + * is valid it hasn't been stolen and needs to be deregistered. We
>> + * delete this context from the list of unpinned guc_ids
>> available to
>> + * steal to seal a race with guc_lrc_desc_pin(). When the G2H CTB
>> + * returns indicating this context has been deregistered the
>> guc_id is
>> + * returned to the pool of available guc_ids.
>> + */
>> + spin_lock_irqsave(&guc->contexts_lock, flags);
>> + if (context_guc_id_invalid(ce)) {
>> + spin_unlock_irqrestore(&guc->contexts_lock, flags);
>> + lrc_destroy(kref);
>> + return;
>> + }
>> +
>> + if (!list_empty(&ce->guc_id_link))
>> + list_del_init(&ce->guc_id_link);
>> + spin_unlock_irqrestore(&guc->contexts_lock, flags);
>> +
>> + /*
>> + * We defer GuC context deregistration until the context is
>> destroyed
>> + * in order to save on CTBs. With this optimization ideally we
>> only need
>> + * 1 CTB to register the context during the first pin and 1 CTB to
>> + * deregister the context when the context is destroyed. Without
>> this
>> + * optimization, a CTB would be needed every pin & unpin.
>> + *
>> + * XXX: Need to acqiure the runtime wakeref as this can be
>> triggered
>> + * from context_free_worker when runtime wakeref is not held.
>> + * guc_lrc_desc_unpin requires the runtime as a GuC register is
>> written
>> + * in H2G CTB to deregister the context. A future patch may
>> defer this
>> + * H2G CTB if the runtime wakeref is zero.
>> + */
>> + with_intel_runtime_pm(runtime_pm, wakeref)
>> + guc_lrc_desc_unpin(ce);
>> +}
>> +
>> +static int guc_context_alloc(struct intel_context *ce)
>> +{
>> + return lrc_alloc(ce, ce->engine);
>> +}
>> +
>> static const struct intel_context_ops guc_context_ops = {
>> .alloc = guc_context_alloc,
>> .pre_pin = guc_context_pre_pin,
>> .pin = guc_context_pin,
>> - .unpin = lrc_unpin,
>> - .post_unpin = lrc_post_unpin,
>> + .unpin = guc_context_unpin,
>> + .post_unpin = guc_context_post_unpin,
>> .enter = intel_context_enter_engine,
>> .exit = intel_context_exit_engine,
>> .reset = lrc_reset,
>> - .destroy = lrc_destroy,
>> + .destroy = guc_context_destroy,
>> };
>> -static int guc_request_alloc(struct i915_request *request)
>> +static bool context_needs_register(struct intel_context *ce, bool
>> new_guc_id)
>> +{
>> + return new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) ||
>> + !lrc_desc_registered(ce_to_guc(ce), ce->guc_id);
>> +}
>> +
>> +static int guc_request_alloc(struct i915_request *rq)
>> {
>> + struct intel_context *ce = rq->context;
>> + struct intel_guc *guc = ce_to_guc(ce);
>> int ret;
>> - GEM_BUG_ON(!intel_context_is_pinned(request->context));
>> + GEM_BUG_ON(!intel_context_is_pinned(rq->context));
>> /*
>> * Flush enough space to reduce the likelihood of waiting after
>> * we start building the request - in which case we will just
>> * have to repeat work.
>> */
>> - request->reserved_space += GUC_REQUEST_SIZE;
>> + rq->reserved_space += GUC_REQUEST_SIZE;
>> /*
>> * Note that after this point, we have committed to using
>> @@ -483,56 +962,47 @@ static int guc_request_alloc(struct
>> i915_request *request)
>> */
>> /* Unconditionally invalidate GPU caches and TLBs. */
>> - ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
>> + ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
>> if (ret)
>> return ret;
>> - request->reserved_space -= GUC_REQUEST_SIZE;
>> - return 0;
>> -}
>> -
>> -static inline void queue_request(struct i915_sched_engine
>> *sched_engine,
>> - struct i915_request *rq,
>> - int prio)
>> -{
>> - GEM_BUG_ON(!list_empty(&rq->sched.link));
>> - list_add_tail(&rq->sched.link,
>> - i915_sched_lookup_priolist(sched_engine, prio));
>> - set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
>> -}
>> -
>> -static int guc_bypass_tasklet_submit(struct intel_guc *guc,
>> - struct i915_request *rq)
>> -{
>> - int ret;
>> -
>> - __i915_request_submit(rq);
>> -
>> - trace_i915_request_in(rq, 0);
>> -
>> - guc_set_lrc_tail(rq);
>> - ret = guc_add_request(guc, rq);
>> - if (ret == -EBUSY)
>> - guc->stalled_request = rq;
>> -
>> - return ret;
>> -}
>> + rq->reserved_space -= GUC_REQUEST_SIZE;
>> -static void guc_submit_request(struct i915_request *rq)
>> -{
>> - struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
>> - struct intel_guc *guc = &rq->engine->gt->uc.guc;
>> - unsigned long flags;
>> + /*
>> + * Call pin_guc_id here rather than in the pinning step as with
>> + * dma_resv, contexts can be repeatedly pinned / unpinned
>> trashing the
>> + * guc_ids and creating horrible race conditions. This is
>> especially bad
>> + * when guc_ids are being stolen due to over subscription. By
>> the time
>> + * this function is reached, it is guaranteed that the guc_id
>> will be
>> + * persistent until the generated request is retired. Thus,
>> sealing these
>> + * race conditions. It is still safe to fail here if guc_ids are
>> + * exhausted and return -EAGAIN to the user indicating that they
>> can try
>> + * again in the future.
>> + *
>> + * There is no need for a lock here as the timeline mutex
>> ensures at
>> + * most one context can be executing this code path at once. The
>> + * guc_id_ref is incremented once for every request in flight and
>> + * decremented on each retire. When it is zero, a lock around the
>> + * increment (in pin_guc_id) is needed to seal a race with
>> unpin_guc_id.
>> + */
>> + if (atomic_add_unless(&ce->guc_id_ref, 1, 0))
>> + return 0;
>> - /* Will be called from irq-context when using foreign fences. */
>> - spin_lock_irqsave(&sched_engine->lock, flags);
>> + ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id
>> assigned */
>> + if (unlikely(ret < 0))
>> + return ret;
>> + if (context_needs_register(ce, !!ret)) {
>> + ret = guc_lrc_desc_pin(ce);
>> + if (unlikely(ret)) { /* unwind */
>> + atomic_dec(&ce->guc_id_ref);
>> + unpin_guc_id(guc, ce);
>> + return ret;
>> + }
>> + }
>> - if (guc->stalled_request ||
>> !i915_sched_engine_is_empty(sched_engine))
>> - queue_request(sched_engine, rq, rq_prio(rq));
>> - else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY)
>> - tasklet_hi_schedule(&sched_engine->tasklet);
>> + clear_bit(CONTEXT_LRCA_DIRTY, &ce->flags);
>> - spin_unlock_irqrestore(&sched_engine->lock, flags);
>> + return 0;
>> }
>> static void sanitize_hwsp(struct intel_engine_cs *engine)
>> @@ -606,6 +1076,41 @@ static void guc_set_default_submission(struct
>> intel_engine_cs *engine)
>> engine->submit_request = guc_submit_request;
>> }
>> +static inline void guc_kernel_context_pin(struct intel_guc *guc,
>> + struct intel_context *ce)
>> +{
>> + if (context_guc_id_invalid(ce))
>> + pin_guc_id(guc, ce);
>> + guc_lrc_desc_pin(ce);
>> +}
>> +
>> +static inline void guc_init_lrc_mapping(struct intel_guc *guc)
>> +{
>> + struct intel_gt *gt = guc_to_gt(guc);
>> + struct intel_engine_cs *engine;
>> + enum intel_engine_id id;
>> +
>> + /* make sure all descriptors are clean... */
>> + xa_destroy(&guc->context_lookup);
>> +
>> + /*
>> + * Some contexts might have been pinned before we enabled GuC
>> + * submission, so we need to add them to the GuC bookeeping.
>> + * Also, after a reset the of GuC we want to make sure that the
> the of -> of the
>
>> + * information shared with GuC is properly reset. The kernel
>> LRCs are
>> + * not attached to the gem_context, so they need to be added
>> separately.
>> + *
>> + * Note: we purposely do not check the return of guc_lrc_desc_pin,
> purposefully
>
> Just a bunch of nits, so maybe not worth respinning. I think it needs
> an r-b from Daniele as well, given that he had a bunch of comments on
> the previous rev too. But apart from the nits, looks good to me.
>
I didn't fully re-review the patch, but I've checked the things I had
commented on and I'm happy with how they've been addressed. My only
remaining concern is the potentially long wait in atomic context, but
that can be addressed as a follow up.
Daniele
> Reviewed-by: John Harrison <John.C.Harrison at Intel.com>
>
>> + * because that function can only fail if a reset is just
>> starting. This
>> + * is at the end of reset so presumably another reset isn't
>> happening
>> + * and even it did this code would be run again.
>> + */
>> +
>> + for_each_engine(engine, gt, id)
>> + if (engine->kernel_context)
>> + guc_kernel_context_pin(guc, engine->kernel_context);
>> +}
>> +
>> static void guc_release(struct intel_engine_cs *engine)
>> {
>> engine->sanitize = NULL; /* no longer in control, nothing to
>> sanitize */
>> @@ -718,6 +1223,7 @@ int intel_guc_submission_setup(struct
>> intel_engine_cs *engine)
>> void intel_guc_submission_enable(struct intel_guc *guc)
>> {
>> + guc_init_lrc_mapping(guc);
>> }
>> void intel_guc_submission_disable(struct intel_guc *guc)
>> @@ -743,3 +1249,62 @@ void intel_guc_submission_init_early(struct
>> intel_guc *guc)
>> {
>> guc->submission_selected = __guc_submission_selected(guc);
>> }
>> +
>> +static inline struct intel_context *
>> +g2h_context_lookup(struct intel_guc *guc, u32 desc_idx)
>> +{
>> + struct intel_context *ce;
>> +
>> + if (unlikely(desc_idx >= GUC_MAX_LRC_DESCRIPTORS)) {
>> + drm_err(&guc_to_gt(guc)->i915->drm,
>> + "Invalid desc_idx %u", desc_idx);
>> + return NULL;
>> + }
>> +
>> + ce = __get_context(guc, desc_idx);
>> + if (unlikely(!ce)) {
>> + drm_err(&guc_to_gt(guc)->i915->drm,
>> + "Context is NULL, desc_idx %u", desc_idx);
>> + return NULL;
>> + }
>> +
>> + return ce;
>> +}
>> +
>> +int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
>> + const u32 *msg,
>> + u32 len)
>> +{
>> + struct intel_context *ce;
>> + u32 desc_idx = msg[0];
>> +
>> + if (unlikely(len < 1)) {
>> + drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
>> + return -EPROTO;
>> + }
>> +
>> + ce = g2h_context_lookup(guc, desc_idx);
>> + if (unlikely(!ce))
>> + return -EPROTO;
>> +
>> + if (context_wait_for_deregister_to_register(ce)) {
>> + struct intel_runtime_pm *runtime_pm =
>> + &ce->engine->gt->i915->runtime_pm;
>> + intel_wakeref_t wakeref;
>> +
>> + /*
>> + * Previous owner of this guc_id has been deregistered, now
>> safe
>> + * register this context.
>> + */
>> + with_intel_runtime_pm(runtime_pm, wakeref)
>> + register_context(ce);
>> + clr_context_wait_for_deregister_to_register(ce);
>> + intel_context_put(ce);
>> + } else if (context_destroyed(ce)) {
>> + /* Context has been destroyed */
>> + release_guc_id(guc, ce);
>> + lrc_destroy(&ce->ref);
>> + }
>> +
>> + return 0;
>> +}
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h
>> b/drivers/gpu/drm/i915/i915_reg.h
>> index 943fe485c662..204c95c39353 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -4142,6 +4142,7 @@ enum {
>> FAULT_AND_CONTINUE /* Unsupported */
>> };
>> +#define CTX_GTT_ADDRESS_MASK GENMASK(31, 12)
>> #define GEN8_CTX_VALID (1 << 0)
>> #define GEN8_CTX_FORCE_PD_RESTORE (1 << 1)
>> #define GEN8_CTX_FORCE_RESTORE (1 << 2)
>> diff --git a/drivers/gpu/drm/i915/i915_request.c
>> b/drivers/gpu/drm/i915/i915_request.c
>> index 09ebea9a0090..ef26724fe980 100644
>> --- a/drivers/gpu/drm/i915/i915_request.c
>> +++ b/drivers/gpu/drm/i915/i915_request.c
>> @@ -407,6 +407,7 @@ bool i915_request_retire(struct i915_request *rq)
>> */
>> if (!list_empty(&rq->sched.link))
>> remove_from_engine(rq);
>> + atomic_dec(&rq->context->guc_id_ref);
>> GEM_BUG_ON(!llist_empty(&rq->execute_cb));
>> __list_del_entry(&rq->link); /* poison neither prev/next (RCU
>> walks) */
>
More information about the Intel-gfx
mailing list