[Intel-gfx] [PATCH 1/3] drm/i915/guc: Add non blocking CTB send function
Michal Wajdeczko
michal.wajdeczko at intel.com
Mon Nov 25 13:20:39 UTC 2019
On Fri, 22 Nov 2019 01:13:25 +0100, Matthew Brost
<matthew.brost at intel.com> wrote:
> On Thu, Nov 21, 2019 at 12:43:26PM +0100, Michal Wajdeczko wrote:
>> On Thu, 21 Nov 2019 00:56:02 +0100, <John.C.Harrison at intel.com> wrote:
>>
>>> From: Matthew Brost <matthew.brost at intel.com>
>>>
>>> Add non blocking CTB send fuction, intel_guc_send_nb. In order to
>>> support a non blocking CTB send fuction a spin lock is needed to
>>
>> 2x typos
>>
>>> protect the CTB descriptors fields. Also the non blocking call must not
>>> update the fence value as this value is owned by the blocking call
>>> (intel_guc_send).
>>
>> you probably mean "intel_guc_send_ct", as intel_guc_send is just a
>> wrapper
>> around guc->send
>>
>
> Ah, yes.
>
>>>
>>> The blocking CTB now must have a flow control mechanism to ensure the
>>> buffer isn't overrun. A lazy spin wait is used as we believe the flow
>>> control condition should be rare with properly sized buffer. A retry
>>> counter is also implemented which fails H2G CTBs once a limit is
>>> reached to prevent deadlock.
>>>
>>> The function, intel_guc_send_nb, is exported in this patch but unused.
>>> Several patches later in the series make use of this function.
>>
>> It's likely in yet another series
>>
>
> Yes, it is.
>
>>>
>>> Cc: John Harrison <john.c.harrison at intel.com>
>>> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
>>> ---
>>> drivers/gpu/drm/i915/gt/uc/intel_guc.h | 2 +
>>> drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 97 +++++++++++++++++++----
>>> drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 10 ++-
>>> 3 files changed, 91 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
>>> index e6400204a2bd..77c5af919ace 100644
>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
>>> @@ -94,6 +94,8 @@ intel_guc_send_and_receive(struct intel_guc *guc,
>>> const u32 *action, u32 len,
>>> return guc->send(guc, action, len, response_buf, response_buf_size);
>>> }
>>> +int intel_guc_send_nb(struct intel_guc_ct *ct, const u32 *action, u32
>>> len);
>>> +
>>
>> Hmm, this mismatch of guc/ct parameter breaks the our layering.
>> But we can keep this layering intact by introducing some flags to
>> the existing guc_send() function. These flags could be passed as
>> high bits in action[0], like this:
>
> This seems reasonable.
>
>>
>> #define GUC_ACTION_FLAG_DONT_WAIT 0x80000000
>>
>> int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset)
>> {
>> u32 action[] = {
>> INTEL_GUC_ACTION_AUTHENTICATE_HUC | GUC_ACTION_FLAG_DONT_WAIT,
>> rsa_offset
>> };
>>
>> return intel_guc_send(guc, action, ARRAY_SIZE(action));
>> }
>>
>> then actual back-end of guc->send can take proper steps based on this
>> flag:
>>
>> @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action,
>> u32 len,
>> GEM_BUG_ON(!len);
>> GEM_BUG_ON(len > guc->send_regs.count);
>>
>> + if (*action & GUC_ACTION_FLAG_DONT_WAIT)
>> + return -EINVAL;
>> + *action &= ~GUC_ACTION_FLAG_DONT_WAIT;
>> +
>> /* We expect only action code */
>> GEM_BUG_ON(*action & ~INTEL_GUC_MSG_CODE_MASK);
>>
>> @@ @@ int intel_guc_send_ct(struct intel_guc *guc, const u32 *action,
>> u32 len,
>> u32 status = ~0; /* undefined */
>> int ret;
>>
>> + if (*action & GUC_ACTION_FLAG_DONT_WAIT) {
>> + GEM_BUG_ON(response_buf);
>> + GEM_BUG_ON(response_buf_size);
>> + return ctch_send_nb(ct, ctch, action, len);
>> + }
>> +
>> mutex_lock(&guc->send_mutex);
>>
>> ret = ctch_send(ct, ctch, action, len, response_buf,
>> response_buf_size,
>>
>>
>>> static inline void intel_guc_notify(struct intel_guc *guc)
>>> {
>>> guc->notify(guc);
>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>> index b49115517510..e50d968b15d5 100644
>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>> @@ -3,6 +3,8 @@
>>> * Copyright © 2016-2019 Intel Corporation
>>> */
>>> +#include <linux/circ_buf.h>
>>> +
>>> #include "i915_drv.h"
>>> #include "intel_guc_ct.h"
>>> @@ -12,6 +14,8 @@
>>> #define CT_DEBUG_DRIVER(...) do { } while (0)
>>> #endif
>>> +#define MAX_RETRY 0x1000000
>>> +
>>> struct ct_request {
>>> struct list_head link;
>>> u32 fence;
>>> @@ -40,7 +44,8 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct)
>>> /* we're using static channel owners */
>>> ct->host_channel.owner = CTB_OWNER_HOST;
>>> - spin_lock_init(&ct->lock);
>>> + spin_lock_init(&ct->request_lock);
>>> + spin_lock_init(&ct->send_lock);
>>> INIT_LIST_HEAD(&ct->pending_requests);
>>> INIT_LIST_HEAD(&ct->incoming_requests);
>>> INIT_WORK(&ct->worker, ct_incoming_request_worker_func);
>>> @@ -291,7 +296,8 @@ static u32 ctch_get_next_fence(struct
>>> intel_guc_ct_channel *ctch)
>>> static int ctb_write(struct intel_guc_ct_buffer *ctb,
>>> const u32 *action,
>>> u32 len /* in dwords */,
>>> - u32 fence,
>>> + u32 fence_value,
>>> + bool enable_fence,
>>
>> maybe we can just guarantee that fence=0 will never be used as a valid
>> fence id, then this flag could be replaced with (fence != 0) check.
>>
>
> Yes, again seems reasonable. Initialize next_fence = 1, then increment
> by 2 each
> time and this works.
As we need fence only for low volume of blocking messages that requires
response I'm not sure that we need to super optimize get_fence to use +2
all the time - maybe just check for zero wrap ?
>
>>> bool want_response)
>>> {
>>> struct guc_ct_buffer_desc *desc = ctb->desc;
>>> @@ -328,18 +334,18 @@ static int ctb_write(struct intel_guc_ct_buffer
>>> *ctb,
>>> * DW2+: action data
>>> */
>>> header = (len << GUC_CT_MSG_LEN_SHIFT) |
>>> - (GUC_CT_MSG_WRITE_FENCE_TO_DESC) |
>>> + (enable_fence ? GUC_CT_MSG_WRITE_FENCE_TO_DESC : 0) |
>>
>> Hmm, even if we ask fw to do not write back fence to the descriptor,
>> IIRC current firmware will unconditionally write back return status
>> of this non-blocking call, possibly overwriting status of the blocked
>> call.
>>
>
> Yes, known problem with the interface that needs to be fixed.
>
>>> (want_response ? GUC_CT_MSG_SEND_STATUS : 0) |
>>
>> btw, if we switch all requests to expect reply send back over CTB,
>> then we can possibly drop the send_mutex in CTB paths, and block
>> only when there is no DONT_WAIT flag and we have to wait for response.
>>
>
> Rather just wait for the GuC to fix this.
But fixing will probably require some extra flags, while switching
to replies over CTB should work today on existing fw.
>
>>> (action[0] << GUC_CT_MSG_ACTION_SHIFT);
>>> CT_DEBUG_DRIVER("CT: writing %*ph %*ph %*ph\n",
>>> - 4, &header, 4, &fence,
>>> + 4, &header, 4, &fence_value,
>>> 4 * (len - 1), &action[1]);
>>> cmds[tail] = header;
>>> tail = (tail + 1) % size;
>>> - cmds[tail] = fence;
>>> + cmds[tail] = fence_value;
>>> tail = (tail + 1) % size;
>>> for (i = 1; i < len; i++) {
>>> @@ -440,6 +446,47 @@ static int wait_for_ct_request_update(struct
>>> ct_request *req, u32 *status)
>>> return err;
>>> }
>>> +static inline bool ctb_has_room(struct guc_ct_buffer_desc *desc, u32
>>> len)
>>> +{
>>> + u32 head = READ_ONCE(desc->head);
>>> + u32 space;
>>> +
>>> + space = CIRC_SPACE(desc->tail, head, desc->size);
>>> +
>>> + return space >= len;
>>> +}
>>> +
>>> +int intel_guc_send_nb(struct intel_guc_ct *ct,
>>> + const u32 *action,
>>> + u32 len)
>>> +{
>>> + struct intel_guc_ct_channel *ctch = &ct->host_channel;
>>> + struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_SEND];
>>> + struct guc_ct_buffer_desc *desc = ctb->desc;
>>> + int err;
>>> +
>>> + GEM_BUG_ON(!ctch->enabled);
>>> + GEM_BUG_ON(!len);
>>> + GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
>>> + lockdep_assert_held(&ct->send_lock);
>>
>> hmm, does it mean that now it's caller responsibility to spinlock
>> on CT private lock ? That is not how other guc_send() functions work.
>>
>
> Yes, that how I would like this work as I feel like it gives more
> flexability to
> the caller on the -EBUSY case. The caller can call intel_guc_send_nb
> again while
> still holding the lock or it release lock the and use a different form
> of flow
> control. Perhaps locking / unlocking should be exposed via static
> inlines rather
> than the caller directly manipulating the lock?
Hmm, I'm not sure that I like such flexibility that one set of callers is
allowed to grab and hold CTB internal send_lock, while other caller don't
even know that such lock exists, is a good direction.
Why do you need to sync your callers on CTB internal lock ?
Are you afraid that other clients might steal next free slot in CTB ?
Who are these other clients ? maybe you can lock on something else ?
>
>>> +
>>> + if (unlikely(!ctb_has_room(desc, (len + 1) * 4))) {
>>> + ct->retry++;
>>> + if (ct->retry >= MAX_RETRY)
>>> + return -EDEADLK;
>>> + else
>>> + return -EBUSY;
>>> + }
>>> +
>>> + ct->retry = 0;
>>> + err = ctb_write(ctb, action, len, 0, false, false);
>>> + if (unlikely(err))
>>> + return err;
>>> +
>>> + intel_guc_notify(ct_to_guc(ct));
>>> + return 0;
>>> +}
>>> +
>>> static int ctch_send(struct intel_guc_ct *ct,
>>> struct intel_guc_ct_channel *ctch,
>>> const u32 *action,
>>> @@ -460,17 +507,35 @@ static int ctch_send(struct intel_guc_ct *ct,
>>> GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
>>> GEM_BUG_ON(!response_buf && response_buf_size);
>>> + /*
>>> + * We use a lazy spin wait loop here as we believe that if the CT
>>> + * buffers are sized correctly the flow control condition should be
>>> + * rare.
>>> + */
>>> +retry:
>>> + spin_lock_irqsave(&ct->send_lock, flags);
>>> + if (unlikely(!ctb_has_room(desc, (len + 1) * 4))) {
>>> + spin_unlock_irqrestore(&ct->send_lock, flags);
>>> + ct->retry++;
>>> + if (ct->retry >= MAX_RETRY)
>>> + return -EDEADLK;
>>
>> I'm not sure what's better: have secret deadlock hard to reproduce,
>> or deadlocks easier to catch that helps improve to be deadlock-clean
>>
>
> This is covering the case where the has died and to avoid deadlock.
> Eventually
> we will have some GuC health check code that will trigger a full GPU
> reset if
> the GuC has died. We need a way for code spinning on the CTBs to exit.
>
> I've already tweaked this code locally a bit to use an atomic too with
> the idea
> being the GuC health check code can set this value to have all code
> spinning on
> CTBs immediately return --EDEADLK when the GuC has died.
Will wait then to see next revision.
>
>>> + cpu_relax();
>>> + goto retry;
>>> + }
>>> +
>>> + ct->retry = 0;
>>> fence = ctch_get_next_fence(ctch);
>>> request.fence = fence;
>>> request.status = 0;
>>> request.response_len = response_buf_size;
>>> request.response_buf = response_buf;
>>> - spin_lock_irqsave(&ct->lock, flags);
>>> + spin_lock(&ct->request_lock);
>>> list_add_tail(&request.link, &ct->pending_requests);
>>> - spin_unlock_irqrestore(&ct->lock, flags);
>>> + spin_unlock(&ct->request_lock);
>>> - err = ctb_write(ctb, action, len, fence, !!response_buf);
>>> + err = ctb_write(ctb, action, len, fence, true, !!response_buf);
>>> + spin_unlock_irqrestore(&ct->send_lock, flags);
>>> if (unlikely(err))
>>> goto unlink;
>>> @@ -501,9 +566,9 @@ static int ctch_send(struct intel_guc_ct *ct,
>>> }
>>> unlink:
>>> - spin_lock_irqsave(&ct->lock, flags);
>>> + spin_lock_irqsave(&ct->request_lock, flags);
>>> list_del(&request.link);
>>> - spin_unlock_irqrestore(&ct->lock, flags);
>>> + spin_unlock_irqrestore(&ct->request_lock, flags);
>>> return err;
>>> }
>>> @@ -653,7 +718,7 @@ static int ct_handle_response(struct intel_guc_ct
>>> *ct, const u32 *msg)
>>> CT_DEBUG_DRIVER("CT: response fence %u status %#x\n", fence, status);
>>> - spin_lock(&ct->lock);
>>> + spin_lock(&ct->request_lock);
>>> list_for_each_entry(req, &ct->pending_requests, link) {
>>> if (unlikely(fence != req->fence)) {
>>> CT_DEBUG_DRIVER("CT: request %u awaits response\n",
>>> @@ -672,7 +737,7 @@ static int ct_handle_response(struct intel_guc_ct
>>> *ct, const u32 *msg)
>>> found = true;
>>> break;
>>> }
>>> - spin_unlock(&ct->lock);
>>> + spin_unlock(&ct->request_lock);
>>> if (!found)
>>> DRM_ERROR("CT: unsolicited response %*ph\n", 4 * msglen, msg);
>>> @@ -710,13 +775,13 @@ static bool ct_process_incoming_requests(struct
>>> intel_guc_ct *ct)
>>> u32 *payload;
>>> bool done;
>>> - spin_lock_irqsave(&ct->lock, flags);
>>> + spin_lock_irqsave(&ct->request_lock, flags);
>>> request = list_first_entry_or_null(&ct->incoming_requests,
>>> struct ct_incoming_request, link);
>>> if (request)
>>> list_del(&request->link);
>>> done = !!list_empty(&ct->incoming_requests);
>>> - spin_unlock_irqrestore(&ct->lock, flags);
>>> + spin_unlock_irqrestore(&ct->request_lock, flags);
>>> if (!request)
>>> return true;
>>> @@ -777,9 +842,9 @@ static int ct_handle_request(struct intel_guc_ct
>>> *ct, const u32 *msg)
>>> }
>>> memcpy(request->msg, msg, 4 * msglen);
>>> - spin_lock_irqsave(&ct->lock, flags);
>>> + spin_lock_irqsave(&ct->request_lock, flags);
>>> list_add_tail(&request->link, &ct->incoming_requests);
>>> - spin_unlock_irqrestore(&ct->lock, flags);
>>> + spin_unlock_irqrestore(&ct->request_lock, flags);
>>> queue_work(system_unbound_wq, &ct->worker);
>>> return 0;
>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
>>> index 7c24d83f5c24..bc670a796bd8 100644
>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
>>> @@ -62,8 +62,11 @@ struct intel_guc_ct {
>>> struct intel_guc_ct_channel host_channel;
>>> /* other channels are tbd */
>>> - /** @lock: protects pending requests list */
>>> - spinlock_t lock;
>>> + /** @request_lock: protects pending requests list */
>>> + spinlock_t request_lock;
>>> +
>>> + /** @send_lock: protects h2g channel */
>>> + spinlock_t send_lock;
>>> /** @pending_requests: list of requests waiting for response */
>>> struct list_head pending_requests;
>>> @@ -73,6 +76,9 @@ struct intel_guc_ct {
>>> /** @worker: worker for handling incoming requests */
>>> struct work_struct worker;
>>> +
>>> + /** @retry: the number of times a H2G CTB has been retried */
>>> + u32 retry;
>>> };
>>> void intel_guc_ct_init_early(struct intel_guc_ct *ct);
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
More information about the Intel-gfx
mailing list