[Intel-gfx] [PATCH 1/3] drm/i915/guc: Add non blocking CTB send function
Michal Wajdeczko
michal.wajdeczko at intel.com
Thu Nov 21 11:43:26 UTC 2019
On Thu, 21 Nov 2019 00:56:02 +0100, <John.C.Harrison at intel.com> wrote:
> From: Matthew Brost <matthew.brost at intel.com>
>
> Add non blocking CTB send fuction, intel_guc_send_nb. In order to
> support a non blocking CTB send fuction a spin lock is needed to
2x typos
> protect the CTB descriptors fields. Also the non blocking call must not
> update the fence value as this value is owned by the blocking call
> (intel_guc_send).
you probably mean "intel_guc_send_ct", as intel_guc_send is just a wrapper
around guc->send
>
> The blocking CTB now must have a flow control mechanism to ensure the
> buffer isn't overrun. A lazy spin wait is used as we believe the flow
> control condition should be rare with properly sized buffer. A retry
> counter is also implemented which fails H2G CTBs once a limit is
> reached to prevent deadlock.
>
> The function, intel_guc_send_nb, is exported in this patch but unused.
> Several patches later in the series make use of this function.
It's likely in yet another series
>
> Cc: John Harrison <john.c.harrison at intel.com>
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
> drivers/gpu/drm/i915/gt/uc/intel_guc.h | 2 +
> drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 97 +++++++++++++++++++----
> drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 10 ++-
> 3 files changed, 91 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> index e6400204a2bd..77c5af919ace 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> @@ -94,6 +94,8 @@ intel_guc_send_and_receive(struct intel_guc *guc,
> const u32 *action, u32 len,
> return guc->send(guc, action, len, response_buf, response_buf_size);
> }
> +int intel_guc_send_nb(struct intel_guc_ct *ct, const u32 *action, u32
> len);
> +
Hmm, this mismatch of guc/ct parameter breaks the our layering.
But we can keep this layering intact by introducing some flags to
the existing guc_send() function. These flags could be passed as
high bits in action[0], like this:
#define GUC_ACTION_FLAG_DONT_WAIT 0x80000000
int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset)
{
u32 action[] = {
INTEL_GUC_ACTION_AUTHENTICATE_HUC | GUC_ACTION_FLAG_DONT_WAIT,
rsa_offset
};
return intel_guc_send(guc, action, ARRAY_SIZE(action));
}
then actual back-end of guc->send can take proper steps based on this flag:
@@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32
len,
GEM_BUG_ON(!len);
GEM_BUG_ON(len > guc->send_regs.count);
+ if (*action & GUC_ACTION_FLAG_DONT_WAIT)
+ return -EINVAL;
+ *action &= ~GUC_ACTION_FLAG_DONT_WAIT;
+
/* We expect only action code */
GEM_BUG_ON(*action & ~INTEL_GUC_MSG_CODE_MASK);
@@ @@ int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32
len,
u32 status = ~0; /* undefined */
int ret;
+ if (*action & GUC_ACTION_FLAG_DONT_WAIT) {
+ GEM_BUG_ON(response_buf);
+ GEM_BUG_ON(response_buf_size);
+ return ctch_send_nb(ct, ctch, action, len);
+ }
+
mutex_lock(&guc->send_mutex);
ret = ctch_send(ct, ctch, action, len, response_buf,
response_buf_size,
> static inline void intel_guc_notify(struct intel_guc *guc)
> {
> guc->notify(guc);
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> index b49115517510..e50d968b15d5 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> @@ -3,6 +3,8 @@
> * Copyright © 2016-2019 Intel Corporation
> */
> +#include <linux/circ_buf.h>
> +
> #include "i915_drv.h"
> #include "intel_guc_ct.h"
> @@ -12,6 +14,8 @@
> #define CT_DEBUG_DRIVER(...) do { } while (0)
> #endif
> +#define MAX_RETRY 0x1000000
> +
> struct ct_request {
> struct list_head link;
> u32 fence;
> @@ -40,7 +44,8 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct)
> /* we're using static channel owners */
> ct->host_channel.owner = CTB_OWNER_HOST;
> - spin_lock_init(&ct->lock);
> + spin_lock_init(&ct->request_lock);
> + spin_lock_init(&ct->send_lock);
> INIT_LIST_HEAD(&ct->pending_requests);
> INIT_LIST_HEAD(&ct->incoming_requests);
> INIT_WORK(&ct->worker, ct_incoming_request_worker_func);
> @@ -291,7 +296,8 @@ static u32 ctch_get_next_fence(struct
> intel_guc_ct_channel *ctch)
> static int ctb_write(struct intel_guc_ct_buffer *ctb,
> const u32 *action,
> u32 len /* in dwords */,
> - u32 fence,
> + u32 fence_value,
> + bool enable_fence,
maybe we can just guarantee that fence=0 will never be used as a valid
fence id, then this flag could be replaced with (fence != 0) check.
> bool want_response)
> {
> struct guc_ct_buffer_desc *desc = ctb->desc;
> @@ -328,18 +334,18 @@ static int ctb_write(struct intel_guc_ct_buffer
> *ctb,
> * DW2+: action data
> */
> header = (len << GUC_CT_MSG_LEN_SHIFT) |
> - (GUC_CT_MSG_WRITE_FENCE_TO_DESC) |
> + (enable_fence ? GUC_CT_MSG_WRITE_FENCE_TO_DESC : 0) |
Hmm, even if we ask fw to do not write back fence to the descriptor,
IIRC current firmware will unconditionally write back return status
of this non-blocking call, possibly overwriting status of the blocked
call.
> (want_response ? GUC_CT_MSG_SEND_STATUS : 0) |
btw, if we switch all requests to expect reply send back over CTB,
then we can possibly drop the send_mutex in CTB paths, and block
only when there is no DONT_WAIT flag and we have to wait for response.
> (action[0] << GUC_CT_MSG_ACTION_SHIFT);
> CT_DEBUG_DRIVER("CT: writing %*ph %*ph %*ph\n",
> - 4, &header, 4, &fence,
> + 4, &header, 4, &fence_value,
> 4 * (len - 1), &action[1]);
> cmds[tail] = header;
> tail = (tail + 1) % size;
> - cmds[tail] = fence;
> + cmds[tail] = fence_value;
> tail = (tail + 1) % size;
> for (i = 1; i < len; i++) {
> @@ -440,6 +446,47 @@ static int wait_for_ct_request_update(struct
> ct_request *req, u32 *status)
> return err;
> }
> +static inline bool ctb_has_room(struct guc_ct_buffer_desc *desc, u32
> len)
> +{
> + u32 head = READ_ONCE(desc->head);
> + u32 space;
> +
> + space = CIRC_SPACE(desc->tail, head, desc->size);
> +
> + return space >= len;
> +}
> +
> +int intel_guc_send_nb(struct intel_guc_ct *ct,
> + const u32 *action,
> + u32 len)
> +{
> + struct intel_guc_ct_channel *ctch = &ct->host_channel;
> + struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_SEND];
> + struct guc_ct_buffer_desc *desc = ctb->desc;
> + int err;
> +
> + GEM_BUG_ON(!ctch->enabled);
> + GEM_BUG_ON(!len);
> + GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
> + lockdep_assert_held(&ct->send_lock);
hmm, does it mean that now it's caller responsibility to spinlock
on CT private lock ? That is not how other guc_send() functions work.
> +
> + if (unlikely(!ctb_has_room(desc, (len + 1) * 4))) {
> + ct->retry++;
> + if (ct->retry >= MAX_RETRY)
> + return -EDEADLK;
> + else
> + return -EBUSY;
> + }
> +
> + ct->retry = 0;
> + err = ctb_write(ctb, action, len, 0, false, false);
> + if (unlikely(err))
> + return err;
> +
> + intel_guc_notify(ct_to_guc(ct));
> + return 0;
> +}
> +
> static int ctch_send(struct intel_guc_ct *ct,
> struct intel_guc_ct_channel *ctch,
> const u32 *action,
> @@ -460,17 +507,35 @@ static int ctch_send(struct intel_guc_ct *ct,
> GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
> GEM_BUG_ON(!response_buf && response_buf_size);
> + /*
> + * We use a lazy spin wait loop here as we believe that if the CT
> + * buffers are sized correctly the flow control condition should be
> + * rare.
> + */
> +retry:
> + spin_lock_irqsave(&ct->send_lock, flags);
> + if (unlikely(!ctb_has_room(desc, (len + 1) * 4))) {
> + spin_unlock_irqrestore(&ct->send_lock, flags);
> + ct->retry++;
> + if (ct->retry >= MAX_RETRY)
> + return -EDEADLK;
I'm not sure what's better: have secret deadlock hard to reproduce,
or deadlocks easier to catch that helps improve to be deadlock-clean
> + cpu_relax();
> + goto retry;
> + }
> +
> + ct->retry = 0;
> fence = ctch_get_next_fence(ctch);
> request.fence = fence;
> request.status = 0;
> request.response_len = response_buf_size;
> request.response_buf = response_buf;
> - spin_lock_irqsave(&ct->lock, flags);
> + spin_lock(&ct->request_lock);
> list_add_tail(&request.link, &ct->pending_requests);
> - spin_unlock_irqrestore(&ct->lock, flags);
> + spin_unlock(&ct->request_lock);
> - err = ctb_write(ctb, action, len, fence, !!response_buf);
> + err = ctb_write(ctb, action, len, fence, true, !!response_buf);
> + spin_unlock_irqrestore(&ct->send_lock, flags);
> if (unlikely(err))
> goto unlink;
> @@ -501,9 +566,9 @@ static int ctch_send(struct intel_guc_ct *ct,
> }
> unlink:
> - spin_lock_irqsave(&ct->lock, flags);
> + spin_lock_irqsave(&ct->request_lock, flags);
> list_del(&request.link);
> - spin_unlock_irqrestore(&ct->lock, flags);
> + spin_unlock_irqrestore(&ct->request_lock, flags);
> return err;
> }
> @@ -653,7 +718,7 @@ static int ct_handle_response(struct intel_guc_ct
> *ct, const u32 *msg)
> CT_DEBUG_DRIVER("CT: response fence %u status %#x\n", fence, status);
> - spin_lock(&ct->lock);
> + spin_lock(&ct->request_lock);
> list_for_each_entry(req, &ct->pending_requests, link) {
> if (unlikely(fence != req->fence)) {
> CT_DEBUG_DRIVER("CT: request %u awaits response\n",
> @@ -672,7 +737,7 @@ static int ct_handle_response(struct intel_guc_ct
> *ct, const u32 *msg)
> found = true;
> break;
> }
> - spin_unlock(&ct->lock);
> + spin_unlock(&ct->request_lock);
> if (!found)
> DRM_ERROR("CT: unsolicited response %*ph\n", 4 * msglen, msg);
> @@ -710,13 +775,13 @@ static bool ct_process_incoming_requests(struct
> intel_guc_ct *ct)
> u32 *payload;
> bool done;
> - spin_lock_irqsave(&ct->lock, flags);
> + spin_lock_irqsave(&ct->request_lock, flags);
> request = list_first_entry_or_null(&ct->incoming_requests,
> struct ct_incoming_request, link);
> if (request)
> list_del(&request->link);
> done = !!list_empty(&ct->incoming_requests);
> - spin_unlock_irqrestore(&ct->lock, flags);
> + spin_unlock_irqrestore(&ct->request_lock, flags);
> if (!request)
> return true;
> @@ -777,9 +842,9 @@ static int ct_handle_request(struct intel_guc_ct
> *ct, const u32 *msg)
> }
> memcpy(request->msg, msg, 4 * msglen);
> - spin_lock_irqsave(&ct->lock, flags);
> + spin_lock_irqsave(&ct->request_lock, flags);
> list_add_tail(&request->link, &ct->incoming_requests);
> - spin_unlock_irqrestore(&ct->lock, flags);
> + spin_unlock_irqrestore(&ct->request_lock, flags);
> queue_work(system_unbound_wq, &ct->worker);
> return 0;
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
> index 7c24d83f5c24..bc670a796bd8 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
> @@ -62,8 +62,11 @@ struct intel_guc_ct {
> struct intel_guc_ct_channel host_channel;
> /* other channels are tbd */
> - /** @lock: protects pending requests list */
> - spinlock_t lock;
> + /** @request_lock: protects pending requests list */
> + spinlock_t request_lock;
> +
> + /** @send_lock: protects h2g channel */
> + spinlock_t send_lock;
> /** @pending_requests: list of requests waiting for response */
> struct list_head pending_requests;
> @@ -73,6 +76,9 @@ struct intel_guc_ct {
> /** @worker: worker for handling incoming requests */
> struct work_struct worker;
> +
> + /** @retry: the number of times a H2G CTB has been retried */
> + u32 retry;
> };
> void intel_guc_ct_init_early(struct intel_guc_ct *ct);
More information about the Intel-gfx
mailing list