[Intel-gfx] [PATCH 6/7] drm/i915/guc: Optimize CTB writes and reads
Matthew Brost
matthew.brost at intel.com
Tue Jul 6 21:22:48 UTC 2021
On Tue, Jul 06, 2021 at 09:33:23PM +0200, Michal Wajdeczko wrote:
>
>
> On 06.07.2021 21:19, John Harrison wrote:
> > On 7/6/2021 12:12, Michal Wajdeczko wrote:
> >> On 06.07.2021 21:00, John Harrison wrote:
> >>> On 7/1/2021 10:15, Matthew Brost wrote:
> >>>> CTB writes are now in the path of command submission and should be
> >>>> optimized for performance. Rather than reading CTB descriptor values
> >>>> (e.g. head, tail) which could result in accesses across the PCIe bus,
> >>>> store shadow local copies and only read/write the descriptor values
> >>>> when
> >>>> absolutely necessary. Also store the current space in the each channel
> >>>> locally.
> >>>>
> >>>> v2:
> >>>> (Michel)
> >>>> - Add additional sanity checks for head / tail pointers
> >>>> - Use GUC_CTB_HDR_LEN rather than magic 1
> >>>>
> >>>> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
> >>>> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> >>>> ---
> >>>> drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 88
> >>>> +++++++++++++++--------
> >>>> drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h | 6 ++
> >>>> 2 files changed, 65 insertions(+), 29 deletions(-)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> >>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> >>>> index a9cb7b608520..5b8b4ff609e2 100644
> >>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> >>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> >>>> @@ -130,6 +130,10 @@ static void guc_ct_buffer_desc_init(struct
> >>>> guc_ct_buffer_desc *desc)
> >>>> static void guc_ct_buffer_reset(struct intel_guc_ct_buffer *ctb)
> >>>> {
> >>>> ctb->broken = false;
> >>>> + ctb->tail = 0;
> >>>> + ctb->head = 0;
> >>>> + ctb->space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size);
> >>>> +
> >>>> guc_ct_buffer_desc_init(ctb->desc);
> >>>> }
> >>>> @@ -383,10 +387,8 @@ static int ct_write(struct intel_guc_ct *ct,
> >>>> {
> >>>> struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
> >>>> struct guc_ct_buffer_desc *desc = ctb->desc;
> >>>> - u32 head = desc->head;
> >>>> - u32 tail = desc->tail;
> >>>> + u32 tail = ctb->tail;
> >>>> u32 size = ctb->size;
> >>>> - u32 used;
> >>>> u32 header;
> >>>> u32 hxg;
> >>>> u32 *cmds = ctb->cmds;
> >>>> @@ -395,25 +397,22 @@ static int ct_write(struct intel_guc_ct *ct,
> >>>> if (unlikely(desc->status))
> >>>> goto corrupted;
> >>>> - if (unlikely((tail | head) >= size)) {
> >>>> + GEM_BUG_ON(tail > size);
> >>>> +
> >>>> +#ifdef CONFIG_DRM_I915_DEBUG_GUC
> >>>> + if (unlikely(tail != READ_ONCE(desc->tail))) {
> >>>> + CT_ERROR(ct, "Tail was modified %u != %u\n",
> >>>> + desc->tail, ctb->tail);
> >>>> + desc->status |= GUC_CTB_STATUS_MISMATCH;
> >>>> + goto corrupted;
> >>>> + }
> >>>> + if (unlikely((desc->tail | desc->head) >= size)) {
> >>>> CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n",
> >>>> - head, tail, size);
> >>>> + desc->head, desc->tail, size);
> >>>> desc->status |= GUC_CTB_STATUS_OVERFLOW;
> >>>> goto corrupted;
> >>>> }
> >>>> -
> >>>> - /*
> >>>> - * tail == head condition indicates empty. GuC FW does not support
> >>>> - * using up the entire buffer to get tail == head meaning full.
> >>>> - */
> >>>> - if (tail < head)
> >>>> - used = (size - head) + tail;
> >>>> - else
> >>>> - used = tail - head;
> >>>> -
> >>>> - /* make sure there is a space including extra dw for the fence */
> >>>> - if (unlikely(used + len + GUC_CTB_HDR_LEN >= size))
> >>>> - return -ENOSPC;
> >>>> +#endif
> >>>> /*
> >>>> * dw0: CT header (including fence)
> >>>> @@ -454,7 +453,9 @@ static int ct_write(struct intel_guc_ct *ct,
> >>>> write_barrier(ct);
> >>>> /* now update descriptor */
> >>>> + ctb->tail = tail;
> >>>> WRITE_ONCE(desc->tail, tail);
> >>>> + ctb->space -= len + GUC_CTB_HDR_LEN;
> >>>> return 0;
> >>>> @@ -470,7 +471,7 @@ static int ct_write(struct intel_guc_ct *ct,
> >>>> * @req: pointer to pending request
> >>>> * @status: placeholder for status
> >>>> *
> >>>> - * For each sent request, Guc shall send bac CT response message.
> >>>> + * For each sent request, GuC shall send back CT response message.
> >>>> * Our message handler will update status of tracked request once
> >>>> * response message with given fence is received. Wait here and
> >>>> * check for valid response status value.
> >>>> @@ -526,24 +527,35 @@ static inline bool ct_deadlocked(struct
> >>>> intel_guc_ct *ct)
> >>>> return ret;
> >>>> }
> >>>> -static inline bool h2g_has_room(struct intel_guc_ct_buffer *ctb,
> >>>> u32 len_dw)
> >>>> +static inline bool h2g_has_room(struct intel_guc_ct *ct, u32 len_dw)
> >>>> {
> >>>> - struct guc_ct_buffer_desc *desc = ctb->desc;
> >>>> - u32 head = READ_ONCE(desc->head);
> >>>> + struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
> >>>> + u32 head;
> >>>> u32 space;
> >>>> - space = CIRC_SPACE(desc->tail, head, ctb->size);
> >>>> + if (ctb->space >= len_dw)
> >>>> + return true;
> >>>> +
> >>>> + head = READ_ONCE(ctb->desc->head);
> >>>> + if (unlikely(head > ctb->size)) {
> >>>> + CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u size=%u\n",
> >>>> + ctb->desc->head, ctb->desc->tail, ctb->size);
> >>>> + ctb->desc->status |= GUC_CTB_STATUS_OVERFLOW;
> >>>> + ctb->broken = true;
> >>>> + return false;
> >>>> + }
> >>>> +
> >>>> + space = CIRC_SPACE(ctb->tail, head, ctb->size);
> >>>> + ctb->space = space;
> >>>> return space >= len_dw;
> >>>> }
> >>>> static int has_room_nb(struct intel_guc_ct *ct, u32 len_dw)
> >>>> {
> >>>> - struct intel_guc_ct_buffer *ctb = &ct->ctbs.send;
> >>>> -
> >>>> lockdep_assert_held(&ct->ctbs.send.lock);
> >>>> - if (unlikely(!h2g_has_room(ctb, len_dw))) {
> >>>> + if (unlikely(!h2g_has_room(ct, len_dw))) {
> >>>> if (ct->stall_time == KTIME_MAX)
> >>>> ct->stall_time = ktime_get();
> >>>> @@ -613,7 +625,7 @@ static int ct_send(struct intel_guc_ct *ct,
> >>>> */
> >>>> retry:
> >>>> spin_lock_irqsave(&ctb->lock, flags);
> >>>> - if (unlikely(!h2g_has_room(ctb, len + GUC_CTB_HDR_LEN))) {
> >>>> + if (unlikely(!h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) {
> >>>> if (ct->stall_time == KTIME_MAX)
> >>>> ct->stall_time = ktime_get();
> >>>> spin_unlock_irqrestore(&ctb->lock, flags);
> >>>> @@ -733,7 +745,7 @@ static int ct_read(struct intel_guc_ct *ct, struct
> >>>> ct_incoming_msg **msg)
> >>>> {
> >>>> struct intel_guc_ct_buffer *ctb = &ct->ctbs.recv;
> >>>> struct guc_ct_buffer_desc *desc = ctb->desc;
> >>>> - u32 head = desc->head;
> >>>> + u32 head = ctb->head;
> >>>> u32 tail = desc->tail;
> >>>> u32 size = ctb->size;
> >>>> u32 *cmds = ctb->cmds;
> >>>> @@ -748,12 +760,29 @@ static int ct_read(struct intel_guc_ct *ct,
> >>>> struct ct_incoming_msg **msg)
> >>>> if (unlikely(desc->status))
> >>>> goto corrupted;
> >>>> - if (unlikely((tail | head) >= size)) {
> >>>> + GEM_BUG_ON(head > size);
This is driver owned field so I think a GEM_BUG_ON is correct as if this
blows the driver apart we have a bug in the i915.
> >>> Is the BUG_ON necessary given that both options below do the same check
> >>> but as a corrupted buffer test (with subsequent recovery by GT reset?)
> >>> rather than killing the driver.
> >> "head" and "size" are now fully owned by the driver.
> >> BUGON here is to make sure driver is coded correctly.
> > The point is that both sides of the #if below also validate head. So
>
> but not the same "head"
>
> under DEBUG we are validating the one from descriptor (together with
> tail) - and that should be recoverable as if this fails it was clearly
> not our fault.
>
> but under non-DEBUG we were attempting to validate again the local one,
> pretending that this is recoverable, but it is not, as this is our fault
> (elsewhere in i915 we don't attempt to recover from obvious coding errors).
>
> > first there is a BUG_ON, then there is the same test but without blowing
> > the driver apart. One or the other is not required. My vote would be to
> > keep the recoverable test rather than the BUG_ON.
>
> IMHO we should keep GEMBUGON and drop redundant check in non-DEBUG.
>
> But let Matt decide.
>
I think I'll drop the testing of the head value and keep the BUG_ON.
Matt
> Michal
>
> >
> > John.
> >
> >>
> >>>> +
> >>>> +#ifdef CONFIG_DRM_I915_DEBUG_GUC
> >>>> + if (unlikely(head != READ_ONCE(desc->head))) {
> >>>> + CT_ERROR(ct, "Head was modified %u != %u\n",
> >>>> + desc->head, ctb->head);
> >>>> + desc->status |= GUC_CTB_STATUS_MISMATCH;
> >>>> + goto corrupted;
> >>>> + }
> >>>> + if (unlikely((desc->tail | desc->head) >= size)) {
> >>>> + CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n",
> >>>> + head, tail, size);
> >>>> + desc->status |= GUC_CTB_STATUS_OVERFLOW;
> >>>> + goto corrupted;
> >>>> + }
> >>>> +#else
> >>>> + if (unlikely((tail | ctb->head) >= size)) {
> >>> Could just be 'head' rather than 'ctb->head'.
> >> or drop "ctb->head" completely since this is driver owned field and
> >> above you already have BUGON to test it
> >>
> >> Michal
> >>
> >>> John.
> >>>
> >>>> CT_ERROR(ct, "Invalid offsets head=%u tail=%u (size=%u)\n",
> >>>> head, tail, size);
> >>>> desc->status |= GUC_CTB_STATUS_OVERFLOW;
> >>>> goto corrupted;
> >>>> }
> >>>> +#endif
> >>>> /* tail == head condition indicates empty */
> >>>> available = tail - head;
> >>>> @@ -803,6 +832,7 @@ static int ct_read(struct intel_guc_ct *ct, struct
> >>>> ct_incoming_msg **msg)
> >>>> }
> >>>> CT_DEBUG(ct, "received %*ph\n", 4 * len, (*msg)->msg);
> >>>> + ctb->head = head;
> >>>> /* now update descriptor */
> >>>> WRITE_ONCE(desc->head, head);
> >>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
> >>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
> >>>> index bee03794c1eb..edd1bba0445d 100644
> >>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
> >>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
> >>>> @@ -33,6 +33,9 @@ struct intel_guc;
> >>>> * @desc: pointer to the buffer descriptor
> >>>> * @cmds: pointer to the commands buffer
> >>>> * @size: size of the commands buffer in dwords
> >>>> + * @head: local shadow copy of head in dwords
> >>>> + * @tail: local shadow copy of tail in dwords
> >>>> + * @space: local shadow copy of space in dwords
> >>>> * @broken: flag to indicate if descriptor data is broken
> >>>> */
> >>>> struct intel_guc_ct_buffer {
> >>>> @@ -40,6 +43,9 @@ struct intel_guc_ct_buffer {
> >>>> struct guc_ct_buffer_desc *desc;
> >>>> u32 *cmds;
> >>>> u32 size;
> >>>> + u32 tail;
> >>>> + u32 head;
> >>>> + u32 space;
> >>>> bool broken;
> >>>> };
> >>>>
> >
More information about the Intel-gfx
mailing list