[Intel-gfx] [PATCH 2/3] drm/i915/guc: Optimized CTB writes and reads

Michal Wajdeczko michal.wajdeczko at intel.com
Mon Nov 25 13:48:17 UTC 2019


On Fri, 22 Nov 2019 02:29:59 +0100, Matthew Brost  
<matthew.brost at intel.com> wrote:

> On Thu, Nov 21, 2019 at 07:56:07AM -0800, Matthew Brost wrote:
>> On Thu, Nov 21, 2019 at 12:58:50PM +0100, Michal Wajdeczko wrote:
>>> On Thu, 21 Nov 2019 00:56:03 +0100, <John.C.Harrison at intel.com> wrote:
>>>
>>>> From: Matthew Brost <matthew.brost at intel.com>
>>>>
>>>> CTB writes are now in the path of command submission and should be
>>>> optimized for performance. Rather than reading CTB descriptor values
>>>> (e.g. head, tail, size) which could result in accesses across the PCIe
>>>> bus, store shadow local copies and only read/write the descriptor
>>>> values when absolutely necessary.
>>>>
>>>> Cc: John Harrison <john.c.harrison at intel.com>
>>>> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
>>>> ---
>>>> drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 79 +++++++++++------------
>>>> drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h |  8 +++
>>>> 2 files changed, 45 insertions(+), 42 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c  
>>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>>> index e50d968b15d5..4d8a4c6afd71 100644
>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>>>> @@ -68,23 +68,29 @@ static inline const char  
>>>> *guc_ct_buffer_type_to_str(u32 type)
>>>> 	}
>>>> }
>>>> -static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc,
>>>> +static void guc_ct_buffer_desc_init(struct intel_guc_ct_buffer *ctb,
>>>> 				    u32 cmds_addr, u32 size, u32 owner)
>>>
>>> as now this function takes ctb instead of desc, it should be renamed
>>> or make it separate from guc_ct_buffer_desc_init
>>>
>>
>> Yes, makes sense.
>>
>>>> {
>>>> +	struct guc_ct_buffer_desc *desc = ctb->desc;
>>>> 	CT_DEBUG_DRIVER("CT: desc %p init addr=%#x size=%u owner=%u\n",
>>>> 			desc, cmds_addr, size, owner);
>>>> 	memset(desc, 0, sizeof(*desc));
>>>> 	desc->addr = cmds_addr;
>>>> -	desc->size = size;
>>>> +	ctb->size = desc->size = size;
>>>> 	desc->owner = owner;
>>>> +	ctb->tail = 0;
>>>> +	ctb->head = 0;
>>>> +	ctb->space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size);
>>>> }
>>>> -static void guc_ct_buffer_desc_reset(struct guc_ct_buffer_desc *desc)
>>>> +static void guc_ct_buffer_desc_reset(struct intel_guc_ct_buffer *ctb)
>>>
>>> same here
>>>
>>
>> Same.
>>
>>>> {
>>>> +	struct guc_ct_buffer_desc *desc = ctb->desc;
>>>> 	CT_DEBUG_DRIVER("CT: desc %p reset head=%u tail=%u\n",
>>>> 			desc, desc->head, desc->tail);
>>>> -	desc->head = 0;
>>>> -	desc->tail = 0;
>>>> +	ctb->head = desc->head = 0;
>>>> +	ctb->tail = desc->tail = 0;
>>>> +	ctb->space = CIRC_SPACE(ctb->tail, ctb->head, ctb->size);
>>>> 	desc->is_in_error = 0;
>>>> }
>>>> @@ -220,7 +226,7 @@ static int ctch_enable(struct intel_guc *guc,
>>>> 	 */
>>>> 	for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) {
>>>> 		GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV));
>>>> -		guc_ct_buffer_desc_init(ctch->ctbs[i].desc,
>>>> +		guc_ct_buffer_desc_init(&ctch->ctbs[i],
>>>> 					base + PAGE_SIZE/4 * i + PAGE_SIZE/2,
>>>> 					PAGE_SIZE/4,
>>>> 					ctch->owner);
>>>> @@ -301,32 +307,16 @@ static int ctb_write(struct intel_guc_ct_buffer  
>>>> *ctb,
>>>> 		     bool want_response)
>>>> {
>>>> 	struct guc_ct_buffer_desc *desc = ctb->desc;
>>>> -	u32 head = desc->head / 4;	/* in dwords */
>>>> -	u32 tail = desc->tail / 4;	/* in dwords */
>>>> -	u32 size = desc->size / 4;	/* in dwords */
>>>> -	u32 used;			/* in dwords */
>>>> +	u32 tail = ctb->tail / 4;	/* in dwords */
>>>> +	u32 size = ctb->size / 4;	/* in dwords */
>>>> 	u32 header;
>>>> 	u32 *cmds = ctb->cmds;
>>>> 	unsigned int i;
>>>> -	GEM_BUG_ON(desc->size % 4);
>>>> -	GEM_BUG_ON(desc->head % 4);
>>>> -	GEM_BUG_ON(desc->tail % 4);
>>>> +	GEM_BUG_ON(ctb->size % 4);
>>>> +	GEM_BUG_ON(ctb->tail % 4);
>>>> 	GEM_BUG_ON(tail >= size);
>>>> -	/*
>>>> -	 * tail == head condition indicates empty. GuC FW does not support
>>>> -	 * using up the entire buffer to get tail == head meaning full.
>>>> -	 */
>>>> -	if (tail < head)
>>>> -		used = (size - head) + tail;
>>>> -	else
>>>> -		used = tail - head;
>>>> -
>>>> -	/* make sure there is a space including extra dw for the fence */
>>>> -	if (unlikely(used + len + 1 >= size))
>>>> -		return -ENOSPC;
>>>> -
>>>> 	/*
>>>> 	 * Write the message. The format is the following:
>>>> 	 * DW0: header (including action code)
>>>> @@ -354,15 +344,16 @@ static int ctb_write(struct intel_guc_ct_buffer  
>>>> *ctb,
>>>> 	}
>>>> 	/* now update desc tail (back in bytes) */
>>>> -	desc->tail = tail * 4;
>>>> -	GEM_BUG_ON(desc->tail > desc->size);
>>>> +	ctb->tail = desc->tail = tail * 4;
>>>> +	ctb->space -= (len + 1) * 4;
>>>> +	GEM_BUG_ON(ctb->tail > ctb->size);
>>>> 	return 0;
>>>> }
>>>> /**
>>>> * wait_for_ctb_desc_update - Wait for the CT buffer descriptor update.
>>>> - * @desc:	buffer descriptor
>>>> + * @ctb:	ctb buffer
>>>> * @fence:	response fence
>>>> * @status:	placeholder for status
>>>> *
>>>> @@ -376,11 +367,12 @@ static int ctb_write(struct intel_guc_ct_buffer  
>>>> *ctb,
>>>> * *	-ETIMEDOUT no response within hardcoded timeout
>>>> * *	-EPROTO no response, CT buffer is in error
>>>> */
>>>> -static int wait_for_ctb_desc_update(struct guc_ct_buffer_desc *desc,
>>>> +static int wait_for_ctb_desc_update(struct intel_guc_ct_buffer *ctb,
>>>> 				    u32 fence,
>>>> 				    u32 *status)
>>>> {
>>>> 	int err;
>>>> +	struct guc_ct_buffer_desc *desc = ctb->desc;
>>>> 	/*
>>>> 	 * Fast commands should complete in less than 10us, so sample quickly
>>>> @@ -401,7 +393,7 @@ static int wait_for_ctb_desc_update(struct  
>>>> guc_ct_buffer_desc *desc,
>>>> 			/* Something went wrong with the messaging, try to reset
>>>> 			 * the buffer and hope for the best
>>>> 			 */
>>>> -			guc_ct_buffer_desc_reset(desc);
>>>> +			guc_ct_buffer_desc_reset(ctb);
>>>> 			err = -EPROTO;
>>>> 		}
>>>> 	}
>>>> @@ -446,12 +438,17 @@ static int wait_for_ct_request_update(struct  
>>>> ct_request *req, u32 *status)
>>>> 	return err;
>>>> }
>>>> -static inline bool ctb_has_room(struct guc_ct_buffer_desc *desc, u32  
>>>> len)
>>>> +static inline bool ctb_has_room(struct intel_guc_ct_buffer *ctb, u32  
>>>> len)
>>>> {
>>>> -	u32 head = READ_ONCE(desc->head);
>>>> +	u32 head;
>>>> 	u32 space;
>>>> -	space = CIRC_SPACE(desc->tail, head, desc->size);
>>>> +	if (ctb->space >= len)
>>>> +		return true;
>>>> +
>>>> +	head = READ_ONCE(ctb->desc->head);
>>>> +	space = CIRC_SPACE(ctb->tail, head, ctb->size);
>>>> +	ctb->space = space;
>>>> 	return space >= len;
>>>> }
>>>> @@ -462,7 +459,6 @@ int intel_guc_send_nb(struct intel_guc_ct *ct,
>>>> {
>>>> 	struct intel_guc_ct_channel *ctch = &ct->host_channel;
>>>> 	struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_SEND];
>>>> -	struct guc_ct_buffer_desc *desc = ctb->desc;
>>>> 	int err;
>>>> 	GEM_BUG_ON(!ctch->enabled);
>>>> @@ -470,7 +466,7 @@ int intel_guc_send_nb(struct intel_guc_ct *ct,
>>>> 	GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
>>>> 	lockdep_assert_held(&ct->send_lock);
>>>> -	if (unlikely(!ctb_has_room(desc, (len + 1) * 4))) {
>>>> +	if (unlikely(!ctb_has_room(ctb, (len + 1) * 4))) {
>>>> 		ct->retry++;
>>>> 		if (ct->retry >= MAX_RETRY)
>>>> 			return -EDEADLK;
>>>> @@ -496,7 +492,6 @@ static int ctch_send(struct intel_guc_ct *ct,
>>>> 		     u32 *status)
>>>> {
>>>> 	struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_SEND];
>>>> -	struct guc_ct_buffer_desc *desc = ctb->desc;
>>>> 	struct ct_request request;
>>>> 	unsigned long flags;
>>>> 	u32 fence;
>>>> @@ -514,7 +509,7 @@ static int ctch_send(struct intel_guc_ct *ct,
>>>> 	 */
>>>> retry:
>>>> 	spin_lock_irqsave(&ct->send_lock, flags);
>>>> -	if (unlikely(!ctb_has_room(desc, (len + 1) * 4))) {
>>>> +	if (unlikely(!ctb_has_room(ctb, (len + 1) * 4))) {
>>>> 		spin_unlock_irqrestore(&ct->send_lock, flags);
>>>> 		ct->retry++;
>>>> 		if (ct->retry >= MAX_RETRY)
>>>> @@ -544,7 +539,7 @@ static int ctch_send(struct intel_guc_ct *ct,
>>>> 	if (response_buf)
>>>> 		err = wait_for_ct_request_update(&request, status);
>>>> 	else
>>>> -		err = wait_for_ctb_desc_update(desc, fence, status);
>>>> +		err = wait_for_ctb_desc_update(ctb, fence, status);
>>>> 	if (unlikely(err))
>>>> 		goto unlink;
>>>> @@ -618,9 +613,9 @@ static inline bool ct_header_is_response(u32  
>>>> header)
>>>> static int ctb_read(struct intel_guc_ct_buffer *ctb, u32 *data)
>>>> {
>>>> 	struct guc_ct_buffer_desc *desc = ctb->desc;
>>>> -	u32 head = desc->head / 4;	/* in dwords */
>>>> +	u32 head = ctb->head / 4;	/* in dwords */
>>>> 	u32 tail = desc->tail / 4;	/* in dwords */
>>>> -	u32 size = desc->size / 4;	/* in dwords */
>>>> +	u32 size = ctb->size / 4;	/* in dwords */
>>>> 	u32 *cmds = ctb->cmds;
>>>> 	s32 available;			/* in dwords */
>>>> 	unsigned int len;
>>>> @@ -664,7 +659,7 @@ static int ctb_read(struct intel_guc_ct_buffer  
>>>> *ctb, u32 *data)
>>>> 	}
>>>> 	CT_DEBUG_DRIVER("CT: received %*ph\n", 4 * len, data);
>>>> -	desc->head = head * 4;
>>>> +	ctb->head = desc->head = head * 4;
>>>> 	return 0;
>>>> }
>>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h  
>>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
>>>> index bc670a796bd8..1bff4f0b91f7 100644
>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
>>>> @@ -29,10 +29,18 @@ struct intel_guc;
>>>> *
>>>> * @desc: pointer to the buffer descriptor
>>>> * @cmds: pointer to the commands buffer
>>>> + * @size: local shadow copy of size
>>>
>>> I would rather expect this as real fixed size,
>>> note that size is not expected to change
>>>
>>
>> Yes, it is fixed over the life of the CTB but not all CTBs need to be  
>> the same
>> size. e.g. The H2G & G2H may and likely will be different sizes with  
>> the new Guc
>> interface.
>>
>>>> + * @head: local shadow copy of head
>>>> + * @tail: local shadow copy of tail
>>>> + * @space: local shadow copy of space
>>>> */
>>>> struct intel_guc_ct_buffer {
>>>> 	struct guc_ct_buffer_desc *desc;
>>>> 	u32 *cmds;
>>>> +	u32 size;
>>>> +	u32 tail;
>>>> +	u32 head;
>>>> +	u32 space;
>>>> };
>>>> /** Represents pair of command transport buffers.
>>>
>>> Can we reorder this patch to be first in the series ?
>>>
>>> Michal
>
> Tried this and I think it makes more sense the way it is. The 'space'  
> value
> doesn't have a meaning before the non blocking call is introduced.

hmm, blocking ctb_write also needs to check free space, and it is already
doing that on its own, so there is need/usage for "space"

> Also it ends
> up changing a bunch of code that is then deleted in the non blocking  
> call patch.
> Better to leave it as is.

this patch as 2nd also modifies some lines already touched by 1st patch
but after reordering there will be more chances to merge this patch earlier

>
> Matt
>
>>> _______________________________________________
>>
>> Yes.
>>
>>> Intel-gfx mailing list
>>> Intel-gfx at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx


More information about the Intel-gfx mailing list