[PATCH 1/2] drm/xe: Add sent and recv counters for tlb invalidations
Nirmoy Das
nirmoy.das at intel.com
Tue Jul 23 13:07:05 UTC 2024
On 7/23/2024 2:22 PM, Michal Wajdeczko wrote:
>
> On 23.07.2024 13:16, Nirmoy Das wrote:
>> Add counters for TLB invalidation sent, receive requests which
>> then could be query as sysfs files from userspace.
> s/sysfs/debugfs ?
I will fix it.
>
>> Cc: Matthew Brost <matthew.brost at intel.com>
>> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
>> Cc: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
>> Signed-off-by: Nirmoy Das <nirmoy.das at intel.com>
>> ---
>> drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 37 +++++++++++++++------
>> drivers/gpu/drm/xe/xe_gt_types.h | 4 +++
>> 2 files changed, 30 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
>> index 481d83d07367..f84717c1aafa 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
>> +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
>> @@ -37,8 +37,11 @@ static long tlb_timeout_jiffies(struct xe_gt *gt)
>> }
>>
>> static void
>> -__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
>> +__invalidation_fence_signal(struct xe_gt *gt,
>> + struct xe_gt_tlb_invalidation_fence *fence,
>> + bool failed)
>> {
>> + struct xe_device *xe = gt_to_xe(gt);
>> bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags);
>>
>> trace_xe_gt_tlb_invalidation_fence_signal(xe, fence);
>> @@ -46,13 +49,19 @@ __invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_
>> dma_fence_signal(&fence->base);
>> if (!stack)
>> dma_fence_put(&fence->base);
>> +
>> + /* Only increment the counter when tlb inval is done successfully */
>> + if (!failed)
>> + atomic64_inc(>->tlb_invalidation.received_count);
>> }
>>
>> static void
>> -invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
>> +invalidation_fence_signal(struct xe_gt *gt,
>> + struct xe_gt_tlb_invalidation_fence *fence,
>> + bool failed)
>> {
>> list_del(&fence->link);
>> - __invalidation_fence_signal(xe, fence);
>> + __invalidation_fence_signal(gt, fence, failed);
>> }
>>
>> static void xe_gt_tlb_fence_timeout(struct work_struct *work)
>> @@ -76,7 +85,7 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
>> fence->seqno, gt->tlb_invalidation.seqno_recv);
>>
>> fence->base.error = -ETIME;
>> - invalidation_fence_signal(xe, fence);
>> + invalidation_fence_signal(gt, fence, true);
>> }
>> if (!list_empty(>->tlb_invalidation.pending_fences))
>> queue_delayed_work(system_wq,
>> @@ -102,6 +111,8 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
>> spin_lock_init(>->tlb_invalidation.lock);
>> INIT_DELAYED_WORK(>->tlb_invalidation.fence_tdr,
>> xe_gt_tlb_fence_timeout);
>> + atomic64_set(>->tlb_invalidation.sent_count, 0);
>> + atomic64_set(>->tlb_invalidation.received_count, 0);
>>
>> return 0;
>> }
>> @@ -140,7 +151,9 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
>>
>> list_for_each_entry_safe(fence, next,
>> >->tlb_invalidation.pending_fences, link)
>> - invalidation_fence_signal(gt_to_xe(gt), fence);
>> + invalidation_fence_signal(gt, fence, false);
>> + atomic64_set(>->tlb_invalidation.sent_count, 0);
>> + atomic64_set(>->tlb_invalidation.received_count, 0);
> hmm, any TLB invalidation timeouts/errors, which would make
> received_count != sent_count, should trigger a GT reset, which in turn
> will reset those counters, so under which condition you expect those two
> stats being not equal?
We tolerate GGTT tlb inval timeouts without needed to do a GT reset,
probably we shouldn't? If not then, I agree that we can have
a total sent counter and another for inflight counter.
> is it just during the waiting for some ack?
>
> maybe better/cleaner option would be to track/display number of TLB
> invalidation requests in flight ?
Request from Sai was about having total tlb inval sent counter and I
think inflight would be a bonus and should be useful for debugging.
Regards,
Nirmoy
>
>> spin_unlock_irq(>->tlb_invalidation.pending_lock);
>> mutex_unlock(>->uc.guc.ct.lock);
>> }
>> @@ -182,7 +195,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
>> action[1] = seqno;
>> ret = xe_guc_ct_send_locked(&guc->ct, action, len,
>> G2H_LEN_DW_TLB_INVALIDATE, 1);
>> - if (!ret && fence) {
>> + if (!ret) {
>> spin_lock_irq(>->tlb_invalidation.pending_lock);
>> /*
>> * We haven't actually published the TLB fence as per
>> @@ -191,7 +204,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
>> * we can just go ahead and signal the fence here.
>> */
>> if (tlb_invalidation_seqno_past(gt, seqno)) {
>> - __invalidation_fence_signal(xe, fence);
>> + __invalidation_fence_signal(gt, fence, false);
>> } else {
>> fence->invalidation_time = ktime_get();
>> list_add_tail(&fence->link,
>> @@ -203,14 +216,16 @@ static int send_tlb_invalidation(struct xe_guc *guc,
>> tlb_timeout_jiffies(gt));
>> }
>> spin_unlock_irq(>->tlb_invalidation.pending_lock);
>> - } else if (ret < 0 && fence) {
>> - __invalidation_fence_signal(xe, fence);
>> + } else if (ret < 0) {
>> + __invalidation_fence_signal(gt, fence, true);
>> }
>> if (!ret) {
>> gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) %
>> TLB_INVALIDATION_SEQNO_MAX;
>> if (!gt->tlb_invalidation.seqno)
>> gt->tlb_invalidation.seqno = 1;
>> +
>> + atomic64_inc(>->tlb_invalidation.sent_count);
>> }
>> mutex_unlock(&guc->ct.lock);
>>
>> @@ -321,7 +336,7 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
>>
>> /* Execlists not supported */
>> if (gt_to_xe(gt)->info.force_execlist) {
>> - __invalidation_fence_signal(xe, fence);
>> + __invalidation_fence_signal(gt, fence, true);
>> return 0;
>> }
>>
>> @@ -455,7 +470,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
>> if (!tlb_invalidation_seqno_past(gt, fence->seqno))
>> break;
>>
>> - invalidation_fence_signal(xe, fence);
>> + invalidation_fence_signal(gt, fence, false);
>> }
>>
>> if (!list_empty(>->tlb_invalidation.pending_fences))
>> diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
>> index ef68c4a92972..130d9f5cb5c2 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_types.h
>> +++ b/drivers/gpu/drm/xe/xe_gt_types.h
>> @@ -199,6 +199,10 @@ struct xe_gt {
>> struct delayed_work fence_tdr;
>> /** @tlb_invalidation.lock: protects TLB invalidation fences */
>> spinlock_t lock;
>> + /** @tlb_invalidation.sent_count: counter for sent TLB inval requests */
>> + atomic64_t sent_count;
>> + /** @tlb_invalidation.received_count: counter for received TLB inval requestes */
>> + atomic64_t received_count;
>> } tlb_invalidation;
>>
>> /**
More information about the Intel-xe
mailing list