[PATCH v5 3/4] drm/sched: trace dependencies for gpu jobs
Pierre-Eric Pelloux-Prayer
pierre-eric at damsy.net
Wed Jul 3 12:43:18 UTC 2024
Le 03/07/2024 à 13:05, Lucas Stach a écrit :
> Am Freitag, dem 14.06.2024 um 10:16 +0200 schrieb Pierre-Eric Pelloux-
> Prayer:
>> Trace the fence dependencies similarly to how we print fences:
>>
>> ... , dependencies:{(context:606, seqno:38006)}
>>
>> This allows tools to analyze the dependencies between the jobs (previously
>> it was only possible for fences traced by drm_sched_job_wait_dep).
>>
>> Since drm_sched_job and drm_run_job use the same base event class,
>> the caller of trace_drm_run_job have to pass a dep_count of 0 (which
>> is ignored because dependencies are only considered at submit time).
>>
>> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
>> ---
>> .../gpu/drm/scheduler/gpu_scheduler_trace.h | 59 ++++++++++++++++---
>> drivers/gpu/drm/scheduler/sched_entity.c | 8 ++-
>> drivers/gpu/drm/scheduler/sched_main.c | 2 +-
>> 3 files changed, 58 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
>> index f6da377cdb99..0abcad26839c 100644
>> --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
>> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
>> @@ -26,15 +26,41 @@
>>
>> #include <linux/stringify.h>
>> #include <linux/types.h>
>> +#include <linux/trace_seq.h>
>> #include <linux/tracepoint.h>
>>
>> #undef TRACE_SYSTEM
>> #define TRACE_SYSTEM gpu_scheduler
>> #define TRACE_INCLUDE_FILE gpu_scheduler_trace
>>
>> +#ifndef __TRACE_EVENT_GPU_SCHEDULER_PRINT_FN
>> +#define __TRACE_EVENT_GPU_SCHEDULER_PRINT_FN
>> +/* Similar to trace_print_array_seq but for fences. */
>> +static inline const char *__print_dma_fence_array(struct trace_seq *p, const void *buf, int count)
>> +{
>> + const char *ret = trace_seq_buffer_ptr(p);
>> + u64 *fences = (u64 *) buf;
>> + const char *prefix = "";
>> +
>> + trace_seq_putc(p, '{');
>> + for (int i = 0; i < count; i++) {
>> + u64 context = fences[2 * i], seqno = fences[2 * i + 1];
>> +
>> + trace_seq_printf(p, "%s(context:%llu, seqno:%lld)",
>> + prefix, context, seqno);
>> + prefix = ",";
>> + }
>> + trace_seq_putc(p, '}');
>> + trace_seq_putc(p, 0);
>> +
>> + return ret;
>> +}
>> +#endif
>> +
>> DECLARE_EVENT_CLASS(drm_sched_job,
>> - TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity),
>> - TP_ARGS(sched_job, entity),
>> + TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity,
>> + unsigned int dep_count),
>> + TP_ARGS(sched_job, entity, dep_count),
>> TP_STRUCT__entry(
>> __string(name, sched_job->sched->name)
>> __field(uint64_t, id)
>> @@ -43,9 +69,14 @@ DECLARE_EVENT_CLASS(drm_sched_job,
>> __string(dev, dev_name(sched_job->sched->dev))
>> __field(uint64_t, fence_context)
>> __field(uint64_t, fence_seqno)
>> + __field(int, n_deps)
>> + __dynamic_array(u64, deps, dep_count * 2)
>> ),
>>
>> TP_fast_assign(
>> + unsigned long idx;
>> + struct dma_fence *fence;
>> + u64 *dyn_arr;
>> __entry->id = sched_job->id;
>> __assign_str(name);
>> __entry->job_count = spsc_queue_count(&entity->job_queue);
>> @@ -54,22 +85,32 @@ DECLARE_EVENT_CLASS(drm_sched_job,
>> __assign_str(dev);
>> __entry->fence_context = sched_job->s_fence->finished.context;
>> __entry->fence_seqno = sched_job->s_fence->finished.seqno;
>> -
>> + __entry->n_deps = dep_count;
>> + if (dep_count) {
>> + dyn_arr = __get_dynamic_array(deps);
>> + xa_for_each(&sched_job->dependencies, idx, fence) {
>> + dyn_arr[2 * idx] = fence->context;
>> + dyn_arr[2 * idx + 1] = fence->seqno;
>> + }
>> + }
>> ),
>> - TP_printk("id=%llu, fence=(context:%llu, seqno:%lld), ring=%s, job count:%u, hw job count:%d",
>> + TP_printk("id=%llu, fence=(context:%llu, seqno:%lld), ring=%s, job count:%u, hw job count:%d, dependencies:%s",
>> __entry->id,
>> __entry->fence_context, __entry->fence_seqno, __get_str(name),
>> - __entry->job_count, __entry->hw_job_count)
>> + __entry->job_count, __entry->hw_job_count,
>> + __print_dma_fence_array(p, __get_dynamic_array(deps), __entry->n_deps))
>> );
>>
>> DEFINE_EVENT(drm_sched_job, drm_sched_job,
>> - TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity),
>> - TP_ARGS(sched_job, entity)
>> + TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity,
>> + unsigned int dep_count),
>> + TP_ARGS(sched_job, entity, dep_count)
>> );
>>
>> DEFINE_EVENT_PRINT(drm_sched_job, drm_run_job,
>> - TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity),
>> - TP_ARGS(sched_job, entity),
>> + TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity,
>> + unsigned int dep_count),
>> + TP_ARGS(sched_job, entity, 0),
>> TP_printk("dev=%s, id=%llu, fence=(context:%llu, seqno:%lld), ring=%s, job count:%u, hw job count:%d",
>> __get_str(dev), __entry->id,
>> __entry->fence_context, __entry->fence_seqno, __get_str(name),
>> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
>> index 58c8161289fe..aa33a9d29b14 100644
>> --- a/drivers/gpu/drm/scheduler/sched_entity.c
>> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
>> @@ -583,7 +583,13 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job)
>> bool first;
>> ktime_t submit_ts;
>>
>> - trace_drm_sched_job(sched_job, entity);
>> + if (trace_drm_sched_job_enabled()) {
>> + unsigned long index;
>> + void *f;
>> +
>> + xa_for_each(&sched_job->dependencies, index, f) { }
>
> I did need to do a double take on that one. While using index for the
> total number of dependencies is correct for the dense and at this point
> in time fully populated dependencies array, it is a bit non-obvious.
> Maybe add a small comment here or have an actual counter in the loop
> body?
Makes sense. I'll add a counter in v6.
thanks,
Pierre-Eric
>
> Regards,
> Lucas
>
>> + trace_drm_sched_job(sched_job, entity, index);
>> + }
>> atomic_inc(entity->rq->sched->score);
>> WRITE_ONCE(entity->last_user, current->group_leader);
>>
>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
>> index 7e90c9f95611..0bc9b7b24b34 100644
>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>> @@ -1201,7 +1201,7 @@ static void drm_sched_run_job_work(struct work_struct *w)
>> atomic_add(sched_job->credits, &sched->credit_count);
>> drm_sched_job_begin(sched_job);
>>
>> - trace_drm_run_job(sched_job, entity);
>> + trace_drm_run_job(sched_job, entity, 0);
>> fence = sched->ops->run_job(sched_job);
>> complete_all(&entity->entity_idle);
>> drm_sched_fence_scheduled(s_fence, fence);
More information about the dri-devel
mailing list