[PATCH 1/4] drm/scheduler: implement hardware time accounting
Tvrtko Ursulin
tursulin at ursulin.net
Tue Jul 2 08:42:28 UTC 2024
Hi,
I few questions below.
On 01/07/2024 18:14, Lucas Stach wrote:
> From: Christian König <ckoenig.leichtzumerken at gmail.com>
>
> Multiple drivers came up with the requirement to measure how
> much runtime each entity accumulated on the HW.
>
> A previous attempt of accounting this had to be reverted because
> HW submissions can have a lifetime exceeding that of the entity
> originally issuing them.
>
> Amdgpu on the other hand solves this task by keeping track of
> all the submissions and calculating how much time they have used
> on demand.
>
> Move this approach over into the scheduler to provide an easy to
> use interface for all drivers.
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> Signed-off-by: Lucas Stach <l.stach at pengutronix.de>
> ---
> v2:
> - rebase to v6.10-rc1
> - fix for non-power-of-two number of HW submission
> - add comment explaining the logic behind the fence tracking array
> - rename some function and fix documentation
> ---
> drivers/gpu/drm/scheduler/sched_entity.c | 82 +++++++++++++++++++++++-
> drivers/gpu/drm/scheduler/sched_fence.c | 19 ++++++
> include/drm/gpu_scheduler.h | 31 +++++++++
> 3 files changed, 131 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
> index 58c8161289fe..d678d0b9b29e 100644
> --- a/drivers/gpu/drm/scheduler/sched_entity.c
> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
> @@ -62,7 +62,9 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
> unsigned int num_sched_list,
> atomic_t *guilty)
> {
> - if (!(entity && sched_list && (num_sched_list == 0 || sched_list[0])))
> + unsigned int i, num_submissions = 0;
> +
> + if (!entity || !sched_list)
> return -EINVAL;
>
> memset(entity, 0, sizeof(struct drm_sched_entity));
> @@ -98,6 +100,11 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
> (s32) DRM_SCHED_PRIORITY_KERNEL);
> }
> entity->rq = sched_list[0]->sched_rq[entity->priority];
> +
> + for (i = 0; i < num_sched_list; ++i) {
> + num_submissions = max(num_submissions,
> + sched_list[i]->credit_limit);
> + }
Does this work (in concept and naming) for all drivers if introduction
of credits broke the 1:1 between jobs and hw "ring" capacity?
How big is the array for different drivers?
> }
>
> init_completion(&entity->entity_idle);
> @@ -110,11 +117,52 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
>
> atomic_set(&entity->fence_seq, 0);
> entity->fence_context = dma_fence_context_alloc(2);
> + spin_lock_init(&entity->accounting_lock);
> +
> + if (!num_submissions)
> + return 0;
> +
> + entity->max_hw_submissions = num_submissions;
> + entity->hw_submissions = kcalloc(num_submissions, sizeof(void *),
> + GFP_KERNEL);
> + if (!entity->hw_submissions)
> + return -ENOMEM;
>
> return 0;
> }
> EXPORT_SYMBOL(drm_sched_entity_init);
>
> +/**
> + * drm_sched_entity_time_spent - Accumulated HW runtime used by this entity
> + * @entity: scheduler entity to check
> + *
> + * Get the current accumulated HW runtime used by all submissions made through
> + * this entity.
> + */
> +ktime_t drm_sched_entity_time_spent(struct drm_sched_entity *entity)
> +{
> + ktime_t result;
> + unsigned int i;
> +
> + if (!entity->max_hw_submissions)
> + return ns_to_ktime(0);
> +
> + spin_lock(&entity->accounting_lock);
> + result = entity->hw_time_used;
> + for (i = 0; i < entity->max_hw_submissions; ++i) {
> + struct drm_sched_fence *fence = entity->hw_submissions[i];
> +
> + if (!fence)
> + continue;
> +
> + result = ktime_add(result, drm_sched_fence_get_runtime(fence));
Does this end up counting from when jobs have been submitted to the hw
backend and may not be actually executing?
Say if a driver configures a backend N deep and is filled with N jobs,
while in actuality they are executed sequentially one at a time, the
time as reported here would over-account by some series such as
(job[0].finish - job[0].submit) + ... + (job[N].finish - job[N].submit)?
Or in other words if one submits N jobs to a ring serving an 1-wide hw
backend, will we see "N*100%" utilisation instead of "100%" if sampling
while first job is still executing, the rest queued behind it?
Regards,
Tvrtko
> + }
> + spin_unlock(&entity->accounting_lock);
> +
> + return result;
> +}
> +EXPORT_SYMBOL(drm_sched_entity_time_spent);
> +
> /**
> * drm_sched_entity_modify_sched - Modify sched of an entity
> * @entity: scheduler entity to init
> @@ -326,6 +374,8 @@ EXPORT_SYMBOL(drm_sched_entity_flush);
> */
> void drm_sched_entity_fini(struct drm_sched_entity *entity)
> {
> + unsigned int i;
> +
> /*
> * If consumption of existing IBs wasn't completed. Forcefully remove
> * them here. Also makes sure that the scheduler won't touch this entity
> @@ -341,6 +391,9 @@ void drm_sched_entity_fini(struct drm_sched_entity *entity)
>
> dma_fence_put(rcu_dereference_check(entity->last_scheduled, true));
> RCU_INIT_POINTER(entity->last_scheduled, NULL);
> + for (i = 0; i < entity->max_hw_submissions; ++i)
> + dma_fence_put(&entity->hw_submissions[i]->scheduled);
> + kfree(entity->hw_submissions);
> }
> EXPORT_SYMBOL(drm_sched_entity_fini);
>
> @@ -522,6 +575,33 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
> */
> sched_job->entity = NULL;
>
> + if (entity->max_hw_submissions) {
> + struct drm_sched_fence *fence = sched_job->s_fence;
> + unsigned int idx = fence->scheduled.seqno;
> +
> + dma_fence_get(&fence->scheduled);
> + idx %= entity->max_hw_submissions;
> +
> + spin_lock(&entity->accounting_lock);
> + /*
> + * The fence seqno is dense and monotonically increasing. By
> + * cycling through a array sized to match the maximum number of
> + * submissions queued in the HW we can be sure that once we need
> + * to reuse a slot the fence stored in this slot refers to a
> + * retired submission and we can safely sum up the accumulated
> + * runtime and dispose the fence.
> + */
> + swap(fence, entity->hw_submissions[idx]);
> + if (fence) {
> + ktime_t runtime = drm_sched_fence_get_runtime(fence);
> +
> + entity->hw_time_used = ktime_add(entity->hw_time_used,
> + runtime);
> + dma_fence_put(&fence->scheduled);
> + }
> + spin_unlock(&entity->accounting_lock);
> + }
> +
> return sched_job;
> }
>
> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
> index 0f35f009b9d3..55981ada1829 100644
> --- a/drivers/gpu/drm/scheduler/sched_fence.c
> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
> @@ -82,6 +82,25 @@ void drm_sched_fence_finished(struct drm_sched_fence *fence, int result)
> dma_fence_signal(&fence->finished);
> }
>
> +/**
> + * drm_sched_fence_get_runtime - accumulated runtime on HW
> + * @fence: fence
> + *
> + * Calculate how much runtime this fence has accumulated on the HW.
> + */
> +ktime_t drm_sched_fence_get_runtime(struct drm_sched_fence *fence)
> +{
> + /* When the fence is not scheduled, it can't have accumulated runtime */
> + if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->scheduled.flags))
> + return ns_to_ktime(0);
> +
> + /* When it is still running, calculate runtime until now */
> + if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->finished.flags))
> + return ktime_sub(ktime_get(), fence->scheduled.timestamp);
> +
> + return ktime_sub(fence->finished.timestamp, fence->scheduled.timestamp);
> +}
> +
> static const char *drm_sched_fence_get_driver_name(struct dma_fence *fence)
> {
> return "drm_sched";
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 5acc64954a88..52bcff324a92 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -238,6 +238,35 @@ struct drm_sched_entity {
> */
> struct rb_node rb_tree_node;
>
> + /**
> + * @accounting_lock:
> + *
> + * Protects the array of fences tracking the in-flight HW submissions
> + * and the accumulator counter.
> + */
> + spinlock_t accounting_lock;
> +
> + /**
> + * @hw_time_used:
> + *
> + * How much HW runtime has been accumulated by retired submissions
> + * from this entity.
> + */
> + ktime_t hw_time_used;
> +
> + /**
> + * @max_hw_submissions:
> + *
> + * Maximum number of submissions queued in the HW.
> + */
> + unsigned int max_hw_submissions;
> +
> + /**
> + * @hw_submissions:
> + *
> + * Scheduler fences of the HW submissions in flight.
> + */
> + struct drm_sched_fence **hw_submissions;
> };
>
> /**
> @@ -600,6 +629,7 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
> struct drm_gpu_scheduler **sched_list,
> unsigned int num_sched_list,
> atomic_t *guilty);
> +ktime_t drm_sched_entity_time_spent(struct drm_sched_entity *entity);
> long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout);
> void drm_sched_entity_fini(struct drm_sched_entity *entity);
> void drm_sched_entity_destroy(struct drm_sched_entity *entity);
> @@ -620,6 +650,7 @@ void drm_sched_fence_free(struct drm_sched_fence *fence);
> void drm_sched_fence_scheduled(struct drm_sched_fence *fence,
> struct dma_fence *parent);
> void drm_sched_fence_finished(struct drm_sched_fence *fence, int result);
> +ktime_t drm_sched_fence_get_runtime(struct drm_sched_fence *fence);
>
> unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched);
> void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
>
> base-commit: 1613e604df0cd359cf2a7fbd9be7a0bcfacfabd0
More information about the dri-devel
mailing list