[PATCH v8 09/10] drm: get rid of drm_sched_job::id

Thu Mar 20 10:55:04 UTC 2025

On 20/03/2025 09:58, Pierre-Eric Pelloux-Prayer wrote:
> Its only purpose was for trace events, but jobs can already be
> uniquely identified using their fence.
> 
> The downside of using the fence is that it's only available
> after 'drm_sched_job_arm' was called which is true for all trace
> events that used job.id so they can safely switch to using it.

Perhaps it would make more sense to pull in this patch to before the one 
declaring tracpoints stable ABI.

Also note that patched declared job->id as stable and this one forgot to 
remove that.

The rest LGTM.

Regards,

Tvrtko

> 
> Suggested-by: Tvrtko Ursulin <tursulin at igalia.com>
> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h      | 18 ++++++------------
>   .../gpu/drm/scheduler/gpu_scheduler_trace.h    | 18 ++++++------------
>   drivers/gpu/drm/scheduler/sched_main.c         |  1 -
>   include/drm/gpu_scheduler.h                    |  3 ---
>   4 files changed, 12 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> index 383fce40d4dd..a4f394d827bc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> @@ -167,7 +167,6 @@ TRACE_EVENT(amdgpu_cs_ioctl,
>   	    TP_PROTO(struct amdgpu_job *job),
>   	    TP_ARGS(job),
>   	    TP_STRUCT__entry(
> -			     __field(uint64_t, sched_job_id)
>   			     __string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
>   			     __field(unsigned int, context)
>   			     __field(unsigned int, seqno)
> @@ -177,15 +176,14 @@ TRACE_EVENT(amdgpu_cs_ioctl,
>   			     ),
>   
>   	    TP_fast_assign(
> -			   __entry->sched_job_id = job->base.id;
>   			   __assign_str(timeline);
>   			   __entry->context = job->base.s_fence->finished.context;
>   			   __entry->seqno = job->base.s_fence->finished.seqno;
>   			   __assign_str(ring);
>   			   __entry->num_ibs = job->num_ibs;
>   			   ),
> -	    TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
> -		      __entry->sched_job_id, __get_str(timeline), __entry->context,
> +	    TP_printk("timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
> +		      __get_str(timeline), __entry->context,
>   		      __entry->seqno, __get_str(ring), __entry->num_ibs)
>   );
>   
> @@ -193,7 +191,6 @@ TRACE_EVENT(amdgpu_sched_run_job,
>   	    TP_PROTO(struct amdgpu_job *job),
>   	    TP_ARGS(job),
>   	    TP_STRUCT__entry(
> -			     __field(uint64_t, sched_job_id)
>   			     __string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
>   			     __field(unsigned int, context)
>   			     __field(unsigned int, seqno)
> @@ -202,15 +199,14 @@ TRACE_EVENT(amdgpu_sched_run_job,
>   			     ),
>   
>   	    TP_fast_assign(
> -			   __entry->sched_job_id = job->base.id;
>   			   __assign_str(timeline);
>   			   __entry->context = job->base.s_fence->finished.context;
>   			   __entry->seqno = job->base.s_fence->finished.seqno;
>   			   __assign_str(ring);
>   			   __entry->num_ibs = job->num_ibs;
>   			   ),
> -	    TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
> -		      __entry->sched_job_id, __get_str(timeline), __entry->context,
> +	    TP_printk("timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
> +		      __get_str(timeline), __entry->context,
>   		      __entry->seqno, __get_str(ring), __entry->num_ibs)
>   );
>   
> @@ -519,7 +515,6 @@ TRACE_EVENT(amdgpu_ib_pipe_sync,
>   	    TP_ARGS(sched_job, fence),
>   	    TP_STRUCT__entry(
>   			     __string(ring, sched_job->base.sched->name)
> -			     __field(uint64_t, id)
>   			     __field(struct dma_fence *, fence)
>   			     __field(uint64_t, ctx)
>   			     __field(unsigned, seqno)
> @@ -527,13 +522,12 @@ TRACE_EVENT(amdgpu_ib_pipe_sync,
>   
>   	    TP_fast_assign(
>   			   __assign_str(ring);
> -			   __entry->id = sched_job->base.id;
>   			   __entry->fence = fence;
>   			   __entry->ctx = fence->context;
>   			   __entry->seqno = fence->seqno;
>   			   ),
> -	    TP_printk("job ring=%s, id=%llu, need pipe sync to fence=%p, context=%llu, seq=%u",
> -		      __get_str(ring), __entry->id,
> +	    TP_printk("job ring=%s need pipe sync to fence=%p, context=%llu, seq=%u",
> +		      __get_str(ring),
>   		      __entry->fence, __entry->ctx,
>   		      __entry->seqno)
>   );
> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
> index 3c7f6a39cf91..ad03240b2f03 100644
> --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
> @@ -58,7 +58,6 @@ DECLARE_EVENT_CLASS(drm_sched_job,
>   	    TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity),
>   	    TP_ARGS(sched_job, entity),
>   	    TP_STRUCT__entry(
> -			     __field(uint64_t, id)
>   			     __string(name, sched_job->sched->name)
>   			     __field(u32, job_count)
>   			     __field(int, hw_job_count)
> @@ -69,7 +68,6 @@ DECLARE_EVENT_CLASS(drm_sched_job,
>   			     ),
>   
>   	    TP_fast_assign(
> -			   __entry->id = sched_job->id;
>   			   __assign_str(name);
>   			   __entry->job_count = spsc_queue_count(&entity->job_queue);
>   			   __entry->hw_job_count = atomic_read(
> @@ -79,8 +77,8 @@ DECLARE_EVENT_CLASS(drm_sched_job,
>   			   __entry->fence_seqno = sched_job->s_fence->finished.seqno;
>   			   __entry->client_id = sched_job->s_fence->drm_client_id;
>   			   ),
> -	    TP_printk("dev=%s, id=%llu, fence=%llu:%llu, ring=%s, job count:%u, hw job count:%d, client_id:%llu",
> -		      __get_str(dev), __entry->id,
> +	    TP_printk("dev=%s, fence=%llu:%llu, ring=%s, job count:%u, hw job count:%d, client_id:%llu",
> +		      __get_str(dev),
>   		      __entry->fence_context, __entry->fence_seqno, __get_str(name),
>   		      __entry->job_count, __entry->hw_job_count, __entry->client_id)
>   );
> @@ -117,7 +115,6 @@ TRACE_EVENT(drm_sched_job_add_dep,
>   	TP_STRUCT__entry(
>   		    __field(u64, fence_context)
>   		    __field(u64, fence_seqno)
> -		    __field(u64, id)
>   		    __field(u64, ctx)
>   		    __field(u64, seqno)
>   		    ),
> @@ -125,12 +122,11 @@ TRACE_EVENT(drm_sched_job_add_dep,
>   	TP_fast_assign(
>   		    __entry->fence_context = sched_job->s_fence->finished.context;
>   		    __entry->fence_seqno = sched_job->s_fence->finished.seqno;
> -		    __entry->id = sched_job->id;
>   		    __entry->ctx = fence->context;
>   		    __entry->seqno = fence->seqno;
>   		    ),
> -	TP_printk("fence=%llu:%llu, id=%llu depends on fence=%llu:%llu",
> -		  __entry->fence_context, __entry->fence_seqno, __entry->id,
> +	TP_printk("fence=%llu:%llu depends on fence=%llu:%llu",
> +		  __entry->fence_context, __entry->fence_seqno,
>   		  __entry->ctx, __entry->seqno)
>   );
>   
> @@ -140,7 +136,6 @@ TRACE_EVENT(drm_sched_job_unschedulable,
>   	    TP_STRUCT__entry(
>   			     __field(u64, fence_context)
>   			     __field(u64, fence_seqno)
> -			     __field(uint64_t, id)
>   			     __field(u64, ctx)
>   			     __field(u64, seqno)
>   			     ),
> @@ -148,12 +143,11 @@ TRACE_EVENT(drm_sched_job_unschedulable,
>   	    TP_fast_assign(
>   			   __entry->fence_context = sched_job->s_fence->finished.context;
>   			   __entry->fence_seqno = sched_job->s_fence->finished.seqno;
> -			   __entry->id = sched_job->id;
>   			   __entry->ctx = fence->context;
>   			   __entry->seqno = fence->seqno;
>   			   ),
> -	    TP_printk("fence=%llu:%llu, id=%llu depends on unsignalled fence=%llu:%llu",
> -		      __entry->fence_context, __entry->fence_seqno, __entry->id,
> +	    TP_printk("fence=%llu:%llu depends on unsignalled fence=%llu:%llu",
> +		      __entry->fence_context, __entry->fence_seqno,
>   		      __entry->ctx, __entry->seqno)
>   );
>   
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index 85c2111e5500..85e0ba850746 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -848,7 +848,6 @@ void drm_sched_job_arm(struct drm_sched_job *job)
>   
>   	job->sched = sched;
>   	job->s_priority = entity->priority;
> -	job->id = atomic64_inc_return(&sched->job_id_count);
>   
>   	drm_sched_fence_init(job->s_fence, job->entity);
>   }
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 032214a49138..ddc24512c281 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -326,7 +326,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
>    * @finish_cb: the callback for the finished fence.
>    * @credits: the number of credits this job contributes to the scheduler
>    * @work: Helper to reschedule job kill to different context.
> - * @id: a unique id assigned to each job scheduled on the scheduler.
>    * @karma: increment on every hang caused by this job. If this exceeds the hang
>    *         limit of the scheduler then the job is marked guilty and will not
>    *         be scheduled further.
> @@ -339,8 +338,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
>    * to schedule the job.
>    */
>   struct drm_sched_job {
> -	u64				id;
> -
>   	/**
>   	 * @submit_ts:
>   	 *