[PATCH 05/29] drm/amdgpu: switch job hw_fence to amdgpu_fence
Christian König
christian.koenig at amd.com
Fri Jun 6 11:39:40 UTC 2025
On 6/6/25 08:43, Alex Deucher wrote:
> Use the amdgpu fence container so we can store additional
> data in the fence.
>
> Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 30 +++++----------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 12 ++++-----
> drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 16 +++++++++++
> 6 files changed, 32 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index 8e626f50b362e..f81608330a3d0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -1902,7 +1902,7 @@ static void amdgpu_ib_preempt_mark_partial_job(struct amdgpu_ring *ring)
> continue;
> }
> job = to_amdgpu_job(s_job);
> - if (preempted && (&job->hw_fence) == fence)
> + if (preempted && (&job->hw_fence.base) == fence)
> /* mark the job as preempted */
> job->preemption_status |= AMDGPU_IB_PREEMPTED;
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index ea565651f7459..8298e95e4543e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -6375,7 +6375,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
> *
> * job->base holds a reference to parent fence
> */
> - if (job && dma_fence_is_signaled(&job->hw_fence)) {
> + if (job && dma_fence_is_signaled(&job->hw_fence.base)) {
> job_signaled = true;
> dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
> goto skip_hw_reset;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> index 2f24a6aa13bf6..569e0e5373927 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> @@ -41,22 +41,6 @@
> #include "amdgpu_trace.h"
> #include "amdgpu_reset.h"
>
> -/*
> - * Fences mark an event in the GPUs pipeline and are used
> - * for GPU/CPU synchronization. When the fence is written,
> - * it is expected that all buffers associated with that fence
> - * are no longer in use by the associated ring on the GPU and
> - * that the relevant GPU caches have been flushed.
> - */
> -
> -struct amdgpu_fence {
> - struct dma_fence base;
> -
> - /* RB, DMA, etc. */
> - struct amdgpu_ring *ring;
> - ktime_t start_timestamp;
> -};
> -
Oh, that handling here is completely broken.
The MCBP muxer overwrites fields in the job because of this ^^.
I think that patch needs to be a bug fix we even backport.
Regards,
CFhristian.
> static struct kmem_cache *amdgpu_fence_slab;
>
> int amdgpu_fence_slab_init(void)
> @@ -151,12 +135,12 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
> am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_ATOMIC);
> if (am_fence == NULL)
> return -ENOMEM;
> - fence = &am_fence->base;
> - am_fence->ring = ring;
> } else {
> /* take use of job-embedded fence */
> - fence = &job->hw_fence;
> + am_fence = &job->hw_fence;
> }
> + fence = &am_fence->base;
> + am_fence->ring = ring;
>
> seq = ++ring->fence_drv.sync_seq;
> if (job && job->job_run_counter) {
> @@ -718,7 +702,7 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring)
> * it right here or we won't be able to track them in fence_drv
> * and they will remain unsignaled during sa_bo free.
> */
> - job = container_of(old, struct amdgpu_job, hw_fence);
> + job = container_of(old, struct amdgpu_job, hw_fence.base);
> if (!job->base.s_fence && !dma_fence_is_signaled(old))
> dma_fence_signal(old);
> RCU_INIT_POINTER(*ptr, NULL);
> @@ -780,7 +764,7 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
>
> static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f)
> {
> - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);
> + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base);
>
> return (const char *)to_amdgpu_ring(job->base.sched)->name;
> }
> @@ -810,7 +794,7 @@ static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
> */
> static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f)
> {
> - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);
> + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base);
>
> if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer))
> amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched));
> @@ -845,7 +829,7 @@ static void amdgpu_job_fence_free(struct rcu_head *rcu)
> struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
>
> /* free job if fence has a parent job */
> - kfree(container_of(f, struct amdgpu_job, hw_fence));
> + kfree(container_of(f, struct amdgpu_job, hw_fence.base));
> }
>
> /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index acb21fc8b3ce5..ddb9d3269357c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -272,8 +272,8 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
> /* Check if any fences where initialized */
> if (job->base.s_fence && job->base.s_fence->finished.ops)
> f = &job->base.s_fence->finished;
> - else if (job->hw_fence.ops)
> - f = &job->hw_fence;
> + else if (job->hw_fence.base.ops)
> + f = &job->hw_fence.base;
> else
> f = NULL;
>
> @@ -290,10 +290,10 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
> amdgpu_sync_free(&job->explicit_sync);
>
> /* only put the hw fence if has embedded fence */
> - if (!job->hw_fence.ops)
> + if (!job->hw_fence.base.ops)
> kfree(job);
> else
> - dma_fence_put(&job->hw_fence);
> + dma_fence_put(&job->hw_fence.base);
> }
>
> void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
> @@ -322,10 +322,10 @@ void amdgpu_job_free(struct amdgpu_job *job)
> if (job->gang_submit != &job->base.s_fence->scheduled)
> dma_fence_put(job->gang_submit);
>
> - if (!job->hw_fence.ops)
> + if (!job->hw_fence.base.ops)
> kfree(job);
> else
> - dma_fence_put(&job->hw_fence);
> + dma_fence_put(&job->hw_fence.base);
> }
>
> struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> index f2c049129661f..931fed8892cc1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> @@ -48,7 +48,7 @@ struct amdgpu_job {
> struct drm_sched_job base;
> struct amdgpu_vm *vm;
> struct amdgpu_sync explicit_sync;
> - struct dma_fence hw_fence;
> + struct amdgpu_fence hw_fence;
> struct dma_fence *gang_submit;
> uint32_t preamble_status;
> uint32_t preemption_status;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index b95b471107692..e1f25218943a4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -127,6 +127,22 @@ struct amdgpu_fence_driver {
> struct dma_fence **fences;
> };
>
> +/*
> + * Fences mark an event in the GPUs pipeline and are used
> + * for GPU/CPU synchronization. When the fence is written,
> + * it is expected that all buffers associated with that fence
> + * are no longer in use by the associated ring on the GPU and
> + * that the relevant GPU caches have been flushed.
> + */
> +
> +struct amdgpu_fence {
> + struct dma_fence base;
> +
> + /* RB, DMA, etc. */
> + struct amdgpu_ring *ring;
> + ktime_t start_timestamp;
> +};
> +
> extern const struct drm_sched_backend_ops amdgpu_sched_ops;
>
> void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring);
More information about the amd-gfx
mailing list