[PATCH v11 21/28] drm/amdgpu: add gfx eviction fence helpers
Christian König
ckoenig.leichtzumerken at gmail.com
Mon Sep 16 14:14:16 UTC 2024
Am 09.09.24 um 22:06 schrieb Shashank Sharma:
> This patch adds basic eviction fence framework for the gfx buffers.
> The idea is to:
> - One eviction fence is created per gfx process, at kms_open.
> - This fence is attached to all the gem buffers created
> by this process.
> - This fence is detached to all the gem buffers at postclose_kms.
>
> This framework will be further used for usermode queues.
>
> V2: Addressed review comments from Christian
> - keep fence_ctx and fence_seq directly in fpriv
> - evcition_fence should be dynamically allocated
> - do not save eviction fence instance in BO, there could be many
> such fences attached to one BO
> - use dma_resv_replace_fence() in detach
>
> V3: Addressed review comments from Christian
> - eviction fence create and destroy functions should be called only once
> from fpriv create/destroy
> - use dma_fence_put() in eviction_fence_destroy
>
> V4: Addressed review comments from Christian:
> - create a separate ev_fence_mgr structure
> - cleanup fence init part
> - do not add a domain for fence owner KGD
>
> Cc: Christian Koenig <christian.koenig at amd.com>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Signed-off-by: Shashank Sharma <shashank.sharma at amd.com>
> Signed-off-by: Arvind Yadav <arvind.yadav at amd.com>
> Change-Id: I7a8d27d7172bafbfe34aa9decf2cd36655948275
> ---
> drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 +-
> .../drm/amd/amdgpu/amdgpu_eviction_fence.c | 148 ++++++++++++++++++
> .../drm/amd/amdgpu/amdgpu_eviction_fence.h | 65 ++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 9 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 +
> 6 files changed, 231 insertions(+), 2 deletions(-)
> create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
> create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> index ff5621697c68..0643078d1225 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -66,7 +66,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
> amdgpu_fw_attestation.o amdgpu_securedisplay.o \
> amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
> amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \
> - amdgpu_userq_fence.o
> + amdgpu_userq_fence.o amdgpu_eviction_fence.o
>
> amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 76ada47b1875..0013bfc74024 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -113,6 +113,7 @@
> #include "amdgpu_seq64.h"
> #include "amdgpu_reg_state.h"
> #include "amdgpu_userqueue.h"
> +#include "amdgpu_eviction_fence.h"
> #if defined(CONFIG_DRM_AMD_ISP)
> #include "amdgpu_isp.h"
> #endif
> @@ -481,7 +482,6 @@ struct amdgpu_flip_work {
> bool async;
> };
>
> -
> /*
> * file private structure
> */
> @@ -495,6 +495,10 @@ struct amdgpu_fpriv {
> struct idr bo_list_handles;
> struct amdgpu_ctx_mgr ctx_mgr;
> struct amdgpu_userq_mgr userq_mgr;
> +
> + /* Eviction fence infra */
> + struct amdgpu_eviction_fence_mgr evf_mgr;
> +
> /** GPU partition selection */
> uint32_t xcp_id;
> };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
> new file mode 100644
> index 000000000000..2d474cb11cf9
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c
> @@ -0,0 +1,148 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright 2024 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +#include <linux/sched.h>
> +#include "amdgpu.h"
> +
> +static const char *
> +amdgpu_eviction_fence_get_driver_name(struct dma_fence *fence)
> +{
> + return "amdgpu";
> +}
> +
> +static const char *
> +amdgpu_eviction_fence_get_timeline_name(struct dma_fence *f)
> +{
> + struct amdgpu_eviction_fence *ef;
> +
> + ef = container_of(f, struct amdgpu_eviction_fence, base);
> + return ef->timeline_name;
> +}
> +
> +static const struct dma_fence_ops amdgpu_eviction_fence_ops = {
> + .use_64bit_seqno = true,
> + .get_driver_name = amdgpu_eviction_fence_get_driver_name,
> + .get_timeline_name = amdgpu_eviction_fence_get_timeline_name,
> +};
> +
> +int amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr)
> +{
> + int ret;
> +
> + spin_lock(&evf_mgr->ev_fence_lock);
> + ret = dma_fence_signal(&evf_mgr->ev_fence->base);
> + spin_unlock(&evf_mgr->ev_fence_lock);
> + return ret;
> +}
> +
> +struct amdgpu_eviction_fence *
> +amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr)
> +{
> + struct amdgpu_eviction_fence *ev_fence;
> +
> + ev_fence = kzalloc(sizeof(*ev_fence), GFP_KERNEL);
> + if (!ev_fence)
> + return NULL;
> +
> + get_task_comm(ev_fence->timeline_name, current);
> + spin_lock_init(&ev_fence->lock);
> + dma_fence_init(&ev_fence->base, &amdgpu_eviction_fence_ops,
> + &ev_fence->lock, evf_mgr->ev_fence_ctx,
> + atomic_inc_return(&evf_mgr->ev_fence_seq));
> + return ev_fence;
> +}
> +
> +void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr)
> +{
> + if (!evf_mgr->ev_fence)
> + return;
> +
> + if (!dma_fence_is_signaled(&evf_mgr->ev_fence->base))
You can drop that if, dma_fence_wait() will check that anyway.
> + dma_fence_wait(&evf_mgr->ev_fence->base, false);
> +
> + /* Last unref of ev_fence */
> + spin_lock(&evf_mgr->ev_fence_lock);
> + dma_fence_put(&evf_mgr->ev_fence->base);
> + evf_mgr->ev_fence = NULL;
> + spin_unlock(&evf_mgr->ev_fence_lock);
> +}
> +
> +int amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
> + struct amdgpu_bo *bo)
> +{
> + struct dma_fence *ef;
> + struct amdgpu_eviction_fence *ev_fence = evf_mgr->ev_fence;
> + struct dma_resv *resv = bo->tbo.base.resv;
> + int ret;
> +
> + if (!ev_fence || !resv)
> + return 0;
> +
> + ef = &ev_fence->base;
> + ret = dma_resv_reserve_fences(resv, 1);
> + if (ret) {
> + dma_fence_wait(ef, false);
> + return ret;
> + }
> +
> + spin_lock(&evf_mgr->ev_fence_lock);
> + dma_resv_add_fence(resv, ef, DMA_RESV_USAGE_BOOKKEEP);
> + spin_unlock(&evf_mgr->ev_fence_lock);
That spinlock is protecting evf_mgr->ev_fence, isn't it?
In that case you probably shouldn't dereference it outside of the spinlock.
> + return 0;
> +}
> +
> +void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
> + struct amdgpu_bo *bo)
> +{
> + struct dma_fence *stub;
> + struct amdgpu_eviction_fence *ev_fence = evf_mgr->ev_fence;
> +
> + if (!ev_fence)
> + return;
> +
> + spin_lock(&evf_mgr->ev_fence_lock);
> + stub = dma_fence_get_stub();
> + dma_resv_replace_fences(bo->tbo.base.resv, evf_mgr->ev_fence_ctx,
> + stub, DMA_RESV_USAGE_BOOKKEEP);
> + dma_fence_put(stub);
> + spin_unlock(&evf_mgr->ev_fence_lock);
This operation doesn't need the spinlock since we are not accessing
evf_mgr->ev_fence.
> +}
> +
> +void amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr)
> +{
> +
> + /* This needs to be done one time per open */
> + atomic_set(&evf_mgr->ev_fence_seq, 0);
> + evf_mgr->ev_fence_ctx = dma_fence_context_alloc(1);
> + spin_lock_init(&evf_mgr->ev_fence_lock);
> +
> + spin_lock(&evf_mgr->ev_fence_lock);
> + evf_mgr->ev_fence = amdgpu_eviction_fence_create(evf_mgr);
amdgpu_eviction_fence_create() will call kmalloc, doing that while
holding the spinlock is a bad idea.
You need to do something like:
tmp = amdgpu_eviction_fence_create(....);
spin_lock(...);
evf_mgr->ev_fence = tmp;
...
> + if (!evf_mgr->ev_fence) {
> + DRM_ERROR("Failed to craete eviction fence\n");
> + goto unlock;
> + }
> +
> +unlock:
> + spin_unlock(&evf_mgr->ev_fence_lock);
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
> new file mode 100644
> index 000000000000..b47ab1307ec5
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h
> @@ -0,0 +1,65 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright 2023 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#ifndef AMDGPU_EV_FENCE_H_
> +#define AMDGPU_EV_FENCE_H_
> +
> +struct amdgpu_eviction_fence {
> + struct dma_fence base;
> + spinlock_t lock;
> + char timeline_name[TASK_COMM_LEN];
> + struct amdgpu_userq_mgr *uq_mgr;
> +};
> +
> +struct amdgpu_eviction_fence_mgr {
> + u64 ev_fence_ctx;
> + atomic_t ev_fence_seq;
> + spinlock_t ev_fence_lock;
> + struct amdgpu_eviction_fence *ev_fence;
> +};
> +
> +/* Eviction fence helper functions */
> +struct amdgpu_eviction_fence *
> +amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr);
> +
> +void
> +amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr);
> +
> +int
> +amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr,
> + struct amdgpu_bo *bo);
> +
> +void
> +amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr,
> + struct amdgpu_bo *bo);
> +
> +void
> +amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr);
> +
> +int
> +amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr);
> +
> +int
> +amdgpu_eviction_fence_replace_fence(struct amdgpu_fpriv *fpriv);
> +#endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index f4529f2fad97..c9b4a6ce3f14 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -186,6 +186,13 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
> bo_va = amdgpu_vm_bo_add(adev, vm, abo);
> else
> ++bo_va->ref_count;
> +
> + if (!vm->is_compute_context || !vm->process_info) {
I said it before, we should really drop this line since the user queues
are completely independent of that.
> + /* attach gfx eviction fence */
> + if (amdgpu_eviction_fence_attach(&fpriv->evf_mgr, abo))
> + DRM_DEBUG_DRIVER("Failed to attach eviction fence to BO\n");
> + }
> +
> amdgpu_bo_unreserve(abo);
>
> /* Validate and add eviction fence to DMABuf imports with dynamic
> @@ -236,6 +243,8 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj,
> struct drm_exec exec;
> long r;
>
> + amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo);
> +
We should probably skip that call for per VM BOs, or otherwise we will
also detach the page tables accidentally.
BTW: Were do we attach the eviction fence to the page tables?
Regards,
Christian.
> drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
> drm_exec_until_all_locked(&exec) {
> r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 1);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 019a377620ce..e7fb13e20197 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -1391,6 +1391,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
> mutex_init(&fpriv->bo_list_lock);
> idr_init_base(&fpriv->bo_list_handles, 1);
>
> + amdgpu_eviction_fence_init(&fpriv->evf_mgr);
> +
> amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev);
>
> r = amdgpu_userq_mgr_init(&fpriv->userq_mgr, adev);
> @@ -1464,6 +1466,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
> amdgpu_bo_unreserve(pd);
> }
>
> + amdgpu_eviction_fence_destroy(&fpriv->evf_mgr);
> amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
> amdgpu_vm_fini(adev, &fpriv->vm);
> amdgpu_userq_mgr_fini(&fpriv->userq_mgr);
More information about the amd-gfx
mailing list