[PATCH] drm/amdgpu: revert "take runtime pm reference when we attach a buffer"

Thu Jun 6 13:20:43 UTC 2024

On Thu, Jun 6, 2024 at 2:19 AM Christian König <christian.koenig at amd.com> wrote:
>
> Am 05.06.24 um 15:20 schrieb Alex Deucher:
>
> On Wed, Jun 5, 2024 at 8:32 AM Christian König
> <ckoenig.leichtzumerken at gmail.com> wrote:
>
> This reverts commit b8c415e3bf989be1b749409951debe6b36f5c78c and
> commit 425285d39afddaf4a9dab36045b816af0cc3e400.
>
> Taking a runtime pm reference for DMA-buf is actually completely
> unnecessary.
>
> When the buffer is in GTT it is still accessible even when the GPU
> is powered down and when it is in VRAM the buffer gets migrated to
> GTT before powering down.
>
> Won't that kind of defeat the purpose of P2P DMA?  I guess it's a
> trade off between performance and power savings.
>
>
> Not really. P2P is useful because ti avoids the extra bounce through system memory.
>
> But when the ASIC is powered down and not producing any new data there really is no extra bounce.
>
> The only use case which would make it mandatory to keep the runtime
> pm reference would be if we pin the buffer into VRAM, and that's not
> something we currently do.
>
> We'll need to bring this back if we ever support that?  I think we'll
> want that for P2P DMA with RDMA NICs that don't support ODP.  That's
> one of the big blockers for a lot of ROCm customers to migrate to the
> in box drivers.
>
>
> Yeah, but we need a completely different approach in that case.
>
> The problem is that calling pm_runtime_get_sync() from the DMA-buf callbacks is illegal in the first place because we have the reservation lock taken here which is also taken during resume.
>
> So this here never triggered or otherwise we would have seen a deadlock (I should probably mention that in the commit message).

Thanks.  With that added to the commit message, the patch is:
Reviewed-by: Alex Deucher <alexander.deucher at amd.com>

>
> Christian.
>
>
> Alex
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 33 ---------------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c   |  2 --
>  drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h   | 15 ----------
>  3 files changed, 50 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> index 0b3b10d21952..ab848047204c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> @@ -41,8 +41,6 @@
>  #include <linux/dma-buf.h>
>  #include <linux/dma-fence-array.h>
>  #include <linux/pci-p2pdma.h>
> -#include <linux/pm_runtime.h>
> -#include "amdgpu_trace.h"
>
>  /**
>   * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
> @@ -63,37 +61,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
>         if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0)
>                 attach->peer2peer = false;
>
> -       r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> -       trace_amdgpu_runpm_reference_dumps(1, __func__);
> -       if (r < 0)
> -               goto out;
> -
>         return 0;
> -
> -out:
> -       pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> -       trace_amdgpu_runpm_reference_dumps(0, __func__);
> -       return r;
> -}
> -
> -/**
> - * amdgpu_dma_buf_detach - &dma_buf_ops.detach implementation
> - *
> - * @dmabuf: DMA-buf where we remove the attachment from
> - * @attach: the attachment to remove
> - *
> - * Called when an attachment is removed from the DMA-buf.
> - */
> -static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
> -                                 struct dma_buf_attachment *attach)
> -{
> -       struct drm_gem_object *obj = dmabuf->priv;
> -       struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
> -       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> -
> -       pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> -       pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> -       trace_amdgpu_runpm_reference_dumps(0, __func__);
>  }
>
>  /**
> @@ -266,7 +234,6 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
>
>  const struct dma_buf_ops amdgpu_dmabuf_ops = {
>         .attach = amdgpu_dma_buf_attach,
> -       .detach = amdgpu_dma_buf_detach,
>         .pin = amdgpu_dma_buf_pin,
>         .unpin = amdgpu_dma_buf_unpin,
>         .map_dma_buf = amdgpu_dma_buf_map,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> index 10832b470448..bc3ac73b6b8d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> @@ -181,7 +181,6 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
>         amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
>                                seq, flags | AMDGPU_FENCE_FLAG_INT);
>         pm_runtime_get_noresume(adev_to_drm(adev)->dev);
> -       trace_amdgpu_runpm_reference_dumps(1, __func__);
>         ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
>         if (unlikely(rcu_dereference_protected(*ptr, 1))) {
>                 struct dma_fence *old;
> @@ -309,7 +308,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
>                 dma_fence_put(fence);
>                 pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>                 pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> -               trace_amdgpu_runpm_reference_dumps(0, __func__);
>         } while (last_seq != seq);
>
>         return true;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> index f539b1d00234..2fd1bfb35916 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> @@ -554,21 +554,6 @@ TRACE_EVENT(amdgpu_reset_reg_dumps,
>                       __entry->value)
>  );
>
> -TRACE_EVENT(amdgpu_runpm_reference_dumps,
> -           TP_PROTO(uint32_t index, const char *func),
> -           TP_ARGS(index, func),
> -           TP_STRUCT__entry(
> -                            __field(uint32_t, index)
> -                            __string(func, func)
> -                            ),
> -           TP_fast_assign(
> -                          __entry->index = index;
> -                          __assign_str(func, func);
> -                          ),
> -           TP_printk("amdgpu runpm reference dump 0x%x: 0x%s\n",
> -                     __entry->index,
> -                     __get_str(func))
> -);
>  #undef AMDGPU_JOB_GET_TIMELINE_NAME
>  #endif
>
> --
> 2.34.1
>
>