[RFC PATCH 2/2] drm/amdgpu/uvd: Ensure vcpu bos are within the uvd segment

Wed Apr 30 21:39:47 UTC 2025

+ Christian

On Tue, Apr 29, 2025 at 7:25 AM John Olender <john.olender at gmail.com> wrote:
>
> If the vcpu bos are allocated outside the uvd segment,
> amdgpu_uvd_ring_test_ib() times out waiting on the ring's fence.
>
> See amdgpu_fence_driver_start_ring() for more context.
>
> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3851
> Signed-off-by: John Olender <john.olender at gmail.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 36 ++++++++++++++-----------
>  1 file changed, 21 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> index 74758b5ffc6c..a6b3e75ffa2d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> @@ -139,15 +139,20 @@ static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo);
>
>  static int amdgpu_uvd_create_msg_bo_helper(struct amdgpu_device *adev,
>                                            uint32_t size,
> -                                          struct amdgpu_bo **bo_ptr)
> +                                          struct amdgpu_bo **bo_ptr,
> +                                          bool interruptible)
>  {
> -       struct ttm_operation_ctx ctx = { true, false };
> +       struct ttm_operation_ctx ctx = { interruptible, false };
>         struct amdgpu_bo *bo = NULL;
> +       u32 initial_domain = AMDGPU_GEM_DOMAIN_GTT;
>         void *addr;
>         int r;
>
> +       if (!interruptible && adev->uvd.address_64_bit)
> +               initial_domain |= AMDGPU_GEM_DOMAIN_VRAM;
> +
>         r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,
> -                                     AMDGPU_GEM_DOMAIN_GTT,
> +                                     initial_domain,
>                                       &bo, NULL, &addr);
>         if (r)
>                 return r;
> @@ -319,19 +324,23 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
>                 bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
>
> +       /* from uvd v5.0 HW addressing capacity increased to 64 bits */
> +       if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
> +               adev->uvd.address_64_bit = true;
> +
>         for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
>                 if (adev->uvd.harvest_config & (1 << j))
>                         continue;
> -               r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
> -                                           AMDGPU_GEM_DOMAIN_VRAM |
> -                                           AMDGPU_GEM_DOMAIN_GTT,

I think we can just make this VRAM only.  Or something like:
adev->uvd.address_64_bit ? AMDGPU_GEM_DOMAIN_GTT : AMDGPU_GEM_DOMAIN_VRAM

If that fixes it, this should be tagged with:
Fixes: 58ab2c08d708 ("drm/amdgpu: use VRAM|GTT for a bunch of kernel
allocations")

Alex

> -                                           &adev->uvd.inst[j].vcpu_bo,
> -                                           &adev->uvd.inst[j].gpu_addr,
> -                                           &adev->uvd.inst[j].cpu_addr);
> +               r = amdgpu_uvd_create_msg_bo_helper(adev, bo_size,
> +                               &adev->uvd.inst[j].vcpu_bo, false);
>                 if (r) {
>                         dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
>                         return r;
>                 }
> +               adev->uvd.inst[j].gpu_addr =
> +                               amdgpu_bo_gpu_offset(adev->uvd.inst[j].vcpu_bo);
> +               adev->uvd.inst[j].cpu_addr =
> +                               amdgpu_bo_kptr(adev->uvd.inst[j].vcpu_bo);
>         }
>
>         for (i = 0; i < adev->uvd.max_handles; ++i) {
> @@ -339,11 +348,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>                 adev->uvd.filp[i] = NULL;
>         }
>
> -       /* from uvd v5.0 HW addressing capacity increased to 64 bits */
> -       if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
> -               adev->uvd.address_64_bit = true;
> -
> -       r = amdgpu_uvd_create_msg_bo_helper(adev, 128 << 10, &adev->uvd.ib_bo);
> +       r = amdgpu_uvd_create_msg_bo_helper(adev, 128 << 10, &adev->uvd.ib_bo,
> +                       true);
>         if (r)
>                 return r;
>
> @@ -1236,7 +1242,7 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>         if (direct) {
>                 bo = adev->uvd.ib_bo;
>         } else {
> -               r = amdgpu_uvd_create_msg_bo_helper(adev, 4096, &bo);
> +               r = amdgpu_uvd_create_msg_bo_helper(adev, 4096, &bo, true);
>                 if (r)
>                         return r;
>         }
> --
> 2.47.2
>