回复: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test

Fri Sep 10 10:10:37 UTC 2021

[AMD Official Use Only]

we need take this lock.
IB test can be triggered through debugfs. Recent days I usually test it by cat gpu recovery and amdgpu_test_ib in debugfs.

________________________________________
发件人: Koenig, Christian <Christian.Koenig at amd.com>
发送时间: 2021年9月10日 18:02
收件人: Pan, Xinhui; amd-gfx at lists.freedesktop.org
抄送: Deucher, Alexander
主题: Re: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test

*sigh* yeah, you are probably right. Wouldn't be to simple if this would
be easy, doesn't it?

In this case you can also skip taking the reservation lock for the
pre-allocated BO, since there should absolutely be only one user at a time.

Christian.

Am 10.09.21 um 11:42 schrieb Pan, Xinhui:
> [AMD Official Use Only]
>
> oh, god. uvd free handler submit delayed msg which depends on scheduler with reservation lock hold.
> This is not allowed as commit c8e42d57859d "drm/amdgpu: implement more ib pools (v2)" says
> Any jobs which schedule IBs without dependence on gpu scheduler should use DIRECT pool.
>
> Looks like we should only use reserved BO for direct IB submission.
> As for delayed IB submission, we could alloc a new one dynamicly.
> ________________________________________
> 发件人: Koenig, Christian <Christian.Koenig at amd.com>
> 发送时间: 2021年9月10日 16:53
> 收件人: Pan, Xinhui; amd-gfx at lists.freedesktop.org
> 抄送: Deucher, Alexander
> 主题: Re: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
>
> It should not unless we are OOM which should not happen during driver
> initialization.
>
> But there is another problem I'm thinking about: Do we still use UVD IB
> submissions to forcefully tear down UVD sessions when a process crashes?
>
> If yes that stuff could easily deadlock with an IB test executed during
> GPU reset.
>
> Christian.
>
> Am 10.09.21 um 10:18 schrieb Pan, Xinhui:
>> [AMD Official Use Only]
>>
>> I am wondering if amdgpu_bo_pin would change BO's placement in the futrue.
>> For now, the new placement is calculated by new = old ∩ new.
>>
>> ________________________________________
>> 发件人: Koenig, Christian <Christian.Koenig at amd.com>
>> 发送时间: 2021年9月10日 14:24
>> 收件人: Pan, Xinhui; amd-gfx at lists.freedesktop.org
>> 抄送: Deucher, Alexander
>> 主题: Re: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
>>
>> Am 10.09.21 um 02:38 schrieb xinhui pan:
>>> move BO allocation in sw_init.
>>>
>>> Signed-off-by: xinhui pan <xinhui.pan at amd.com>
>>> ---
>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 75 +++++++++++++++----------
>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |  1 +
>>>     drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c   |  8 +--
>>>     drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c   |  8 +--
>>>     4 files changed, 49 insertions(+), 43 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>> index d451c359606a..e2eaac941d37 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>> @@ -141,6 +141,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>>>         const char *fw_name;
>>>         const struct common_firmware_header *hdr;
>>>         unsigned family_id;
>>> +     struct amdgpu_bo *bo = NULL;
>>> +     void *addr;
>>>         int i, j, r;
>>>
>>>         INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
>>> @@ -298,9 +300,34 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>>>                 adev->uvd.filp[i] = NULL;
>>>         }
>>>
>>> +     r = amdgpu_bo_create_reserved(adev, 128 << 10, PAGE_SIZE,
>>> +                     AMDGPU_GEM_DOMAIN_GTT,
>>> +                     &bo, NULL, &addr);
>>> +     if (r)
>>> +             return r;
>>> +
>>>         /* from uvd v5.0 HW addressing capacity increased to 64 bits */
>>> -     if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
>>> +     if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) {
>>>                 adev->uvd.address_64_bit = true;
>>> +             amdgpu_bo_kunmap(bo);
>>> +             amdgpu_bo_unpin(bo);
>>> +             r = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_VRAM,
>>> +                             0, 256 << 20);
>> Please keep using amdgpu_uvd_force_into_uvd_segment() and validate here,
>> cause I want to remove amdgpu_bo_pin_restricted() sooner or later.
>>
>>> +             if (r) {
>>> +                     amdgpu_bo_unreserve(bo);
>>> +                     amdgpu_bo_unref(&bo);
>>> +                     return r;
>>> +             }
>>> +             r = amdgpu_bo_kmap(bo, &addr);
>>> +             if (r) {
>>> +                     amdgpu_bo_unpin(bo);
>>> +                     amdgpu_bo_unreserve(bo);
>>> +                     amdgpu_bo_unref(&bo);
>>> +                     return r;
>>> +             }
>>> +     }
>>> +     adev->uvd.ib_bo = bo;
>>> +     amdgpu_bo_unreserve(bo);
>>>
>>>         switch (adev->asic_type) {
>>>         case CHIP_TONGA:
>>> @@ -342,6 +369,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
>>>                 for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
>>>                         amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
>>>         }
>>> +     amdgpu_bo_free_kernel(&adev->uvd.ib_bo, NULL, NULL);
>>>         release_firmware(adev->uvd.fw);
>>>
>>>         return 0;
>>> @@ -1080,23 +1108,10 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>>         unsigned offset_idx = 0;
>>>         unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
>>>
>>> -     amdgpu_bo_kunmap(bo);
>>> -     amdgpu_bo_unpin(bo);
>>> -
>>> -     if (!ring->adev->uvd.address_64_bit) {
>>> -             struct ttm_operation_ctx ctx = { true, false };
>>> -
>>> -             amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
>>> -             amdgpu_uvd_force_into_uvd_segment(bo);
>>> -             r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
>>> -             if (r)
>>> -                     goto err;
>>> -     }
>>> -
>>>         r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
>>>                                      AMDGPU_IB_POOL_DELAYED, &job);
>>>         if (r)
>>> -             goto err;
>>> +             return r;
>>>
>>>         if (adev->asic_type >= CHIP_VEGA10) {
>>>                 offset_idx = 1 + ring->me;
>>> @@ -1148,8 +1163,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>>         }
>>>
>>>         amdgpu_bo_fence(bo, f, false);
>>> -     amdgpu_bo_unreserve(bo);
>>> -     amdgpu_bo_unref(&bo);
>>>
>>>         if (fence)
>>>                 *fence = dma_fence_get(f);
>>> @@ -1159,10 +1172,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>>
>>>     err_free:
>>>         amdgpu_job_free(job);
>>> -
>>> -err:
>>> -     amdgpu_bo_unreserve(bo);
>>> -     amdgpu_bo_unref(&bo);
>>>         return r;
>>>     }
>>>
>>> @@ -1173,16 +1182,15 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>>>                               struct dma_fence **fence)
>>>     {
>>>         struct amdgpu_device *adev = ring->adev;
>>> -     struct amdgpu_bo *bo = NULL;
>>> +     struct amdgpu_bo *bo = adev->uvd.ib_bo;
>>>         uint32_t *msg;
>>>         int r, i;
>>>
>>> -     r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
>>> -                                   AMDGPU_GEM_DOMAIN_GTT,
>>> -                                   &bo, NULL, (void **)&msg);
>>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>>         if (r)
>>>                 return r;
>>>
>>> +     msg = amdgpu_bo_kptr(bo);
>>>         /* stitch together an UVD create msg */
>>>         msg[0] = cpu_to_le32(0x00000de4);
>>>         msg[1] = cpu_to_le32(0x00000000);
>>> @@ -1198,23 +1206,25 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>>>         for (i = 11; i < 1024; ++i)
>>>                 msg[i] = cpu_to_le32(0x0);
>>>
>>> -     return amdgpu_uvd_send_msg(ring, bo, true, fence);
>>> +     r = amdgpu_uvd_send_msg(ring, bo, true, fence);
>>> +
>>> +     amdgpu_bo_unreserve(bo);
>>> +     return r;
>>>     }
>>>
>>>     int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>>>                                bool direct, struct dma_fence **fence)
>>>     {
>>>         struct amdgpu_device *adev = ring->adev;
>>> -     struct amdgpu_bo *bo = NULL;
>>> +     struct amdgpu_bo *bo = adev->uvd.ib_bo;
>>>         uint32_t *msg;
>>>         int r, i;
>>>
>>> -     r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
>>> -                                   AMDGPU_GEM_DOMAIN_GTT,
>>> -                                   &bo, NULL, (void **)&msg);
>>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>> Please use amdgpu_bo_reserve() here and elsewhere as well just to be on
>> the clean side.
>>
>> Lockdep will sooner or later complain that we reserve a BO in the reset
>> path, but that is mostly a theoretical problem and existed before. So
>> I'm fine with sticking with that for now cause the problem was there
>> before as well.
>>
>> It's just that trylock might not work because the BO is busy with
>> indirect submission.
>>
>> Apart from those two minor issues the patch looks good to me.
>>
>> Thanks,
>> Christian.
>>
>>>         if (r)
>>>                 return r;
>>>
>>> +     msg = amdgpu_bo_kptr(bo);
>>>         /* stitch together an UVD destroy msg */
>>>         msg[0] = cpu_to_le32(0x00000de4);
>>>         msg[1] = cpu_to_le32(0x00000002);
>>> @@ -1223,7 +1233,10 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>>>         for (i = 4; i < 1024; ++i)
>>>                 msg[i] = cpu_to_le32(0x0);
>>>
>>> -     return amdgpu_uvd_send_msg(ring, bo, direct, fence);
>>> +     r = amdgpu_uvd_send_msg(ring, bo, direct, fence);
>>> +
>>> +     amdgpu_bo_unreserve(bo);
>>> +     return r;
>>>     }
>>>
>>>     static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>>> index edbb8194ee81..76ac9699885d 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>>> @@ -68,6 +68,7 @@ struct amdgpu_uvd {
>>>         /* store image width to adjust nb memory state */
>>>         unsigned                decode_image_width;
>>>         uint32_t                keyselect;
>>> +     struct amdgpu_bo        *ib_bo;
>>>     };
>>>
>>>     int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>> index bc571833632e..301c0cea7164 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>> @@ -332,12 +332,10 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
>>>     static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>     {
>>>         struct dma_fence *fence = NULL;
>>> -     struct amdgpu_bo *bo = NULL;
>>> +     struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
>>>         long r;
>>>
>>> -     r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
>>> -                                   AMDGPU_GEM_DOMAIN_VRAM,
>>> -                                   &bo, NULL, NULL);
>>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>>         if (r)
>>>                 return r;
>>>
>>> @@ -357,9 +355,7 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>
>>>     error:
>>>         dma_fence_put(fence);
>>> -     amdgpu_bo_unpin(bo);
>>>         amdgpu_bo_unreserve(bo);
>>> -     amdgpu_bo_unref(&bo);
>>>         return r;
>>>     }
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>>> index b6e82d75561f..efa270288029 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>>> @@ -338,12 +338,10 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl
>>>     static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>     {
>>>         struct dma_fence *fence = NULL;
>>> -     struct amdgpu_bo *bo = NULL;
>>> +     struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
>>>         long r;
>>>
>>> -     r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
>>> -                                   AMDGPU_GEM_DOMAIN_VRAM,
>>> -                                   &bo, NULL, NULL);
>>> +     r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>>         if (r)
>>>                 return r;
>>>
>>> @@ -363,9 +361,7 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>
>>>     error:
>>>         dma_fence_put(fence);
>>> -     amdgpu_bo_unpin(bo);
>>>         amdgpu_bo_unreserve(bo);
>>> -     amdgpu_bo_unref(&bo);
>>>         return r;
>>>     }
>>>