回复: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
Pan, Xinhui
Xinhui.Pan at amd.com
Fri Sep 10 10:10:37 UTC 2021
[AMD Official Use Only]
we need take this lock.
IB test can be triggered through debugfs. Recent days I usually test it by cat gpu recovery and amdgpu_test_ib in debugfs.
________________________________________
发件人: Koenig, Christian <Christian.Koenig at amd.com>
发送时间: 2021年9月10日 18:02
收件人: Pan, Xinhui; amd-gfx at lists.freedesktop.org
抄送: Deucher, Alexander
主题: Re: 回复: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
*sigh* yeah, you are probably right. Wouldn't be to simple if this would
be easy, doesn't it?
In this case you can also skip taking the reservation lock for the
pre-allocated BO, since there should absolutely be only one user at a time.
Christian.
Am 10.09.21 um 11:42 schrieb Pan, Xinhui:
> [AMD Official Use Only]
>
> oh, god. uvd free handler submit delayed msg which depends on scheduler with reservation lock hold.
> This is not allowed as commit c8e42d57859d "drm/amdgpu: implement more ib pools (v2)" says
> Any jobs which schedule IBs without dependence on gpu scheduler should use DIRECT pool.
>
> Looks like we should only use reserved BO for direct IB submission.
> As for delayed IB submission, we could alloc a new one dynamicly.
> ________________________________________
> 发件人: Koenig, Christian <Christian.Koenig at amd.com>
> 发送时间: 2021年9月10日 16:53
> 收件人: Pan, Xinhui; amd-gfx at lists.freedesktop.org
> 抄送: Deucher, Alexander
> 主题: Re: 回复: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
>
> It should not unless we are OOM which should not happen during driver
> initialization.
>
> But there is another problem I'm thinking about: Do we still use UVD IB
> submissions to forcefully tear down UVD sessions when a process crashes?
>
> If yes that stuff could easily deadlock with an IB test executed during
> GPU reset.
>
> Christian.
>
> Am 10.09.21 um 10:18 schrieb Pan, Xinhui:
>> [AMD Official Use Only]
>>
>> I am wondering if amdgpu_bo_pin would change BO's placement in the futrue.
>> For now, the new placement is calculated by new = old ∩ new.
>>
>> ________________________________________
>> 发件人: Koenig, Christian <Christian.Koenig at amd.com>
>> 发送时间: 2021年9月10日 14:24
>> 收件人: Pan, Xinhui; amd-gfx at lists.freedesktop.org
>> 抄送: Deucher, Alexander
>> 主题: Re: [PATCH 2/4] drm/amdgpu: UVD avoid memory allocation during IB test
>>
>> Am 10.09.21 um 02:38 schrieb xinhui pan:
>>> move BO allocation in sw_init.
>>>
>>> Signed-off-by: xinhui pan <xinhui.pan at amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 75 +++++++++++++++----------
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h | 1 +
>>> drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 8 +--
>>> drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 8 +--
>>> 4 files changed, 49 insertions(+), 43 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>> index d451c359606a..e2eaac941d37 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>>> @@ -141,6 +141,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>>> const char *fw_name;
>>> const struct common_firmware_header *hdr;
>>> unsigned family_id;
>>> + struct amdgpu_bo *bo = NULL;
>>> + void *addr;
>>> int i, j, r;
>>>
>>> INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
>>> @@ -298,9 +300,34 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>>> adev->uvd.filp[i] = NULL;
>>> }
>>>
>>> + r = amdgpu_bo_create_reserved(adev, 128 << 10, PAGE_SIZE,
>>> + AMDGPU_GEM_DOMAIN_GTT,
>>> + &bo, NULL, &addr);
>>> + if (r)
>>> + return r;
>>> +
>>> /* from uvd v5.0 HW addressing capacity increased to 64 bits */
>>> - if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
>>> + if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) {
>>> adev->uvd.address_64_bit = true;
>>> + amdgpu_bo_kunmap(bo);
>>> + amdgpu_bo_unpin(bo);
>>> + r = amdgpu_bo_pin_restricted(bo, AMDGPU_GEM_DOMAIN_VRAM,
>>> + 0, 256 << 20);
>> Please keep using amdgpu_uvd_force_into_uvd_segment() and validate here,
>> cause I want to remove amdgpu_bo_pin_restricted() sooner or later.
>>
>>> + if (r) {
>>> + amdgpu_bo_unreserve(bo);
>>> + amdgpu_bo_unref(&bo);
>>> + return r;
>>> + }
>>> + r = amdgpu_bo_kmap(bo, &addr);
>>> + if (r) {
>>> + amdgpu_bo_unpin(bo);
>>> + amdgpu_bo_unreserve(bo);
>>> + amdgpu_bo_unref(&bo);
>>> + return r;
>>> + }
>>> + }
>>> + adev->uvd.ib_bo = bo;
>>> + amdgpu_bo_unreserve(bo);
>>>
>>> switch (adev->asic_type) {
>>> case CHIP_TONGA:
>>> @@ -342,6 +369,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
>>> for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
>>> amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
>>> }
>>> + amdgpu_bo_free_kernel(&adev->uvd.ib_bo, NULL, NULL);
>>> release_firmware(adev->uvd.fw);
>>>
>>> return 0;
>>> @@ -1080,23 +1108,10 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>> unsigned offset_idx = 0;
>>> unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
>>>
>>> - amdgpu_bo_kunmap(bo);
>>> - amdgpu_bo_unpin(bo);
>>> -
>>> - if (!ring->adev->uvd.address_64_bit) {
>>> - struct ttm_operation_ctx ctx = { true, false };
>>> -
>>> - amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
>>> - amdgpu_uvd_force_into_uvd_segment(bo);
>>> - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
>>> - if (r)
>>> - goto err;
>>> - }
>>> -
>>> r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
>>> AMDGPU_IB_POOL_DELAYED, &job);
>>> if (r)
>>> - goto err;
>>> + return r;
>>>
>>> if (adev->asic_type >= CHIP_VEGA10) {
>>> offset_idx = 1 + ring->me;
>>> @@ -1148,8 +1163,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>> }
>>>
>>> amdgpu_bo_fence(bo, f, false);
>>> - amdgpu_bo_unreserve(bo);
>>> - amdgpu_bo_unref(&bo);
>>>
>>> if (fence)
>>> *fence = dma_fence_get(f);
>>> @@ -1159,10 +1172,6 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>>>
>>> err_free:
>>> amdgpu_job_free(job);
>>> -
>>> -err:
>>> - amdgpu_bo_unreserve(bo);
>>> - amdgpu_bo_unref(&bo);
>>> return r;
>>> }
>>>
>>> @@ -1173,16 +1182,15 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>>> struct dma_fence **fence)
>>> {
>>> struct amdgpu_device *adev = ring->adev;
>>> - struct amdgpu_bo *bo = NULL;
>>> + struct amdgpu_bo *bo = adev->uvd.ib_bo;
>>> uint32_t *msg;
>>> int r, i;
>>>
>>> - r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
>>> - AMDGPU_GEM_DOMAIN_GTT,
>>> - &bo, NULL, (void **)&msg);
>>> + r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>> if (r)
>>> return r;
>>>
>>> + msg = amdgpu_bo_kptr(bo);
>>> /* stitch together an UVD create msg */
>>> msg[0] = cpu_to_le32(0x00000de4);
>>> msg[1] = cpu_to_le32(0x00000000);
>>> @@ -1198,23 +1206,25 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
>>> for (i = 11; i < 1024; ++i)
>>> msg[i] = cpu_to_le32(0x0);
>>>
>>> - return amdgpu_uvd_send_msg(ring, bo, true, fence);
>>> + r = amdgpu_uvd_send_msg(ring, bo, true, fence);
>>> +
>>> + amdgpu_bo_unreserve(bo);
>>> + return r;
>>> }
>>>
>>> int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>>> bool direct, struct dma_fence **fence)
>>> {
>>> struct amdgpu_device *adev = ring->adev;
>>> - struct amdgpu_bo *bo = NULL;
>>> + struct amdgpu_bo *bo = adev->uvd.ib_bo;
>>> uint32_t *msg;
>>> int r, i;
>>>
>>> - r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
>>> - AMDGPU_GEM_DOMAIN_GTT,
>>> - &bo, NULL, (void **)&msg);
>>> + r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>> Please use amdgpu_bo_reserve() here and elsewhere as well just to be on
>> the clean side.
>>
>> Lockdep will sooner or later complain that we reserve a BO in the reset
>> path, but that is mostly a theoretical problem and existed before. So
>> I'm fine with sticking with that for now cause the problem was there
>> before as well.
>>
>> It's just that trylock might not work because the BO is busy with
>> indirect submission.
>>
>> Apart from those two minor issues the patch looks good to me.
>>
>> Thanks,
>> Christian.
>>
>>> if (r)
>>> return r;
>>>
>>> + msg = amdgpu_bo_kptr(bo);
>>> /* stitch together an UVD destroy msg */
>>> msg[0] = cpu_to_le32(0x00000de4);
>>> msg[1] = cpu_to_le32(0x00000002);
>>> @@ -1223,7 +1233,10 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
>>> for (i = 4; i < 1024; ++i)
>>> msg[i] = cpu_to_le32(0x0);
>>>
>>> - return amdgpu_uvd_send_msg(ring, bo, direct, fence);
>>> + r = amdgpu_uvd_send_msg(ring, bo, direct, fence);
>>> +
>>> + amdgpu_bo_unreserve(bo);
>>> + return r;
>>> }
>>>
>>> static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>>> index edbb8194ee81..76ac9699885d 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
>>> @@ -68,6 +68,7 @@ struct amdgpu_uvd {
>>> /* store image width to adjust nb memory state */
>>> unsigned decode_image_width;
>>> uint32_t keyselect;
>>> + struct amdgpu_bo *ib_bo;
>>> };
>>>
>>> int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>> index bc571833632e..301c0cea7164 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
>>> @@ -332,12 +332,10 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
>>> static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>> {
>>> struct dma_fence *fence = NULL;
>>> - struct amdgpu_bo *bo = NULL;
>>> + struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
>>> long r;
>>>
>>> - r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
>>> - AMDGPU_GEM_DOMAIN_VRAM,
>>> - &bo, NULL, NULL);
>>> + r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>> if (r)
>>> return r;
>>>
>>> @@ -357,9 +355,7 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>
>>> error:
>>> dma_fence_put(fence);
>>> - amdgpu_bo_unpin(bo);
>>> amdgpu_bo_unreserve(bo);
>>> - amdgpu_bo_unref(&bo);
>>> return r;
>>> }
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>>> index b6e82d75561f..efa270288029 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
>>> @@ -338,12 +338,10 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl
>>> static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>> {
>>> struct dma_fence *fence = NULL;
>>> - struct amdgpu_bo *bo = NULL;
>>> + struct amdgpu_bo *bo = ring->adev->uvd.ib_bo;
>>> long r;
>>>
>>> - r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
>>> - AMDGPU_GEM_DOMAIN_VRAM,
>>> - &bo, NULL, NULL);
>>> + r = ttm_bo_reserve(&bo->tbo, true, true, NULL);
>>> if (r)
>>> return r;
>>>
>>> @@ -363,9 +361,7 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
>>>
>>> error:
>>> dma_fence_put(fence);
>>> - amdgpu_bo_unpin(bo);
>>> amdgpu_bo_unreserve(bo);
>>> - amdgpu_bo_unref(&bo);
>>> return r;
>>> }
>>>
More information about the amd-gfx
mailing list