[PATCH] drm/amdgpu: once more fix the call oder in amdgpu_ttm_move()
Christian König
ckoenig.leichtzumerken at gmail.com
Mon Apr 22 15:05:34 UTC 2024
Am 18.04.24 um 18:10 schrieb Alex Deucher:
> On Thu, Mar 21, 2024 at 10:37 AM Christian König
> <ckoenig.leichtzumerken at gmail.com> wrote:
>> Am 21.03.24 um 15:12 schrieb Tvrtko Ursulin:
>>> On 21/03/2024 12:43, Christian König wrote:
>>>> This reverts drm/amdgpu: fix ftrace event amdgpu_bo_move always move
>>>> on same heap. The basic problem here is that after the move the old
>>>> location is simply not available any more.
>>>>
>>>> Some fixes where suggested, but essentially we should call the move
>>>> notification before actually moving things because only this way we have
>>>> the correct order for DMA-buf and VM move notifications as well.
>>>>
>>>> Also rework the statistic handling so that we don't update the eviction
>>>> counter before the move.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig at amd.com>
>>> Don't forget:
>>>
>>> Fixes: 94aeb4117343 ("drm/amdgpu: fix ftrace event amdgpu_bo_move
>>> always move on same heap")
>>> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3171
>> Ah, thanks. I already wanted to ask if there is any bug report about
>> that as well.
> Did this ever land? I don't see it anywhere.
No, I never found time to actually rebase and push it.
Just did so 10 minutes ago, should probably show up in
amd-staging-drm-next unless there isn't any CI hickup again.
Christian.
>
> Alex
>
>> Regards,
>> Christian.
>>
>>> ;)
>>>
>>> Regards,
>>>
>>> Tvrtko
>>>
>>>> ---
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 15 +++----
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 4 +-
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 48 ++++++++++++----------
>>>> 3 files changed, 37 insertions(+), 30 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> index 425cebcc5cbf..eb7d824763b9 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> @@ -1245,19 +1245,20 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo
>>>> *bo, void *buffer,
>>>> * amdgpu_bo_move_notify - notification about a memory move
>>>> * @bo: pointer to a buffer object
>>>> * @evict: if this move is evicting the buffer from the graphics
>>>> address space
>>>> + * @new_mem: new resource for backing the BO
>>>> *
>>>> * Marks the corresponding &amdgpu_bo buffer object as invalid,
>>>> also performs
>>>> * bookkeeping.
>>>> * TTM driver callback which is called when ttm moves a buffer.
>>>> */
>>>> -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict)
>>>> +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
>>>> + bool evict,
>>>> + struct ttm_resource *new_mem)
>>>> {
>>>> struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
>>>> + struct ttm_resource *old_mem = bo->resource;
>>>> struct amdgpu_bo *abo;
>>>> - if (!amdgpu_bo_is_amdgpu_bo(bo))
>>>> - return;
>>>> -
>>>> abo = ttm_to_amdgpu_bo(bo);
>>>> amdgpu_vm_bo_invalidate(adev, abo, evict);
>>>> @@ -1267,9 +1268,9 @@ void amdgpu_bo_move_notify(struct
>>>> ttm_buffer_object *bo, bool evict)
>>>> bo->resource->mem_type != TTM_PL_SYSTEM)
>>>> dma_buf_move_notify(abo->tbo.base.dma_buf);
>>>> - /* remember the eviction */
>>>> - if (evict)
>>>> - atomic64_inc(&adev->num_evictions);
>>>> + /* move_notify is called before move happens */
>>>> + trace_amdgpu_bo_move(abo, new_mem ? new_mem->mem_type : -1,
>>>> + old_mem ? old_mem->mem_type : -1);
>>>> }
>>>> void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>> index a3ea8a82db23..d28e21baef16 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>> @@ -344,7 +344,9 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo,
>>>> void *metadata,
>>>> int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
>>>> size_t buffer_size, uint32_t *metadata_size,
>>>> uint64_t *flags);
>>>> -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict);
>>>> +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
>>>> + bool evict,
>>>> + struct ttm_resource *new_mem);
>>>> void amdgpu_bo_release_notify(struct ttm_buffer_object *bo);
>>>> vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object
>>>> *bo);
>>>> void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> index a5ceec7820cf..460b23918bfc 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> @@ -471,14 +471,16 @@ static int amdgpu_bo_move(struct
>>>> ttm_buffer_object *bo, bool evict,
>>>> if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
>>>> bo->ttm == NULL)) {
>>>> + amdgpu_bo_move_notify(bo, evict, new_mem);
>>>> ttm_bo_move_null(bo, new_mem);
>>>> - goto out;
>>>> + return 0;
>>>> }
>>>> if (old_mem->mem_type == TTM_PL_SYSTEM &&
>>>> (new_mem->mem_type == TTM_PL_TT ||
>>>> new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
>>>> + amdgpu_bo_move_notify(bo, evict, new_mem);
>>>> ttm_bo_move_null(bo, new_mem);
>>>> - goto out;
>>>> + return 0;
>>>> }
>>>> if ((old_mem->mem_type == TTM_PL_TT ||
>>>> old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
>>>> @@ -488,9 +490,10 @@ static int amdgpu_bo_move(struct
>>>> ttm_buffer_object *bo, bool evict,
>>>> return r;
>>>> amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
>>>> + amdgpu_bo_move_notify(bo, evict, new_mem);
>>>> ttm_resource_free(bo, &bo->resource);
>>>> ttm_bo_assign_mem(bo, new_mem);
>>>> - goto out;
>>>> + return 0;
>>>> }
>>>> if (old_mem->mem_type == AMDGPU_PL_GDS ||
>>>> @@ -502,8 +505,9 @@ static int amdgpu_bo_move(struct
>>>> ttm_buffer_object *bo, bool evict,
>>>> new_mem->mem_type == AMDGPU_PL_OA ||
>>>> new_mem->mem_type == AMDGPU_PL_DOORBELL) {
>>>> /* Nothing to save here */
>>>> + amdgpu_bo_move_notify(bo, evict, new_mem);
>>>> ttm_bo_move_null(bo, new_mem);
>>>> - goto out;
>>>> + return 0;
>>>> }
>>>> if (bo->type == ttm_bo_type_device &&
>>>> @@ -515,22 +519,23 @@ static int amdgpu_bo_move(struct
>>>> ttm_buffer_object *bo, bool evict,
>>>> abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
>>>> }
>>>> - if (adev->mman.buffer_funcs_enabled) {
>>>> - if (((old_mem->mem_type == TTM_PL_SYSTEM &&
>>>> - new_mem->mem_type == TTM_PL_VRAM) ||
>>>> - (old_mem->mem_type == TTM_PL_VRAM &&
>>>> - new_mem->mem_type == TTM_PL_SYSTEM))) {
>>>> - hop->fpfn = 0;
>>>> - hop->lpfn = 0;
>>>> - hop->mem_type = TTM_PL_TT;
>>>> - hop->flags = TTM_PL_FLAG_TEMPORARY;
>>>> - return -EMULTIHOP;
>>>> - }
>>>> + if (adev->mman.buffer_funcs_enabled &&
>>>> + ((old_mem->mem_type == TTM_PL_SYSTEM &&
>>>> + new_mem->mem_type == TTM_PL_VRAM) ||
>>>> + (old_mem->mem_type == TTM_PL_VRAM &&
>>>> + new_mem->mem_type == TTM_PL_SYSTEM))) {
>>>> + hop->fpfn = 0;
>>>> + hop->lpfn = 0;
>>>> + hop->mem_type = TTM_PL_TT;
>>>> + hop->flags = TTM_PL_FLAG_TEMPORARY;
>>>> + return -EMULTIHOP;
>>>> + }
>>>> + amdgpu_bo_move_notify(bo, evict, new_mem);
>>>> + if (adev->mman.buffer_funcs_enabled)
>>>> r = amdgpu_move_blit(bo, evict, new_mem, old_mem);
>>>> - } else {
>>>> + else
>>>> r = -ENODEV;
>>>> - }
>>>> if (r) {
>>>> /* Check that all memory is CPU accessible */
>>>> @@ -545,11 +550,10 @@ static int amdgpu_bo_move(struct
>>>> ttm_buffer_object *bo, bool evict,
>>>> return r;
>>>> }
>>>> - trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
>>>> -out:
>>>> - /* update statistics */
>>>> + /* update statistics after the move */
>>>> + if (evict)
>>>> + atomic64_inc(&adev->num_evictions);
>>>> atomic64_add(bo->base.size, &adev->num_bytes_moved);
>>>> - amdgpu_bo_move_notify(bo, evict);
>>>> return 0;
>>>> }
>>>> @@ -1551,7 +1555,7 @@ static int amdgpu_ttm_access_memory(struct
>>>> ttm_buffer_object *bo,
>>>> static void
>>>> amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo)
>>>> {
>>>> - amdgpu_bo_move_notify(bo, false);
>>>> + amdgpu_bo_move_notify(bo, false, NULL);
>>>> }
>>>> static struct ttm_device_funcs amdgpu_bo_driver = {
More information about the amd-gfx
mailing list