[PATCH 5/5] drm/amdgpu: fix shadow BO restoring
Christian König
ckoenig.leichtzumerken at gmail.com
Fri Sep 14 11:54:33 UTC 2018
Am 13.09.2018 um 11:29 schrieb Zhang, Jerry(Junwei):
> On 09/11/2018 05:56 PM, Christian König wrote:
>> Don't grab the reservation lock any more and simplify the handling quite
>> a bit.
>>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 109
>> ++++++++---------------------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 46 ++++--------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 8 +--
>> 3 files changed, 43 insertions(+), 120 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 5eba66ecf668..20bb702f5c7f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -2940,54 +2940,6 @@ static int
>> amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
>> return 0;
>> }
>> -/**
>> - * amdgpu_device_recover_vram_from_shadow - restore shadowed VRAM
>> buffers
>> - *
>> - * @adev: amdgpu_device pointer
>> - * @ring: amdgpu_ring for the engine handling the buffer operations
>> - * @bo: amdgpu_bo buffer whose shadow is being restored
>> - * @fence: dma_fence associated with the operation
>> - *
>> - * Restores the VRAM buffer contents from the shadow in GTT. Used to
>> - * restore things like GPUVM page tables after a GPU reset where
>> - * the contents of VRAM might be lost.
>> - * Returns 0 on success, negative error code on failure.
>> - */
>> -static int amdgpu_device_recover_vram_from_shadow(struct
>> amdgpu_device *adev,
>> - struct amdgpu_ring *ring,
>> - struct amdgpu_bo *bo,
>> - struct dma_fence **fence)
>> -{
>> - uint32_t domain;
>> - int r;
>> -
>> - if (!bo->shadow)
>> - return 0;
>> -
>> - r = amdgpu_bo_reserve(bo, true);
>> - if (r)
>> - return r;
>> - domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
>> - /* if bo has been evicted, then no need to recover */
>> - if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
>> - r = amdgpu_bo_validate(bo->shadow);
>> - if (r) {
>> - DRM_ERROR("bo validate failed!\n");
>> - goto err;
>> - }
>> -
>> - r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
>> - NULL, fence, true);
>> - if (r) {
>> - DRM_ERROR("recover page table failed!\n");
>> - goto err;
>> - }
>> - }
>> -err:
>> - amdgpu_bo_unreserve(bo);
>> - return r;
>> -}
>> -
>> /**
>> * amdgpu_device_recover_vram - Recover some VRAM contents
>> *
>> @@ -2996,16 +2948,15 @@ static int
>> amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev,
>> * Restores the contents of VRAM buffers from the shadows in GTT.
>> Used to
>> * restore things like GPUVM page tables after a GPU reset where
>> * the contents of VRAM might be lost.
>> - * Returns 0 on success, 1 on failure.
>> + *
>> + * Returns:
>> + * 0 on success, negative error code on failure.
>> */
>> static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
>> {
>> - struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
>> - struct amdgpu_bo *bo, *tmp;
>> struct dma_fence *fence = NULL, *next = NULL;
>> - long r = 1;
>> - int i = 0;
>> - long tmo;
>> + struct amdgpu_bo *shadow;
>> + long r = 1, tmo;
>> if (amdgpu_sriov_runtime(adev))
>> tmo = msecs_to_jiffies(8000);
>> @@ -3014,44 +2965,40 @@ static int amdgpu_device_recover_vram(struct
>> amdgpu_device *adev)
>> DRM_INFO("recover vram bo from shadow start\n");
>> mutex_lock(&adev->shadow_list_lock);
>> - list_for_each_entry_safe(bo, tmp, &adev->shadow_list,
>> shadow_list) {
>> - next = NULL;
>> - amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next);
>> + list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
>> +
>> + /* No need to recover an evicted BO */
>> + if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
>> + shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
> is there a change that shadow bo evicted to other domain?
> like SYSTEM?
Yes, that's why I test "!= TTM_PL_TT" here.
What can happen is that either the shadow or the page table or page
directory is evicted.
But in this case we don't need to restore anything because of patch #1
in this series.
Regards,
Christian.
>
> Regards,
> Jerry
>> + continue;
>> +
>> + r = amdgpu_bo_restore_shadow(shadow, &next);
>> + if (r)
>> + break;
>> +
>> if (fence) {
>> r = dma_fence_wait_timeout(fence, false, tmo);
>> - if (r == 0)
>> - pr_err("wait fence %p[%d] timeout\n", fence, i);
>> - else if (r < 0)
>> - pr_err("wait fence %p[%d] interrupted\n", fence, i);
>> - if (r < 1) {
>> - dma_fence_put(fence);
>> - fence = next;
>> + dma_fence_put(fence);
>> + fence = next;
>> + if (r <= 0)
>> break;
>> - }
>> - i++;
>> + } else {
>> + fence = next;
>> }
>> -
>> - dma_fence_put(fence);
>> - fence = next;
>> }
>> mutex_unlock(&adev->shadow_list_lock);
>> - if (fence) {
>> - r = dma_fence_wait_timeout(fence, false, tmo);
>> - if (r == 0)
>> - pr_err("wait fence %p[%d] timeout\n", fence, i);
>> - else if (r < 0)
>> - pr_err("wait fence %p[%d] interrupted\n", fence, i);
>> -
>> - }
>> + if (fence)
>> + tmo = dma_fence_wait_timeout(fence, false, tmo);
>> dma_fence_put(fence);
>> - if (r > 0)
>> - DRM_INFO("recover vram bo from shadow done\n");
>> - else
>> + if (r <= 0 || tmo <= 0) {
>> DRM_ERROR("recover vram bo from shadow failed\n");
>> + return -EIO;
>> + }
>> - return (r > 0) ? 0 : 1;
>> + DRM_INFO("recover vram bo from shadow done\n");
>> + return 0;
>> }
>> /**
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index 3a6f92de5504..5b6d5fcc2151 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -537,7 +537,7 @@ static int amdgpu_bo_create_shadow(struct
>> amdgpu_device *adev,
>> if (!r) {
>> bo->shadow->parent = amdgpu_bo_ref(bo);
>> mutex_lock(&adev->shadow_list_lock);
>> - list_add_tail(&bo->shadow_list, &adev->shadow_list);
>> + list_add_tail(&bo->shadow->shadow_list, &adev->shadow_list);
>> mutex_unlock(&adev->shadow_list_lock);
>> }
>> @@ -669,13 +669,10 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo)
>> }
>> /**
>> - * amdgpu_bo_restore_from_shadow - restore an &amdgpu_bo buffer object
>> - * @adev: amdgpu device object
>> - * @ring: amdgpu_ring for the engine handling the buffer operations
>> - * @bo: &amdgpu_bo buffer to be restored
>> - * @resv: reservation object with embedded fence
>> + * amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow
>> + *
>> + * @shadow: &amdgpu_bo shadow to be restored
>> * @fence: dma_fence associated with the operation
>> - * @direct: whether to submit the job directly
>> *
>> * Copies a buffer object's shadow content back to the object.
>> * This is used for recovering a buffer from its shadow in case of
>> a gpu
>> @@ -684,36 +681,19 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo)
>> * Returns:
>> * 0 for success or a negative error code on failure.
>> */
>> -int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
>> - struct amdgpu_ring *ring,
>> - struct amdgpu_bo *bo,
>> - struct reservation_object *resv,
>> - struct dma_fence **fence,
>> - bool direct)
>> +int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct
>> dma_fence **fence)
>> {
>> - struct amdgpu_bo *shadow = bo->shadow;
>> - uint64_t bo_addr, shadow_addr;
>> - int r;
>> -
>> - if (!shadow)
>> - return -EINVAL;
>> -
>> - bo_addr = amdgpu_bo_gpu_offset(bo);
>> - shadow_addr = amdgpu_bo_gpu_offset(bo->shadow);
>> -
>> - r = reservation_object_reserve_shared(bo->tbo.resv);
>> - if (r)
>> - goto err;
>> + struct amdgpu_device *adev = amdgpu_ttm_adev(shadow->tbo.bdev);
>> + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
>> + uint64_t shadow_addr, parent_addr;
>> - r = amdgpu_copy_buffer(ring, shadow_addr, bo_addr,
>> - amdgpu_bo_size(bo), resv, fence,
>> - direct, false);
>> - if (!r)
>> - amdgpu_bo_fence(bo, *fence, true);
>> + shadow_addr = amdgpu_bo_gpu_offset(shadow);
>> + parent_addr = amdgpu_bo_gpu_offset(shadow->parent);
>> -err:
>> - return r;
>> + return amdgpu_copy_buffer(ring, shadow_addr, parent_addr,
>> + amdgpu_bo_size(shadow), NULL, fence,
>> + true, false);
>> }
>> /**
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> index 907fdf46d895..363db417fb2e 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> @@ -273,12 +273,8 @@ int amdgpu_bo_backup_to_shadow(struct
>> amdgpu_device *adev,
>> struct reservation_object *resv,
>> struct dma_fence **fence, bool direct);
>> int amdgpu_bo_validate(struct amdgpu_bo *bo);
>> -int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
>> - struct amdgpu_ring *ring,
>> - struct amdgpu_bo *bo,
>> - struct reservation_object *resv,
>> - struct dma_fence **fence,
>> - bool direct);
>> +int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
>> + struct dma_fence **fence);
>> uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device
>> *adev,
>> uint32_t domain);
>
More information about the amd-gfx
mailing list