[PATCH 1/1] drm/amdgpu: convert gtt_window_lock to a spinlock

Christian König christian.koenig at amd.com
Thu May 27 15:30:25 UTC 2021


That won't work, we need to be able to sleep in waiting for the tlb 
flush anyway.

On the other hand flushing the TLB after recovering each BO is nonsense 
to begin with.

So we should probably move that from amdgpu_gart_bind() into 
amdgpu_ttm_alloc_gart().

Regards,
Christian.

Am 27.05.21 um 16:54 schrieb Nirmoy Das:
> amdgpu_device_gpu_recover() will eventually call
> gmc_v10_0_flush_gpu_tlb() with holding a spinlock, which
> puts the process in atomic context. Sleeping in atomic context
> is not allowed so convert gtt_window_lock into a spinlock.
>
> BUG: sleeping function called from invalid context at kernel/locking/mutex.c:935
> in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 2662, name: cat
> Call Trace:
>   dump_stack+0x8b/0xb0
>   ___might_sleep.cold+0xb6/0xc6
>   ? gmc_v10_0_flush_gpu_tlb+0x72/0x2c0 [amdgpu]
>   __mutex_lock+0x45/0x820
>   ? amdgpu_device_skip_hw_access+0x3e/0x70 [amdgpu]
>   gmc_v10_0_flush_gpu_tlb+0x72/0x2c0 [amdgpu]
>   ? amdgpu_device_skip_hw_access+0x3e/0x70 [amdgpu]
>   amdgpu_gart_bind+0x7a/0xc0 [amdgpu]
>   amdgpu_ttm_gart_bind+0x7d/0xd0 [amdgpu]
>   ? amdgpu_ttm_recover_gart+0x2e/0x70 [amdgpu]
>   amdgpu_gtt_mgr_recover+0x4e/0x70 [amdgpu]
>   amdgpu_do_asic_reset.cold+0x90/0x1c4 [amdgpu]
>   ? amdgpu_device_pre_asic_reset+0xa8/0x250 [amdgpu]
>   amdgpu_device_gpu_recover.cold+0x7bd/0xa23 [amdgpu]
>   ? lock_acquired+0x210/0x390
>   gpu_recover_get+0x29/0x60 [amdgpu]
>   simple_attr_read+0x69/0xf0
>   debugfs_attr_read+0x40/0x60
>   full_proxy_read+0x56/0x80
>   vfs_read+0xd1/0x1d0
>   ksys_read+0x68/0xe0
>   do_syscall_64+0x33/0x80
>   entry_SYSCALL_64_after_hwframe+0x44/0xa9
>
> Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  |  6 +++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h  |  2 +-
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   | 10 +++++-----
>   drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  4 ++--
>   4 files changed, 11 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index c0aef327292a..4cb8fd193b6c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -333,7 +333,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
>   	amdgpu_res_first(src->mem, src->offset, size, &src_mm);
>   	amdgpu_res_first(dst->mem, dst->offset, size, &dst_mm);
>   
> -	mutex_lock(&adev->mman.gtt_window_lock);
> +	spin_lock(&adev->mman.gtt_window_lock);
>   	while (src_mm.remaining) {
>   		uint32_t src_page_offset = src_mm.start & ~PAGE_MASK;
>   		uint32_t dst_page_offset = dst_mm.start & ~PAGE_MASK;
> @@ -373,7 +373,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
>   		amdgpu_res_next(&dst_mm, cur_size);
>   	}
>   error:
> -	mutex_unlock(&adev->mman.gtt_window_lock);
> +	spin_unlock(&adev->mman.gtt_window_lock);
>   	if (f)
>   		*f = dma_fence_get(fence);
>   	dma_fence_put(fence);
> @@ -1661,7 +1661,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>   	int r;
>   	u64 vis_vram_limit;
>   
> -	mutex_init(&adev->mman.gtt_window_lock);
> +	spin_lock_init(&adev->mman.gtt_window_lock);
>   
>   	/* No others user of address space so set it to 0 */
>   	r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 2877a924086f..afd905dc337b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -70,7 +70,7 @@ struct amdgpu_mman {
>   	struct amdgpu_ring			*buffer_funcs_ring;
>   	bool					buffer_funcs_enabled;
>   
> -	struct mutex				gtt_window_lock;
> +	spinlock_t				gtt_window_lock;
>   	/* Scheduler entity for buffer moves */
>   	struct drm_sched_entity			entity;
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index ceab5ef50790..f4ce3eb4d7e4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -341,11 +341,11 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   		return;
>   	}
>   
> -	mutex_lock(&adev->mman.gtt_window_lock);
> +	spin_lock(&adev->mman.gtt_window_lock);
>   
>   	if (vmhub == AMDGPU_MMHUB_0) {
>   		gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0);
> -		mutex_unlock(&adev->mman.gtt_window_lock);
> +		spin_unlock(&adev->mman.gtt_window_lock);
>   		return;
>   	}
>   
> @@ -356,7 +356,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   	    amdgpu_in_reset(adev) ||
>   	    ring->sched.ready == false) {
>   		gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0);
> -		mutex_unlock(&adev->mman.gtt_window_lock);
> +		spin_unlock(&adev->mman.gtt_window_lock);
>   		return;
>   	}
>   
> @@ -379,7 +379,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   	if (r)
>   		goto error_submit;
>   
> -	mutex_unlock(&adev->mman.gtt_window_lock);
> +	spin_unlock(&adev->mman.gtt_window_lock);
>   
>   	dma_fence_wait(fence, false);
>   	dma_fence_put(fence);
> @@ -390,7 +390,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>   	amdgpu_job_free(job);
>   
>   error_alloc:
> -	mutex_unlock(&adev->mman.gtt_window_lock);
> +	spin_unlock(&adev->mman.gtt_window_lock);
>   	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> index fd8f544f0de2..e1dad02d400b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -135,7 +135,7 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
>   	uint64_t size;
>   	int r;
>   
> -	mutex_lock(&adev->mman.gtt_window_lock);
> +	spin_lock(&adev->mman.gtt_window_lock);
>   
>   	while (npages) {
>   		size = min(GTT_MAX_PAGES, npages);
> @@ -171,7 +171,7 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
>   	}
>   
>   out_unlock:
> -	mutex_unlock(&adev->mman.gtt_window_lock);
> +	spin_unlock(&adev->mman.gtt_window_lock);
>   
>   	return r;
>   }



More information about the amd-gfx mailing list