[PATCH] drm/amdgpu: improve debug VRAM access performance using sdma

Christian König ckoenig.leichtzumerken at gmail.com
Mon Mar 20 17:01:54 UTC 2023


I don't think so. Have we recently re-ordered something here?

Christian.

Am 20.03.23 um 08:05 schrieb Quan, Evan:
> [AMD Official Use Only - General]
>
> I happened to find the sdma_access_bo allocation from GTT seems performing before gart is ready.
> That makes the "amdgpu_gart_map" is skipped since adev->gart.ptr is still NULL.
> Is that done intentionally ?
>
> Evan
>> -----Original Message-----
>> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of
>> Jonathan Kim
>> Sent: Wednesday, January 5, 2022 3:12 AM
>> To: amd-gfx at lists.freedesktop.org
>> Cc: Kuehling, Felix <Felix.Kuehling at amd.com>; Kim, Jonathan
>> <Jonathan.Kim at amd.com>; Koenig, Christian <Christian.Koenig at amd.com>
>> Subject: [PATCH] drm/amdgpu: improve debug VRAM access performance
>> using sdma
>>
>> For better performance during VRAM access for debugged processes, do
>> read/write copies over SDMA.
>>
>> In order to fulfill post mortem debugging on a broken device, fallback to
>> stable MMIO access when gpu recovery is disabled or when job submission
>> time outs are set to max.  Failed SDMA access should automatically fall
>> back to MMIO access.
>>
>> Use a pre-allocated GTT bounce buffer pre-mapped into GART to avoid
>> page-table updates and TLB flushes on access.
>>
>> Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 78
>> +++++++++++++++++++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  5 +-
>>   2 files changed, 82 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 367abed1d6e6..512df4c09772 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -48,6 +48,7 @@
>>   #include <drm/ttm/ttm_range_manager.h>
>>
>>   #include <drm/amdgpu_drm.h>
>> +#include <drm/drm_drv.h>
>>
>>   #include "amdgpu.h"
>>   #include "amdgpu_object.h"
>> @@ -1429,6 +1430,70 @@ static void amdgpu_ttm_vram_mm_access(struct
>> amdgpu_device *adev, loff_t pos,
>>   	}
>>   }
>>
>> +static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object
>> *bo,
>> +					unsigned long offset, void *buf, int
>> len, int write)
>> +{
>> +	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
>> +	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
>> +	struct amdgpu_job *job;
>> +	struct dma_fence *fence;
>> +	uint64_t src_addr, dst_addr;
>> +	unsigned int num_dw;
>> +	int r, idx;
>> +
>> +	if (len != PAGE_SIZE)
>> +		return -EINVAL;
>> +
>> +	if (!adev->mman.sdma_access_ptr)
>> +		return -EACCES;
>> +
>> +	r = drm_dev_enter(adev_to_drm(adev), &idx);
>> +	if (r)
>> +		return r;
>> +
>> +	if (write)
>> +		memcpy(adev->mman.sdma_access_ptr, buf, len);
>> +
>> +	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
>> +	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4,
>> AMDGPU_IB_POOL_DELAYED, &job);
>> +	if (r)
>> +		goto out;
>> +
>> +	src_addr = write ? amdgpu_bo_gpu_offset(adev-
>>> mman.sdma_access_bo) :
>> +			amdgpu_bo_gpu_offset(abo);
>> +	dst_addr = write ? amdgpu_bo_gpu_offset(abo) :
>> +			amdgpu_bo_gpu_offset(adev-
>>> mman.sdma_access_bo);
>> +	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
>> PAGE_SIZE, false);
>> +
>> +	amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
>> +	WARN_ON(job->ibs[0].length_dw > num_dw);
>> +
>> +	r = amdgpu_job_submit(job, &adev->mman.entity,
>> AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
>> +	if (r) {
>> +		amdgpu_job_free(job);
>> +		goto out;
>> +	}
>> +
>> +	if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
>> +		r = -ETIMEDOUT;
>> +	dma_fence_put(fence);
>> +
>> +	if (!(r || write))
>> +		memcpy(buf, adev->mman.sdma_access_ptr, len);
>> +out:
>> +	drm_dev_exit(idx);
>> +	return r;
>> +}
>> +
>> +static inline bool amdgpu_ttm_allow_post_mortem_debug(struct
>> amdgpu_device *adev)
>> +{
>> +	return amdgpu_gpu_recovery == 0 ||
>> +		adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
>> +		adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
>> +		adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT ||
>> +		adev->video_timeout == MAX_SCHEDULE_TIMEOUT;
>> +}
>> +
>>   /**
>>    * amdgpu_ttm_access_memory - Read or Write memory that backs a
>> buffer object.
>>    *
>> @@ -1453,6 +1518,10 @@ static int amdgpu_ttm_access_memory(struct
>> ttm_buffer_object *bo,
>>   	if (bo->resource->mem_type != TTM_PL_VRAM)
>>   		return -EIO;
>>
>> +	if (!amdgpu_ttm_allow_post_mortem_debug(adev) &&
>> +			!amdgpu_ttm_access_memory_sdma(bo, offset, buf,
>> len, write))
>> +		return len;
>> +
>>   	amdgpu_res_first(bo->resource, offset, len, &cursor);
>>   	while (cursor.remaining) {
>>   		size_t count, size = cursor.size;
>> @@ -1793,6 +1862,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>>   		return r;
>>   	}
>>
>> +	if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
>> +				AMDGPU_GEM_DOMAIN_GTT,
>> +				&adev->mman.sdma_access_bo, NULL,
>> +				adev->mman.sdma_access_ptr))
>> +		DRM_WARN("Debug VRAM access will use slowpath MM
>> access\n");
>> +
>>   	return 0;
>>   }
>>
>> @@ -1823,6 +1898,9 @@ void amdgpu_ttm_fini(struct amdgpu_device
>> *adev)
>>   	ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
>>   	ttm_device_fini(&adev->mman.bdev);
>>   	adev->mman.initialized = false;
>> +	if (adev->mman.sdma_access_ptr)
>> +		amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo,
>> NULL,
>> +					&adev->mman.sdma_access_ptr);
>>   	DRM_INFO("amdgpu: ttm finalized\n");
>>   }
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> index 91a087f9dc7c..b0116c4a768f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> @@ -98,6 +98,10 @@ struct amdgpu_mman {
>>   	u64		fw_vram_usage_size;
>>   	struct amdgpu_bo	*fw_vram_usage_reserved_bo;
>>   	void		*fw_vram_usage_va;
>> +
>> +	/* PAGE_SIZE'd BO for process memory r/w over SDMA. */
>> +	struct amdgpu_bo	*sdma_access_bo;
>> +	void			*sdma_access_ptr;
>>   };
>>
>>   struct amdgpu_copy_mem {
>> @@ -193,5 +197,4 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct
>> amdgpu_device *adev, struct ttm_tt *ttm,
>>   int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int
>> mem_type);
>>
>>   void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
>> -
>>   #endif
>> --
>> 2.25.1



More information about the amd-gfx mailing list