[PATCH] drm/amdgpu: improve debug VRAM access performance using sdma
Christian König
ckoenig.leichtzumerken at gmail.com
Mon Mar 20 17:01:54 UTC 2023
I don't think so. Have we recently re-ordered something here?
Christian.
Am 20.03.23 um 08:05 schrieb Quan, Evan:
> [AMD Official Use Only - General]
>
> I happened to find the sdma_access_bo allocation from GTT seems performing before gart is ready.
> That makes the "amdgpu_gart_map" is skipped since adev->gart.ptr is still NULL.
> Is that done intentionally ?
>
> Evan
>> -----Original Message-----
>> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of
>> Jonathan Kim
>> Sent: Wednesday, January 5, 2022 3:12 AM
>> To: amd-gfx at lists.freedesktop.org
>> Cc: Kuehling, Felix <Felix.Kuehling at amd.com>; Kim, Jonathan
>> <Jonathan.Kim at amd.com>; Koenig, Christian <Christian.Koenig at amd.com>
>> Subject: [PATCH] drm/amdgpu: improve debug VRAM access performance
>> using sdma
>>
>> For better performance during VRAM access for debugged processes, do
>> read/write copies over SDMA.
>>
>> In order to fulfill post mortem debugging on a broken device, fallback to
>> stable MMIO access when gpu recovery is disabled or when job submission
>> time outs are set to max. Failed SDMA access should automatically fall
>> back to MMIO access.
>>
>> Use a pre-allocated GTT bounce buffer pre-mapped into GART to avoid
>> page-table updates and TLB flushes on access.
>>
>> Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 78
>> +++++++++++++++++++++++++
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 5 +-
>> 2 files changed, 82 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 367abed1d6e6..512df4c09772 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -48,6 +48,7 @@
>> #include <drm/ttm/ttm_range_manager.h>
>>
>> #include <drm/amdgpu_drm.h>
>> +#include <drm/drm_drv.h>
>>
>> #include "amdgpu.h"
>> #include "amdgpu_object.h"
>> @@ -1429,6 +1430,70 @@ static void amdgpu_ttm_vram_mm_access(struct
>> amdgpu_device *adev, loff_t pos,
>> }
>> }
>>
>> +static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object
>> *bo,
>> + unsigned long offset, void *buf, int
>> len, int write)
>> +{
>> + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
>> + struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
>> + struct amdgpu_job *job;
>> + struct dma_fence *fence;
>> + uint64_t src_addr, dst_addr;
>> + unsigned int num_dw;
>> + int r, idx;
>> +
>> + if (len != PAGE_SIZE)
>> + return -EINVAL;
>> +
>> + if (!adev->mman.sdma_access_ptr)
>> + return -EACCES;
>> +
>> + r = drm_dev_enter(adev_to_drm(adev), &idx);
>> + if (r)
>> + return r;
>> +
>> + if (write)
>> + memcpy(adev->mman.sdma_access_ptr, buf, len);
>> +
>> + num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
>> + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4,
>> AMDGPU_IB_POOL_DELAYED, &job);
>> + if (r)
>> + goto out;
>> +
>> + src_addr = write ? amdgpu_bo_gpu_offset(adev-
>>> mman.sdma_access_bo) :
>> + amdgpu_bo_gpu_offset(abo);
>> + dst_addr = write ? amdgpu_bo_gpu_offset(abo) :
>> + amdgpu_bo_gpu_offset(adev-
>>> mman.sdma_access_bo);
>> + amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
>> PAGE_SIZE, false);
>> +
>> + amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
>> + WARN_ON(job->ibs[0].length_dw > num_dw);
>> +
>> + r = amdgpu_job_submit(job, &adev->mman.entity,
>> AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
>> + if (r) {
>> + amdgpu_job_free(job);
>> + goto out;
>> + }
>> +
>> + if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
>> + r = -ETIMEDOUT;
>> + dma_fence_put(fence);
>> +
>> + if (!(r || write))
>> + memcpy(buf, adev->mman.sdma_access_ptr, len);
>> +out:
>> + drm_dev_exit(idx);
>> + return r;
>> +}
>> +
>> +static inline bool amdgpu_ttm_allow_post_mortem_debug(struct
>> amdgpu_device *adev)
>> +{
>> + return amdgpu_gpu_recovery == 0 ||
>> + adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
>> + adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
>> + adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT ||
>> + adev->video_timeout == MAX_SCHEDULE_TIMEOUT;
>> +}
>> +
>> /**
>> * amdgpu_ttm_access_memory - Read or Write memory that backs a
>> buffer object.
>> *
>> @@ -1453,6 +1518,10 @@ static int amdgpu_ttm_access_memory(struct
>> ttm_buffer_object *bo,
>> if (bo->resource->mem_type != TTM_PL_VRAM)
>> return -EIO;
>>
>> + if (!amdgpu_ttm_allow_post_mortem_debug(adev) &&
>> + !amdgpu_ttm_access_memory_sdma(bo, offset, buf,
>> len, write))
>> + return len;
>> +
>> amdgpu_res_first(bo->resource, offset, len, &cursor);
>> while (cursor.remaining) {
>> size_t count, size = cursor.size;
>> @@ -1793,6 +1862,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>> return r;
>> }
>>
>> + if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
>> + AMDGPU_GEM_DOMAIN_GTT,
>> + &adev->mman.sdma_access_bo, NULL,
>> + adev->mman.sdma_access_ptr))
>> + DRM_WARN("Debug VRAM access will use slowpath MM
>> access\n");
>> +
>> return 0;
>> }
>>
>> @@ -1823,6 +1898,9 @@ void amdgpu_ttm_fini(struct amdgpu_device
>> *adev)
>> ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
>> ttm_device_fini(&adev->mman.bdev);
>> adev->mman.initialized = false;
>> + if (adev->mman.sdma_access_ptr)
>> + amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo,
>> NULL,
>> + &adev->mman.sdma_access_ptr);
>> DRM_INFO("amdgpu: ttm finalized\n");
>> }
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> index 91a087f9dc7c..b0116c4a768f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> @@ -98,6 +98,10 @@ struct amdgpu_mman {
>> u64 fw_vram_usage_size;
>> struct amdgpu_bo *fw_vram_usage_reserved_bo;
>> void *fw_vram_usage_va;
>> +
>> + /* PAGE_SIZE'd BO for process memory r/w over SDMA. */
>> + struct amdgpu_bo *sdma_access_bo;
>> + void *sdma_access_ptr;
>> };
>>
>> struct amdgpu_copy_mem {
>> @@ -193,5 +197,4 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct
>> amdgpu_device *adev, struct ttm_tt *ttm,
>> int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int
>> mem_type);
>>
>> void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
>> -
>> #endif
>> --
>> 2.25.1
More information about the amd-gfx
mailing list