[RFC PATCH v2 3/3] drm/amdgpu/ttm: optimize vram access in amdgpu_ttm_access_memory()
Christian König
ckoenig.leichtzumerken at gmail.com
Fri Jul 16 07:43:18 UTC 2021
Am 16.07.21 um 05:10 schrieb Kevin Wang:
> 1. using vram aper to access vram if possible
> 2. avoid MM_INDEX/MM_DATA is not working when mmio protect feature is
> enabled.
>
> Signed-off-by: Kevin Wang <kevin1.wang at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 91 +++++++++++++++----------
> 1 file changed, 54 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index f4ff3c9350b3..62ea5089b4f0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1407,6 +1407,56 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
> return ttm_bo_eviction_valuable(bo, place);
> }
>
> +static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
> + void *buf, size_t size, bool write)
> +{
> + while (size) {
> + uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
> + uint64_t bytes = 4 - (pos & 0x3);
> + uint32_t shift = (pos & 0x3) * 8;
> + uint32_t mask = 0xffffffff << shift;
> + uint32_t value = 0;
> +
> + if (size < bytes) {
> + mask &= 0xffffffff >> (bytes - size) * 8;
> + bytes = size;
> + }
> +
> + if (mask != 0xffffffff) {
> + amdgpu_device_mm_access(adev, aligned_pos, &value, 4, false);
> + if (write) {
> + value &= ~mask;
> + value |= (*(uint32_t *)buf << shift) & mask;
> + amdgpu_device_mm_access(adev, aligned_pos, &value, 4, true);
> + } else {
> + value = (value & mask) >> shift;
> + memcpy(buf, &value, bytes);
> + }
> + } else {
> + amdgpu_device_mm_access(adev, aligned_pos, buf, 4, write);
> + }
> +
> + pos += bytes;
> + buf += bytes;
> + size -= bytes;
> + }
> +}
> +
> +static void amdgpu_ttm_vram_access(struct amdgpu_device *adev, loff_t pos,
> + void *buf, size_t size, bool write)
> +{
> + size_t count;
> +
> + count = amdgpu_device_aper_access(adev, pos, buf, size, write);
> + size -= count;
> + if (size) {
> + /* using MM to access rest vram and handle un-aligned address */
> + pos += count;
> + buf += count;
> + amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write);
> + }
> +}
Just inline that function into the caller, don't really see the need to
have that separated.
Apart from that looks good to me.
Regards,
Christian.
> +
> /**
> * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
> *
> @@ -1426,8 +1476,6 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
> struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
> struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
> struct amdgpu_res_cursor cursor;
> - unsigned long flags;
> - uint32_t value = 0;
> int ret = 0;
>
> if (bo->mem.mem_type != TTM_PL_VRAM)
> @@ -1435,41 +1483,10 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>
> amdgpu_res_first(&bo->mem, offset, len, &cursor);
> while (cursor.remaining) {
> - uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
> - uint64_t bytes = 4 - (cursor.start & 3);
> - uint32_t shift = (cursor.start & 3) * 8;
> - uint32_t mask = 0xffffffff << shift;
> -
> - if (cursor.size < bytes) {
> - mask &= 0xffffffff >> (bytes - cursor.size) * 8;
> - bytes = cursor.size;
> - }
> -
> - if (mask != 0xffffffff) {
> - spin_lock_irqsave(&adev->mmio_idx_lock, flags);
> - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
> - WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
> - value = RREG32_NO_KIQ(mmMM_DATA);
> - if (write) {
> - value &= ~mask;
> - value |= (*(uint32_t *)buf << shift) & mask;
> - WREG32_NO_KIQ(mmMM_DATA, value);
> - }
> - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
> - if (!write) {
> - value = (value & mask) >> shift;
> - memcpy(buf, &value, bytes);
> - }
> - } else {
> - bytes = cursor.size & ~0x3ULL;
> - amdgpu_device_vram_access(adev, cursor.start,
> - (uint32_t *)buf, bytes,
> - write);
> - }
> -
> - ret += bytes;
> - buf = (uint8_t *)buf + bytes;
> - amdgpu_res_next(&cursor, bytes);
> + amdgpu_ttm_vram_access(adev, cursor.start, buf, cursor.size, write);
> + ret += cursor.size;
> + buf += cursor.size;
> + amdgpu_res_next(&cursor, cursor.size);
> }
>
> return ret;
More information about the amd-gfx
mailing list