[PATCH] drm/amdgpu: add non-aligned address supported in amdgpu_device_vram_access()
Lazar, Lijo
lijo.lazar at amd.com
Mon Jun 28 04:57:34 UTC 2021
On 6/28/2021 8:14 AM, Wang, Kevin(Yang) wrote:
> [AMD Official Use Only]
>
>
>
>
> ------------------------------------------------------------------------
> *From:* Lazar, Lijo <Lijo.Lazar at amd.com>
> *Sent:* Friday, June 25, 2021 9:28 PM
> *To:* Wang, Kevin(Yang) <Kevin1.Wang at amd.com>;
> amd-gfx at lists.freedesktop.org <amd-gfx at lists.freedesktop.org>
> *Cc:* Deucher, Alexander <Alexander.Deucher at amd.com>; Koenig, Christian
> <Christian.Koenig at amd.com>
> *Subject:* Re: [PATCH] drm/amdgpu: add non-aligned address supported in
> amdgpu_device_vram_access()
>
>
> On 6/25/2021 8:54 AM, Kevin Wang wrote:
>> 1. add non-aligned address support in amdgpu_device_vram_access()
>> 2. reduce duplicate code in amdgpu_ttm_access_memory()
>>
>> because the MM_INDEX{HI}/DATA are protected register, it is not working
>> in sriov environment when mmio protect feature is enabled (in some asics),
>> and the old function amdgpu_ttm_access_memory() will force using MM_INDEX/DATA
>> to handle non-aligned address by default, it will cause the register(MM_DATA)
>> is always return 0.
>>
>> with the patch, the memory will be accessed in the aper way first.
>> (in visible memory or enable pcie large-bar feature), then using
>> MM_INDEX{HI}/MM_DATA to access rest vram memroy.
>>
>> Signed-off-by: Kevin Wang <kevin1.wang at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 69 ++++++++++++++++------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 42 ++-----------
>> 3 files changed, 58 insertions(+), 55 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index d14b4968a026..8095d9a9c857 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -1103,7 +1103,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev);
>> int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
>>
>> void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
>> - uint32_t *buf, size_t size, bool write);
>> + void *buf, size_t size, bool write);
>> uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
>> uint32_t reg, uint32_t acc_flags);
>> void amdgpu_device_wreg(struct amdgpu_device *adev,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index e6702d136a6d..2047e3c2b365 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -280,6 +280,54 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
>> amdgpu_acpi_is_power_shift_control_supported());
>> }
>>
>> +static inline void amdgpu_device_vram_mmio_align_access_locked(struct amdgpu_device *adev, loff_t pos,
>> + uint32_t *value, bool write)
>> +{
>> + BUG_ON(!IS_ALIGNED(pos, 4));
>> +
>> + WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
>> + WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
>> + if (write)
>> + WREG32_NO_KIQ(mmMM_DATA, *value);
>> + else
>> + *value = RREG32_NO_KIQ(mmMM_DATA);
>> +}
>> +
>> +static void amdgpu_device_vram_mmio_access_locked(struct amdgpu_device *adev, loff_t pos,
>> + void *buf, size_t size, bool write)
>> +{
>> + while (size) {
>> + uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
>> + uint64_t bytes = 4 - (pos & 0x3);
>> + uint32_t shift = (pos & 0x3) * 8;
>> + uint32_t mask = 0xffffffff << shift;
>> + uint32_t value = 0;
>> +
>> + if (size < bytes) {
>> + mask &= 0xffffffff >> (bytes - size) * 8;
>> + bytes = size;
>> + }
>> +
>> + if (mask != 0xffffffff) {
>> + amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, &value, false);
>> + if (write) {
>> + value &= ~mask;
>> + value |= (*(uint32_t *)buf << shift) & mask;
>> + amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, &value, true);
>> + } else {
>> + value = (value & mask) >> shift;
>> + memcpy(buf, &value, bytes);
>> + }
>> + } else {
>> + amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, buf, write);
>> + }
>> +
>> + pos += bytes;
>> + buf += bytes;
>> + size -= bytes;
>> + }
>> +}
>> +
>> /*
>> * VRAM access helper functions
>> */
>> @@ -294,13 +342,11 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
>> * @write: true - write to vram, otherwise - read from vram
>> */
>> void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
>> - uint32_t *buf, size_t size, bool write)
>> + void *buf, size_t size, bool write)
>> {
>> unsigned long flags;
>> - uint32_t hi = ~0;
>> uint64_t last;
>>
>> -
>> #ifdef CONFIG_64BIT
>> last = min(pos + size, adev->gmc.visible_vram_size);
>> if (last > pos) {
>> @@ -321,25 +367,12 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
>> return;
>>
>> pos += count;
>> - buf += count / 4;
>> + buf += count;
>> size -= count;
>> }
>> #endif
>> -
>> spin_lock_irqsave(&adev->mmio_idx_lock, flags);
>> - for (last = pos + size; pos < last; pos += 4) {
>> - uint32_t tmp = pos >> 31;
>> -
>> - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
>> - if (tmp != hi) {
>> - WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
>> - hi = tmp;
>> - }
>> - if (write)
>> - WREG32_NO_KIQ(mmMM_DATA, *buf++);
>> - else
>> - *buf++ = RREG32_NO_KIQ(mmMM_DATA);
>> - }
>> + amdgpu_device_vram_mmio_access_locked(adev, pos, buf, size, write);
>> spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
>> }
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 6297363ab740..cf5b8bdc2dd3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -1430,8 +1430,6 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>> struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
>> struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
>> struct amdgpu_res_cursor cursor;
>> - unsigned long flags;
>> - uint32_t value = 0;
>> int ret = 0;
>>
>> if (bo->mem.mem_type != TTM_PL_VRAM)
>> @@ -1439,41 +1437,13 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>>
>> amdgpu_res_first(&bo->mem, offset, len, &cursor);
>> while (cursor.remaining) {
>> - uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
>> - uint64_t bytes = 4 - (cursor.start & 3);
>> - uint32_t shift = (cursor.start & 3) * 8;
>> - uint32_t mask = 0xffffffff << shift;
>> -
>> - if (cursor.size < bytes) {
>> - mask &= 0xffffffff >> (bytes - cursor.size) * 8;
>> - bytes = cursor.size;
>> - }
>> + amdgpu_device_vram_access(adev, cursor.start,
>> + buf, cursor.size,
>> + write);
>>
>> - if (mask != 0xffffffff) {
>> - spin_lock_irqsave(&adev->mmio_idx_lock, flags);
>
> The new logic seems to skip this mmio_idx_lock for accessing index/data
> pair ragisters. BTW, where is the visible memory aperture check?
>
> Thanks
> Lijo
>
> [Keivn]:
>
> Hi Lijo,
> the mmio_idx_lock lock is in amdgpu_device_vram_access() function.
> this patch is extending the amdgpu_device_vram_access() function to
> support un-aligned address support.
>
> Best Regards,
> Kevin
Ah, now I see..Thanks.
For consecutive mem accesses, this is a good optimization (as in earlier
code) as crossing this boundary will be rare. I suggest to keep this.
- uint32_t tmp = pos >> 31;
-
- WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
- if (tmp != hi) {
- WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
- hi = tmp;
- }
Thanks,
Lijo
>> - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
>> - WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
>> - value = RREG32_NO_KIQ(mmMM_DATA);
>> - if (write) {
>> - value &= ~mask;
>> - value |= (*(uint32_t *)buf << shift) & mask;
>> - WREG32_NO_KIQ(mmMM_DATA, value);
>> - }
>> - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
>> - if (!write) {
>> - value = (value & mask) >> shift;
>> - memcpy(buf, &value, bytes);
>> - }
>> - } else {
>> - bytes = cursor.size & ~0x3ULL;
>> - amdgpu_device_vram_access(adev, cursor.start,
>> - (uint32_t *)buf, bytes,
>> - write);
>> - }
>> -
>> - ret += bytes;
>> - buf = (uint8_t *)buf + bytes;
>> - amdgpu_res_next(&cursor, bytes);
>> + ret += cursor.size;
>> + buf += cursor.size;
>> + amdgpu_res_next(&cursor, cursor.size);
>> }
>>
>> return ret;
>>
More information about the amd-gfx
mailing list