<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<style type="text/css" style="display:none;"> P {margin-top:0;margin-bottom:0;} </style>
</head>
<body dir="ltr">
<p style="font-family:Arial;font-size:10pt;color:#0000FF;margin:15pt;" align="Left">
[AMD Official Use Only]<br>
</p>
<br>
<div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
<br>
</div>
<div id="appendonsend"></div>
<div style="font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0)">
<br>
</div>
<hr tabindex="-1" style="display:inline-block; width:98%">
<div id="divRplyFwdMsg" dir="ltr"><font face="Calibri, sans-serif" color="#000000" style="font-size:11pt"><b>From:</b> Lazar, Lijo <Lijo.Lazar@amd.com><br>
<b>Sent:</b> Friday, June 25, 2021 9:28 PM<br>
<b>To:</b> Wang, Kevin(Yang) <Kevin1.Wang@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org><br>
<b>Cc:</b> Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian <Christian.Koenig@amd.com><br>
<b>Subject:</b> Re: [PATCH] drm/amdgpu: add non-aligned address supported in amdgpu_device_vram_access()</font>
<div> </div>
</div>
<div class="BodyFragment"><font size="2"><span style="font-size:11pt">
<div class="PlainText"><br>
<br>
On 6/25/2021 8:54 AM, Kevin Wang wrote:<br>
> 1. add non-aligned address support in amdgpu_device_vram_access()<br>
> 2. reduce duplicate code in amdgpu_ttm_access_memory()<br>
> <br>
> because the MM_INDEX{HI}/DATA are protected register, it is not working<br>
> in sriov environment when mmio protect feature is enabled (in some asics),<br>
> and the old function amdgpu_ttm_access_memory() will force using MM_INDEX/DATA<br>
> to handle non-aligned address by default, it will cause the register(MM_DATA)<br>
> is always return 0.<br>
> <br>
> with the patch, the memory will be accessed in the aper way first.<br>
> (in visible memory or enable pcie large-bar feature), then using<br>
> MM_INDEX{HI}/MM_DATA to access rest vram memroy.<br>
> <br>
> Signed-off-by: Kevin Wang <kevin1.wang@amd.com><br>
> ---<br>
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +-<br>
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 69 ++++++++++++++++------<br>
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 42 ++-----------<br>
> 3 files changed, 58 insertions(+), 55 deletions(-)<br>
> <br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h<br>
> index d14b4968a026..8095d9a9c857 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h<br>
> @@ -1103,7 +1103,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev);<br>
> int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);<br>
> <br>
> void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,<br>
> - uint32_t *buf, size_t size, bool write);<br>
> + void *buf, size_t size, bool write);<br>
> uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,<br>
> uint32_t reg, uint32_t acc_flags);<br>
> void amdgpu_device_wreg(struct amdgpu_device *adev,<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
> index e6702d136a6d..2047e3c2b365 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
> @@ -280,6 +280,54 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev)<br>
> amdgpu_acpi_is_power_shift_control_supported());<br>
> }<br>
> <br>
> +static inline void amdgpu_device_vram_mmio_align_access_locked(struct amdgpu_device *adev, loff_t pos,<br>
> + uint32_t *value, bool write)<br>
> +{<br>
> + BUG_ON(!IS_ALIGNED(pos, 4));<br>
> +<br>
> + WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);<br>
> + WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);<br>
> + if (write)<br>
> + WREG32_NO_KIQ(mmMM_DATA, *value);<br>
> + else<br>
> + *value = RREG32_NO_KIQ(mmMM_DATA);<br>
> +}<br>
> +<br>
> +static void amdgpu_device_vram_mmio_access_locked(struct amdgpu_device *adev, loff_t pos,<br>
> + void *buf, size_t size, bool write)<br>
> +{<br>
> + while (size) {<br>
> + uint64_t aligned_pos = ALIGN_DOWN(pos, 4);<br>
> + uint64_t bytes = 4 - (pos & 0x3);<br>
> + uint32_t shift = (pos & 0x3) * 8;<br>
> + uint32_t mask = 0xffffffff << shift;<br>
> + uint32_t value = 0;<br>
> +<br>
> + if (size < bytes) {<br>
> + mask &= 0xffffffff >> (bytes - size) * 8;<br>
> + bytes = size;<br>
> + }<br>
> +<br>
> + if (mask != 0xffffffff) {<br>
> + amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, &value, false);<br>
> + if (write) {<br>
> + value &= ~mask;<br>
> + value |= (*(uint32_t *)buf << shift) & mask;<br>
> + amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, &value, true);<br>
> + } else {<br>
> + value = (value & mask) >> shift;<br>
> + memcpy(buf, &value, bytes);<br>
> + }<br>
> + } else {<br>
> + amdgpu_device_vram_mmio_align_access_locked(adev, aligned_pos, buf, write);<br>
> + }<br>
> +<br>
> + pos += bytes;<br>
> + buf += bytes;<br>
> + size -= bytes;<br>
> + }<br>
> +}<br>
> +<br>
> /*<br>
> * VRAM access helper functions<br>
> */<br>
> @@ -294,13 +342,11 @@ bool amdgpu_device_supports_smart_shift(struct drm_device *dev)<br>
> * @write: true - write to vram, otherwise - read from vram<br>
> */<br>
> void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,<br>
> - uint32_t *buf, size_t size, bool write)<br>
> + void *buf, size_t size, bool write)<br>
> {<br>
> unsigned long flags;<br>
> - uint32_t hi = ~0;<br>
> uint64_t last;<br>
> <br>
> -<br>
> #ifdef CONFIG_64BIT<br>
> last = min(pos + size, adev->gmc.visible_vram_size);<br>
> if (last > pos) {<br>
> @@ -321,25 +367,12 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,<br>
> return;<br>
> <br>
> pos += count;<br>
> - buf += count / 4;<br>
> + buf += count;<br>
> size -= count;<br>
> }<br>
> #endif<br>
> -<br>
> spin_lock_irqsave(&adev->mmio_idx_lock, flags);<br>
> - for (last = pos + size; pos < last; pos += 4) {<br>
> - uint32_t tmp = pos >> 31;<br>
> -<br>
> - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);<br>
> - if (tmp != hi) {<br>
> - WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);<br>
> - hi = tmp;<br>
> - }<br>
> - if (write)<br>
> - WREG32_NO_KIQ(mmMM_DATA, *buf++);<br>
> - else<br>
> - *buf++ = RREG32_NO_KIQ(mmMM_DATA);<br>
> - }<br>
> + amdgpu_device_vram_mmio_access_locked(adev, pos, buf, size, write);<br>
> spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);<br>
> }<br>
> <br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c<br>
> index 6297363ab740..cf5b8bdc2dd3 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c<br>
> @@ -1430,8 +1430,6 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,<br>
> struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);<br>
> struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);<br>
> struct amdgpu_res_cursor cursor;<br>
> - unsigned long flags;<br>
> - uint32_t value = 0;<br>
> int ret = 0;<br>
> <br>
> if (bo->mem.mem_type != TTM_PL_VRAM)<br>
> @@ -1439,41 +1437,13 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,<br>
> <br>
> amdgpu_res_first(&bo->mem, offset, len, &cursor);<br>
> while (cursor.remaining) {<br>
> - uint64_t aligned_pos = cursor.start & ~(uint64_t)3;<br>
> - uint64_t bytes = 4 - (cursor.start & 3);<br>
> - uint32_t shift = (cursor.start & 3) * 8;<br>
> - uint32_t mask = 0xffffffff << shift;<br>
> -<br>
> - if (cursor.size < bytes) {<br>
> - mask &= 0xffffffff >> (bytes - cursor.size) * 8;<br>
> - bytes = cursor.size;<br>
> - }<br>
> + amdgpu_device_vram_access(adev, cursor.start,<br>
> + buf, cursor.size,<br>
> + write);<br>
> <br>
> - if (mask != 0xffffffff) {<br>
> - spin_lock_irqsave(&adev->mmio_idx_lock, flags);<br>
<br>
The new logic seems to skip this mmio_idx_lock for accessing index/data <br>
pair ragisters. BTW, where is the visible memory aperture check?<br>
<br>
Thanks<br>
Lijo</div>
<div class="PlainText"><br>
</div>
<div class="PlainText"><span style="font-family: "segoe ui", "segoe ui web (west european)", "segoe ui", -apple-system, blinkmacsystemfont, roboto, "helvetica neue", sans-serif; font-size: 11pt; color: rgb(0, 0, 0); background-color: rgba(0, 0, 0, 0);">[Keivn]:</span></div>
<div class="PlainText"><span style="font-family: "segoe ui", "segoe ui web (west european)", "segoe ui", -apple-system, blinkmacsystemfont, roboto, "helvetica neue", sans-serif; font-size: 11pt; color: rgb(0, 0, 0); background-color: rgba(0, 0, 0, 0);"><br>
</span></div>
<div class="PlainText"><span style="font-family: "segoe ui", "segoe ui web (west european)", "segoe ui", -apple-system, blinkmacsystemfont, roboto, "helvetica neue", sans-serif; font-size: 11pt; color: rgb(0, 0, 0); background-color: rgba(0, 0, 0, 0);">Hi Lijo,</span><br>
</div>
<div class="PlainText"><span style="font-family: "segoe ui", "segoe ui web (west european)", "segoe ui", -apple-system, blinkmacsystemfont, roboto, "helvetica neue", sans-serif; font-size: 11pt; color: rgb(0, 0, 0); background-color: rgba(0, 0, 0, 0);">the
mmio_idx_lock lock is in </span><span style="color: rgb(0, 0, 0); background-color: rgba(0, 0, 0, 0); display: inline !important; font-family: "segoe ui", "segoe ui web (west european)", "segoe ui", -apple-system, blinkmacsystemfont, roboto, "helvetica neue", sans-serif; font-size: 11pt;">amdgpu_device_vram_access</span><span style="color: rgb(0, 0, 0); background-color: rgba(0, 0, 0, 0); display: inline !important; font-family: "segoe ui", "segoe ui web (west european)", "segoe ui", -apple-system, blinkmacsystemfont, roboto, "helvetica neue", sans-serif; font-size: 11pt;">()
function.</span></div>
<div class="PlainText"><font color="#000000"><span style="font-family: "segoe ui", "segoe ui web (west european)", "segoe ui", -apple-system, blinkmacsystemfont, roboto, "helvetica neue", sans-serif; font-size: 11pt; color: rgb(0, 0, 0); background-color: rgba(0, 0, 0, 0);">this
patch is extending the </span><span style="background-color: rgba(0, 0, 0, 0); font-family: "segoe ui", "segoe ui web (west european)", "segoe ui", -apple-system, blinkmacsystemfont, roboto, "helvetica neue", sans-serif; font-size: 11pt; color: rgb(0, 0, 0); display: inline !important;">amdgpu_device_vram_access()
function to support un-aligned address support.</span></font></div>
<div class="PlainText"><br>
</div>
<div class="PlainText">Best Regards,</div>
<div class="PlainText">Kevin</div>
<div class="PlainText">> - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);<br>
> - WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);<br>
> - value = RREG32_NO_KIQ(mmMM_DATA);<br>
> - if (write) {<br>
> - value &= ~mask;<br>
> - value |= (*(uint32_t *)buf << shift) & mask;<br>
> - WREG32_NO_KIQ(mmMM_DATA, value);<br>
> - }<br>
> - spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);<br>
> - if (!write) {<br>
> - value = (value & mask) >> shift;<br>
> - memcpy(buf, &value, bytes);<br>
> - }<br>
> - } else {<br>
> - bytes = cursor.size & ~0x3ULL;<br>
> - amdgpu_device_vram_access(adev, cursor.start,<br>
> - (uint32_t *)buf, bytes,<br>
> - write);<br>
> - }<br>
> -<br>
> - ret += bytes;<br>
> - buf = (uint8_t *)buf + bytes;<br>
> - amdgpu_res_next(&cursor, bytes);<br>
> + ret += cursor.size;<br>
> + buf += cursor.size;<br>
> + amdgpu_res_next(&cursor, cursor.size);<br>
> }<br>
> <br>
> return ret;<br>
> <br>
</div>
</span></font></div>
</div>
</body>
</html>