[PATCH 1/2] Fix incorrect calculations of the wptr of the doorbells

Christian König ckoenig.leichtzumerken at gmail.com
Mon Apr 25 12:33:27 UTC 2022


Am 25.04.22 um 14:19 schrieb Haohui Mai:
> Dropped the changes of older generations.
>
> Signed-off-by: Haohui Mai <ricetons at gmail.com>

Please update the commit messages to include all the background we just 
discussed.

With that done the series is Reviewed-by: Christian König 
<christian.koenig at amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++----
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++----
>   3 files changed, 10 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index d7e8f7232364..ff86c43b63d1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct
> amdgpu_ring *ring)
>
>                  DRM_DEBUG("Using doorbell -- "
>                                  "wptr_offs == 0x%08x "
> -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
> +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>                                  ring->wptr_offs,
>                                  lower_32_bits(ring->wptr << 2),
>                                  upper_32_bits(ring->wptr << 2));
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index a8d49c005f73..627eb1f147c2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct
> amdgpu_ring *ring)
>          if (ring->use_doorbell) {
>                  DRM_DEBUG("Using doorbell -- "
>                                  "wptr_offs == 0x%08x "
> -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
> +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>                                  ring->wptr_offs,
>                                  lower_32_bits(ring->wptr << 2),
>                                  upper_32_bits(ring->wptr << 2));
> @@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
>
>                  if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use
> register write for wptr */
>                          WREG32(sdma_v5_0_get_reg_offset(adev, i,
> mmSDMA0_GFX_RB_WPTR),
> -                              lower_32_bits(ring->wptr) << 2);
> +                              lower_32_bits(ring->wptr << 2));
>                          WREG32(sdma_v5_0_get_reg_offset(adev, i,
> mmSDMA0_GFX_RB_WPTR_HI),
> -                              upper_32_bits(ring->wptr) << 2);
> +                              upper_32_bits(ring->wptr << 2));
>                  }
>
>                  doorbell = RREG32_SOC15_IP(GC,
> sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> index 824eace69884..a5eb82bfeaa8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> @@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct
> amdgpu_ring *ring)
>          if (ring->use_doorbell) {
>                  DRM_DEBUG("Using doorbell -- "
>                                  "wptr_offs == 0x%08x "
> -                               "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> -                               "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> +                               "lower_32_bits(ring->wptr << 2) == 0x%08x "
> +                               "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>                                  ring->wptr_offs,
>                                  lower_32_bits(ring->wptr << 2),
>                                  upper_32_bits(ring->wptr << 2));
> @@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
>                  WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
> mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
>
>                  if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use
> register write for wptr */
> -                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
> -                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
> +                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
> +                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
> mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
>                  }
>
>                  doorbell = RREG32_SOC15_IP(GC,
> sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
> --
> 2.25.1
>
> On Mon, Apr 25, 2022 at 7:52 PM Christian König
> <ckoenig.leichtzumerken at gmail.com> wrote:
>> Am 25.04.22 um 13:47 schrieb Haohui Mai:
>>> Updated the commit messages based on the previous discussion.
>> Please drop all the changes for pre SDMA v4 hardware (e.g. the ones with
>> only a 32bit register), so that we only have the changes for the 64bit
>> hw versions in here.
>>
>> Apart from that looks good to me.
>>
>> Thanks,
>> Christian.
>>
>>> Signed-off-by: Haohui Mai <ricetons at gmail.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/cik_sdma.c  | 4 ++--
>>>    drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++--
>>>    drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 8 ++++----
>>>    drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
>>>    drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++----
>>>    drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++----
>>>    drivers/gpu/drm/amd/amdgpu/si_dma.c    | 4 ++--
>>>    7 files changed, 20 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>> b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>> index c8ebd108548d..df863d346995 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>> @@ -195,7 +195,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
>>>     struct amdgpu_device *adev = ring->adev;
>>>
>>>     WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
>>> -        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
>>> +        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
>>>    }
>>>
>>>    static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
>>> @@ -487,7 +487,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
>>>     WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
>>>
>>>     ring->wptr = 0;
>>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
>>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
>>>
>>>     /* enable DMA RB */
>>>     WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i],
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>> index 1d8bbcbd7a37..b83fd00466fe 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>> @@ -223,7 +223,7 @@ static void sdma_v2_4_ring_set_wptr(struct
>>> amdgpu_ring *ring)
>>>    {
>>>     struct amdgpu_device *adev = ring->adev;
>>>
>>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
>>> lower_32_bits(ring->wptr) << 2);
>>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
>>> lower_32_bits(ring->wptr << 2));
>>>    }
>>>
>>>    static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
>>> @@ -465,7 +465,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
>>>     WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
>>>
>>>     ring->wptr = 0;
>>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
>>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
>>>
>>>     /* enable DMA RB */
>>>     rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>> index 4ef4feff5649..557a7d5174b0 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>> @@ -389,14 +389,14 @@ static void sdma_v3_0_ring_set_wptr(struct
>>> amdgpu_ring *ring)
>>>     if (ring->use_doorbell) {
>>>     u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
>>>     /* XXX check if swapping is necessary on BE */
>>> - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
>>> - WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2);
>>> + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
>>> + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr << 2));
>>>     } else if (ring->use_pollmem) {
>>>     u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
>>>
>>> - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
>>> + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
>>>     } else {
>>> - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
>>> lower_32_bits(ring->wptr) << 2);
>>> + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
>>> lower_32_bits(ring->wptr << 2));
>>>     }
>>>    }
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> index d7e8f7232364..ff86c43b63d1 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> @@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct
>>> amdgpu_ring *ring)
>>>
>>>     DRM_DEBUG("Using doorbell -- "
>>>     "wptr_offs == 0x%08x "
>>> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
>>> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
>>> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
>>> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>>>     ring->wptr_offs,
>>>     lower_32_bits(ring->wptr << 2),
>>>     upper_32_bits(ring->wptr << 2));
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>> index a8d49c005f73..627eb1f147c2 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>> @@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct
>>> amdgpu_ring *ring)
>>>     if (ring->use_doorbell) {
>>>     DRM_DEBUG("Using doorbell -- "
>>>     "wptr_offs == 0x%08x "
>>> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
>>> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
>>> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
>>> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>>>     ring->wptr_offs,
>>>     lower_32_bits(ring->wptr << 2),
>>>     upper_32_bits(ring->wptr << 2));
>>> @@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
>>>
>>>     if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
>>> for wptr */
>>>     WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
>>> -        lower_32_bits(ring->wptr) << 2);
>>> +        lower_32_bits(ring->wptr << 2));
>>>     WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
>>> -        upper_32_bits(ring->wptr) << 2);
>>> +        upper_32_bits(ring->wptr << 2));
>>>     }
>>>
>>>     doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
>>> mmSDMA0_GFX_DOORBELL));
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
>>> index 824eace69884..a5eb82bfeaa8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
>>> @@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct
>>> amdgpu_ring *ring)
>>>     if (ring->use_doorbell) {
>>>     DRM_DEBUG("Using doorbell -- "
>>>     "wptr_offs == 0x%08x "
>>> - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
>>> - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
>>> + "lower_32_bits(ring->wptr << 2) == 0x%08x "
>>> + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
>>>     ring->wptr_offs,
>>>     lower_32_bits(ring->wptr << 2),
>>>     upper_32_bits(ring->wptr << 2));
>>> @@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
>>>     WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
>>> mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
>>>
>>>     if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
>>> for wptr */
>>> - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
>>> lower_32_bits(ring->wptr) << 2);
>>> - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
>>> upper_32_bits(ring->wptr) << 2);
>>> + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
>>> lower_32_bits(ring->wptr << 2));
>>> + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
>>> upper_32_bits(ring->wptr << 2));
>>>     }
>>>
>>>     doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
>>> mmSDMA0_GFX_DOORBELL));
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c
>>> b/drivers/gpu/drm/amd/amdgpu/si_dma.c
>>> index 195b45bcb8ad..0af11d3b00e7 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
>>> @@ -57,7 +57,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
>>>     u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
>>>
>>>     WREG32(DMA_RB_WPTR + sdma_offsets[me],
>>> -        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
>>> +        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
>>>    }
>>>
>>>    static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
>>> @@ -175,7 +175,7 @@ static int si_dma_start(struct amdgpu_device *adev)
>>>     WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl);
>>>
>>>     ring->wptr = 0;
>>> - WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
>>> + WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
>>>     WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
>>>
>>>     ring->sched.ready = true;
>>> --
>>> 2.25.1



More information about the amd-gfx mailing list