[PATCH] Set the 64-bit address of the wptr of SDMA doorbell properly

Haohui Mai ricetons at gmail.com
Mon Apr 25 11:14:39 UTC 2022


Sorry for the confusion. I misread the code, but it still seems to me
it is a valid issue. What the patch tries to do is to fix the
following pattern:

-                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
-                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
+                       WREG32(sdma_v5_2_get_reg_offset(adev, i,
mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));

I agree with you that ring->wptr is an offset to the ring. Just
looking at the above lines it seems that they are incorrect when
ring->wptr is larger than 1GB.

As you pointed out that ring->wptr cannot be larger than (1 << 24), it
can be resolved via either (1) fixing the patterns in the provided
patch, or (2) clamping the results to (1 << 24) - 1 and getting rid of
lower_32_bits() / higher_32_bits() at all.

What's your suggestion of moving forward?

Thanks,
Haohui

On Mon, Apr 25, 2022 at 7:02 PM Christian König
<ckoenig.leichtzumerken at gmail.com> wrote:
>
> Am 25.04.22 um 11:15 schrieb Haohui Mai:
> > Computing the address of the doorbell should be done before instead of after
> > separating the 64-bit address into the higher and lower half. The
> > current code sets the MMIO registers incorrectly if the address of the
> > doorbell is above 1G.
>
> That doesn't make any sense at all. The address of the doorbell is
> completely irrelevant to the value you write into it.
>
> What we could do is to stop using the lower_32_bits() function, since
> the WPTR can't handle more than 16, 20 or 24 bits (IIRC) depending on hw
> generation anyway.
>
> Regards,
> Christian.
>
> >
> > Signed-off-by: Haohui Mai <ricetons at gmail.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/cik_sdma.c  | 4 ++--
> >   drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++--
> >   drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 8 ++++----
> >   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
> >   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 8 ++++----
> >   drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 8 ++++----
> >   drivers/gpu/drm/amd/amdgpu/si_dma.c    | 4 ++--
> >   7 files changed, 20 insertions(+), 20 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> > b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> > index c8ebd108548d..df863d346995 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> > @@ -195,7 +195,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
> >    struct amdgpu_device *adev = ring->adev;
> >
> >    WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> > -        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
> > +        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
> >   }
> >
> >   static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> > @@ -487,7 +487,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
> >    WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
> >
> >    ring->wptr = 0;
> > - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
> > + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
> >
> >    /* enable DMA RB */
> >    WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i],
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> > index 1d8bbcbd7a37..b83fd00466fe 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> > @@ -223,7 +223,7 @@ static void sdma_v2_4_ring_set_wptr(struct
> > amdgpu_ring *ring)
> >   {
> >    struct amdgpu_device *adev = ring->adev;
> >
> > - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> > lower_32_bits(ring->wptr) << 2);
> > + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> > lower_32_bits(ring->wptr << 2));
> >   }
> >
> >   static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> > @@ -465,7 +465,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
> >    WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
> >
> >    ring->wptr = 0;
> > - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
> > + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
> >
> >    /* enable DMA RB */
> >    rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> > index 4ef4feff5649..557a7d5174b0 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> > @@ -389,14 +389,14 @@ static void sdma_v3_0_ring_set_wptr(struct
> > amdgpu_ring *ring)
> >    if (ring->use_doorbell) {
> >    u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
> >    /* XXX check if swapping is necessary on BE */
> > - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
> > - WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2);
> > + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
> > + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr << 2));
> >    } else if (ring->use_pollmem) {
> >    u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
> >
> > - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
> > + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr << 2)));
> >    } else {
> > - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> > lower_32_bits(ring->wptr) << 2);
> > + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
> > lower_32_bits(ring->wptr << 2));
> >    }
> >   }
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > index d7e8f7232364..ff86c43b63d1 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > @@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct
> > amdgpu_ring *ring)
> >
> >    DRM_DEBUG("Using doorbell -- "
> >    "wptr_offs == 0x%08x "
> > - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> > - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> > + "lower_32_bits(ring->wptr << 2) == 0x%08x "
> > + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> >    ring->wptr_offs,
> >    lower_32_bits(ring->wptr << 2),
> >    upper_32_bits(ring->wptr << 2));
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > index a8d49c005f73..627eb1f147c2 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > @@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct
> > amdgpu_ring *ring)
> >    if (ring->use_doorbell) {
> >    DRM_DEBUG("Using doorbell -- "
> >    "wptr_offs == 0x%08x "
> > - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> > - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> > + "lower_32_bits(ring->wptr << 2) == 0x%08x "
> > + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> >    ring->wptr_offs,
> >    lower_32_bits(ring->wptr << 2),
> >    upper_32_bits(ring->wptr << 2));
> > @@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
> >
> >    if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
> > for wptr */
> >    WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
> > -        lower_32_bits(ring->wptr) << 2);
> > +        lower_32_bits(ring->wptr << 2));
> >    WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
> > -        upper_32_bits(ring->wptr) << 2);
> > +        upper_32_bits(ring->wptr << 2));
> >    }
> >
> >    doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i,
> > mmSDMA0_GFX_DOORBELL));
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > index 824eace69884..a5eb82bfeaa8 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> > @@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct
> > amdgpu_ring *ring)
> >    if (ring->use_doorbell) {
> >    DRM_DEBUG("Using doorbell -- "
> >    "wptr_offs == 0x%08x "
> > - "lower_32_bits(ring->wptr) << 2 == 0x%08x "
> > - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
> > + "lower_32_bits(ring->wptr << 2) == 0x%08x "
> > + "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
> >    ring->wptr_offs,
> >    lower_32_bits(ring->wptr << 2),
> >    upper_32_bits(ring->wptr << 2));
> > @@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
> >    WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
> > mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
> >
> >    if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write
> > for wptr */
> > - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
> > lower_32_bits(ring->wptr) << 2);
> > - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
> > upper_32_bits(ring->wptr) << 2);
> > + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
> > lower_32_bits(ring->wptr << 2));
> > + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
> > upper_32_bits(ring->wptr << 2));
> >    }
> >
> >    doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i,
> > mmSDMA0_GFX_DOORBELL));
> > diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c
> > b/drivers/gpu/drm/amd/amdgpu/si_dma.c
> > index 195b45bcb8ad..0af11d3b00e7 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
> > @@ -57,7 +57,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
> >    u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
> >
> >    WREG32(DMA_RB_WPTR + sdma_offsets[me],
> > -        (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
> > +        (lower_32_bits(ring->wptr << 2)) & 0x3fffc);
> >   }
> >
> >   static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
> > @@ -175,7 +175,7 @@ static int si_dma_start(struct amdgpu_device *adev)
> >    WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl);
> >
> >    ring->wptr = 0;
> > - WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
> > + WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr << 2));
> >    WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
> >
> >    ring->sched.ready = true;
> > --
> > 2.25.1
>


More information about the amd-gfx mailing list