[PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
Huang Rui
ray.huang at amd.com
Tue Oct 9 09:17:09 UTC 2018
On Mon, Oct 08, 2018 at 03:35:15PM +0200, Christian König wrote:
> Under SRIOV we were enabling the ring buffer before it was initialized.
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 234 ++++++++++++++++-----------------
> 1 file changed, 116 insertions(+), 118 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index c20d413f277c..5ecf6c9252c4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -673,13 +673,14 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
> * sdma_v4_0_gfx_resume - setup and start the async dma engines
> *
> * @adev: amdgpu_device pointer
> + * @i: instance to resume
> *
> * Set up the gfx DMA ring buffers and enable them (VEGA10).
> * Returns 0 for success, error for failure.
> */
> -static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
> +static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
> {
> - struct amdgpu_ring *ring;
> + struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
> u32 rb_cntl, ib_cntl, wptr_poll_cntl;
> u32 rb_bufsz;
> u32 wb_offset;
> @@ -687,129 +688,108 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
> u32 doorbell_offset;
> u32 temp;
> u64 wptr_gpu_addr;
> - int i, r;
>
> - for (i = 0; i < adev->sdma.num_instances; i++) {
> - ring = &adev->sdma.instance[i].ring;
> - wb_offset = (ring->rptr_offs * 4);
> + wb_offset = (ring->rptr_offs * 4);
>
> - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
> + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
>
> - /* Set ring buffer size in dwords */
> - rb_bufsz = order_base_2(ring->ring_size / 4);
> - rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
> - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
> + /* Set ring buffer size in dwords */
> + rb_bufsz = order_base_2(ring->ring_size / 4);
> + rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
> + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
> #ifdef __BIG_ENDIAN
> - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
> - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
> - RPTR_WRITEBACK_SWAP_ENABLE, 1);
> + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
> + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
> + RPTR_WRITEBACK_SWAP_ENABLE, 1);
> #endif
> - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
> + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
>
> - /* Initialize the ring buffer's read and write pointers */
> - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
> - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
> - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
> - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
> + /* Initialize the ring buffer's read and write pointers */
> + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
> + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
> + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
> + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
>
> - /* set the wb address whether it's enabled or not */
> - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
> - upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
> - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
> - lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
> + /* set the wb address whether it's enabled or not */
> + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
> + upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
> + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
> + lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
>
> - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
> + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
>
> - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
> - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
> + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
> + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
>
> - ring->wptr = 0;
> + ring->wptr = 0;
>
> - /* before programing wptr to a less value, need set minor_ptr_update first */
> - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
> + /* before programing wptr to a less value, need set minor_ptr_update first */
> + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
>
> - if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
> - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
> - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
> - }
> + if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
> + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
> + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
> + }
>
> - doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
> - doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
> + doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
> + doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
>
> - if (ring->use_doorbell) {
> - doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
> - doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
> - OFFSET, ring->doorbell_index);
> - } else {
> - doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
> - }
> - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
> - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
> - adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
> - ring->doorbell_index);
> -
> - if (amdgpu_sriov_vf(adev))
> - sdma_v4_0_ring_set_wptr(ring);
> -
> - /* set minor_ptr_update to 0 after wptr programed */
> - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
> -
> - /* set utc l1 enable flag always to 1 */
> - temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
> - temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
> - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
> -
> - if (!amdgpu_sriov_vf(adev)) {
> - /* unhalt engine */
> - temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
> - temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
> - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
> - }
> + if (ring->use_doorbell) {
> + doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
> + doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
> + OFFSET, ring->doorbell_index);
> + } else {
> + doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
> + }
> + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
> + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
> + adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
> + ring->doorbell_index);
>
> - /* setup the wptr shadow polling */
> - wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
> - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
> - lower_32_bits(wptr_gpu_addr));
> - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
> - upper_32_bits(wptr_gpu_addr));
> - wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
> - if (amdgpu_sriov_vf(adev))
> - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
> - else
> - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
> - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl);
> + if (amdgpu_sriov_vf(adev))
> + sdma_v4_0_ring_set_wptr(ring);
>
> - /* enable DMA RB */
> - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
> - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
> + /* set minor_ptr_update to 0 after wptr programed */
> + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
>
> - ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
> - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
> -#ifdef __BIG_ENDIAN
> - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
> -#endif
> - /* enable DMA IBs */
> - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
> + /* set utc l1 enable flag always to 1 */
> + temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
> + temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
> + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
>
> - ring->ready = true;
> + if (!amdgpu_sriov_vf(adev)) {
> + /* unhalt engine */
> + temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
> + temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
> + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
> + }
>
> - if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
> - sdma_v4_0_ctx_switch_enable(adev, true);
> - sdma_v4_0_enable(adev, true);
> - }
> + /* setup the wptr shadow polling */
> + wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
> + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
> + lower_32_bits(wptr_gpu_addr));
> + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
> + upper_32_bits(wptr_gpu_addr));
> + wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
> + if (amdgpu_sriov_vf(adev))
> + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
> + else
> + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
> + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl);
>
> - r = amdgpu_ring_test_ring(ring);
> - if (r) {
> - ring->ready = false;
> - return r;
> - }
> + /* enable DMA RB */
> + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
> + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
>
> - if (adev->mman.buffer_funcs_ring == ring)
> - amdgpu_ttm_set_buffer_funcs_status(adev, true);
> -
> - }
> + ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
> + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
> +#ifdef __BIG_ENDIAN
> + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
> +#endif
> + /* enable DMA IBs */
> + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
>
> - return 0;
> + ring->ready = true;
> }
>
> static void
> @@ -941,33 +921,51 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
> */
> static int sdma_v4_0_start(struct amdgpu_device *adev)
> {
> - int r = 0;
> + struct amdgpu_ring *ring;
> + int i, r;
>
> if (amdgpu_sriov_vf(adev)) {
> sdma_v4_0_ctx_switch_enable(adev, false);
> sdma_v4_0_enable(adev, false);
> + } else {
>
> - /* set RB registers */
> - r = sdma_v4_0_gfx_resume(adev);
> - return r;
> + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
> + r = sdma_v4_0_load_microcode(adev);
> + if (r)
> + return r;
> + }
> +
> + /* unhalt the MEs */
> + sdma_v4_0_enable(adev, true);
> + /* enable sdma ring preemption */
> + sdma_v4_0_ctx_switch_enable(adev, true);
> }
>
> - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
> - r = sdma_v4_0_load_microcode(adev);
> + /* start the gfx rings and rlc compute queues */
> + for (i = 0; i < adev->sdma.num_instances; i++)
> + sdma_v4_0_gfx_resume(adev, i);
> +
> + if (amdgpu_sriov_vf(adev)) {
> + sdma_v4_0_ctx_switch_enable(adev, true);
> + sdma_v4_0_enable(adev, true);
> + } else {
> + r = sdma_v4_0_rlc_resume(adev);
> if (r)
> return r;
> }
+ Monk, Frank,
I probably cannot judge here, under SRIOV, I saw you disable ctx switch
before. Do you have any concern if we enabled it here.
Others, looks good for me. Christian, may we know which kind of jobs will
use sdma page queue(ring), you know, we just sdma gfx queue(ring) before?
Thanks,
Ray
>
> - /* unhalt the MEs */
> - sdma_v4_0_enable(adev, true);
> - /* enable sdma ring preemption */
> - sdma_v4_0_ctx_switch_enable(adev, true);
> + for (i = 0; i < adev->sdma.num_instances; i++) {
> + ring = &adev->sdma.instance[i].ring;
>
> - /* start the gfx rings and rlc compute queues */
> - r = sdma_v4_0_gfx_resume(adev);
> - if (r)
> - return r;
> - r = sdma_v4_0_rlc_resume(adev);
> + r = amdgpu_ring_test_ring(ring);
> + if (r) {
> + ring->ready = false;
> + return r;
> + }
> +
> + if (adev->mman.buffer_funcs_ring == ring)
> + amdgpu_ttm_set_buffer_funcs_status(adev, true);
> + }
>
> return r;
> }
> --
> 2.14.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list