[PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
Christian König
ckoenig.leichtzumerken at gmail.com
Mon Oct 8 13:35:15 UTC 2018
Under SRIOV we were enabling the ring buffer before it was initialized.
Signed-off-by: Christian König <christian.koenig at amd.com>
---
drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 234 ++++++++++++++++-----------------
1 file changed, 116 insertions(+), 118 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index c20d413f277c..5ecf6c9252c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -673,13 +673,14 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
* sdma_v4_0_gfx_resume - setup and start the async dma engines
*
* @adev: amdgpu_device pointer
+ * @i: instance to resume
*
* Set up the gfx DMA ring buffers and enable them (VEGA10).
* Returns 0 for success, error for failure.
*/
-static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
+static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
{
- struct amdgpu_ring *ring;
+ struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
u32 rb_cntl, ib_cntl, wptr_poll_cntl;
u32 rb_bufsz;
u32 wb_offset;
@@ -687,129 +688,108 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
u32 doorbell_offset;
u32 temp;
u64 wptr_gpu_addr;
- int i, r;
- for (i = 0; i < adev->sdma.num_instances; i++) {
- ring = &adev->sdma.instance[i].ring;
- wb_offset = (ring->rptr_offs * 4);
+ wb_offset = (ring->rptr_offs * 4);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
- /* Set ring buffer size in dwords */
- rb_bufsz = order_base_2(ring->ring_size / 4);
- rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
+ /* Set ring buffer size in dwords */
+ rb_bufsz = order_base_2(ring->ring_size / 4);
+ rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
#ifdef __BIG_ENDIAN
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
- RPTR_WRITEBACK_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+ RPTR_WRITEBACK_SWAP_ENABLE, 1);
#endif
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
- /* Initialize the ring buffer's read and write pointers */
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
+ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
+ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
+ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
- /* set the wb address whether it's enabled or not */
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
- upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
- lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
+ /* set the wb address whether it's enabled or not */
+ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
+ upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
+ lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
+ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
+ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
- ring->wptr = 0;
+ ring->wptr = 0;
- /* before programing wptr to a less value, need set minor_ptr_update first */
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
+ /* before programing wptr to a less value, need set minor_ptr_update first */
+ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
- if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
- }
+ if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
+ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+ }
- doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
- doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
+ doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
+ doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
- if (ring->use_doorbell) {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
- doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
- OFFSET, ring->doorbell_index);
- } else {
- doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
- }
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
- adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
- ring->doorbell_index);
-
- if (amdgpu_sriov_vf(adev))
- sdma_v4_0_ring_set_wptr(ring);
-
- /* set minor_ptr_update to 0 after wptr programed */
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
-
- /* set utc l1 enable flag always to 1 */
- temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
-
- if (!amdgpu_sriov_vf(adev)) {
- /* unhalt engine */
- temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
- temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
- }
+ if (ring->use_doorbell) {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
+ doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
+ OFFSET, ring->doorbell_index);
+ } else {
+ doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
+ }
+ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
+ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
+ adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+ ring->doorbell_index);
- /* setup the wptr shadow polling */
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
- lower_32_bits(wptr_gpu_addr));
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
- upper_32_bits(wptr_gpu_addr));
- wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
- if (amdgpu_sriov_vf(adev))
- wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
- else
- wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl);
+ if (amdgpu_sriov_vf(adev))
+ sdma_v4_0_ring_set_wptr(ring);
- /* enable DMA RB */
- rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+ /* set minor_ptr_update to 0 after wptr programed */
+ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
- ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
-#ifdef __BIG_ENDIAN
- ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
-#endif
- /* enable DMA IBs */
- WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+ /* set utc l1 enable flag always to 1 */
+ temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
+ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
- ring->ready = true;
+ if (!amdgpu_sriov_vf(adev)) {
+ /* unhalt engine */
+ temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+ temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
+ }
- if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
- sdma_v4_0_ctx_switch_enable(adev, true);
- sdma_v4_0_enable(adev, true);
- }
+ /* setup the wptr shadow polling */
+ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
+ lower_32_bits(wptr_gpu_addr));
+ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
+ upper_32_bits(wptr_gpu_addr));
+ wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
+ if (amdgpu_sriov_vf(adev))
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
+ else
+ wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
+ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl);
- r = amdgpu_ring_test_ring(ring);
- if (r) {
- ring->ready = false;
- return r;
- }
+ /* enable DMA RB */
+ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
+ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
- if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_buffer_funcs_status(adev, true);
-
- }
+ ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+ /* enable DMA IBs */
+ WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
- return 0;
+ ring->ready = true;
}
static void
@@ -941,33 +921,51 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
*/
static int sdma_v4_0_start(struct amdgpu_device *adev)
{
- int r = 0;
+ struct amdgpu_ring *ring;
+ int i, r;
if (amdgpu_sriov_vf(adev)) {
sdma_v4_0_ctx_switch_enable(adev, false);
sdma_v4_0_enable(adev, false);
+ } else {
- /* set RB registers */
- r = sdma_v4_0_gfx_resume(adev);
- return r;
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ r = sdma_v4_0_load_microcode(adev);
+ if (r)
+ return r;
+ }
+
+ /* unhalt the MEs */
+ sdma_v4_0_enable(adev, true);
+ /* enable sdma ring preemption */
+ sdma_v4_0_ctx_switch_enable(adev, true);
}
- if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
- r = sdma_v4_0_load_microcode(adev);
+ /* start the gfx rings and rlc compute queues */
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ sdma_v4_0_gfx_resume(adev, i);
+
+ if (amdgpu_sriov_vf(adev)) {
+ sdma_v4_0_ctx_switch_enable(adev, true);
+ sdma_v4_0_enable(adev, true);
+ } else {
+ r = sdma_v4_0_rlc_resume(adev);
if (r)
return r;
}
- /* unhalt the MEs */
- sdma_v4_0_enable(adev, true);
- /* enable sdma ring preemption */
- sdma_v4_0_ctx_switch_enable(adev, true);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
- /* start the gfx rings and rlc compute queues */
- r = sdma_v4_0_gfx_resume(adev);
- if (r)
- return r;
- r = sdma_v4_0_rlc_resume(adev);
+ r = amdgpu_ring_test_ring(ring);
+ if (r) {
+ ring->ready = false;
+ return r;
+ }
+
+ if (adev->mman.buffer_funcs_ring == ring)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ }
return r;
}
--
2.14.1
More information about the amd-gfx
mailing list