[PATCH 7/8] drm/amdgpu: activate paging queue on SDMA v4
Huang Rui
ray.huang at amd.com
Tue Oct 9 09:40:14 UTC 2018
On Mon, Oct 08, 2018 at 03:35:20PM +0200, Christian König wrote:
> Implement all the necessary stuff to get those extra rings working.
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
Reviewed-by: Huang Rui <ray.huang at amd.com>
We have four queue architecture, currently, if include page queue, we only
use two, is there any use case that we need also activate rlc0/rlc1?
Thanks,
Ray
> ---
> drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 324 ++++++++++++++++++++++++++++-----
> 1 file changed, 274 insertions(+), 50 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index 55384bad7a70..a362904d73f7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -427,6 +427,57 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
> }
> }
>
> +/**
> + * sdma_v4_0_page_ring_get_wptr - get the current write pointer
> + *
> + * @ring: amdgpu ring pointer
> + *
> + * Get the current wptr from the hardware (VEGA10+).
> + */
> +static uint64_t sdma_v4_0_page_ring_get_wptr(struct amdgpu_ring *ring)
> +{
> + struct amdgpu_device *adev = ring->adev;
> + u64 wptr;
> +
> + if (ring->use_doorbell) {
> + /* XXX check if swapping is necessary on BE */
> + wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
> + } else {
> + wptr = RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI);
> + wptr = wptr << 32;
> + wptr |= RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR);
> + }
> +
> + return wptr >> 2;
> +}
> +
> +/**
> + * sdma_v4_0_ring_set_wptr - commit the write pointer
> + *
> + * @ring: amdgpu ring pointer
> + *
> + * Write the wptr back to the hardware (VEGA10+).
> + */
> +static void sdma_v4_0_page_ring_set_wptr(struct amdgpu_ring *ring)
> +{
> + struct amdgpu_device *adev = ring->adev;
> +
> + if (ring->use_doorbell) {
> + u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
> +
> + /* XXX check if swapping is necessary on BE */
> + WRITE_ONCE(*wb, (ring->wptr << 2));
> + WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
> + } else {
> + uint64_t wptr = ring->wptr << 2;
> +
> + WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR,
> + lower_32_bits(wptr));
> + WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI,
> + upper_32_bits(wptr));
> + }
> +}
> +
> static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> {
> struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
> @@ -597,6 +648,35 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev)
> /* XXX todo */
> }
>
> +/**
> + * sdma_v4_0_page_stop - stop the page async dma engines
> + *
> + * @adev: amdgpu_device pointer
> + *
> + * Stop the page async dma ring buffers (VEGA10).
> + */
> +static void sdma_v4_0_page_stop(struct amdgpu_device *adev)
> +{
> + struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].page;
> + struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].page;
> + u32 rb_cntl, ib_cntl;
> + int i;
> +
> + for (i = 0; i < adev->sdma.num_instances; i++) {
> + rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
> + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
> + RB_ENABLE, 0);
> + WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
> + ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL);
> + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL,
> + IB_ENABLE, 0);
> + WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
> + }
> +
> + sdma0->ready = false;
> + sdma1->ready = false;
> +}
> +
> /**
> * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch
> *
> @@ -664,6 +744,7 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
> if (enable == false) {
> sdma_v4_0_gfx_stop(adev);
> sdma_v4_0_rlc_stop(adev);
> + sdma_v4_0_page_stop(adev);
> }
>
> for (i = 0; i < adev->sdma.num_instances; i++) {
> @@ -673,6 +754,23 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
> }
> }
>
> +/**
> + * sdma_v4_0_rb_cntl - get parameters for rb_cntl
> + */
> +static uint32_t sdma_v4_0_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl)
> +{
> + /* Set ring buffer size in dwords */
> + uint32_t rb_bufsz = order_base_2(ring->ring_size / 4);
> +
> + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
> +#ifdef __BIG_ENDIAN
> + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
> + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
> + RPTR_WRITEBACK_SWAP_ENABLE, 1);
> +#endif
> + return rb_cntl;
> +}
> +
> /**
> * sdma_v4_0_gfx_resume - setup and start the async dma engines
> *
> @@ -686,7 +784,6 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
> {
> struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
> u32 rb_cntl, ib_cntl, wptr_poll_cntl;
> - u32 rb_bufsz;
> u32 wb_offset;
> u32 doorbell;
> u32 doorbell_offset;
> @@ -694,15 +791,8 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
>
> wb_offset = (ring->rptr_offs * 4);
>
> - /* Set ring buffer size in dwords */
> - rb_bufsz = order_base_2(ring->ring_size / 4);
> rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
> - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
> -#ifdef __BIG_ENDIAN
> - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
> - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
> - RPTR_WRITEBACK_SWAP_ENABLE, 1);
> -#endif
> + rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
> WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
>
> /* Initialize the ring buffer's read and write pointers */
> @@ -717,7 +807,8 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
> WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO,
> lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
>
> - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
> + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
> + RPTR_WRITEBACK_ENABLE, 1);
>
> WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE, ring->gpu_addr >> 8);
> WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE_HI, ring->gpu_addr >> 40);
> @@ -730,13 +821,11 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
> doorbell = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL);
> doorbell_offset = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET);
>
> - if (ring->use_doorbell) {
> - doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
> - doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
> - OFFSET, ring->doorbell_index);
> - } else {
> - doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
> - }
> + doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE,
> + ring->use_doorbell);
> + doorbell_offset = REG_SET_FIELD(doorbell_offset,
> + SDMA0_GFX_DOORBELL_OFFSET,
> + OFFSET, ring->doorbell_index);
> WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL, doorbell);
> WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET, doorbell_offset);
> adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
> @@ -754,10 +843,9 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
> WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI,
> upper_32_bits(wptr_gpu_addr));
> wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL);
> - if (amdgpu_sriov_vf(adev))
> - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
> - else
> - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
> + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
> + SDMA0_GFX_RB_WPTR_POLL_CNTL,
> + F32_POLL_ENABLE, amdgpu_sriov_vf(adev));
> WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
>
> /* enable DMA RB */
> @@ -775,6 +863,99 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
> ring->ready = true;
> }
>
> +/**
> + * sdma_v4_0_page_resume - setup and start the async dma engines
> + *
> + * @adev: amdgpu_device pointer
> + * @i: instance to resume
> + *
> + * Set up the page DMA ring buffers and enable them (VEGA10).
> + * Returns 0 for success, error for failure.
> + */
> +static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i)
> +{
> + struct amdgpu_ring *ring = &adev->sdma.instance[i].page;
> + u32 rb_cntl, ib_cntl, wptr_poll_cntl;
> + u32 wb_offset;
> + u32 doorbell;
> + u32 doorbell_offset;
> + u64 wptr_gpu_addr;
> +
> + wb_offset = (ring->rptr_offs * 4);
> +
> + rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
> + rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
> + WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
> +
> + /* Initialize the ring buffer's read and write pointers */
> + WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR, 0);
> + WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_HI, 0);
> + WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR, 0);
> + WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_HI, 0);
> +
> + /* set the wb address whether it's enabled or not */
> + WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_HI,
> + upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
> + WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_LO,
> + lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
> +
> + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
> + RPTR_WRITEBACK_ENABLE, 1);
> +
> + WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE, ring->gpu_addr >> 8);
> + WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE_HI, ring->gpu_addr >> 40);
> +
> + ring->wptr = 0;
> +
> + /* before programing wptr to a less value, need set minor_ptr_update first */
> + WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 1);
> +
> + doorbell = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL);
> + doorbell_offset = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET);
> +
> + doorbell = REG_SET_FIELD(doorbell, SDMA0_PAGE_DOORBELL, ENABLE,
> + ring->use_doorbell);
> + doorbell_offset = REG_SET_FIELD(doorbell_offset,
> + SDMA0_PAGE_DOORBELL_OFFSET,
> + OFFSET, ring->doorbell_index);
> + WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL, doorbell);
> + WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET, doorbell_offset);
> + /* TODO: enable doorbell support */
> + /*adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
> + ring->doorbell_index);*/
> +
> + sdma_v4_0_ring_set_wptr(ring);
> +
> + /* set minor_ptr_update to 0 after wptr programed */
> + WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 0);
> +
> + /* setup the wptr shadow polling */
> + wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
> + WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_LO,
> + lower_32_bits(wptr_gpu_addr));
> + WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_HI,
> + upper_32_bits(wptr_gpu_addr));
> + wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL);
> + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
> + SDMA0_PAGE_RB_WPTR_POLL_CNTL,
> + F32_POLL_ENABLE, amdgpu_sriov_vf(adev));
> + WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
> +
> + /* enable DMA RB */
> + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, RB_ENABLE, 1);
> + WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
> +
> + ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL);
> + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_ENABLE, 1);
> +#ifdef __BIG_ENDIAN
> + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_SWAP_ENABLE, 1);
> +#endif
> + /* enable DMA IBs */
> + WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
> +
> + ring->ready = true;
> +}
> +
> static void
> sdma_v4_1_update_power_gating(struct amdgpu_device *adev, bool enable)
> {
> @@ -932,6 +1113,7 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
>
> WREG32_SDMA(i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL, 0);
> sdma_v4_0_gfx_resume(adev, i);
> + sdma_v4_0_page_resume(adev, i);
>
> /* set utc l1 enable flag always to 1 */
> temp = RREG32_SDMA(i, mmSDMA0_CNTL);
> @@ -1337,6 +1519,19 @@ static int sdma_v4_0_sw_init(void *handle)
> AMDGPU_SDMA_IRQ_TRAP1);
> if (r)
> return r;
> +
> + ring = &adev->sdma.instance[i].page;
> + ring->ring_obj = NULL;
> + ring->use_doorbell = false;
> +
> + sprintf(ring->name, "page%d", i);
> + r = amdgpu_ring_init(adev, ring, 1024,
> + &adev->sdma.trap_irq,
> + (i == 0) ?
> + AMDGPU_SDMA_IRQ_TRAP0 :
> + AMDGPU_SDMA_IRQ_TRAP1);
> + if (r)
> + return r;
> }
>
> return r;
> @@ -1347,8 +1542,10 @@ static int sdma_v4_0_sw_fini(void *handle)
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> int i;
>
> - for (i = 0; i < adev->sdma.num_instances; i++)
> + for (i = 0; i < adev->sdma.num_instances; i++) {
> amdgpu_ring_fini(&adev->sdma.instance[i].ring);
> + amdgpu_ring_fini(&adev->sdma.instance[i].page);
> + }
>
> for (i = 0; i < adev->sdma.num_instances; i++) {
> release_firmware(adev->sdma.instance[i].fw);
> @@ -1462,39 +1659,32 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
> struct amdgpu_irq_src *source,
> struct amdgpu_iv_entry *entry)
> {
> + uint32_t instance;
> +
> DRM_DEBUG("IH: SDMA trap\n");
> switch (entry->client_id) {
> case SOC15_IH_CLIENTID_SDMA0:
> - switch (entry->ring_id) {
> - case 0:
> - amdgpu_fence_process(&adev->sdma.instance[0].ring);
> - break;
> - case 1:
> - /* XXX compute */
> - break;
> - case 2:
> - /* XXX compute */
> - break;
> - case 3:
> - /* XXX page queue*/
> - break;
> - }
> + instance = 0;
> break;
> case SOC15_IH_CLIENTID_SDMA1:
> - switch (entry->ring_id) {
> - case 0:
> - amdgpu_fence_process(&adev->sdma.instance[1].ring);
> - break;
> - case 1:
> - /* XXX compute */
> - break;
> - case 2:
> - /* XXX compute */
> - break;
> - case 3:
> - /* XXX page queue*/
> - break;
> - }
> + instance = 1;
> + break;
> + default:
> + return 0;
> + }
> +
> + switch (entry->ring_id) {
> + case 0:
> + amdgpu_fence_process(&adev->sdma.instance[instance].ring);
> + break;
> + case 1:
> + /* XXX compute */
> + break;
> + case 2:
> + /* XXX compute */
> + break;
> + case 3:
> + amdgpu_fence_process(&adev->sdma.instance[instance].page);
> break;
> }
> return 0;
> @@ -1722,6 +1912,38 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
> .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
> };
>
> +static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = {
> + .type = AMDGPU_RING_TYPE_SDMA,
> + .align_mask = 0xf,
> + .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
> + .support_64bit_ptrs = true,
> + .vmhub = AMDGPU_MMHUB,
> + .get_rptr = sdma_v4_0_ring_get_rptr,
> + .get_wptr = sdma_v4_0_page_ring_get_wptr,
> + .set_wptr = sdma_v4_0_page_ring_set_wptr,
> + .emit_frame_size =
> + 6 + /* sdma_v4_0_ring_emit_hdp_flush */
> + 3 + /* hdp invalidate */
> + 6 + /* sdma_v4_0_ring_emit_pipeline_sync */
> + /* sdma_v4_0_ring_emit_vm_flush */
> + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
> + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
> + 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
> + .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
> + .emit_ib = sdma_v4_0_ring_emit_ib,
> + .emit_fence = sdma_v4_0_ring_emit_fence,
> + .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
> + .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
> + .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
> + .test_ring = sdma_v4_0_ring_test_ring,
> + .test_ib = sdma_v4_0_ring_test_ib,
> + .insert_nop = sdma_v4_0_ring_insert_nop,
> + .pad_ib = sdma_v4_0_ring_pad_ib,
> + .emit_wreg = sdma_v4_0_ring_emit_wreg,
> + .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
> + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
> +};
> +
> static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
> {
> int i;
> @@ -1729,6 +1951,8 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
> for (i = 0; i < adev->sdma.num_instances; i++) {
> adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs;
> adev->sdma.instance[i].ring.me = i;
> + adev->sdma.instance[i].page.funcs = &sdma_v4_0_page_ring_funcs;
> + adev->sdma.instance[i].page.me = i;
> }
> }
>
> --
> 2.14.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list