[PATCH 02/17] drm/amdgpu: refactor MQD/HQD initialization v3
Andres Rodriguez
andresx7 at gmail.com
Mon Apr 17 20:13:23 UTC 2017
On 2017-04-13 05:35 PM, Andres Rodriguez wrote:
> The MQD programming sequence currently exists in 3 different places.
> Refactor it to absorb all the duplicates.
>
> The success path remains mostly identical except for a slightly
> different order in the non-kiq case. This shouldn't matter if the HQD
> is disabled.
>
> The error handling paths have been updated to deal with the new code
> structure.
>
> v2: the non-kiq path for gfxv8 was dropped in the rebase
> v3: split MEC_HPD_SIZE rename, dropped doorbell changes
>
> Reviewed-by: Edward O'Callaghan <funfunctor at folklore1984.net>
> Acked-by: Christian König <christian.koenig at amd.com>
> Acked-by: Felix Kuehling <Felix.Kuehling at amd.com>
> Signed-off-by: Andres Rodriguez <andresx7 at gmail.com>
> ---
> drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 439 ++++++++++++++++++----------------
> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 78 +++---
> 2 files changed, 271 insertions(+), 246 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> index 3b98162..4e6a60c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> @@ -2927,281 +2927,316 @@ struct bonaire_mqd
> u32 perf_counter_enable;
> u32 pgm[2];
> u32 tba[2];
> u32 tma[2];
> u32 pgm_rsrc[2];
> u32 vmid;
> u32 resource_limits;
> u32 static_thread_mgmt01[2];
> u32 tmp_ring_size;
> u32 static_thread_mgmt23[2];
> u32 restart[3];
> u32 thread_trace_enable;
> u32 reserved1;
> u32 user_data[16];
> u32 vgtcs_invoke_count[2];
> struct hqd_registers queue_state;
> u32 dequeue_cntr;
> u32 interrupt_queue[64];
> };
>
> -/**
> - * gfx_v7_0_cp_compute_resume - setup the compute queue registers
> - *
> - * @adev: amdgpu_device pointer
> - *
> - * Program the compute queues and test them to make sure they
> - * are working.
> - * Returns 0 for success, error for failure.
> - */
> -static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
> +static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev, int me, int pipe)
> {
> - int r, i, j;
> - u32 tmp;
> - bool use_doorbell = true;
> - u64 hqd_gpu_addr;
> - u64 mqd_gpu_addr;
> u64 eop_gpu_addr;
> - u64 wb_gpu_addr;
> - u32 *buf;
> - struct bonaire_mqd *mqd;
> - struct amdgpu_ring *ring;
> -
> - /* fix up chicken bits */
> - tmp = RREG32(mmCP_CPF_DEBUG);
> - tmp |= (1 << 23);
> - WREG32(mmCP_CPF_DEBUG, tmp);
> + u32 tmp;
> + size_t eop_offset = me * pipe * GFX7_MEC_HPD_SIZE * 2;
The offset here is incorrect, it should be:
(mec * num_pipe_per_mec + pipe) * ...
>
> - /* init the pipes */
> mutex_lock(&adev->srbm_mutex);
> - for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
> - int me = (i < 4) ? 1 : 2;
> - int pipe = (i < 4) ? i : (i - 4);
> + eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset;
>
> - eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * GFX7_MEC_HPD_SIZE * 2);
> + cik_srbm_select(adev, me, pipe, 0, 0);
>
> - cik_srbm_select(adev, me, pipe, 0, 0);
> + /* write the EOP addr */
> + WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
> + WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
>
> - /* write the EOP addr */
> - WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
> - WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
> + /* set the VMID assigned */
> + WREG32(mmCP_HPD_EOP_VMID, 0);
>
> - /* set the VMID assigned */
> - WREG32(mmCP_HPD_EOP_VMID, 0);
> + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
> + tmp = RREG32(mmCP_HPD_EOP_CONTROL);
> + tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
> + tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8);
> + WREG32(mmCP_HPD_EOP_CONTROL, tmp);
>
> - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
> - tmp = RREG32(mmCP_HPD_EOP_CONTROL);
> - tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
> - tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8);
> - WREG32(mmCP_HPD_EOP_CONTROL, tmp);
> - }
> cik_srbm_select(adev, 0, 0, 0, 0);
> mutex_unlock(&adev->srbm_mutex);
> +}
>
> - /* init the queues. Just two for now. */
> - for (i = 0; i < adev->gfx.num_compute_rings; i++) {
> - ring = &adev->gfx.compute_ring[i];
> +static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev)
> +{
> + int i;
>
> - if (ring->mqd_obj == NULL) {
> - r = amdgpu_bo_create(adev,
> - sizeof(struct bonaire_mqd),
> - PAGE_SIZE, true,
> - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
> - &ring->mqd_obj);
> - if (r) {
> - dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
> - return r;
> - }
> + /* disable the queue if it's active */
> + if (RREG32(mmCP_HQD_ACTIVE) & 1) {
> + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
> + for (i = 0; i < adev->usec_timeout; i++) {
> + if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
> + break;
> + udelay(1);
> }
>
> - r = amdgpu_bo_reserve(ring->mqd_obj, false);
> - if (unlikely(r != 0)) {
> - gfx_v7_0_cp_compute_fini(adev);
> - return r;
> - }
> - r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
> - &mqd_gpu_addr);
> - if (r) {
> - dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
> - gfx_v7_0_cp_compute_fini(adev);
> - return r;
> - }
> - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
> - if (r) {
> - dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
> - gfx_v7_0_cp_compute_fini(adev);
> - return r;
> - }
> + if (i == adev->usec_timeout)
> + return -ETIMEDOUT;
>
> - /* init the mqd struct */
> - memset(buf, 0, sizeof(struct bonaire_mqd));
> + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
> + WREG32(mmCP_HQD_PQ_RPTR, 0);
> + WREG32(mmCP_HQD_PQ_WPTR, 0);
> + }
>
> - mqd = (struct bonaire_mqd *)buf;
> - mqd->header = 0xC0310800;
> - mqd->static_thread_mgmt01[0] = 0xffffffff;
> - mqd->static_thread_mgmt01[1] = 0xffffffff;
> - mqd->static_thread_mgmt23[0] = 0xffffffff;
> - mqd->static_thread_mgmt23[1] = 0xffffffff;
> + return 0;
> +}
>
> - mutex_lock(&adev->srbm_mutex);
> - cik_srbm_select(adev, ring->me,
> - ring->pipe,
> - ring->queue, 0);
> +static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
> + struct bonaire_mqd *mqd,
> + uint64_t mqd_gpu_addr,
> + struct amdgpu_ring *ring)
> +{
> + u64 hqd_gpu_addr;
> + u64 wb_gpu_addr;
>
> - /* disable wptr polling */
> - tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
> - tmp &= ~CP_PQ_WPTR_POLL_CNTL__EN_MASK;
> - WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
> + /* init the mqd struct */
> + memset(mqd, 0, sizeof(struct bonaire_mqd));
>
> - /* enable doorbell? */
> - mqd->queue_state.cp_hqd_pq_doorbell_control =
> - RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
> - if (use_doorbell)
> - mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
> - else
> - mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
> - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
> - mqd->queue_state.cp_hqd_pq_doorbell_control);
> -
> - /* disable the queue if it's active */
> - mqd->queue_state.cp_hqd_dequeue_request = 0;
> - mqd->queue_state.cp_hqd_pq_rptr = 0;
> - mqd->queue_state.cp_hqd_pq_wptr= 0;
> - if (RREG32(mmCP_HQD_ACTIVE) & 1) {
> - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
> - for (j = 0; j < adev->usec_timeout; j++) {
> - if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
> - break;
> - udelay(1);
> - }
> - WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
> - WREG32(mmCP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
> - WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
> - }
> + mqd->header = 0xC0310800;
> + mqd->static_thread_mgmt01[0] = 0xffffffff;
> + mqd->static_thread_mgmt01[1] = 0xffffffff;
> + mqd->static_thread_mgmt23[0] = 0xffffffff;
> + mqd->static_thread_mgmt23[1] = 0xffffffff;
>
> - /* set the pointer to the MQD */
> - mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
> - mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
> - WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
> - WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
> - /* set MQD vmid to 0 */
> - mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
> - mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
> - WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
> -
> - /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
> - hqd_gpu_addr = ring->gpu_addr >> 8;
> - mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
> - mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
> - WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
> - WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
> -
> - /* set up the HQD, this is similar to CP_RB0_CNTL */
> - mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
> - mqd->queue_state.cp_hqd_pq_control &=
> - ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
> - CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
> -
> - mqd->queue_state.cp_hqd_pq_control |=
> - order_base_2(ring->ring_size / 8);
> - mqd->queue_state.cp_hqd_pq_control |=
> - (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
> + /* enable doorbell? */
> + mqd->queue_state.cp_hqd_pq_doorbell_control =
> + RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
> + if (ring->use_doorbell)
> + mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
> + else
> + mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
> +
> + /* set the pointer to the MQD */
> + mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
> + mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
> +
> + /* set MQD vmid to 0 */
> + mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
> + mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
> +
> + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
> + hqd_gpu_addr = ring->gpu_addr >> 8;
> + mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
> + mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
> +
> + /* set up the HQD, this is similar to CP_RB0_CNTL */
> + mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
> + mqd->queue_state.cp_hqd_pq_control &=
> + ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
> + CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
> +
> + mqd->queue_state.cp_hqd_pq_control |=
> + order_base_2(ring->ring_size / 8);
> + mqd->queue_state.cp_hqd_pq_control |=
> + (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
> #ifdef __BIG_ENDIAN
> - mqd->queue_state.cp_hqd_pq_control |=
> - 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
> + mqd->queue_state.cp_hqd_pq_control |=
> + 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
> #endif
> - mqd->queue_state.cp_hqd_pq_control &=
> - ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
> + mqd->queue_state.cp_hqd_pq_control &=
> + ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
> CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK |
> CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK);
> - mqd->queue_state.cp_hqd_pq_control |=
> - CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
> - CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
> - WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
> -
> - /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
> - wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
> - mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
> - mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
> - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
> - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
> - mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
> -
> - /* set the wb address wether it's enabled or not */
> - wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
> - mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
> - mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
> - upper_32_bits(wb_gpu_addr) & 0xffff;
> - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
> - mqd->queue_state.cp_hqd_pq_rptr_report_addr);
> - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
> - mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
> -
> - /* enable the doorbell if requested */
> - if (use_doorbell) {
> - mqd->queue_state.cp_hqd_pq_doorbell_control =
> - RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
> - mqd->queue_state.cp_hqd_pq_doorbell_control &=
> - ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
> - mqd->queue_state.cp_hqd_pq_doorbell_control |=
> - (ring->doorbell_index <<
> - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
> - mqd->queue_state.cp_hqd_pq_doorbell_control |=
> - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
> - mqd->queue_state.cp_hqd_pq_doorbell_control &=
> - ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
> - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
> + mqd->queue_state.cp_hqd_pq_control |=
> + CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
> + CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
>
> - } else {
> - mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
> + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
> + wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
> + mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
> + mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
> +
> + /* set the wb address wether it's enabled or not */
> + wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
> + mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
> + mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
> + upper_32_bits(wb_gpu_addr) & 0xffff;
> +
> + /* enable the doorbell if requested */
> + if (ring->use_doorbell) {
> + mqd->queue_state.cp_hqd_pq_doorbell_control =
> + RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
> + mqd->queue_state.cp_hqd_pq_doorbell_control &=
> + ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
> + mqd->queue_state.cp_hqd_pq_doorbell_control |=
> + (ring->doorbell_index <<
> + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
> + mqd->queue_state.cp_hqd_pq_doorbell_control |=
> + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
> + mqd->queue_state.cp_hqd_pq_doorbell_control &=
> + ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
> + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
> +
> + } else {
> + mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
> + }
> +
> + /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
> + ring->wptr = 0;
> + mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
> + mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
> +
> + /* set the vmid for the queue */
> + mqd->queue_state.cp_hqd_vmid = 0;
> +
> + /* activate the queue */
> + mqd->queue_state.cp_hqd_active = 1;
> +}
> +
> +static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev,
> + struct bonaire_mqd *mqd)
> +{
> + u32 tmp;
> +
> + /* disable wptr polling */
> + tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
> + tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
> + WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
> +
> + /* program MQD field to HW */
> + WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
> + WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
> + WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
> + WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
> + WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
> + WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
> + WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
> + WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
> + WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->queue_state.cp_hqd_pq_rptr_report_addr);
> + WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
> + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->queue_state.cp_hqd_pq_doorbell_control);
> + WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
> + WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
> +
> + /* activate the HQD */
> + WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
> +
> + return 0;
> +}
> +
> +static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
> +{
> + int r;
> + u64 mqd_gpu_addr;
> + struct bonaire_mqd *mqd;
> + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
> +
> + if (ring->mqd_obj == NULL) {
> + r = amdgpu_bo_create(adev,
> + sizeof(struct bonaire_mqd),
> + PAGE_SIZE, true,
> + AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
> + &ring->mqd_obj);
> + if (r) {
> + dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
> + return r;
> }
> - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
> - mqd->queue_state.cp_hqd_pq_doorbell_control);
> + }
>
> - /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
> - ring->wptr = 0;
> - mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
> - WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
> - mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
> + r = amdgpu_bo_reserve(ring->mqd_obj, false);
> + if (unlikely(r != 0))
> + goto out;
>
> - /* set the vmid for the queue */
> - mqd->queue_state.cp_hqd_vmid = 0;
> - WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
> + r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
> + &mqd_gpu_addr);
> + if (r) {
> + dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
> + goto out_unreserve;
> + }
> + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd);
> + if (r) {
> + dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
> + goto out_unreserve;
> + }
>
> - /* activate the queue */
> - mqd->queue_state.cp_hqd_active = 1;
> - WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
> + mutex_lock(&adev->srbm_mutex);
> + cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
>
> - cik_srbm_select(adev, 0, 0, 0, 0);
> - mutex_unlock(&adev->srbm_mutex);
> + gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring);
> + gfx_v7_0_mqd_deactivate(adev);
> + gfx_v7_0_mqd_commit(adev, mqd);
>
> - amdgpu_bo_kunmap(ring->mqd_obj);
> - amdgpu_bo_unreserve(ring->mqd_obj);
> + cik_srbm_select(adev, 0, 0, 0, 0);
> + mutex_unlock(&adev->srbm_mutex);
>
> - ring->ready = true;
> + amdgpu_bo_kunmap(ring->mqd_obj);
> +out_unreserve:
> + amdgpu_bo_unreserve(ring->mqd_obj);
> +out:
> + return 0;
> +}
> +
> +/**
> + * gfx_v7_0_cp_compute_resume - setup the compute queue registers
> + *
> + * @adev: amdgpu_device pointer
> + *
> + * Program the compute queues and test them to make sure they
> + * are working.
> + * Returns 0 for success, error for failure.
> + */
> +static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
> +{
> + int r, i, j;
> + u32 tmp;
> + struct amdgpu_ring *ring;
> +
> + /* fix up chicken bits */
> + tmp = RREG32(mmCP_CPF_DEBUG);
> + tmp |= (1 << 23);
> + WREG32(mmCP_CPF_DEBUG, tmp);
> +
> + /* init the pipes */
> + for (i = 0; i < adev->gfx.mec.num_mec; i++)
> + for (j = 0; j < adev->gfx.mec.num_pipe; j++)
> + gfx_v7_0_compute_pipe_init(adev, i, j);
> +
> + /* init the queues */
> + for (i = 0; i < adev->gfx.num_compute_rings; i++) {
> + r = gfx_v7_0_compute_queue_init(adev, i);
> + if (r) {
> + gfx_v7_0_cp_compute_fini(adev);
> + return r;
> + }
> }
>
> gfx_v7_0_cp_compute_enable(adev, true);
>
> for (i = 0; i < adev->gfx.num_compute_rings; i++) {
> ring = &adev->gfx.compute_ring[i];
> -
> + ring->ready = true;
> r = amdgpu_ring_test_ring(ring);
> if (r)
> ring->ready = false;
> }
>
> return 0;
> }
>
> static void gfx_v7_0_cp_enable(struct amdgpu_device *adev, bool enable)
> {
> gfx_v7_0_cp_gfx_enable(adev, enable);
> gfx_v7_0_cp_compute_enable(adev, enable);
> }
>
> static int gfx_v7_0_cp_load_microcode(struct amdgpu_device *adev)
> {
> int r;
>
> r = gfx_v7_0_cp_gfx_load_microcode(adev);
> if (r)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index fc94c3a..b670302 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -4735,79 +4735,98 @@ static int gfx_v8_0_kiq_kcq_disable(struct amdgpu_device *adev)
> amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
> amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
> amdgpu_ring_commit(kiq_ring);
>
> for (i = 0; i < adev->usec_timeout; i++) {
> tmp = RREG32(scratch);
> if (tmp == 0xDEADBEEF)
> break;
> DRM_UDELAY(1);
> }
> if (i >= adev->usec_timeout) {
> DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
> scratch, tmp);
> r = -EINVAL;
> }
> amdgpu_gfx_scratch_free(adev, scratch);
>
> return r;
> }
>
> +static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
> +{
> + int i, r = 0;
> +
> + if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
> + WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
> + for (i = 0; i < adev->usec_timeout; i++) {
> + if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
> + break;
> + udelay(1);
> + }
> + if (i == adev->usec_timeout)
> + r = -ETIMEDOUT;
> + }
> + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
> + WREG32(mmCP_HQD_PQ_RPTR, 0);
> + WREG32(mmCP_HQD_PQ_WPTR, 0);
> +
> + return r;
> +}
> +
> static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
> {
> struct amdgpu_device *adev = ring->adev;
> struct vi_mqd *mqd = ring->mqd_ptr;
> uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
> uint32_t tmp;
>
> + /* init the mqd struct */
> + memset(mqd, 0, sizeof(struct vi_mqd));
> +
> mqd->header = 0xC0310800;
> mqd->compute_pipelinestat_enable = 0x00000001;
> mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
> mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
> mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
> mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
> mqd->compute_misc_reserved = 0x00000003;
>
> eop_base_addr = ring->eop_gpu_addr >> 8;
> mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
> mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
>
> /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
> tmp = RREG32(mmCP_HQD_EOP_CONTROL);
> tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
> (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
>
> mqd->cp_hqd_eop_control = tmp;
>
> /* enable doorbell? */
> tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
> CP_HQD_PQ_DOORBELL_CONTROL,
> DOORBELL_EN,
> ring->use_doorbell ? 1 : 0);
>
> mqd->cp_hqd_pq_doorbell_control = tmp;
>
> - /* disable the queue if it's active */
> - mqd->cp_hqd_dequeue_request = 0;
> - mqd->cp_hqd_pq_rptr = 0;
> - mqd->cp_hqd_pq_wptr = 0;
> -
> /* set the pointer to the MQD */
> mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
> mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
>
> /* set MQD vmid to 0 */
> tmp = RREG32(mmCP_MQD_CONTROL);
> tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
> mqd->cp_mqd_control = tmp;
>
> /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
> hqd_gpu_addr = ring->gpu_addr >> 8;
> mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
> mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
>
> /* set up the HQD, this is similar to CP_RB0_CNTL */
> tmp = RREG32(mmCP_HQD_PQ_CONTROL);
> tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
> (order_base_2(ring->ring_size / 4) - 1));
> tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
> ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
> @@ -4863,157 +4882,149 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
> /* set MTYPE */
> tmp = RREG32(mmCP_HQD_IB_CONTROL);
> tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
> tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
> mqd->cp_hqd_ib_control = tmp;
>
> tmp = RREG32(mmCP_HQD_IQ_TIMER);
> tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
> mqd->cp_hqd_iq_timer = tmp;
>
> tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
> tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
> mqd->cp_hqd_ctx_save_control = tmp;
>
> /* activate the queue */
> mqd->cp_hqd_active = 1;
>
> return 0;
> }
>
> -static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring)
> +static int gfx_v8_0_mqd_commit(struct amdgpu_ring *ring)
> {
> struct amdgpu_device *adev = ring->adev;
> struct vi_mqd *mqd = ring->mqd_ptr;
> - int j;
>
> /* disable wptr polling */
> WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
>
> WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
> WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
>
> /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
> WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
>
> /* enable doorbell? */
> WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
>
> - /* disable the queue if it's active */
> - if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
> - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
> - for (j = 0; j < adev->usec_timeout; j++) {
> - if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
> - break;
> - udelay(1);
> - }
> - WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
> - WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
> - WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
> - }
> + /* set pq read/write pointers */
> + WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
> + WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
> + WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
>
> /* set the pointer to the MQD */
> WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
> WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
>
> /* set MQD vmid to 0 */
> WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
>
> /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
> WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
> WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
>
> /* set up the HQD, this is similar to CP_RB0_CNTL */
> WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
>
> /* set the wb address whether it's enabled or not */
> WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
> mqd->cp_hqd_pq_rptr_report_addr_lo);
> WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
> mqd->cp_hqd_pq_rptr_report_addr_hi);
>
> /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
> WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
> WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
>
> + /* enable the doorbell if requested */
> WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
>
> /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
> WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
>
> /* set the vmid for the queue */
> WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
>
> WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
>
> /* activate the queue */
> WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
>
> return 0;
> }
>
> static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
> {
> struct amdgpu_device *adev = ring->adev;
> struct vi_mqd *mqd = ring->mqd_ptr;
> int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
>
> gfx_v8_0_kiq_setting(ring);
>
> if (adev->gfx.in_reset) { /* for GPU_RESET case */
> /* reset MQD to a clean status */
> if (adev->gfx.mec.mqd_backup[mqd_idx])
> memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
>
> /* reset ring buffer */
> ring->wptr = 0;
> amdgpu_ring_clear_ring(ring);
>
> mutex_lock(&adev->srbm_mutex);
> vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
> - gfx_v8_0_kiq_init_register(ring);
> + gfx_v8_0_deactivate_hqd(adev, 1);
> + gfx_v8_0_mqd_commit(ring);
> vi_srbm_select(adev, 0, 0, 0, 0);
> mutex_unlock(&adev->srbm_mutex);
> } else {
> - memset((void *)mqd, 0, sizeof(*mqd));
> mutex_lock(&adev->srbm_mutex);
> vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
> gfx_v8_0_mqd_init(ring);
> - gfx_v8_0_kiq_init_register(ring);
> + gfx_v8_0_deactivate_hqd(adev, 1);
> + gfx_v8_0_mqd_commit(ring);
> vi_srbm_select(adev, 0, 0, 0, 0);
> mutex_unlock(&adev->srbm_mutex);
>
> if (adev->gfx.mec.mqd_backup[mqd_idx])
> memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
> }
>
> return 0;
> }
>
> static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
> {
> struct amdgpu_device *adev = ring->adev;
> struct vi_mqd *mqd = ring->mqd_ptr;
> int mqd_idx = ring - &adev->gfx.compute_ring[0];
>
> if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
> - memset((void *)mqd, 0, sizeof(*mqd));
> mutex_lock(&adev->srbm_mutex);
> vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
> gfx_v8_0_mqd_init(ring);
> vi_srbm_select(adev, 0, 0, 0, 0);
> mutex_unlock(&adev->srbm_mutex);
>
> if (adev->gfx.mec.mqd_backup[mqd_idx])
> memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
> } else if (adev->gfx.in_reset) { /* for GPU_RESET case */
> /* reset MQD to a clean status */
> if (adev->gfx.mec.mqd_backup[mqd_idx])
> memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
>
> /* reset ring buffer */
> ring->wptr = 0;
> amdgpu_ring_clear_ring(ring);
> }
>
> return 0;
> }
> @@ -5284,61 +5295,40 @@ static bool gfx_v8_0_check_soft_reset(void *handle)
> /* SRBM_STATUS */
> tmp = RREG32(mmSRBM_STATUS);
> if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
> srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
> SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
> if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
> srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
> SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
>
> if (grbm_soft_reset || srbm_soft_reset) {
> adev->gfx.grbm_soft_reset = grbm_soft_reset;
> adev->gfx.srbm_soft_reset = srbm_soft_reset;
> return true;
> } else {
> adev->gfx.grbm_soft_reset = 0;
> adev->gfx.srbm_soft_reset = 0;
> return false;
> }
> }
>
> -static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
> -{
> - int i, r = 0;
> -
> - if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
> - WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
> - for (i = 0; i < adev->usec_timeout; i++) {
> - if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
> - break;
> - udelay(1);
> - }
> - if (i == adev->usec_timeout)
> - r = -ETIMEDOUT;
> - }
> - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
> - WREG32(mmCP_HQD_PQ_RPTR, 0);
> - WREG32(mmCP_HQD_PQ_WPTR, 0);
> -
> - return r;
> -}
> -
> static int gfx_v8_0_pre_soft_reset(void *handle)
> {
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
>
> if ((!adev->gfx.grbm_soft_reset) &&
> (!adev->gfx.srbm_soft_reset))
> return 0;
>
> grbm_soft_reset = adev->gfx.grbm_soft_reset;
> srbm_soft_reset = adev->gfx.srbm_soft_reset;
>
> /* stop the rlc */
> gfx_v8_0_rlc_stop(adev);
>
> if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
> REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
> /* Disable GFX parsing/prefetching */
> gfx_v8_0_cp_gfx_enable(adev, false);
>
>
More information about the amd-gfx
mailing list