[PATCH 01/26] drm/amdgpu: refactor MQD/HQD initialization v2
Andres Rodriguez
andresx7 at gmail.com
Wed Apr 12 21:06:55 UTC 2017
On 2017-04-11 06:08 PM, Alex Deucher wrote:
> On Thu, Apr 6, 2017 at 2:21 AM, Andres Rodriguez <andresx7 at gmail.com> wrote:
>> The MQD programming sequence currently exists in 3 different places.
>> Refactor it to absorb all the duplicates.
>>
>> The success path remains mostly identical except for a slightly
>> different order in the non-kiq case. This shouldn't matter if the HQD
>> is disabled.
>>
>> The error handling paths have been updated to deal with the new code
>> structure.
>>
>> v2: the non-kiq path for gfxv8 was dropped in the rebase
>>
>> Reviewed-by: Edward O'Callaghan <funfunctor at folklore1984.net>
>> Acked-by: Christian König <christian.koenig at amd.com>
>> Signed-off-by: Andres Rodriguez <andresx7 at gmail.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 447 ++++++++++++++++++----------------
>> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 110 +++++----
>> 2 files changed, 309 insertions(+), 248 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>> index 185cb31..f67ef58 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>> @@ -42,20 +42,22 @@
>> #include "gca/gfx_7_2_sh_mask.h"
>>
>> #include "gmc/gmc_7_0_d.h"
>> #include "gmc/gmc_7_0_sh_mask.h"
>>
>> #include "oss/oss_2_0_d.h"
>> #include "oss/oss_2_0_sh_mask.h"
>>
>> #define GFX7_NUM_GFX_RINGS 1
>> #define GFX7_NUM_COMPUTE_RINGS 8
>> +#define GFX7_MEC_HPD_SIZE 2048
>> +
>
> Might want to split out that the rename of this define into a separate
> patch so it can be applied early. Could probably also split the gfx7
> and gfx8 changes into two patches so they can be applied separately
> separately so gfx7 doesn't have to be beholden to the flux in gfx8 at
> the moment.
>
Done
>>
>> static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
>> static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
>> static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
>>
>> MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
>> MODULE_FIRMWARE("radeon/bonaire_me.bin");
>> MODULE_FIRMWARE("radeon/bonaire_ce.bin");
>> MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
>> MODULE_FIRMWARE("radeon/bonaire_mec.bin");
>> @@ -2814,40 +2816,38 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
>> if (unlikely(r != 0))
>> dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
>> amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
>> amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>>
>> amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
>> adev->gfx.mec.hpd_eop_obj = NULL;
>> }
>> }
>>
>> -#define MEC_HPD_SIZE 2048
>> -
>> static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
>> {
>> int r;
>> u32 *hpd;
>>
>> /*
>> * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
>> * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
>> * Nonetheless, we assign only 1 pipe because all other pipes will
>> * be handled by KFD
>> */
>> adev->gfx.mec.num_mec = 1;
>> adev->gfx.mec.num_pipe = 1;
>> adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
>>
>> if (adev->gfx.mec.hpd_eop_obj == NULL) {
>> r = amdgpu_bo_create(adev,
>> - adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
>> + adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * GFX7_MEC_HPD_SIZE * 2,
>> PAGE_SIZE, true,
>> AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
>> &adev->gfx.mec.hpd_eop_obj);
>> if (r) {
>> dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>> return r;
>> }
>> }
>>
>> r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
>> @@ -2863,21 +2863,21 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
>> return r;
>> }
>> r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
>> if (r) {
>> dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
>> gfx_v7_0_mec_fini(adev);
>> return r;
>> }
>>
>> /* clear memory. Not sure if this is required or not */
>> - memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
>> + memset(hpd, 0, adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * GFX7_MEC_HPD_SIZE * 2);
>>
>> amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>>
>> return 0;
>> }
>>
>> struct hqd_registers
>> {
>> u32 cp_mqd_base_addr;
>> @@ -2938,261 +2938,296 @@ struct bonaire_mqd
>> u32 restart[3];
>> u32 thread_trace_enable;
>> u32 reserved1;
>> u32 user_data[16];
>> u32 vgtcs_invoke_count[2];
>> struct hqd_registers queue_state;
>> u32 dequeue_cntr;
>> u32 interrupt_queue[64];
>> };
>>
>> -/**
>> - * gfx_v7_0_cp_compute_resume - setup the compute queue registers
>> - *
>> - * @adev: amdgpu_device pointer
>> - *
>> - * Program the compute queues and test them to make sure they
>> - * are working.
>> - * Returns 0 for success, error for failure.
>> - */
>> -static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
>> +static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev, int me, int pipe)
>> {
>> - int r, i, j;
>> - u32 tmp;
>> - bool use_doorbell = true;
>> - u64 hqd_gpu_addr;
>> - u64 mqd_gpu_addr;
>> u64 eop_gpu_addr;
>> - u64 wb_gpu_addr;
>> - u32 *buf;
>> - struct bonaire_mqd *mqd;
>> - struct amdgpu_ring *ring;
>> -
>> - /* fix up chicken bits */
>> - tmp = RREG32(mmCP_CPF_DEBUG);
>> - tmp |= (1 << 23);
>> - WREG32(mmCP_CPF_DEBUG, tmp);
>> + u32 tmp;
>> + size_t eop_offset = me * pipe * GFX7_MEC_HPD_SIZE * 2;
>>
>> - /* init the pipes */
>> mutex_lock(&adev->srbm_mutex);
>> - for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
>> - int me = (i < 4) ? 1 : 2;
>> - int pipe = (i < 4) ? i : (i - 4);
>> + eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset;
>>
>> - eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
>> + cik_srbm_select(adev, me, pipe, 0, 0);
>>
>> - cik_srbm_select(adev, me, pipe, 0, 0);
>> + /* write the EOP addr */
>> + WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
>> + WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
>>
>> - /* write the EOP addr */
>> - WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
>> - WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
>> + /* set the VMID assigned */
>> + WREG32(mmCP_HPD_EOP_VMID, 0);
>>
>> - /* set the VMID assigned */
>> - WREG32(mmCP_HPD_EOP_VMID, 0);
>> + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
>> + tmp = RREG32(mmCP_HPD_EOP_CONTROL);
>> + tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
>> + tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8);
>> + WREG32(mmCP_HPD_EOP_CONTROL, tmp);
>>
>> - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
>> - tmp = RREG32(mmCP_HPD_EOP_CONTROL);
>> - tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
>> - tmp |= order_base_2(MEC_HPD_SIZE / 8);
>> - WREG32(mmCP_HPD_EOP_CONTROL, tmp);
>> - }
>> cik_srbm_select(adev, 0, 0, 0, 0);
>> mutex_unlock(&adev->srbm_mutex);
>> +}
>>
>> - /* init the queues. Just two for now. */
>> - for (i = 0; i < adev->gfx.num_compute_rings; i++) {
>> - ring = &adev->gfx.compute_ring[i];
>> +static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev)
>> +{
>> + int i;
>>
>> - if (ring->mqd_obj == NULL) {
>> - r = amdgpu_bo_create(adev,
>> - sizeof(struct bonaire_mqd),
>> - PAGE_SIZE, true,
>> - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
>> - &ring->mqd_obj);
>> - if (r) {
>> - dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
>> - return r;
>> - }
>> + /* disable the queue if it's active */
>> + if (RREG32(mmCP_HQD_ACTIVE) & 1) {
>> + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
>> + for (i = 0; i < adev->usec_timeout; i++) {
>> + if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
>> + break;
>> + udelay(1);
>> }
>>
>> - r = amdgpu_bo_reserve(ring->mqd_obj, false);
>> - if (unlikely(r != 0)) {
>> - gfx_v7_0_cp_compute_fini(adev);
>> - return r;
>> - }
>> - r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
>> - &mqd_gpu_addr);
>> - if (r) {
>> - dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
>> - gfx_v7_0_cp_compute_fini(adev);
>> - return r;
>> - }
>> - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
>> - if (r) {
>> - dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
>> - gfx_v7_0_cp_compute_fini(adev);
>> - return r;
>> - }
>> + if (i == adev->usec_timeout)
>> + return -ETIMEDOUT;
>>
>> - /* init the mqd struct */
>> - memset(buf, 0, sizeof(struct bonaire_mqd));
>> + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
>> + WREG32(mmCP_HQD_PQ_RPTR, 0);
>> + WREG32(mmCP_HQD_PQ_WPTR, 0);
>> + }
>>
>> - mqd = (struct bonaire_mqd *)buf;
>> - mqd->header = 0xC0310800;
>> - mqd->static_thread_mgmt01[0] = 0xffffffff;
>> - mqd->static_thread_mgmt01[1] = 0xffffffff;
>> - mqd->static_thread_mgmt23[0] = 0xffffffff;
>> - mqd->static_thread_mgmt23[1] = 0xffffffff;
>> + return 0;
>> +}
>>
>> - mutex_lock(&adev->srbm_mutex);
>> - cik_srbm_select(adev, ring->me,
>> - ring->pipe,
>> - ring->queue, 0);
>> +static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
>> + struct bonaire_mqd *mqd,
>> + uint64_t mqd_gpu_addr,
>> + struct amdgpu_ring *ring)
>> +{
>> + u64 hqd_gpu_addr;
>> + u64 wb_gpu_addr;
>>
>> - /* disable wptr polling */
>> - tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
>> - tmp &= ~CP_PQ_WPTR_POLL_CNTL__EN_MASK;
>> - WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
>> + /* init the mqd struct */
>> + memset(mqd, 0, sizeof(struct bonaire_mqd));
>>
>> - /* enable doorbell? */
>> - mqd->queue_state.cp_hqd_pq_doorbell_control =
>> - RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
>> - if (use_doorbell)
>> - mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
>> - else
>> - mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
>> - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
>> - mqd->queue_state.cp_hqd_pq_doorbell_control);
>> -
>> - /* disable the queue if it's active */
>> - mqd->queue_state.cp_hqd_dequeue_request = 0;
>> - mqd->queue_state.cp_hqd_pq_rptr = 0;
>> - mqd->queue_state.cp_hqd_pq_wptr= 0;
>> - if (RREG32(mmCP_HQD_ACTIVE) & 1) {
>> - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
>> - for (j = 0; j < adev->usec_timeout; j++) {
>> - if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
>> - break;
>> - udelay(1);
>> - }
>> - WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
>> - WREG32(mmCP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
>> - WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
>> - }
>> + mqd->header = 0xC0310800;
>> + mqd->static_thread_mgmt01[0] = 0xffffffff;
>> + mqd->static_thread_mgmt01[1] = 0xffffffff;
>> + mqd->static_thread_mgmt23[0] = 0xffffffff;
>> + mqd->static_thread_mgmt23[1] = 0xffffffff;
>>
>> - /* set the pointer to the MQD */
>> - mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
>> - mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
>> - WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
>> - WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
>> - /* set MQD vmid to 0 */
>> - mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
>> - mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
>> - WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
>> -
>> - /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
>> - hqd_gpu_addr = ring->gpu_addr >> 8;
>> - mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
>> - mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
>> - WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
>> - WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
>> -
>> - /* set up the HQD, this is similar to CP_RB0_CNTL */
>> - mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
>> - mqd->queue_state.cp_hqd_pq_control &=
>> - ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
>> - CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
>> -
>> - mqd->queue_state.cp_hqd_pq_control |=
>> - order_base_2(ring->ring_size / 8);
>> - mqd->queue_state.cp_hqd_pq_control |=
>> - (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
>> + /* enable doorbell? */
>> + mqd->queue_state.cp_hqd_pq_doorbell_control =
>> + RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
>> + if (ring->use_doorbell)
>> + mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
>> + else
>> + mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
>> +
>> + /* set the pointer to the MQD */
>> + mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
>> + mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
>> +
>> + /* set MQD vmid to 0 */
>> + mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
>> + mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
>> +
>> + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
>> + hqd_gpu_addr = ring->gpu_addr >> 8;
>> + mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
>> + mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
>> +
>> + /* set up the HQD, this is similar to CP_RB0_CNTL */
>> + mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
>> + mqd->queue_state.cp_hqd_pq_control &=
>> + ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
>> + CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
>> +
>> + mqd->queue_state.cp_hqd_pq_control |=
>> + order_base_2(ring->ring_size / 8);
>> + mqd->queue_state.cp_hqd_pq_control |=
>> + (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
>> #ifdef __BIG_ENDIAN
>> - mqd->queue_state.cp_hqd_pq_control |=
>> - 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
>> + mqd->queue_state.cp_hqd_pq_control |=
>> + 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
>> #endif
>> - mqd->queue_state.cp_hqd_pq_control &=
>> - ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
>> + mqd->queue_state.cp_hqd_pq_control &=
>> + ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
>> CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK |
>> CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK);
>> - mqd->queue_state.cp_hqd_pq_control |=
>> - CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
>> - CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
>> - WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
>> -
>> - /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
>> - wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
>> - mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
>> - mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
>> - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
>> - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
>> - mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
>> -
>> - /* set the wb address wether it's enabled or not */
>> - wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
>> - mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
>> - mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
>> - upper_32_bits(wb_gpu_addr) & 0xffff;
>> - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
>> - mqd->queue_state.cp_hqd_pq_rptr_report_addr);
>> - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
>> - mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
>> -
>> - /* enable the doorbell if requested */
>> - if (use_doorbell) {
>> - mqd->queue_state.cp_hqd_pq_doorbell_control =
>> - RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
>> - mqd->queue_state.cp_hqd_pq_doorbell_control &=
>> - ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
>> - mqd->queue_state.cp_hqd_pq_doorbell_control |=
>> - (ring->doorbell_index <<
>> - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
>> - mqd->queue_state.cp_hqd_pq_doorbell_control |=
>> - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
>> - mqd->queue_state.cp_hqd_pq_doorbell_control &=
>> - ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
>> - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
>> + mqd->queue_state.cp_hqd_pq_control |=
>> + CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
>> + CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
>>
>> - } else {
>> - mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
>> + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
>> + wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
>> + mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
>> + mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
>> +
>> + /* set the wb address wether it's enabled or not */
>> + wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
>> + mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
>> + mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
>> + upper_32_bits(wb_gpu_addr) & 0xffff;
>> +
>> + /* enable the doorbell if requested */
>> + if (ring->use_doorbell) {
>> + mqd->queue_state.cp_hqd_pq_doorbell_control =
>> + RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
>> + mqd->queue_state.cp_hqd_pq_doorbell_control &=
>> + ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
>> + mqd->queue_state.cp_hqd_pq_doorbell_control |=
>> + (ring->doorbell_index <<
>> + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
>> + mqd->queue_state.cp_hqd_pq_doorbell_control |=
>> + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
>> + mqd->queue_state.cp_hqd_pq_doorbell_control &=
>> + ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
>> + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
>> +
>> + } else {
>> + mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
>> + }
>> +
>> + /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
>> + ring->wptr = 0;
>> + mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
>> + mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
>> +
>> + /* set the vmid for the queue */
>> + mqd->queue_state.cp_hqd_vmid = 0;
>> +
>> + /* activate the queue */
>> + mqd->queue_state.cp_hqd_active = 1;
>> +}
>> +
>> +static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev,
>> + struct bonaire_mqd *mqd)
>> +{
>> + u32 tmp;
>> +
>> + /* disable wptr polling */
>> + tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
>> + tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
>> + WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
>> +
>> + /* program MQD field to HW */
>> + WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
>> + WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
>> + WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
>> + WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
>> + WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
>> + WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
>> + WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
>> + WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
>> + WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->queue_state.cp_hqd_pq_rptr_report_addr);
>> + WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
>> + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->queue_state.cp_hqd_pq_doorbell_control);
>> + WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
>> + WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
>> +
>> + /* activate the HQD */
>> + WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
>> +
>> + return 0;
>> +}
>> +
>> +static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
>> +{
>> + int r;
>> + u64 mqd_gpu_addr;
>> + struct bonaire_mqd *mqd;
>> + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
>> +
>> + if (ring->mqd_obj == NULL) {
>> + r = amdgpu_bo_create(adev,
>> + sizeof(struct bonaire_mqd),
>> + PAGE_SIZE, true,
>> + AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
>> + &ring->mqd_obj);
>> + if (r) {
>> + dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
>> + return r;
>> }
>> - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
>> - mqd->queue_state.cp_hqd_pq_doorbell_control);
>> + }
>> +
>> + r = amdgpu_bo_reserve(ring->mqd_obj, false);
>> + if (unlikely(r != 0))
>> + goto out;
>> +
>> + r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
>> + &mqd_gpu_addr);
>> + if (r) {
>> + dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
>> + goto out_unreserve;
>> + }
>> + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd);
>> + if (r) {
>> + dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
>> + goto out_unreserve;
>> + }
>> +
>> + mutex_lock(&adev->srbm_mutex);
>> + cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
>>
>> - /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
>> - ring->wptr = 0;
>> - mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
>> - WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
>> - mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
>> + gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring);
>> + gfx_v7_0_mqd_deactivate(adev);
>> + gfx_v7_0_mqd_commit(adev, mqd);
>>
>> - /* set the vmid for the queue */
>> - mqd->queue_state.cp_hqd_vmid = 0;
>> - WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
>> + cik_srbm_select(adev, 0, 0, 0, 0);
>> + mutex_unlock(&adev->srbm_mutex);
>>
>> - /* activate the queue */
>> - mqd->queue_state.cp_hqd_active = 1;
>> - WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
>> + amdgpu_bo_kunmap(ring->mqd_obj);
>> +out_unreserve:
>> + amdgpu_bo_unreserve(ring->mqd_obj);
>> +out:
>> + return 0;
>> +}
>> +
>> +/**
>> + * gfx_v7_0_cp_compute_resume - setup the compute queue registers
>> + *
>> + * @adev: amdgpu_device pointer
>> + *
>> + * Program the compute queues and test them to make sure they
>> + * are working.
>> + * Returns 0 for success, error for failure.
>> + */
>> +static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
>> +{
>> + int r, i, j;
>> + u32 tmp;
>> + struct amdgpu_ring *ring;
>>
>> - cik_srbm_select(adev, 0, 0, 0, 0);
>> - mutex_unlock(&adev->srbm_mutex);
>> + /* fix up chicken bits */
>> + tmp = RREG32(mmCP_CPF_DEBUG);
>> + tmp |= (1 << 23);
>> + WREG32(mmCP_CPF_DEBUG, tmp);
>>
>> - amdgpu_bo_kunmap(ring->mqd_obj);
>> - amdgpu_bo_unreserve(ring->mqd_obj);
>> + /* init the pipes */
>> + for (i = 0; i < adev->gfx.mec.num_mec; i++)
>> + for (j = 0; j < adev->gfx.mec.num_pipe; j++)
>> + gfx_v7_0_compute_pipe_init(adev, i, j);
>>
>> - ring->ready = true;
>> + /* init the queues */
>> + for (i = 0; i < adev->gfx.num_compute_rings; i++) {
>> + r = gfx_v7_0_compute_queue_init(adev, i);
>> + if (r) {
>> + gfx_v7_0_cp_compute_fini(adev);
>> + return r;
>> + }
>> }
>>
>> gfx_v7_0_cp_compute_enable(adev, true);
>>
>> for (i = 0; i < adev->gfx.num_compute_rings; i++) {
>> ring = &adev->gfx.compute_ring[i];
>> -
>> + ring->ready = true;
>> r = amdgpu_ring_test_ring(ring);
>> if (r)
>> ring->ready = false;
>> }
>>
>> return 0;
>> }
>>
>> static void gfx_v7_0_cp_enable(struct amdgpu_device *adev, bool enable)
>> {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> index 4e63e52..88b85f7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> @@ -46,20 +46,22 @@
>> #include "gca/gfx_8_0_sh_mask.h"
>> #include "gca/gfx_8_0_enum.h"
>>
>> #include "dce/dce_10_0_d.h"
>> #include "dce/dce_10_0_sh_mask.h"
>>
>> #include "smu/smu_7_1_3_d.h"
>>
>> #define GFX8_NUM_GFX_RINGS 1
>> #define GFX8_NUM_COMPUTE_RINGS 8
>> +#define GFX8_MEC_HPD_SIZE 2048
>> +
>>
>> #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
>> #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
>> #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
>> #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
>>
>> #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
>> #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
>> #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
>> #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
>> @@ -1406,38 +1408,38 @@ static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
>>
>> return r;
>> }
>> static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
>> struct amdgpu_irq_src *irq)
>> {
>> amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
>> amdgpu_ring_fini(ring);
>> }
>>
>> -#define MEC_HPD_SIZE 2048
>> +#define GFX8_MEC_HPD_SIZE 2048
>
> Same comment about the rename of this.
>
Done
>>
>> static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
>> {
>> int r;
>> u32 *hpd;
>>
>> /*
>> * we assign only 1 pipe because all other pipes will
>> * be handled by KFD
>> */
>> adev->gfx.mec.num_mec = 1;
>> adev->gfx.mec.num_pipe = 1;
>> adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
>>
>> if (adev->gfx.mec.hpd_eop_obj == NULL) {
>> r = amdgpu_bo_create(adev,
>> - adev->gfx.mec.num_queue * MEC_HPD_SIZE,
>> + adev->gfx.mec.num_queue * GFX8_MEC_HPD_SIZE,
>> PAGE_SIZE, true,
>> AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
>> &adev->gfx.mec.hpd_eop_obj);
>> if (r) {
>> dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
>> return r;
>> }
>> }
>>
>> r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
>> @@ -1452,50 +1454,50 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
>> gfx_v8_0_mec_fini(adev);
>> return r;
>> }
>> r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
>> if (r) {
>> dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
>> gfx_v8_0_mec_fini(adev);
>> return r;
>> }
>>
>> - memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
>> + memset(hpd, 0, adev->gfx.mec.num_queue * GFX8_MEC_HPD_SIZE);
>>
>> amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
>> amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
>>
>> return 0;
>> }
>>
>> static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev)
>> {
>> struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>>
>> amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
>> }
>>
>> static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
>> {
>> int r;
>> u32 *hpd;
>> struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>>
>> - r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
>> + r = amdgpu_bo_create_kernel(adev, GFX8_MEC_HPD_SIZE, PAGE_SIZE,
>> AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
>> &kiq->eop_gpu_addr, (void **)&hpd);
>> if (r) {
>> dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
>> return r;
>> }
>>
>> - memset(hpd, 0, MEC_HPD_SIZE);
>> + memset(hpd, 0, GFX8_MEC_HPD_SIZE);
>>
>> r = amdgpu_bo_reserve(kiq->eop_obj, false);
>> if (unlikely(r != 0))
>> dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
>> amdgpu_bo_kunmap(kiq->eop_obj);
>> amdgpu_bo_unreserve(kiq->eop_obj);
>>
>> return 0;
>> }
>>
>> @@ -2148,21 +2150,21 @@ static int gfx_v8_0_sw_init(void *handle)
>> DRM_ERROR("Too many (%d) compute rings!\n", i);
>> break;
>> }
>> ring = &adev->gfx.compute_ring[i];
>> ring->ring_obj = NULL;
>> ring->use_doorbell = true;
>> ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
>> ring->me = 1; /* first MEC */
>> ring->pipe = i / 8;
>> ring->queue = i % 8;
>> - ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
>> + ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * GFX8_MEC_HPD_SIZE);
>> sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
>> irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
>> /* type-2 packets are deprecated on MEC, use type-3 instead */
>> r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
>> irq_type);
>> if (r)
>> return r;
>> }
>>
>> r = gfx_v8_0_kiq_init(adev);
>> @@ -4747,52 +4749,50 @@ static int gfx_v8_0_kiq_kcq_disable(struct amdgpu_device *adev)
>> return r;
>> }
>>
>> static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
>> {
>> struct amdgpu_device *adev = ring->adev;
>> struct vi_mqd *mqd = ring->mqd_ptr;
>> uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
>> uint32_t tmp;
>>
>> + /* init the mqd struct */
>> + memset(mqd, 0, sizeof(struct vi_mqd));
>> +
>> mqd->header = 0xC0310800;
>> mqd->compute_pipelinestat_enable = 0x00000001;
>> mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
>> mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
>> mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
>> mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
>> mqd->compute_misc_reserved = 0x00000003;
>>
>> eop_base_addr = ring->eop_gpu_addr >> 8;
>> mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
>> mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
>>
>> /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
>> tmp = RREG32(mmCP_HQD_EOP_CONTROL);
>> tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
>> - (order_base_2(MEC_HPD_SIZE / 4) - 1));
>> + (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
>>
>> mqd->cp_hqd_eop_control = tmp;
>>
>> /* enable doorbell? */
>> tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
>> CP_HQD_PQ_DOORBELL_CONTROL,
>> DOORBELL_EN,
>> ring->use_doorbell ? 1 : 0);
>>
>> mqd->cp_hqd_pq_doorbell_control = tmp;
>>
>> - /* disable the queue if it's active */
>> - mqd->cp_hqd_dequeue_request = 0;
>> - mqd->cp_hqd_pq_rptr = 0;
>> - mqd->cp_hqd_pq_wptr = 0;
>> -
>> /* set the pointer to the MQD */
>> mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
>> mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
>>
>> /* set MQD vmid to 0 */
>> tmp = RREG32(mmCP_MQD_CONTROL);
>> tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
>> mqd->cp_mqd_control = tmp;
>>
>> /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
>> @@ -4854,50 +4854,86 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
>> tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
>> tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
>> mqd->cp_hqd_persistent_state = tmp;
>>
>> /* activate the queue */
>> mqd->cp_hqd_active = 1;
>>
>> return 0;
>> }
>>
>> -static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring)
>> +static int gfx_v8_0_mqd_deactivate(struct amdgpu_device *adev)
>> +{
>> + int i;
>> +
>> + /* disable the queue if it's active */
>> + if (RREG32(mmCP_HQD_ACTIVE) & 1) {
>> + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
>> + for (i = 0; i < adev->usec_timeout; i++) {
>> + if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
>> + break;
>> + udelay(1);
>> + }
>> +
>> + if (i == adev->usec_timeout)
>> + return -ETIMEDOUT;
>> +
>> + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
>> + WREG32(mmCP_HQD_PQ_RPTR, 0);
>> + WREG32(mmCP_HQD_PQ_WPTR, 0);
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static void gfx_v8_0_enable_doorbell(struct amdgpu_device *adev, bool enable)
>> +{
>> + uint32_t tmp;
>> +
>> + if (!enable)
>> + return;
>> +
>> + if ((adev->asic_type == CHIP_CARRIZO) ||
>> + (adev->asic_type == CHIP_FIJI) ||
>> + (adev->asic_type == CHIP_STONEY) ||
>> + (adev->asic_type == CHIP_POLARIS11) ||
>> + (adev->asic_type == CHIP_POLARIS10)) {
>> + WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
>> + WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
>> + }
>> +
>> + tmp = RREG32(mmCP_PQ_STATUS);
>> + tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
>> + WREG32(mmCP_PQ_STATUS, tmp);
>> +}
>
> This can be dropped since we've already split out the doorbell range
> setting and with KIQ enabled, these registers are only set once
> anyway. No need to split it out.
>
Done. From my understanding of your comment + the code, this just needs
to be removed.
>> +
>> +static int gfx_v8_0_mqd_commit(struct amdgpu_ring *ring)
>> {
>> struct amdgpu_device *adev = ring->adev;
>> struct vi_mqd *mqd = ring->mqd_ptr;
>> - int j;
>>
>> /* disable wptr polling */
>> WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
>>
>> WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
>> WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
>>
>> /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
>> WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
>>
>> /* enable doorbell? */
>> WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
>>
>> - /* disable the queue if it's active */
>> - if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
>> - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
>> - for (j = 0; j < adev->usec_timeout; j++) {
>> - if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
>> - break;
>> - udelay(1);
>> - }
>> - WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
>> - WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
>> - WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
>> - }
>> + /* set pq read/write pointers */
>> + WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
>> + WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
>> + WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
>>
>> /* set the pointer to the MQD */
>> WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
>> WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
>>
>> /* set MQD vmid to 0 */
>> WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
>>
>> /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
>> WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
>> @@ -4910,46 +4946,33 @@ static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring)
>> WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
>> mqd->cp_hqd_pq_rptr_report_addr_lo);
>> WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
>> mqd->cp_hqd_pq_rptr_report_addr_hi);
>>
>> /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
>> WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
>> WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
>>
>> /* enable the doorbell if requested */
>> - if (ring->use_doorbell) {
>> - if ((adev->asic_type == CHIP_CARRIZO) ||
>> - (adev->asic_type == CHIP_FIJI) ||
>> - (adev->asic_type == CHIP_STONEY)) {
>> - WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
>> - AMDGPU_DOORBELL_KIQ << 2);
>> - WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
>> - AMDGPU_DOORBELL_MEC_RING7 << 2);
>> - }
>> - }
>> WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
>>
>> /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
>> WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
>>
>> /* set the vmid for the queue */
>> WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
>>
>> WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
>>
>> /* activate the queue */
>> WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
>>
>> - if (ring->use_doorbell)
>> - WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
>> -
>> return 0;
>> }
>>
>> static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
>> {
>> struct amdgpu_device *adev = ring->adev;
>> struct vi_mqd *mqd = ring->mqd_ptr;
>> int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
>>
>> gfx_v8_0_kiq_setting(ring);
>> @@ -4958,29 +4981,32 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
>> /* reset MQD to a clean status */
>> if (adev->gfx.mec.mqd_backup[mqd_idx])
>> memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
>>
>> /* reset ring buffer */
>> ring->wptr = 0;
>> amdgpu_ring_clear_ring(ring);
>>
>> mutex_lock(&adev->srbm_mutex);
>> vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
>> - gfx_v8_0_kiq_init_register(ring);
>> + gfx_v8_0_mqd_deactivate(adev);
>> + gfx_v8_0_enable_doorbell(adev, ring->use_doorbell);
>> + gfx_v8_0_mqd_commit(ring);
>> vi_srbm_select(adev, 0, 0, 0, 0);
>> mutex_unlock(&adev->srbm_mutex);
>> } else {
>> - memset((void *)mqd, 0, sizeof(*mqd));
>
> Since you memset the mpq in gfx_v8_0_mqd_init(), you can also drop the
> memset in gfx_v8_0_kcq_init_queue().
>
Done
>> mutex_lock(&adev->srbm_mutex);
>> vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
>> gfx_v8_0_mqd_init(ring);
>> - gfx_v8_0_kiq_init_register(ring);
>> + gfx_v8_0_mqd_deactivate(adev);
>> + gfx_v8_0_enable_doorbell(adev, ring->use_doorbell);
>> + gfx_v8_0_mqd_commit(ring);
>> vi_srbm_select(adev, 0, 0, 0, 0);
>> mutex_unlock(&adev->srbm_mutex);
>>
>> if (adev->gfx.mec.mqd_backup[mqd_idx])
>> memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
>> }
>>
>> return 0;
>> }
>>
>> --
>> 2.9.3
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list