[PATCH 04/21] drm/amdgpu: Add GFXv9 kfd2kgd interface functions

Felix Kuehling felix.kuehling at amd.com
Tue May 15 22:08:36 UTC 2018


On 2018-05-15 05:41 AM, Dave Airlie wrote:
>> +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
>> +                       uint32_t queue_id, uint32_t __user *wptr,
>> +                       uint32_t wptr_shift, uint32_t wptr_mask,
>> +                       struct mm_struct *mm)
>> +{
>> +       struct amdgpu_device *adev = get_amdgpu_device(kgd);
>> +       struct v9_mqd *m;
>> +       uint32_t *mqd_hqd;
>> +       uint32_t reg, hqd_base, data;
>> +
>> +       m = get_mqd(mqd);
>> +
>> +       acquire_queue(kgd, pipe_id, queue_id);
>> +
>> +       /* HIQ is set during driver init period with vmid set to 0*/
>> +       if (m->cp_hqd_vmid == 0) {
>> +               uint32_t value, mec, pipe;
>> +
>> +               mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
>> +               pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
>> +
>> +               pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
>> +                       mec, pipe, queue_id);
>> +               value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
>> +               value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
>> +                       ((mec << 5) | (pipe << 3) | queue_id | 0x80));
>> +               WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
>> +       }
>> +
>> +       /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
>> +       mqd_hqd = &m->cp_mqd_base_addr_lo;
>> +       hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
>> +
>> +       for (reg = hqd_base;
>> +            reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
>> +               WREG32(reg, mqd_hqd[reg - hqd_base]);
>> +
>> +
>> +       /* Activate doorbell logic before triggering WPTR poll. */
>> +       data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
>> +                            CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
>> +       WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
>> +
>> +       if (wptr) {
>> +               /* Don't read wptr with get_user because the user
>> +                * context may not be accessible (if this function
>> +                * runs in a work queue). Instead trigger a one-shot
>> +                * polling read from memory in the CP. This assumes
>> +                * that wptr is GPU-accessible in the queue's VMID via
>> +                * ATC or SVM. WPTR==RPTR before starting the poll so
>> +                * the CP starts fetching new commands from the right
>> +                * place.
>> +                *
>> +                * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
>> +                * tricky. Assume that the queue didn't overflow. The
>> +                * number of valid bits in the 32-bit RPTR depends on
>> +                * the queue size. The remaining bits are taken from
>> +                * the saved 64-bit WPTR. If the WPTR wrapped, add the
>> +                * queue size.
>> +                */
>> +               uint32_t queue_size =
>> +                       2 << REG_GET_FIELD(m->cp_hqd_pq_control,
>> +                                          CP_HQD_PQ_CONTROL, QUEUE_SIZE);
>> +               uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
>> +
>> +               if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
>> +                       guessed_wptr += queue_size;
>> +               guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
>> +               guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
>> +
>> +               WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
>> +                      lower_32_bits(guessed_wptr));
>> +               WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
>> +                      upper_32_bits(guessed_wptr));
>> +               WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
>> +                      lower_32_bits((uint64_t)wptr));
>> +               WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
>> +                      upper_32_bits((uint64_t)wptr));
>  CC [M]  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.o
> In file included from
> /home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c:30:0:
> /home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c:
> In function ‘kgd_hqd_load’:
> /home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c:473:24:
> warning: cast from pointer to integer of different size
> [-Wpointer-to-int-cast]
>           lower_32_bits((uint64_t)wptr));
>                         ^
> /home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/amdgpu.h:1666:53:
> note: in definition of macro ‘WREG32’
>  #define WREG32(reg, v) amdgpu_mm_wreg(adev, (reg), (v), 0)
>                                                      ^
> /home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c:473:10:
> note: in expansion of macro ‘lower_32_bits’
>           lower_32_bits((uint64_t)wptr));
>           ^~~~~~~~~~~~~
> /home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c:475:24:
> warning: cast from pointer to integer of different size
> [-Wpointer-to-int-cast]
>           upper_32_bits((uint64_t)wptr));
>                         ^
> /home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/amdgpu.h:1666:53:
> note: in definition of macro ‘WREG32’
>  #define WREG32(reg, v) amdgpu_mm_wreg(adev, (reg), (v), 0)
>                                                      ^
> /home/airlied/devel/kernel/dim/src/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c:475:10:
> note: in expansion of macro ‘upper_32_bits’
>           upper_32_bits((uint64_t)wptr));
>           ^~~~~~~~~~~~~
>
> On a 32-bit build.

Hmm, I guess we should cast to uintptr_t instead.

That said, I should fix the build system to not compile the
amdgpu_amdkfd_* files when CONFIG_HSA_AMD is not enabled. That would
prevent this type of 32-bit or architecture-specific build problems I've
been seeing from KFD-changes.

>
> Wow that function and the comment make me feel happy nothing will ever
> break here.

Exactly. The comment is there because this will never break and no one
will ever need to read it. :P The assumptions are sane, though. If they
were violated, either the GPU doesn't have access to the queue or the
queue overflowed and the owner of the queue would be in trouble anyway.

Regards,
  Felix


>
> Dave.



More information about the amd-gfx mailing list