[PATCH 19/19] drm/amd: Update MEC HQD loading code for KFD
Oded Gabbay
oded.gabbay at gmail.com
Sun Aug 13 08:33:58 UTC 2017
On Sat, Aug 12, 2017 at 12:56 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote:
> Various bug fixes and improvements that accumulated over the last two
> years.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 16 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 130 +++++++++++++---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 165 ++++++++++++++++++---
> .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 7 +-
> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 3 +-
> drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h | 3 +-
> drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 23 +--
> drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 16 +-
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 -
> drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 11 +-
> drivers/gpu/drm/radeon/radeon_kfd.c | 12 +-
> 11 files changed, 322 insertions(+), 69 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index b8802a5..8d689ab 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -26,6 +26,7 @@
> #define AMDGPU_AMDKFD_H_INCLUDED
>
> #include <linux/types.h>
> +#include <linux/mmu_context.h>
> #include <kgd_kfd_interface.h>
>
> struct amdgpu_device;
> @@ -60,4 +61,19 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
>
> uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
>
> +#define read_user_wptr(mmptr, wptr, dst) \
> + ({ \
> + bool valid = false; \
> + if ((mmptr) && (wptr)) { \
> + if ((mmptr) == current->mm) { \
> + valid = !get_user((dst), (wptr)); \
> + } else if (current->mm == NULL) { \
> + use_mm(mmptr); \
> + valid = !get_user((dst), (wptr)); \
> + unuse_mm(mmptr); \
> + } \
> + } \
> + valid; \
> + })
> +
> #endif /* AMDGPU_AMDKFD_H_INCLUDED */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> index dfb8c74..994d262 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> @@ -39,6 +39,12 @@
> #include "gmc/gmc_7_1_sh_mask.h"
> #include "cik_structs.h"
>
> +enum hqd_dequeue_request_type {
> + NO_ACTION = 0,
> + DRAIN_PIPE,
> + RESET_WAVES
> +};
> +
> enum {
> MAX_TRAPID = 8, /* 3 bits in the bitfield. */
> MAX_WATCH_ADDRESSES = 4
> @@ -96,12 +102,15 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
> uint32_t hpd_size, uint64_t hpd_gpu_addr);
> static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
> static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
> - uint32_t queue_id, uint32_t __user *wptr);
> + uint32_t queue_id, uint32_t __user *wptr,
> + uint32_t wptr_shift, uint32_t wptr_mask,
> + struct mm_struct *mm);
> static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
> static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
> uint32_t pipe_id, uint32_t queue_id);
>
> -static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
> +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
> + enum kfd_preempt_type reset_type,
> unsigned int utimeout, uint32_t pipe_id,
> uint32_t queue_id);
> static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
> @@ -290,20 +299,38 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
> }
>
> static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
> - uint32_t queue_id, uint32_t __user *wptr)
> + uint32_t queue_id, uint32_t __user *wptr,
> + uint32_t wptr_shift, uint32_t wptr_mask,
> + struct mm_struct *mm)
> {
> struct amdgpu_device *adev = get_amdgpu_device(kgd);
> - uint32_t wptr_shadow, is_wptr_shadow_valid;
> struct cik_mqd *m;
> + uint32_t *mqd_hqd;
> + uint32_t reg, wptr_val, data;
>
> m = get_mqd(mqd);
>
> - is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
> - if (is_wptr_shadow_valid)
> - m->cp_hqd_pq_wptr = wptr_shadow;
> -
> acquire_queue(kgd, pipe_id, queue_id);
> - gfx_v7_0_mqd_commit(adev, m);
> +
> + /* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */
> + mqd_hqd = &m->cp_mqd_base_addr_lo;
> +
> + for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++)
> + WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
> +
> + /* Copy userspace write pointer value to register.
> + * Activate doorbell logic to monitor subsequent changes.
> + */
> + data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
> + CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
> + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
> +
> + if (read_user_wptr(mm, wptr, wptr_val))
> + WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
> +
> + data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
> + WREG32(mmCP_HQD_ACTIVE, data);
> +
> release_queue(kgd);
>
> return 0;
> @@ -382,30 +409,99 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
> return false;
> }
>
> -static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
> +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
> + enum kfd_preempt_type reset_type,
> unsigned int utimeout, uint32_t pipe_id,
> uint32_t queue_id)
> {
> struct amdgpu_device *adev = get_amdgpu_device(kgd);
> uint32_t temp;
> - int timeout = utimeout;
> + enum hqd_dequeue_request_type type;
> + unsigned long flags, end_jiffies;
> + int retry;
>
> acquire_queue(kgd, pipe_id, queue_id);
> WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
>
> - WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type);
> + switch (reset_type) {
> + case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
> + type = DRAIN_PIPE;
> + break;
> + case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
> + type = RESET_WAVES;
> + break;
> + default:
> + type = DRAIN_PIPE;
> + break;
> + }
>
> + /* Workaround: If IQ timer is active and the wait time is close to or
> + * equal to 0, dequeueing is not safe. Wait until either the wait time
> + * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
> + * cleared before continuing. Also, ensure wait times are set to at
> + * least 0x3.
> + */
> + local_irq_save(flags);
> + preempt_disable();
> + retry = 5000; /* wait for 500 usecs at maximum */
> + while (true) {
> + temp = RREG32(mmCP_HQD_IQ_TIMER);
> + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
> + pr_debug("HW is processing IQ\n");
> + goto loop;
> + }
> + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
> + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
> + == 3) /* SEM-rearm is safe */
> + break;
> + /* Wait time 3 is safe for CP, but our MMIO read/write
> + * time is close to 1 microsecond, so check for 10 to
> + * leave more buffer room
> + */
> + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
> + >= 10)
> + break;
> + pr_debug("IQ timer is active\n");
> + } else
> + break;
> +loop:
> + if (!retry) {
> + pr_err("CP HQD IQ timer status time out\n");
> + break;
> + }
> + ndelay(100);
> + --retry;
> + }
> + retry = 1000;
> + while (true) {
> + temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
> + if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
> + break;
> + pr_debug("Dequeue request is pending\n");
> +
> + if (!retry) {
> + pr_err("CP HQD dequeue request time out\n");
> + break;
> + }
> + ndelay(100);
> + --retry;
> + }
> + local_irq_restore(flags);
> + preempt_enable();
> +
> + WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
> +
> + end_jiffies = (utimeout * HZ / 1000) + jiffies;
> while (true) {
> temp = RREG32(mmCP_HQD_ACTIVE);
> - if (temp & CP_HQD_ACTIVE__ACTIVE_MASK)
> + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
> break;
> - if (timeout <= 0) {
> - pr_err("kfd: cp queue preemption time out.\n");
> + if (time_after(jiffies, end_jiffies)) {
> + pr_err("cp queue preemption time out\n");
> release_queue(kgd);
> return -ETIME;
> }
> - msleep(20);
> - timeout -= 20;
> + usleep_range(500, 1000);
> }
>
> release_queue(kgd);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> index 0fccd30..29a6f5d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> @@ -39,6 +39,12 @@
> #include "vi_structs.h"
> #include "vid.h"
>
> +enum hqd_dequeue_request_type {
> + NO_ACTION = 0,
> + DRAIN_PIPE,
> + RESET_WAVES
> +};
> +
> struct cik_sdma_rlc_registers;
>
> /*
> @@ -55,12 +61,15 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
> uint32_t hpd_size, uint64_t hpd_gpu_addr);
> static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
> static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
> - uint32_t queue_id, uint32_t __user *wptr);
> + uint32_t queue_id, uint32_t __user *wptr,
> + uint32_t wptr_shift, uint32_t wptr_mask,
> + struct mm_struct *mm);
> static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
> static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
> uint32_t pipe_id, uint32_t queue_id);
> static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
> -static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
> +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
> + enum kfd_preempt_type reset_type,
> unsigned int utimeout, uint32_t pipe_id,
> uint32_t queue_id);
> static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
> @@ -244,20 +253,67 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
> }
>
> static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
> - uint32_t queue_id, uint32_t __user *wptr)
> + uint32_t queue_id, uint32_t __user *wptr,
> + uint32_t wptr_shift, uint32_t wptr_mask,
> + struct mm_struct *mm)
> {
> - struct vi_mqd *m;
> - uint32_t shadow_wptr, valid_wptr;
> struct amdgpu_device *adev = get_amdgpu_device(kgd);
> + struct vi_mqd *m;
> + uint32_t *mqd_hqd;
> + uint32_t reg, wptr_val, data;
>
> m = get_mqd(mqd);
>
> - valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr));
> - if (valid_wptr == 0)
> - m->cp_hqd_pq_wptr = shadow_wptr;
> -
> acquire_queue(kgd, pipe_id, queue_id);
> - gfx_v8_0_mqd_commit(adev, mqd);
> +
> + /* HIQ is set during driver init period with vmid set to 0*/
> + if (m->cp_hqd_vmid == 0) {
> + uint32_t value, mec, pipe;
> +
> + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
> + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
> +
> + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
> + mec, pipe, queue_id);
> + value = RREG32(mmRLC_CP_SCHEDULERS);
> + value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
> + ((mec << 5) | (pipe << 3) | queue_id | 0x80));
> + WREG32(mmRLC_CP_SCHEDULERS, value);
> + }
> +
> + /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
> + mqd_hqd = &m->cp_mqd_base_addr_lo;
> +
> + for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_CONTROL; reg++)
> + WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
> +
> + /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
> + * This is safe since EOP RPTR==WPTR for any inactive HQD
> + * on ASICs that do not support context-save.
> + * EOP writes/reads can start anywhere in the ring.
> + */
> + if (get_amdgpu_device(kgd)->asic_type != CHIP_TONGA) {
> + WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
> + WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
> + WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
> + }
> +
> + for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++)
> + WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
> +
> + /* Copy userspace write pointer value to register.
> + * Activate doorbell logic to monitor subsequent changes.
> + */
> + data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
> + CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
> + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
> +
> + if (read_user_wptr(mm, wptr, wptr_val))
> + WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
> +
> + data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
> + WREG32(mmCP_HQD_ACTIVE, data);
> +
> release_queue(kgd);
>
> return 0;
> @@ -308,29 +364,102 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
> return false;
> }
>
> -static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
> +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
> + enum kfd_preempt_type reset_type,
> unsigned int utimeout, uint32_t pipe_id,
> uint32_t queue_id)
> {
> struct amdgpu_device *adev = get_amdgpu_device(kgd);
> uint32_t temp;
> - int timeout = utimeout;
> + enum hqd_dequeue_request_type type;
> + unsigned long flags, end_jiffies;
> + int retry;
> + struct vi_mqd *m = get_mqd(mqd);
>
> acquire_queue(kgd, pipe_id, queue_id);
>
> - WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type);
> + if (m->cp_hqd_vmid == 0)
> + WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0);
> +
> + switch (reset_type) {
> + case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
> + type = DRAIN_PIPE;
> + break;
> + case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
> + type = RESET_WAVES;
> + break;
> + default:
> + type = DRAIN_PIPE;
> + break;
> + }
> +
> + /* Workaround: If IQ timer is active and the wait time is close to or
> + * equal to 0, dequeueing is not safe. Wait until either the wait time
> + * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
> + * cleared before continuing. Also, ensure wait times are set to at
> + * least 0x3.
> + */
> + local_irq_save(flags);
> + preempt_disable();
> + retry = 5000; /* wait for 500 usecs at maximum */
> + while (true) {
> + temp = RREG32(mmCP_HQD_IQ_TIMER);
> + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
> + pr_debug("HW is processing IQ\n");
> + goto loop;
> + }
> + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
> + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
> + == 3) /* SEM-rearm is safe */
> + break;
> + /* Wait time 3 is safe for CP, but our MMIO read/write
> + * time is close to 1 microsecond, so check for 10 to
> + * leave more buffer room
> + */
> + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
> + >= 10)
> + break;
> + pr_debug("IQ timer is active\n");
> + } else
> + break;
> +loop:
> + if (!retry) {
> + pr_err("CP HQD IQ timer status time out\n");
> + break;
> + }
> + ndelay(100);
> + --retry;
> + }
> + retry = 1000;
> + while (true) {
> + temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
> + if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
> + break;
> + pr_debug("Dequeue request is pending\n");
>
> + if (!retry) {
> + pr_err("CP HQD dequeue request time out\n");
> + break;
> + }
> + ndelay(100);
> + --retry;
> + }
> + local_irq_restore(flags);
> + preempt_enable();
> +
> + WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
> +
> + end_jiffies = (utimeout * HZ / 1000) + jiffies;
> while (true) {
> temp = RREG32(mmCP_HQD_ACTIVE);
> - if (temp & CP_HQD_ACTIVE__ACTIVE_MASK)
> + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
> break;
> - if (timeout <= 0) {
> - pr_err("kfd: cp queue preemption time out.\n");
> + if (time_after(jiffies, end_jiffies)) {
> + pr_err("cp queue preemption time out.\n");
> release_queue(kgd);
> return -ETIME;
> }
> - msleep(20);
> - timeout -= 20;
> + usleep_range(500, 1000);
> }
>
> release_queue(kgd);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 5dac29d..3891fe5 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -268,8 +268,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
> pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
> q->pipe, q->queue);
>
> - retval = mqd->load_mqd(mqd, q->mqd, q->pipe,
> - q->queue, (uint32_t __user *) q->properties.write_ptr);
> + retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties,
> + q->process->mm);
> if (retval)
> goto out_uninit_mqd;
>
> @@ -585,8 +585,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
> if (retval)
> goto out_deallocate_sdma_queue;
>
> - retval = mqd->load_mqd(mqd, q->mqd, 0,
> - 0, NULL);
> + retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL);
> if (retval)
> goto out_uninit_mqd;
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> index 0e4d4a9..681b639 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> @@ -143,7 +143,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
> kq->queue->pipe = KFD_CIK_HIQ_PIPE;
> kq->queue->queue = KFD_CIK_HIQ_QUEUE;
> kq->mqd->load_mqd(kq->mqd, kq->queue->mqd, kq->queue->pipe,
> - kq->queue->queue, NULL);
> + kq->queue->queue, &kq->queue->properties,
> + NULL);
> } else {
> /* allocate fence for DIQ */
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
> index 213a71e..1f3a6ba 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
> @@ -67,7 +67,8 @@ struct mqd_manager {
>
> int (*load_mqd)(struct mqd_manager *mm, void *mqd,
> uint32_t pipe_id, uint32_t queue_id,
> - uint32_t __user *wptr);
> + struct queue_properties *p,
> + struct mm_struct *mms);
>
> int (*update_mqd)(struct mqd_manager *mm, void *mqd,
> struct queue_properties *q);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
> index 7e0ec6b..44ffd23 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
> @@ -144,15 +144,21 @@ static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
> }
>
> static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
> - uint32_t queue_id, uint32_t __user *wptr)
> + uint32_t queue_id, struct queue_properties *p,
> + struct mm_struct *mms)
> {
> - return mm->dev->kfd2kgd->hqd_load
> - (mm->dev->kgd, mqd, pipe_id, queue_id, wptr);
> + /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
> + uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
> + uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1);
> +
> + return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
> + (uint32_t __user *)p->write_ptr,
> + wptr_shift, wptr_mask, mms);
> }
>
> static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
> - uint32_t pipe_id, uint32_t queue_id,
> - uint32_t __user *wptr)
> + uint32_t pipe_id, uint32_t queue_id,
> + struct queue_properties *p, struct mm_struct *mms)
> {
> return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd);
> }
> @@ -176,20 +182,17 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
> m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
> m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
> m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
> - m->cp_hqd_pq_doorbell_control = DOORBELL_EN |
> - DOORBELL_OFFSET(q->doorbell_off);
> + m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(q->doorbell_off);
>
> m->cp_hqd_vmid = q->vmid;
>
> if (q->format == KFD_QUEUE_FORMAT_AQL)
> m->cp_hqd_pq_control |= NO_UPDATE_RPTR;
>
> - m->cp_hqd_active = 0;
> q->is_active = false;
> if (q->queue_size > 0 &&
> q->queue_address != 0 &&
> q->queue_percent > 0) {
> - m->cp_hqd_active = 1;
> q->is_active = true;
> }
>
> @@ -239,7 +242,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd,
> unsigned int timeout, uint32_t pipe_id,
> uint32_t queue_id)
> {
> - return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, type, timeout,
> + return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, mqd, type, timeout,
> pipe_id, queue_id);
> }
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> index 98a930e..73cbfe1 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> @@ -94,10 +94,15 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
>
> static int load_mqd(struct mqd_manager *mm, void *mqd,
> uint32_t pipe_id, uint32_t queue_id,
> - uint32_t __user *wptr)
> + struct queue_properties *p, struct mm_struct *mms)
> {
> - return mm->dev->kfd2kgd->hqd_load
> - (mm->dev->kgd, mqd, pipe_id, queue_id, wptr);
> + /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
> + uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
> + uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1);
> +
> + return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
> + (uint32_t __user *)p->write_ptr,
> + wptr_shift, wptr_mask, mms);
> }
>
> static int __update_mqd(struct mqd_manager *mm, void *mqd,
> @@ -122,7 +127,6 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
> m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
>
> m->cp_hqd_pq_doorbell_control =
> - 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN__SHIFT |
> q->doorbell_off <<
> CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
> pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
> @@ -159,12 +163,10 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
> 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
> }
>
> - m->cp_hqd_active = 0;
> q->is_active = false;
> if (q->queue_size > 0 &&
> q->queue_address != 0 &&
> q->queue_percent > 0) {
> - m->cp_hqd_active = 1;
> q->is_active = true;
> }
>
> @@ -184,7 +186,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd,
> uint32_t queue_id)
> {
> return mm->dev->kfd2kgd->hqd_destroy
> - (mm->dev->kgd, type, timeout,
> + (mm->dev->kgd, mqd, type, timeout,
> pipe_id, queue_id);
> }
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index f0d55cc0..30ce92c 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -239,11 +239,6 @@ enum kfd_preempt_type_filter {
> KFD_PREEMPT_TYPE_FILTER_BY_PASID
> };
>
> -enum kfd_preempt_type {
> - KFD_PREEMPT_TYPE_WAVEFRONT,
> - KFD_PREEMPT_TYPE_WAVEFRONT_RESET
> -};
> -
> /**
> * enum kfd_queue_type
> *
> diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> index 36f3766..ffafda0 100644
> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> @@ -41,6 +41,11 @@ struct kgd_dev;
>
> struct kgd_mem;
>
> +enum kfd_preempt_type {
> + KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN = 0,
> + KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
> +};
> +
> enum kgd_memory_pool {
> KGD_POOL_SYSTEM_CACHEABLE = 1,
> KGD_POOL_SYSTEM_WRITECOMBINE = 2,
> @@ -153,14 +158,16 @@ struct kfd2kgd_calls {
> int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id);
>
> int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
> - uint32_t queue_id, uint32_t __user *wptr);
> + uint32_t queue_id, uint32_t __user *wptr,
> + uint32_t wptr_shift, uint32_t wptr_mask,
> + struct mm_struct *mm);
>
> int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd);
>
> bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address,
> uint32_t pipe_id, uint32_t queue_id);
>
> - int (*hqd_destroy)(struct kgd_dev *kgd, uint32_t reset_type,
> + int (*hqd_destroy)(struct kgd_dev *kgd, void *mqd, uint32_t reset_type,
> unsigned int timeout, uint32_t pipe_id,
> uint32_t queue_id);
>
> diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c
> index a2ab6dc..695117a 100644
> --- a/drivers/gpu/drm/radeon/radeon_kfd.c
> +++ b/drivers/gpu/drm/radeon/radeon_kfd.c
> @@ -75,12 +75,14 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
> uint32_t hpd_size, uint64_t hpd_gpu_addr);
> static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
> static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
> - uint32_t queue_id, uint32_t __user *wptr);
> + uint32_t queue_id, uint32_t __user *wptr,
> + uint32_t wptr_shift, uint32_t wptr_mask,
> + struct mm_struct *mm);
> static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
> static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
> uint32_t pipe_id, uint32_t queue_id);
>
> -static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
> +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type,
> unsigned int timeout, uint32_t pipe_id,
> uint32_t queue_id);
> static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
> @@ -482,7 +484,9 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
> }
>
> static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
> - uint32_t queue_id, uint32_t __user *wptr)
> + uint32_t queue_id, uint32_t __user *wptr,
> + uint32_t wptr_shift, uint32_t wptr_mask,
> + struct mm_struct *mm)
> {
> uint32_t wptr_shadow, is_wptr_shadow_valid;
> struct cik_mqd *m;
> @@ -636,7 +640,7 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
> return false;
> }
>
> -static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
> +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type,
> unsigned int timeout, uint32_t pipe_id,
> uint32_t queue_id)
> {
> --
> 2.7.4
>
This patch is:
Acked-by: Oded Gabbay <oded.gabbay at gmail.com>
More information about the amd-gfx
mailing list