[PATCH 05/26] drm/amdgpu: unify MQD programming sequence for kfd and amdgpu

Felix Kuehling felix.kuehling at amd.com
Tue Apr 11 20:35:47 UTC 2017


The amdgpu_amdkfd code you're modifying has changed a lot on the KFD
branch. I'm not sure it's worth fixing on the amdgpu branch. We'll
largely replace it when we upstream KFD changes.

I spotted an obvious bug in your patch, but it was also broken before.

See inline [FK].

On 17-04-06 02:21 AM, Andres Rodriguez wrote:
> Use the same gfx_*_mqd_commit function for kfd and amdgpu codepaths.
>
> This removes the last duplicates of this programming sequence.
>
> Reviewed-by: Edward O'Callaghan <funfunctor at folklore1984.net>
> Acked-by: Christian König <christian.koenig at amd.com>
> Signed-off-by: Andres Rodriguez <andresx7 at gmail.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 51 ++---------------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 49 ++--------------------
>  drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c             | 38 ++++++++++++++++-
>  drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h             |  5 +++
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c             | 51 ++++++++++++++++++++---
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h             |  5 +++
>  6 files changed, 99 insertions(+), 100 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> index 1a0a5f7..038b7ea 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> @@ -22,20 +22,21 @@
>  
>  #include <linux/fdtable.h>
>  #include <linux/uaccess.h>
>  #include <linux/firmware.h>
>  #include <drm/drmP.h>
>  #include "amdgpu.h"
>  #include "amdgpu_amdkfd.h"
>  #include "cikd.h"
>  #include "cik_sdma.h"
>  #include "amdgpu_ucode.h"
> +#include "gfx_v7_0.h"
>  #include "gca/gfx_7_2_d.h"
>  #include "gca/gfx_7_2_enum.h"
>  #include "gca/gfx_7_2_sh_mask.h"
>  #include "oss/oss_2_0_d.h"
>  #include "oss/oss_2_0_sh_mask.h"
>  #include "gmc/gmc_7_1_d.h"
>  #include "gmc/gmc_7_1_sh_mask.h"
>  #include "cik_structs.h"
>  
>  #define CIK_PIPE_PER_MEC	(4)
> @@ -302,69 +303,25 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
>  static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
>  			uint32_t queue_id, uint32_t __user *wptr)
>  {
>  	struct amdgpu_device *adev = get_amdgpu_device(kgd);
>  	uint32_t wptr_shadow, is_wptr_shadow_valid;
>  	struct cik_mqd *m;
>  
>  	m = get_mqd(mqd);
>  
>  	is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
> -
> -	acquire_queue(kgd, pipe_id, queue_id);
> -	WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
> -	WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
> -	WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
> -
> -	WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
> -	WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
> -	WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
> -
> -	WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
> -	WREG32(mmCP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo);
> -	WREG32(mmCP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi);
> -
> -	WREG32(mmCP_HQD_IB_RPTR, m->cp_hqd_ib_rptr);
> -
> -	WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
> -	WREG32(mmCP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd);
> -	WREG32(mmCP_HQD_MSG_TYPE, m->cp_hqd_msg_type);
> -
> -	WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, m->cp_hqd_atomic0_preop_lo);
> -	WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, m->cp_hqd_atomic0_preop_hi);
> -	WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, m->cp_hqd_atomic1_preop_lo);
> -	WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, m->cp_hqd_atomic1_preop_hi);
> -
> -	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
> -	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
> -			m->cp_hqd_pq_rptr_report_addr_hi);
> -
> -	WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
> -
> -	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, m->cp_hqd_pq_wptr_poll_addr_lo);
> -	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, m->cp_hqd_pq_wptr_poll_addr_hi);
> -
> -	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
> -
> -	WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
> -
> -	WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
> -
> -	WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
> -	WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
> -
> -	WREG32(mmCP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr);
> -
>  	if (is_wptr_shadow_valid)
> -		WREG32(mmCP_HQD_PQ_WPTR, wptr_shadow);
> +		m->cp_hqd_pq_wptr = wptr_shadow;
>  
> -	WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
> +	acquire_queue(kgd, pipe_id, queue_id);
> +	gfx_v7_0_mqd_commit(adev, m);
>  	release_queue(kgd);
>  
>  	return 0;
>  }
>  
>  static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
>  {
>  	struct amdgpu_device *adev = get_amdgpu_device(kgd);
>  	struct cik_sdma_rlc_registers *m;
>  	uint32_t sdma_base_addr;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> index 6697612..2ecef3d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> @@ -21,20 +21,21 @@
>   */
>  
>  #include <linux/module.h>
>  #include <linux/fdtable.h>
>  #include <linux/uaccess.h>
>  #include <linux/firmware.h>
>  #include <drm/drmP.h>
>  #include "amdgpu.h"
>  #include "amdgpu_amdkfd.h"
>  #include "amdgpu_ucode.h"
> +#include "gfx_v8_0.h"
>  #include "gca/gfx_8_0_sh_mask.h"
>  #include "gca/gfx_8_0_d.h"
>  #include "gca/gfx_8_0_enum.h"
>  #include "oss/oss_3_0_sh_mask.h"
>  #include "oss/oss_3_0_d.h"
>  #include "gmc/gmc_8_1_sh_mask.h"
>  #include "gmc/gmc_8_1_d.h"
>  #include "vi_structs.h"
>  #include "vid.h"
>  
> @@ -244,67 +245,25 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
>  static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
>  			uint32_t queue_id, uint32_t __user *wptr)
>  {
>  	struct vi_mqd *m;
>  	uint32_t shadow_wptr, valid_wptr;
>  	struct amdgpu_device *adev = get_amdgpu_device(kgd);
>  
>  	m = get_mqd(mqd);
>  
>  	valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr));
> -	acquire_queue(kgd, pipe_id, queue_id);
> -
> -	WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
> -	WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
> -	WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
> -
> -	WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
> -	WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
> -	WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
> -	WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
> -	WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
> -	WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
> -	WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
> -	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
> -	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
> -			m->cp_hqd_pq_rptr_report_addr_hi);
> -
>  	if (valid_wptr > 0)

[FK] I think copy_from_user returns 0 for success. So this condition
looks incorrect.

> -		WREG32(mmCP_HQD_PQ_WPTR, shadow_wptr);
> -
> -	WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
> -	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
> -
> -	WREG32(mmCP_HQD_EOP_BASE_ADDR, m->cp_hqd_eop_base_addr_lo);
> -	WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, m->cp_hqd_eop_base_addr_hi);
> -	WREG32(mmCP_HQD_EOP_CONTROL, m->cp_hqd_eop_control);
> -	WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
> -	WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
> -	WREG32(mmCP_HQD_EOP_EVENTS, m->cp_hqd_eop_done_events);
> -
> -	WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, m->cp_hqd_ctx_save_base_addr_lo);
> -	WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, m->cp_hqd_ctx_save_base_addr_hi);
> -	WREG32(mmCP_HQD_CTX_SAVE_CONTROL, m->cp_hqd_ctx_save_control);
> -	WREG32(mmCP_HQD_CNTL_STACK_OFFSET, m->cp_hqd_cntl_stack_offset);
> -	WREG32(mmCP_HQD_CNTL_STACK_SIZE, m->cp_hqd_cntl_stack_size);
> -	WREG32(mmCP_HQD_WG_STATE_OFFSET, m->cp_hqd_wg_state_offset);
> -	WREG32(mmCP_HQD_CTX_SAVE_SIZE, m->cp_hqd_ctx_save_size);
> -
> -	WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
> -
> -	WREG32(mmCP_HQD_DEQUEUE_REQUEST, m->cp_hqd_dequeue_request);
> -	WREG32(mmCP_HQD_ERROR, m->cp_hqd_error);
> -	WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
> -	WREG32(mmCP_HQD_EOP_DONES, m->cp_hqd_eop_dones);
> -
> -	WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
> +		m->cp_hqd_pq_wptr = valid_wptr;

[FK] This should be shadow_wptr. valid_wptr is the return value from
copy_from_user.

>  
> +	acquire_queue(kgd, pipe_id, queue_id);
> +	gfx_v8_0_mqd_commit(adev, mqd);
>  	release_queue(kgd);
>  
>  	return 0;
>  }
>  
>  static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
>  {
>  	return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> index c5041ab..4c89ac2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> @@ -3061,26 +3061,43 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
>  	}
>  
>  	/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
>  	ring->wptr = 0;
>  	mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
>  	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
>  
>  	/* set the vmid for the queue */
>  	mqd->cp_hqd_vmid = 0;
>  
> +	/* defaults */
> +	mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL);
> +	mqd->cp_hqd_ib_base_addr_lo = RREG32(mmCP_HQD_IB_BASE_ADDR);
> +	mqd->cp_hqd_ib_base_addr_hi = RREG32(mmCP_HQD_IB_BASE_ADDR_HI);
> +	mqd->cp_hqd_ib_rptr = RREG32(mmCP_HQD_IB_RPTR);
> +	mqd->cp_hqd_persistent_state = RREG32(mmCP_HQD_PERSISTENT_STATE);
> +	mqd->cp_hqd_sema_cmd = RREG32(mmCP_HQD_SEMA_CMD);
> +	mqd->cp_hqd_msg_type = RREG32(mmCP_HQD_MSG_TYPE);
> +	mqd->cp_hqd_atomic0_preop_lo = RREG32(mmCP_HQD_ATOMIC0_PREOP_LO);
> +	mqd->cp_hqd_atomic0_preop_hi = RREG32(mmCP_HQD_ATOMIC0_PREOP_HI);
> +	mqd->cp_hqd_atomic1_preop_lo = RREG32(mmCP_HQD_ATOMIC1_PREOP_LO);
> +	mqd->cp_hqd_atomic1_preop_hi = RREG32(mmCP_HQD_ATOMIC1_PREOP_HI);
> +	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
> +	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
> +	mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
> +	mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
> +	mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR);
> +
>  	/* activate the queue */
>  	mqd->cp_hqd_active = 1;
>  }
>  
> -static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev,
> -			       struct cik_mqd *mqd)
> +int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
>  {
>  	u32 tmp;
>  
>  	/* disable wptr polling */
>  	tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
>  	tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
>  	WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
>  
>  	/* program MQD field to HW */
>  	WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
> @@ -3090,20 +3107,37 @@ static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev,
>  	WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
>  	WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
>  	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
>  	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
>  	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->cp_hqd_pq_rptr_report_addr_lo);
>  	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->cp_hqd_pq_rptr_report_addr_hi);
>  	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
>  	WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
>  	WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
>  
> +	WREG32(mmCP_HQD_IB_CONTROL, mqd->cp_hqd_ib_control);
> +	WREG32(mmCP_HQD_IB_BASE_ADDR, mqd->cp_hqd_ib_base_addr_lo);
> +	WREG32(mmCP_HQD_IB_BASE_ADDR_HI, mqd->cp_hqd_ib_base_addr_hi);
> +	WREG32(mmCP_HQD_IB_RPTR, mqd->cp_hqd_ib_rptr);
> +	WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
> +	WREG32(mmCP_HQD_SEMA_CMD, mqd->cp_hqd_sema_cmd);
> +	WREG32(mmCP_HQD_MSG_TYPE, mqd->cp_hqd_msg_type);
> +	WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, mqd->cp_hqd_atomic0_preop_lo);
> +	WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, mqd->cp_hqd_atomic0_preop_hi);
> +	WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, mqd->cp_hqd_atomic1_preop_lo);
> +	WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, mqd->cp_hqd_atomic1_preop_hi);
> +	WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
> +	WREG32(mmCP_HQD_QUANTUM, mqd->cp_hqd_quantum);
> +	WREG32(mmCP_HQD_PIPE_PRIORITY, mqd->cp_hqd_pipe_priority);
> +	WREG32(mmCP_HQD_QUEUE_PRIORITY, mqd->cp_hqd_queue_priority);
> +	WREG32(mmCP_HQD_IQ_RPTR, mqd->cp_hqd_iq_rptr);
> +
>  	/* activate the HQD */
>  	WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
>  
>  	return 0;
>  }
>  
>  static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
>  {
>  	int r;
>  	u64 mqd_gpu_addr;
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
> index 2f5164c..6fb9c15 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
> @@ -22,11 +22,16 @@
>   */
>  
>  #ifndef __GFX_V7_0_H__
>  #define __GFX_V7_0_H__
>  
>  extern const struct amdgpu_ip_block_version gfx_v7_0_ip_block;
>  extern const struct amdgpu_ip_block_version gfx_v7_1_ip_block;
>  extern const struct amdgpu_ip_block_version gfx_v7_2_ip_block;
>  extern const struct amdgpu_ip_block_version gfx_v7_3_ip_block;
>  
> +struct amdgpu_device;
> +struct cik_mqd;
> +
> +int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd);
> +
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 5fb4100..35690e1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -4848,20 +4848,40 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
>  	mqd->cp_hqd_pq_wptr = ring->wptr;
>  	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
>  
>  	/* set the vmid for the queue */
>  	mqd->cp_hqd_vmid = 0;
>  
>  	tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
>  	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
>  	mqd->cp_hqd_persistent_state = tmp;
>  
> +	/* defaults */
> +	mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
> +	mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
> +	mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
> +	mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
> +	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
> +	mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
> +	mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
> +	mqd->cp_hqd_ctx_save_control = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
> +	mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
> +	mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
> +	mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
> +	mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
> +	mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL);
> +	mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
> +	mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
> +	mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
> +	mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
> +
> +
>  	/* activate the queue */
>  	mqd->cp_hqd_active = 1;
>  
>  	return 0;
>  }
>  
>  static int gfx_v8_0_mqd_deactivate(struct amdgpu_device *adev)
>  {
>  	int i;
>  
> @@ -4901,25 +4921,22 @@ static void gfx_v8_0_enable_doorbell(struct amdgpu_device *adev, bool enable)
>  		WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
>  	}
>  
>  	tmp = RREG32(mmCP_PQ_STATUS);
>  	tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
>  	WREG32(mmCP_PQ_STATUS, tmp);
>  
>  	adev->gfx.doorbell_enabled = true;
>  }
>  
> -static int gfx_v8_0_mqd_commit(struct amdgpu_ring *ring)
> +int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd)
>  {
> -	struct amdgpu_device *adev = ring->adev;
> -	struct vi_mqd *mqd = ring->mqd_ptr;
> -
>  	/* disable wptr polling */
>  	WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
>  
>  	WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
>  	WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
>  
>  	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
>  	WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
>  
>  	/* enable doorbell? */
> @@ -4952,20 +4969,42 @@ static int gfx_v8_0_mqd_commit(struct amdgpu_ring *ring)
>  
>  	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
>  	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
>  	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
>  
>  	/* enable the doorbell if requested */
>  	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
>  
>  	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
>  	WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
> +	WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
> +	WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
> +
> +	/* set the HQD priority */
> +	WREG32(mmCP_HQD_PIPE_PRIORITY, mqd->cp_hqd_pipe_priority);
> +	WREG32(mmCP_HQD_QUEUE_PRIORITY, mqd->cp_hqd_queue_priority);
> +	WREG32(mmCP_HQD_QUANTUM, mqd->cp_hqd_quantum);
> +
> +	/* set cwsr save area */
> +	WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, mqd->cp_hqd_ctx_save_base_addr_lo);
> +	WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, mqd->cp_hqd_ctx_save_base_addr_hi);
> +	WREG32(mmCP_HQD_CTX_SAVE_CONTROL, mqd->cp_hqd_ctx_save_control);
> +	WREG32(mmCP_HQD_CNTL_STACK_OFFSET, mqd->cp_hqd_cntl_stack_offset);
> +	WREG32(mmCP_HQD_CNTL_STACK_SIZE, mqd->cp_hqd_cntl_stack_size);
> +	WREG32(mmCP_HQD_WG_STATE_OFFSET, mqd->cp_hqd_wg_state_offset);
> +	WREG32(mmCP_HQD_CTX_SAVE_SIZE, mqd->cp_hqd_ctx_save_size);
> +
> +	WREG32(mmCP_HQD_IB_CONTROL, mqd->cp_hqd_ib_control);
> +	WREG32(mmCP_HQD_EOP_EVENTS, mqd->cp_hqd_eop_done_events);
> +	WREG32(mmCP_HQD_ERROR, mqd->cp_hqd_error);
> +	WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
> +	WREG32(mmCP_HQD_EOP_DONES, mqd->cp_hqd_eop_dones);
>  
>  	/* set the vmid for the queue */
>  	WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
>  
>  	WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
>  
>  	/* activate the queue */
>  	WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
>  
>  	return 0;
> @@ -4990,34 +5029,34 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
>  		amdgpu_ring_clear_ring(ring);
>  
>  		mutex_lock(&adev->srbm_mutex);
>  		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
>  		r = gfx_v8_0_mqd_deactivate(adev);
>  		if (r) {
>  			dev_err(adev->dev, "failed to deactivate ring %s\n", ring->name);
>  			goto out_unlock;
>  		}
>  		gfx_v8_0_enable_doorbell(adev, ring->use_doorbell);
> -		gfx_v8_0_mqd_commit(ring);
> +		gfx_v8_0_mqd_commit(adev, mqd);
>  		vi_srbm_select(adev, 0, 0, 0, 0);
>  		mutex_unlock(&adev->srbm_mutex);
>  	} else {
>  		mutex_lock(&adev->srbm_mutex);
>  		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
>  		gfx_v8_0_mqd_init(ring);
>  		r = gfx_v8_0_mqd_deactivate(adev);
>  		if (r) {
>  			dev_err(adev->dev, "failed to deactivate ring %s\n", ring->name);
>  			goto out_unlock;
>  		}
>  		gfx_v8_0_enable_doorbell(adev, ring->use_doorbell);
> -		gfx_v8_0_mqd_commit(ring);
> +		gfx_v8_0_mqd_commit(adev, mqd);
>  		vi_srbm_select(adev, 0, 0, 0, 0);
>  		mutex_unlock(&adev->srbm_mutex);
>  
>  		if (adev->gfx.mec.mqd_backup[mqd_idx])
>  			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
>  	}
>  
>  	return r;
>  
>  out_unlock:
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
> index 788cc3a..ec3f11f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
> @@ -20,11 +20,16 @@
>   * OTHER DEALINGS IN THE SOFTWARE.
>   *
>   */
>  
>  #ifndef __GFX_V8_0_H__
>  #define __GFX_V8_0_H__
>  
>  extern const struct amdgpu_ip_block_version gfx_v8_0_ip_block;
>  extern const struct amdgpu_ip_block_version gfx_v8_1_ip_block;
>  
> +struct amdgpu_device;
> +struct vi_mqd;
> +
> +int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd);
> +
>  #endif



More information about the amd-gfx mailing list