[PATCH 3/4] drm/amdgpu: Use correct KIQ MEC engine for gfx9.4.3 (v4)

Lazar, Lijo lijo.lazar at amd.com
Fri Nov 3 06:39:55 UTC 2023



On 11/2/2023 8:34 PM, Victor Lu wrote:
> amdgpu_kiq_wreg/rreg is hardcoded to use MEC engine 0.
> 
> Add an xcc_id parameter to amdgpu_kiq_wreg/rreg, define W/RREG32_XCC
> and amdgpu_device_xcc_wreg/rreg to to use the new xcc_id parameter.
> 
> Using amdgpu_sriov_runtime to determine whether to access via kiq or
> RLC is sufficient for now.
> 
> v4: avoid using amdgpu_sriov_w/rreg
> 
> v3: use W/RREG32_XCC to handle non-kiq case
> 
> v2: define amdgpu_device_xcc_wreg/rreg instead of changing parameters
>      of amdgpu_device_wreg/rreg
> 
> Signed-off-by: Victor Lu <victorchengchi.lu at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h           | 13 ++-
>   .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   |  2 +-
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    | 91 ++++++++++++++++++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c       |  8 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h       |  4 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c      |  4 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h      |  4 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c       |  8 +-
>   9 files changed, 118 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 43c579f5a95e..e8dc75a3ff44 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1162,11 +1162,18 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
>   			    uint32_t reg, uint32_t acc_flags);
>   u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
>   				    u64 reg_addr);
> +uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
> +				uint32_t reg, uint32_t acc_flags,
> +				uint32_t xcc_id);
>   void amdgpu_device_wreg(struct amdgpu_device *adev,
>   			uint32_t reg, uint32_t v,
>   			uint32_t acc_flags);
>   void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
>   				     u64 reg_addr, u32 reg_data);
> +void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
> +			    uint32_t reg, uint32_t v,
> +			    uint32_t acc_flags,
> +			    uint32_t xcc_id);
>   void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
>   			     uint32_t reg, uint32_t v, uint32_t xcc_id);
>   void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value);
> @@ -1207,8 +1214,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
>   #define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
>   #define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ)
>   
> -#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg))
> -#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v))
> +#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg), 0)
> +#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v), 0)
>   
>   #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
>   #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
> @@ -1218,6 +1225,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
>   #define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0)
>   #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
>   #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
> +#define RREG32_XCC(reg, inst) amdgpu_device_xcc_rreg(adev, (reg), 0, inst)
> +#define WREG32_XCC(reg, v, inst) amdgpu_device_xcc_wreg(adev, (reg), (v), 0, inst)
>   #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
>   #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
>   #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> index 490c8f5ddb60..80309d39737a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> @@ -300,7 +300,7 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd,
>   	hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_AQL_DISPATCH_ID_HI);
>   
>   	for (reg = hqd_base; reg <= hqd_end; reg++)
> -		WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
> +		WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst);
>   
>   
>   	/* Activate doorbell logic before triggering WPTR poll. */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index 51011e8ee90d..9285789b3a42 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -239,7 +239,7 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd,
>   
>   	for (reg = hqd_base;
>   	     reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++)
> -		WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
> +		WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst);
>   
>   
>   	/* Activate doorbell logic before triggering WPTR poll. */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 9a11dd1b686e..c1a439bfde14 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -74,6 +74,7 @@
>   #include "amdgpu_pmu.h"
>   #include "amdgpu_fru_eeprom.h"
>   #include "amdgpu_reset.h"
> +#include "amdgpu_virt.h"
>   
>   #include <linux/suspend.h>
>   #include <drm/task_barrier.h>
> @@ -473,7 +474,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
>   		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
>   		    amdgpu_sriov_runtime(adev) &&
>   		    down_read_trylock(&adev->reset_domain->sem)) {
> -			ret = amdgpu_kiq_rreg(adev, reg);
> +			ret = amdgpu_kiq_rreg(adev, reg, 0);
>   			up_read(&adev->reset_domain->sem);
>   		} else {
>   			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
> @@ -510,6 +511,50 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
>   	BUG();
>   }
>   
> +
> +/**
> + * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
> + *
> + * @adev: amdgpu_device pointer
> + * @reg: dword aligned register offset
> + * @acc_flags: access flags which require special behavior
> + * @xcc_id: xcc accelerated compute core id
> + *
> + * Returns the 32 bit value from the offset specified.
> + */
> +uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
> +				uint32_t reg, uint32_t acc_flags,
> +				uint32_t xcc_id)
> +{
> +	uint32_t ret, rlcg_flag;
> +
> +	if (amdgpu_device_skip_hw_access(adev))
> +		return 0;
> +
> +	if ((reg * 4) < adev->rmmio_size) {
> +		if (!amdgpu_sriov_runtime(adev) &&
> +		    adev->gfx.rlc.rlcg_reg_access_supported &&

Missed this in earlier review -

rlc.rlcg_reg_access_supported flag is set unconditionally in patch 1. 
Then baremetal path for WREG32_XCC will take this route which shouldn't 
be the case. I think original usage was through amdgpu_sriov_wreg() 
which has the below guard.

         ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && 
adev->gfx.rlc.rlcg_reg_access_supported) ? \
          amdgpu_sriov_wreg(adev, reg, value, flag, hwip, inst) : \

One way is to set the rlcg_reg_access_supported only for 
amdgpu_sriov_vf(adev) case. Baremetal doesn't need to go through this.

> +		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
> +							 GC_HWIP, false,
> +							 &rlcg_flag)) {
> +			amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, xcc_id);
> +		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
> +		    amdgpu_sriov_runtime(adev) &&
> +		    down_read_trylock(&adev->reset_domain->sem)) {
> +			ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
> +			up_read(&adev->reset_domain->sem);
> +		} else {
> +			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
> +		}
> +	} else {
> +		ret = adev->pcie_rreg(adev, reg * 4);
> +	}
> +
> +	trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);

This trace is for the original device_rreg. Please remove this here, and 
also from wreg.

Thanks,
Lijo

> +
> +	return ret;
> +}
> +
>   /*
>    * MMIO register write with bytes helper functions
>    * @offset:bytes offset from MMIO start
> @@ -557,7 +602,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
>   		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
>   		    amdgpu_sriov_runtime(adev) &&
>   		    down_read_trylock(&adev->reset_domain->sem)) {
> -			amdgpu_kiq_wreg(adev, reg, v);
> +			amdgpu_kiq_wreg(adev, reg, v, 0);
>   			up_read(&adev->reset_domain->sem);
>   		} else {
>   			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
> @@ -598,6 +643,48 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
>   	}
>   }
>   
> +/**
> + * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
> + *
> + * @adev: amdgpu_device pointer
> + * @reg: dword aligned register offset
> + * @v: 32 bit value to write to the register
> + * @acc_flags: access flags which require special behavior
> + * @xcc_id: xcc accelerated compute core id
> + *
> + * Writes the value specified to the offset specified.
> + */
> +void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
> +			uint32_t reg, uint32_t v,
> +			uint32_t acc_flags, uint32_t xcc_id)
> +{
> +	uint32_t rlcg_flag;
> +
> +	if (amdgpu_device_skip_hw_access(adev))
> +		return;
> +
> +	if ((reg * 4) < adev->rmmio_size) {
> +		if (!amdgpu_sriov_runtime(adev) &&
> +		    adev->gfx.rlc.rlcg_reg_access_supported &&
> +		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
> +							 GC_HWIP, true,
> +							 &rlcg_flag)) {
> +			amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, xcc_id);
> +		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
> +		    amdgpu_sriov_runtime(adev) &&
> +		    down_read_trylock(&adev->reset_domain->sem)) {
> +			amdgpu_kiq_wreg(adev, reg, v, xcc_id);
> +			up_read(&adev->reset_domain->sem);
> +		} else {
> +			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
> +		}
> +	} else {
> +		adev->pcie_wreg(adev, reg * 4, v);
> +	}
> +
> +	trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
> +}
> +
>   /**
>    * amdgpu_device_indirect_rreg - read an indirect register
>    *
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index c92e0aba69e1..60ae4bfdc7f5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -929,12 +929,12 @@ void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
>   		func(adev, ras_error_status, i);
>   }
>   
> -uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
> +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id)
>   {
>   	signed long r, cnt = 0;
>   	unsigned long flags;
>   	uint32_t seq, reg_val_offs = 0, value = 0;
> -	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
>   	struct amdgpu_ring *ring = &kiq->ring;
>   
>   	if (amdgpu_device_skip_hw_access(adev))
> @@ -997,12 +997,12 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
>   	return ~0;
>   }
>   
> -void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
> +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id)
>   {
>   	signed long r, cnt = 0;
>   	unsigned long flags;
>   	uint32_t seq;
> -	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
>   	struct amdgpu_ring *ring = &kiq->ring;
>   
>   	BUG_ON(!ring->funcs->emit_wreg);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 7088c5015675..f23bafec71c5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -521,8 +521,8 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
>   int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
>   				  struct amdgpu_irq_src *source,
>   				  struct amdgpu_iv_entry *entry);
> -uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
> -void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
> +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id);
> +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id);
>   int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev);
>   void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index e179f022c428..82762c61d3ec 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -943,7 +943,7 @@ void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev,
>   	}
>   }
>   
> -static bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
> +bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
>   						 u32 acc_flags, u32 hwip,
>   						 bool write, u32 *rlcg_flag)
>   {
> @@ -976,7 +976,7 @@ static bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
>   	return ret;
>   }
>   
> -static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id)
> +u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id)
>   {
>   	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
>   	uint32_t timeout = 50000;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index bb436d41b4ca..ca13ae70e87d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -366,4 +366,8 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
>   bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev,
>   			uint32_t ucode_id);
>   void amdgpu_virt_post_reset(struct amdgpu_device *adev);
> +bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
> +					  u32 acc_flags, u32 hwip,
> +					  bool write, u32 *rlcg_flag);
> +u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id);
>   #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> index ce2a9876369e..9afd43276ab2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> @@ -2738,16 +2738,16 @@ static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
>   
>   	switch (state) {
>   	case AMDGPU_IRQ_STATE_DISABLE:
> -		mec_int_cntl = RREG32(mec_int_cntl_reg);
> +		mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id);
>   		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
>   					     TIME_STAMP_INT_ENABLE, 0);
> -		WREG32(mec_int_cntl_reg, mec_int_cntl);
> +		WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id);
>   		break;
>   	case AMDGPU_IRQ_STATE_ENABLE:
> -		mec_int_cntl = RREG32(mec_int_cntl_reg);
> +		mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id);
>   		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
>   					     TIME_STAMP_INT_ENABLE, 1);
> -		WREG32(mec_int_cntl_reg, mec_int_cntl);
> +		WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id);
>   		break;
>   	default:
>   		break;


More information about the amd-gfx mailing list