[PATCH 3/5] drm/amdgpu: Use correct KIQ MEC engine for gfx9.4.3 (v3)

Lazar, Lijo lijo.lazar at amd.com
Fri Oct 27 09:55:48 UTC 2023



On 10/26/2023 2:22 AM, Victor Lu wrote:
> amdgpu_kiq_wreg/rreg is hardcoded to use MEC engine 0.
> 
> Add an xcc_id parameter to amdgpu_kiq_wreg/rreg, define W/RREG32_XCC
> and amdgpu_device_xcc_wreg/rreg to to use the new xcc_id parameter.
> 
> v3: use W/RREG32_XCC to handle non-kiq case
> 
> v2: define amdgpu_device_xcc_wreg/rreg instead of changing parameters
>      of amdgpu_device_wreg/rreg
> 
> Signed-off-by: Victor Lu <victorchengchi.lu at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h           | 13 ++-
>   .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   |  2 +-
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    | 84 ++++++++++++++++++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c       |  8 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h       |  4 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c      |  4 +-
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c       |  8 +-
>   8 files changed, 107 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index a2e8c2b60857..09989ebb5da3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1168,11 +1168,18 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
>   			    uint32_t reg, uint32_t acc_flags);
>   u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
>   				    u64 reg_addr);
> +uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
> +				uint32_t reg, uint32_t acc_flags,
> +				uint32_t xcc_id);
>   void amdgpu_device_wreg(struct amdgpu_device *adev,
>   			uint32_t reg, uint32_t v,
>   			uint32_t acc_flags);
>   void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
>   				     u64 reg_addr, u32 reg_data);
> +void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
> +			    uint32_t reg, uint32_t v,
> +			    uint32_t acc_flags,
> +			    uint32_t xcc_id);
>   void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
>   			     uint32_t reg, uint32_t v, uint32_t xcc_id);
>   void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value);
> @@ -1213,8 +1220,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
>   #define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
>   #define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ)
>   
> -#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg))
> -#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v))
> +#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg), 0)
> +#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v), 0)
>   
>   #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
>   #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
> @@ -1224,6 +1231,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
>   #define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0)
>   #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
>   #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
> +#define RREG32_XCC(reg, flag, inst) amdgpu_device_xcc_rreg(adev, (reg), flag, inst)
> +#define WREG32_XCC(reg, v, flag, inst) amdgpu_device_xcc_wreg(adev, (reg), (v), flag, inst)
>   #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
>   #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))
>   #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> index 490c8f5ddb60..c94df54e2657 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
> @@ -300,7 +300,7 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd,
>   	hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_AQL_DISPATCH_ID_HI);
>   
>   	for (reg = hqd_base; reg <= hqd_end; reg++)
> -		WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
> +		WREG32_XCC(reg, mqd_hqd[reg - hqd_base], AMDGPU_REGS_RLC, inst);
>   
>   
>   	/* Activate doorbell logic before triggering WPTR poll. */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index 51011e8ee90d..47c8c334c779 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -239,7 +239,7 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd,
>   
>   	for (reg = hqd_base;
>   	     reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++)
> -		WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
> +		WREG32_XCC(reg, mqd_hqd[reg - hqd_base], AMDGPU_REGS_RLC, inst);
>   
>   
>   	/* Activate doorbell logic before triggering WPTR poll. */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 7ec32b44df05..9a35088b990a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -471,7 +471,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
>   		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
>   		    amdgpu_sriov_runtime(adev) &&
>   		    down_read_trylock(&adev->reset_domain->sem)) {
> -			ret = amdgpu_kiq_rreg(adev, reg);
> +			ret = amdgpu_kiq_rreg(adev, reg, 0);
>   			up_read(&adev->reset_domain->sem);
>   		} else {
>   			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
> @@ -508,6 +508,48 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
>   	BUG();
>   }
>   
> +
> +/**
> + * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register
> + *
> + * @adev: amdgpu_device pointer
> + * @reg: dword aligned register offset
> + * @acc_flags: access flags which require special behavior
> + * @xcc_id: xcc accelerated compute core id
> + *
> + * Returns the 32 bit value from the offset specified.
> + */
> +uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
> +				uint32_t reg, uint32_t acc_flags,
> +				uint32_t xcc_id)
> +{
> +	uint32_t ret;
> +
> +	if (amdgpu_device_skip_hw_access(adev))
> +		return 0;
> +
> +	if ((reg * 4) < adev->rmmio_size) {
> +		if ((acc_flags & AMDGPU_REGS_RLC) &&
> +		    (!amdgpu_sriov_runtime(adev)) &&
> +		    adev->gfx.rlc.rlcg_reg_access_supported) {
> +			amdgpu_sriov_rreg(adev, reg, acc_flags, GC_HWIP, xcc_id);

Since amdgpu_device_xcc_rreg is a new interface and used specifically 
for GFX v9.4.3, suggest to merge all cases of access inside this itself 
rather than going to sriov_rreg. You may be able to simplify it then 
since there is no need to consider any legacy style accesses.

List the specific cases for GFX v9.4.3 like KIQ access, RLC access, 
direct etc. and handle only those. Any legacy logic can be dropped then 
since WREG32_XCC is not used for any common/legacy paths.

> +		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
> +		    amdgpu_sriov_runtime(adev) &&
> +		    down_read_trylock(&adev->reset_domain->sem)) {
> +			ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
> +			up_read(&adev->reset_domain->sem);
> +		} else {
> +			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
> +		}
> +	} else {
> +		ret = adev->pcie_rreg(adev, reg * 4);
> +	}
> +
> +	trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
> +
> +	return ret;
> +}
> +
>   /*
>    * MMIO register write with bytes helper functions
>    * @offset:bytes offset from MMIO start
> @@ -555,7 +597,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
>   		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
>   		    amdgpu_sriov_runtime(adev) &&
>   		    down_read_trylock(&adev->reset_domain->sem)) {
> -			amdgpu_kiq_wreg(adev, reg, v);
> +			amdgpu_kiq_wreg(adev, reg, v, 0);
>   			up_read(&adev->reset_domain->sem);
>   		} else {
>   			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
> @@ -596,6 +638,44 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
>   	}
>   }
>   
> +/**
> + * amdgpu_device_wreg - write to a memory mapped IO or indirect register with specific XCC
> + *
> + * @adev: amdgpu_device pointer
> + * @reg: dword aligned register offset
> + * @v: 32 bit value to write to the register
> + * @acc_flags: access flags which require special behavior
> + * @xcc_id: xcc accelerated compute core id
> + *
> + * Writes the value specified to the offset specified.
> + */
> +void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
> +			uint32_t reg, uint32_t v,
> +			uint32_t acc_flags, uint32_t xcc_id)
> +{
> +	if (amdgpu_device_skip_hw_access(adev))
> +		return;
> +
> +	if ((reg * 4) < adev->rmmio_size) {
> +		if ((acc_flags & AMDGPU_REGS_RLC) &&
> +		    (!amdgpu_sriov_runtime(adev)) &&
> +		    adev->gfx.rlc.rlcg_reg_access_supported) {
> +			amdgpu_sriov_wreg(adev, reg, v, acc_flags, GC_HWIP, xcc_id);
> +		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
> +		    amdgpu_sriov_runtime(adev) &&
> +		    down_read_trylock(&adev->reset_domain->sem)) {
> +			amdgpu_kiq_wreg(adev, reg, v, xcc_id);
> +			up_read(&adev->reset_domain->sem);
> +		} else {
> +			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
> +		}
> +	} else {
> +		adev->pcie_wreg(adev, reg * 4, v);
> +	}
> +
> +	trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
> +}
> +
>   /**
>    * amdgpu_device_indirect_rreg - read an indirect register
>    *
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index c92e0aba69e1..60ae4bfdc7f5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -929,12 +929,12 @@ void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
>   		func(adev, ras_error_status, i);
>   }
>   
> -uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
> +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id)
>   {
>   	signed long r, cnt = 0;
>   	unsigned long flags;
>   	uint32_t seq, reg_val_offs = 0, value = 0;
> -	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
>   	struct amdgpu_ring *ring = &kiq->ring;
>   
>   	if (amdgpu_device_skip_hw_access(adev))
> @@ -997,12 +997,12 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
>   	return ~0;
>   }
>   
> -void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
> +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id)
>   {
>   	signed long r, cnt = 0;
>   	unsigned long flags;
>   	uint32_t seq;
> -	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
> +	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
>   	struct amdgpu_ring *ring = &kiq->ring;
>   
>   	BUG_ON(!ring->funcs->emit_wreg);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 7088c5015675..f23bafec71c5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -521,8 +521,8 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
>   int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
>   				  struct amdgpu_irq_src *source,
>   				  struct amdgpu_iv_entry *entry);
> -uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
> -void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
> +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id);
> +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id);
>   int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev);
>   void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index a0aa624f5a92..c6c8f4fed0c1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -1076,7 +1076,7 @@ void amdgpu_sriov_wreg(struct amdgpu_device *adev,
>   	if (acc_flags & AMDGPU_REGS_NO_KIQ)
>   		WREG32_NO_KIQ(offset, value);
>   	else
> -		WREG32(offset, value);
> +		WREG32_XCC(offset, value, acc_flags, xcc_id);
#define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0)

In this definition access flags are explicitly set to 0. I guess same 
should be applied here.
>   }
>   
>   u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
> @@ -1091,5 +1091,5 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
>   	if (acc_flags & AMDGPU_REGS_NO_KIQ)
>   		return RREG32_NO_KIQ(offset);
>   	else
> -		return RREG32(offset);
> +		return RREG32_XCC(offset, acc_flags, xcc_id);

Same case as with WREG32.
#define RREG32(reg) amdgpu_device_rreg(adev, (reg), 0)

Flags value needs to be 0 then.

If you are merging the valid accesses inside amdgpu_device_xcc_wreg, 
then both of these modifications can be dropped.

>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> index 386804f2e95c..b24db7974311 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> @@ -2739,16 +2739,16 @@ static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(
>   
>   	switch (state) {
>   	case AMDGPU_IRQ_STATE_DISABLE:
> -		mec_int_cntl = RREG32(mec_int_cntl_reg);
> +		mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, AMDGPU_REGS_RLC, xcc_id);
Below is the definition of RREG32. It passes 0 as access flags.
#define RREG32(reg) amdgpu_device_rreg(adev, (reg), 0)

With the change, it's passing AMDGPU_REGS_RLC. Is this specific for GFX 
9.4.3? If so, it should be a separate patch. Same goes for other changes 
below.

Thanks,
Lijo
>   		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
>   					     TIME_STAMP_INT_ENABLE, 0);
> -		WREG32(mec_int_cntl_reg, mec_int_cntl);
> +		WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, AMDGPU_REGS_RLC, xcc_id);
>   		break;
>   	case AMDGPU_IRQ_STATE_ENABLE:
> -		mec_int_cntl = RREG32(mec_int_cntl_reg);
> +		mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, AMDGPU_REGS_RLC, xcc_id);
>   		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
>   					     TIME_STAMP_INT_ENABLE, 1);
> -		WREG32(mec_int_cntl_reg, mec_int_cntl);
> +		WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, AMDGPU_REGS_RLC, xcc_id);
>   		break;
>   	default:
>   		break;


More information about the amd-gfx mailing list