<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
<style type="text/css" style="display:none;"> P {margin-top:0;margin-bottom:0;} </style>
</head>
<body dir="ltr">
<p style="font-family:Arial;font-size:10pt;color:#0000FF;margin:5pt;font-style:normal;font-weight:normal;text-decoration:none;" align="Left">
[AMD Official Use Only - General]<br>
</p>
<br>
<div>
<div style="font-family: Calibri, Helvetica, sans-serif; font-size: 11pt; color: rgb(0, 0, 0);" class="elementToProof">
Yes, I have tested this.</div>
<div style="font-family: Calibri, Helvetica, sans-serif; font-size: 11pt; color: rgb(0, 0, 0);" class="elementToProof">
<br>
</div>
<div style="font-family: Calibri, Helvetica, sans-serif; font-size: 11pt; color: rgb(0, 0, 0);" class="elementToProof ContentPasted0 ContentPasted1 ContentPasted2 ContentPasted3 ContentPasted4 ContentPasted5 ContentPasted6">
The (!amdgpu_sriov_runtime(adev) && adev->gfx.rlc.rlcg_reg_access_supported) conditions in amdgpu_device_xcc_wreg will guarantee it will go into amdgpu_virt_rlcg_reg_rw instead of looping back to WREG32_XCC.</div>
<div style="font-family: Calibri, Helvetica, sans-serif; font-size: 11pt; color: rgb(0, 0, 0);" class="elementToProof ContentPasted0 ContentPasted1 ContentPasted2 ContentPasted3">
<br>
</div>
<div style="font-family: Calibri, Helvetica, sans-serif; font-size: 11pt; color: rgb(0, 0, 0);" class="elementToProof ContentPasted0 ContentPasted1 ContentPasted2">
Thanks,</div>
<div style="font-family: Calibri, Helvetica, sans-serif; font-size: 11pt; color: rgb(0, 0, 0);" class="elementToProof ContentPasted0 ContentPasted1 ContentPasted2">
Victor</div>
<div id="appendonsend"></div>
<hr style="display:inline-block;width:98%" tabindex="-1">
<div id="divRplyFwdMsg" dir="ltr"><font face="Calibri, sans-serif" style="font-size:11pt" color="#000000"><b>From:</b> Lazar, Lijo <Lijo.Lazar@amd.com><br>
<b>Sent:</b> Thursday, October 26, 2023 4:16 AM<br>
<b>To:</b> Lu, Victor Cheng Chi (Victor) <VictorChengChi.Lu@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org><br>
<b>Cc:</b> Ming, Davis <Davis.Ming@amd.com><br>
<b>Subject:</b> Re: [PATCH 3/5] drm/amdgpu: Use correct KIQ MEC engine for gfx9.4.3 (v3)</font>
<div> </div>
</div>
<div class="BodyFragment"><font size="2"><span style="font-size:11pt;">
<div class="PlainText"><br>
<br>
On 10/26/2023 2:22 AM, Victor Lu wrote:<br>
> amdgpu_kiq_wreg/rreg is hardcoded to use MEC engine 0.<br>
> <br>
> Add an xcc_id parameter to amdgpu_kiq_wreg/rreg, define W/RREG32_XCC<br>
> and amdgpu_device_xcc_wreg/rreg to to use the new xcc_id parameter.<br>
> <br>
> v3: use W/RREG32_XCC to handle non-kiq case<br>
> <br>
> v2: define amdgpu_device_xcc_wreg/rreg instead of changing parameters<br>
> of amdgpu_device_wreg/rreg<br>
> <br>
> Signed-off-by: Victor Lu <victorchengchi.lu@amd.com><br>
> ---<br>
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 13 ++-<br>
> .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c | 2 +-<br>
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 2 +-<br>
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 84 ++++++++++++++++++-<br>
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 8 +-<br>
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 +-<br>
> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 4 +-<br>
> drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 8 +-<br>
> 8 files changed, 107 insertions(+), 18 deletions(-)<br>
> <br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h<br>
> index a2e8c2b60857..09989ebb5da3 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h<br>
> @@ -1168,11 +1168,18 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,<br>
> uint32_t reg, uint32_t acc_flags);<br>
> u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,<br>
> u64 reg_addr);<br>
> +uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,<br>
> + uint32_t reg, uint32_t acc_flags,<br>
> + uint32_t xcc_id);<br>
> void amdgpu_device_wreg(struct amdgpu_device *adev,<br>
> uint32_t reg, uint32_t v,<br>
> uint32_t acc_flags);<br>
> void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,<br>
> u64 reg_addr, u32 reg_data);<br>
> +void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,<br>
> + uint32_t reg, uint32_t v,<br>
> + uint32_t acc_flags,<br>
> + uint32_t xcc_id);<br>
> void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,<br>
> uint32_t reg, uint32_t v, uint32_t xcc_id);<br>
> void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value);<br>
> @@ -1213,8 +1220,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);<br>
> #define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)<br>
> #define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ)<br>
> <br>
> -#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg))<br>
> -#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v))<br>
> +#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg), 0)<br>
> +#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v), 0)<br>
> <br>
> #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))<br>
> #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))<br>
> @@ -1224,6 +1231,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);<br>
> #define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0)<br>
> #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)<br>
> #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)<br>
> +#define RREG32_XCC(reg, flag, inst) amdgpu_device_xcc_rreg(adev, (reg), flag, inst)<br>
> +#define WREG32_XCC(reg, v, flag, inst) amdgpu_device_xcc_wreg(adev, (reg), (v), flag, inst)<br>
> #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))<br>
> #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v))<br>
> #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg))<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c<br>
> index 490c8f5ddb60..c94df54e2657 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c<br>
> @@ -300,7 +300,7 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd,<br>
> hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_AQL_DISPATCH_ID_HI);<br>
> <br>
> for (reg = hqd_base; reg <= hqd_end; reg++)<br>
> - WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);<br>
> + WREG32_XCC(reg, mqd_hqd[reg - hqd_base], AMDGPU_REGS_RLC, inst);<br>
> <br>
> <br>
> /* Activate doorbell logic before triggering WPTR poll. */<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c<br>
> index 51011e8ee90d..47c8c334c779 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c<br>
> @@ -239,7 +239,7 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd,<br>
> <br>
> for (reg = hqd_base;<br>
> reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++)<br>
> - WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);<br>
> + WREG32_XCC(reg, mqd_hqd[reg - hqd_base], AMDGPU_REGS_RLC, inst);<br>
> <br>
> <br>
> /* Activate doorbell logic before triggering WPTR poll. */<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
> index 7ec32b44df05..9a35088b990a 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c<br>
> @@ -471,7 +471,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,<br>
> if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&<br>
> amdgpu_sriov_runtime(adev) &&<br>
> down_read_trylock(&adev->reset_domain->sem)) {<br>
> - ret = amdgpu_kiq_rreg(adev, reg);<br>
> + ret = amdgpu_kiq_rreg(adev, reg, 0);<br>
> up_read(&adev->reset_domain->sem);<br>
> } else {<br>
> ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));<br>
> @@ -508,6 +508,48 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)<br>
> BUG();<br>
> }<br>
> <br>
> +<br>
> +/**<br>
> + * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register<br>
> + *<br>
> + * @adev: amdgpu_device pointer<br>
> + * @reg: dword aligned register offset<br>
> + * @acc_flags: access flags which require special behavior<br>
> + * @xcc_id: xcc accelerated compute core id<br>
> + *<br>
> + * Returns the 32 bit value from the offset specified.<br>
> + */<br>
> +uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,<br>
> + uint32_t reg, uint32_t acc_flags,<br>
> + uint32_t xcc_id)<br>
> +{<br>
> + uint32_t ret;<br>
> +<br>
> + if (amdgpu_device_skip_hw_access(adev))<br>
> + return 0;<br>
> +<br>
> + if ((reg * 4) < adev->rmmio_size) {<br>
> + if ((acc_flags & AMDGPU_REGS_RLC) &&<br>
> + (!amdgpu_sriov_runtime(adev)) &&<br>
> + adev->gfx.rlc.rlcg_reg_access_supported) {<br>
> + amdgpu_sriov_rreg(adev, reg, acc_flags, GC_HWIP, xcc_id);<br>
> + } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&<br>
> + amdgpu_sriov_runtime(adev) &&<br>
> + down_read_trylock(&adev->reset_domain->sem)) {<br>
> + ret = amdgpu_kiq_rreg(adev, reg, xcc_id);<br>
> + up_read(&adev->reset_domain->sem);<br>
> + } else {<br>
> + ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));<br>
> + }<br>
> + } else {<br>
> + ret = adev->pcie_rreg(adev, reg * 4);<br>
> + }<br>
> +<br>
> + trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);<br>
> +<br>
> + return ret;<br>
> +}<br>
> +<br>
> /*<br>
> * MMIO register write with bytes helper functions<br>
> * @offset:bytes offset from MMIO start<br>
> @@ -555,7 +597,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,<br>
> if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&<br>
> amdgpu_sriov_runtime(adev) &&<br>
> down_read_trylock(&adev->reset_domain->sem)) {<br>
> - amdgpu_kiq_wreg(adev, reg, v);<br>
> + amdgpu_kiq_wreg(adev, reg, v, 0);<br>
> up_read(&adev->reset_domain->sem);<br>
> } else {<br>
> writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));<br>
> @@ -596,6 +638,44 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,<br>
> }<br>
> }<br>
> <br>
> +/**<br>
> + * amdgpu_device_wreg - write to a memory mapped IO or indirect register with specific XCC<br>
> + *<br>
> + * @adev: amdgpu_device pointer<br>
> + * @reg: dword aligned register offset<br>
> + * @v: 32 bit value to write to the register<br>
> + * @acc_flags: access flags which require special behavior<br>
> + * @xcc_id: xcc accelerated compute core id<br>
> + *<br>
> + * Writes the value specified to the offset specified.<br>
> + */<br>
> +void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,<br>
> + uint32_t reg, uint32_t v,<br>
> + uint32_t acc_flags, uint32_t xcc_id)<br>
> +{<br>
> + if (amdgpu_device_skip_hw_access(adev))<br>
> + return;<br>
> +<br>
> + if ((reg * 4) < adev->rmmio_size) {<br>
> + if ((acc_flags & AMDGPU_REGS_RLC) &&<br>
> + (!amdgpu_sriov_runtime(adev)) &&<br>
> + adev->gfx.rlc.rlcg_reg_access_supported) {<br>
> + amdgpu_sriov_wreg(adev, reg, v, acc_flags, GC_HWIP, xcc_id);<br>
<br>
<br>
I see this path<br>
<br>
WREG32_XCC -> amdgpu_device_xcc_wreg -> amdgpu_sriov_wreg -> WREG32_XCC<br>
<br>
Similar for rreg. I'm not able to work out the RLC/sriov runtime <br>
conditions. Is this tested before submitting?<br>
<br>
Thanks,<br>
Lijo<br>
> + } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&<br>
> + amdgpu_sriov_runtime(adev) &&<br>
> + down_read_trylock(&adev->reset_domain->sem)) {<br>
> + amdgpu_kiq_wreg(adev, reg, v, xcc_id);<br>
> + up_read(&adev->reset_domain->sem);<br>
> + } else {<br>
> + writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));<br>
> + }<br>
> + } else {<br>
> + adev->pcie_wreg(adev, reg * 4, v);<br>
> + }<br>
> +<br>
> + trace_amdgpu_device_wreg(adev->pdev->device, reg, v);<br>
> +}<br>
> +<br>
> /**<br>
> * amdgpu_device_indirect_rreg - read an indirect register<br>
> *<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c<br>
> index c92e0aba69e1..60ae4bfdc7f5 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c<br>
> @@ -929,12 +929,12 @@ void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,<br>
> func(adev, ras_error_status, i);<br>
> }<br>
> <br>
> -uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)<br>
> +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id)<br>
> {<br>
> signed long r, cnt = 0;<br>
> unsigned long flags;<br>
> uint32_t seq, reg_val_offs = 0, value = 0;<br>
> - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];<br>
> + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];<br>
> struct amdgpu_ring *ring = &kiq->ring;<br>
> <br>
> if (amdgpu_device_skip_hw_access(adev))<br>
> @@ -997,12 +997,12 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)<br>
> return ~0;<br>
> }<br>
> <br>
> -void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)<br>
> +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id)<br>
> {<br>
> signed long r, cnt = 0;<br>
> unsigned long flags;<br>
> uint32_t seq;<br>
> - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];<br>
> + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];<br>
> struct amdgpu_ring *ring = &kiq->ring;<br>
> <br>
> BUG_ON(!ring->funcs->emit_wreg);<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h<br>
> index 7088c5015675..f23bafec71c5 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h<br>
> @@ -521,8 +521,8 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,<br>
> int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,<br>
> struct amdgpu_irq_src *source,<br>
> struct amdgpu_iv_entry *entry);<br>
> -uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);<br>
> -void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);<br>
> +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id);<br>
> +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id);<br>
> int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev);<br>
> void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id);<br>
> <br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c<br>
> index a0aa624f5a92..c6c8f4fed0c1 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c<br>
> @@ -1076,7 +1076,7 @@ void amdgpu_sriov_wreg(struct amdgpu_device *adev,<br>
> if (acc_flags & AMDGPU_REGS_NO_KIQ)<br>
> WREG32_NO_KIQ(offset, value);<br>
> else<br>
> - WREG32(offset, value);<br>
> + WREG32_XCC(offset, value, acc_flags, xcc_id);<br>
> }<br>
> <br>
> u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,<br>
> @@ -1091,5 +1091,5 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,<br>
> if (acc_flags & AMDGPU_REGS_NO_KIQ)<br>
> return RREG32_NO_KIQ(offset);<br>
> else<br>
> - return RREG32(offset);<br>
> + return RREG32_XCC(offset, acc_flags, xcc_id);<br>
> }<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c<br>
> index 386804f2e95c..b24db7974311 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c<br>
> @@ -2739,16 +2739,16 @@ static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state(<br>
> <br>
> switch (state) {<br>
> case AMDGPU_IRQ_STATE_DISABLE:<br>
> - mec_int_cntl = RREG32(mec_int_cntl_reg);<br>
> + mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, AMDGPU_REGS_RLC, xcc_id);<br>
> mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,<br>
> TIME_STAMP_INT_ENABLE, 0);<br>
> - WREG32(mec_int_cntl_reg, mec_int_cntl);<br>
> + WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, AMDGPU_REGS_RLC, xcc_id);<br>
> break;<br>
> case AMDGPU_IRQ_STATE_ENABLE:<br>
> - mec_int_cntl = RREG32(mec_int_cntl_reg);<br>
> + mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, AMDGPU_REGS_RLC, xcc_id);<br>
> mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,<br>
> TIME_STAMP_INT_ENABLE, 1);<br>
> - WREG32(mec_int_cntl_reg, mec_int_cntl);<br>
> + WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, AMDGPU_REGS_RLC, xcc_id);<br>
> break;<br>
> default:<br>
> break;<br>
</div>
</span></font></div>
</div>
</body>
</html>