[PATCH] drm/amd/amdgpu: fix the inst passed to amdgpu_virt_rlcg_reg_rw

Wed May 22 08:05:40 UTC 2024

On 5/22/2024 1:11 PM, Zhao, Victor wrote:
> [AMD Official Use Only - AMD Internal Distribution Only]
> 
> Hi Lijo,
> 
> This patch alone is not working.
> Since in your approach amdgpu_virt_rlcg_reg_rw is taking logical xcc id, so all the read/write calls need to be fixed with it.
> For example, WREG32_SOC15_OFFSET. There will be bunch of places need to be fixed.
> 

That definitely looks complicated. Using physical index and passing the
same to amdgpu_virt_rlcg_reg_rw is better. The patch below is -

	Reviewed-by: Lijo Lazar <lijo.lazar at amd.com>

Thanks,
Lijo

> Thanks,
> Victor
> 
> -----Original Message-----
> From: Lazar, Lijo <Lijo.Lazar at amd.com>
> Sent: Wednesday, May 22, 2024 2:14 PM
> To: Zhao, Victor <Victor.Zhao at amd.com>; amd-gfx at lists.freedesktop.org
> Subject: RE: [PATCH] drm/amd/amdgpu: fix the inst passed to amdgpu_virt_rlcg_reg_rw
> 
> [AMD Official Use Only - AMD Internal Distribution Only]
> 
> Hi Victor,
> 
> Could you check if an approach like the attached one helps?
> 
> Thanks,
> Lijo
> -----Original Message-----
> From: Zhao, Victor <Victor.Zhao at amd.com>
> Sent: Wednesday, May 22, 2024 11:13 AM
> To: Zhao, Victor <Victor.Zhao at amd.com>; amd-gfx at lists.freedesktop.org; Lazar, Lijo <Lijo.Lazar at amd.com>
> Subject: RE: [PATCH] drm/amd/amdgpu: fix the inst passed to amdgpu_virt_rlcg_reg_rw
> 
> [AMD Official Use Only - AMD Internal Distribution Only]
> 
> Hi @Lazar, Lijo,
> 
> Can you help review this?
> 
> Thanks,
> Victor
> 
> -----Original Message-----
> From: Victor Zhao <Victor.Zhao at amd.com>
> Sent: Tuesday, May 21, 2024 12:08 AM
> To: amd-gfx at lists.freedesktop.org
> Cc: Lazar, Lijo <Lijo.Lazar at amd.com>; Zhao, Victor <Victor.Zhao at amd.com>
> Subject: [PATCH] drm/amd/amdgpu: fix the inst passed to amdgpu_virt_rlcg_reg_rw
> 
> the inst passed to amdgpu_virt_rlcg_reg_rw should be physical instance.
> Fix the miss matched code.
> 
> Signed-off-by: Victor Zhao <Victor.Zhao at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  4 ++--
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      | 18 +++++++++---------
>  2 files changed, 11 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index e72e774d17e6..e74789691070 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -679,7 +679,7 @@ uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
>                     amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
>                                                          GC_HWIP, false,
>                                                          &rlcg_flag)) {
> -                       ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, xcc_id);
> +                       ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0,
> +rlcg_flag, GET_INST(GC, xcc_id));
>                 } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
>                     amdgpu_sriov_runtime(adev) &&
>                     down_read_trylock(&adev->reset_domain->sem)) { @@ -810,7 +810,7 @@ void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
>                     amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
>                                                          GC_HWIP, true,
>                                                          &rlcg_flag)) {
> -                       amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, xcc_id);
> +                       amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag,
> +GET_INST(GC, xcc_id));
>                 } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
>                     amdgpu_sriov_runtime(adev) &&
>                     down_read_trylock(&adev->reset_domain->sem)) { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 094c08cb98e7..350f6b6676f1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -844,7 +844,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>         ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
> 
>         if (vmhub >= AMDGPU_MMHUB0(0))
> -               inst = GET_INST(GC, 0);
> +               inst = 0;
>         else
>                 inst = vmhub;
> 
> @@ -876,9 +876,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>                 for (j = 0; j < adev->usec_timeout; j++) {
>                         /* a read return value of 1 means semaphore acquire */
>                         if (vmhub >= AMDGPU_MMHUB0(0))
> -                               tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem, inst);
> +                               tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem,
> + GET_INST(GC, inst));
>                         else
> -                               tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem, inst);
> +                               tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem,
> + GET_INST(GC, inst));
>                         if (tmp & 0x1)
>                                 break;
>                         udelay(1);
> @@ -889,9 +889,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>         }
> 
>         if (vmhub >= AMDGPU_MMHUB0(0))
> -               WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req, inst);
> +               WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req, GET_INST(GC,
> + inst));
>         else
> -               WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req, inst);
> +               WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req, GET_INST(GC,
> + inst));
> 
>         /*
>          * Issue a dummy read to wait for the ACK register to @@ -904,9 +904,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> 
>         for (j = 0; j < adev->usec_timeout; j++) {
>                 if (vmhub >= AMDGPU_MMHUB0(0))
> -                       tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack, inst);
> +                       tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack,
> + GET_INST(GC, inst));
>                 else
> -                       tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack, inst);
> +                       tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack,
> + GET_INST(GC, inst));
>                 if (tmp & (1 << vmid))
>                         break;
>                 udelay(1);
> @@ -919,9 +919,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>                  * write with 0 means semaphore release
>                  */
>                 if (vmhub >= AMDGPU_MMHUB0(0))
> -                       WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0, inst);
> +                       WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0,
> + GET_INST(GC, inst));
>                 else
> -                       WREG32_SOC15_IP_NO_KIQ(GC, sem, 0, inst);
> +                       WREG32_SOC15_IP_NO_KIQ(GC, sem, 0, GET_INST(GC,
> + inst));
>         }
> 
>         spin_unlock(&adev->gmc.invalidate_lock);
> --
> 2.34.1
> 
> 
>