[PATCH] drm/amdgpu: drop kiq access while in reset

Christian König ckoenig.leichtzumerken at gmail.com
Mon Jun 24 11:49:22 UTC 2024


Am 24.06.24 um 11:52 schrieb Lazar, Lijo:
>
> On 6/24/2024 3:08 PM, Christian König wrote:
>> Am 24.06.24 um 08:34 schrieb Lazar, Lijo:
>>> On 6/24/2024 12:01 PM, Vignesh Chander wrote:
>>>> correctly handle the case when trylock fails when gpu is
>>>> about to be reset by dropping the request instead of using mmio
>>>>
>>>> Signed-off-by: Vignesh Chander <Vignesh.Chander at amd.com>
>>> Reviewed-by: Lijo Lazar <lijo.lazar at amd.com>
>>>
>>> Thanks,
>>> Lijo
>>>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 38 ++++++++++++----------
>>>>    1 file changed, 21 insertions(+), 17 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> index a7ce8280b17ce7..3cfd24699d691d 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> @@ -613,10 +613,11 @@ uint32_t amdgpu_device_rreg(struct
>>>> amdgpu_device *adev,
>>>>          if ((reg * 4) < adev->rmmio_size) {
>>>>            if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
>>>> -            amdgpu_sriov_runtime(adev) &&
>>>> -            down_read_trylock(&adev->reset_domain->sem)) {
>>>> -            ret = amdgpu_kiq_rreg(adev, reg, 0);
>>>> -            up_read(&adev->reset_domain->sem);
>>>> +            amdgpu_sriov_runtime(adev) {
>>>> +            if (down_read_trylock(&adev->reset_domain->sem)) {
>>>> +                ret = amdgpu_kiq_rreg(adev, reg, 0);
>>>> +                up_read(&adev->reset_domain->sem);
>>>> +            }
>> What has actually changed here? As far as I can see that isn't
>> functionally different to the existing code.
>>
> In earlier logic, if it fails to get the lock, it takes the 'else' path.
> The 'else' path is not meant for sriov/vf.

Yeah, but the read or write is then just silently dropped.

That can't be correct either.

Regards,
Christian.

>
> Thanks,
> Lijo
>
>> Regards,
>> Christian.
>>
>>>>            } else {
>>>>                ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
>>>>            }
>>>> @@ -681,10 +682,11 @@ uint32_t amdgpu_device_xcc_rreg(struct
>>>> amdgpu_device *adev,
>>>>                                 &rlcg_flag)) {
>>>>                ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag,
>>>> GET_INST(GC, xcc_id));
>>>>            } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
>>>> -            amdgpu_sriov_runtime(adev) &&
>>>> -            down_read_trylock(&adev->reset_domain->sem)) {
>>>> -            ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
>>>> -            up_read(&adev->reset_domain->sem);
>>>> +            amdgpu_sriov_runtime(adev) {
>>>> +            if (down_read_trylock(&adev->reset_domain->sem)) {
>>>> +                ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
>>>> +                up_read(&adev->reset_domain->sem);
>>>> +            }
>>>>            } else {
>>>>                ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
>>>>            }
>>>> @@ -740,10 +742,11 @@ void amdgpu_device_wreg(struct amdgpu_device
>>>> *adev,
>>>>          if ((reg * 4) < adev->rmmio_size) {
>>>>            if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
>>>> -            amdgpu_sriov_runtime(adev) &&
>>>> -            down_read_trylock(&adev->reset_domain->sem)) {
>>>> -            amdgpu_kiq_wreg(adev, reg, v, 0);
>>>> -            up_read(&adev->reset_domain->sem);
>>>> +            amdgpu_sriov_runtime(adev) {
>>>> +            if (down_read_trylock(&adev->reset_domain->sem)) {
>>>> +                amdgpu_kiq_wreg(adev, reg, v, 0);
>>>> +                up_read(&adev->reset_domain->sem);
>>>> +            }
>>>>            } else {
>>>>                writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
>>>>            }
>>>> @@ -812,11 +815,12 @@ void amdgpu_device_xcc_wreg(struct
>>>> amdgpu_device *adev,
>>>>                                 &rlcg_flag)) {
>>>>                amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag,
>>>> GET_INST(GC, xcc_id));
>>>>            } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
>>>> -            amdgpu_sriov_runtime(adev) &&
>>>> -            down_read_trylock(&adev->reset_domain->sem)) {
>>>> -            amdgpu_kiq_wreg(adev, reg, v, xcc_id);
>>>> -            up_read(&adev->reset_domain->sem);
>>>> -        } else {
>>>> +            amdgpu_sriov_runtime(adev) {
>>>> +            if (down_read_trylock(&adev->reset_domain->sem)) {
>>>> +                amdgpu_kiq_wreg(adev, reg, v, xcc_id);
>>>> +                up_read(&adev->reset_domain->sem);
>>>> +            }
>>>> +            } else {
>>>>                writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
>>>>            }
>>>>        } else {



More information about the amd-gfx mailing list