[PATCH v1 1/2] drm/amdgpu: save the reset dump register value for devcoredump

Somalapuram, Amaranath asomalap at amd.com
Tue May 24 11:57:25 UTC 2022


On 5/24/2022 3:25 PM, Sharma, Shashank wrote:
>
>
> On 5/24/2022 8:12 AM, Somalapuram, Amaranath wrote:
>>
>> On 5/20/2022 7:36 PM, Sharma, Shashank wrote:
>>> Hey Amar,
>>>
>>> On 5/20/2022 3:49 PM, Somalapuram Amaranath wrote:
>>>> Allocate memory for register value and use the same values for 
>>>> devcoredump.
>>>> Remove dump_stack reset register dumps.
>>>>
>>>> Signed-off-by: Somalapuram Amaranath <Amaranath.Somalapuram at amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h         | 1 +
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 9 ++++++++-
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 7 +++----
>>>>   3 files changed, 12 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> index 76df583663c7..c79d9992b113 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> @@ -1042,6 +1042,7 @@ struct amdgpu_device {
>>>>         /* reset dump register */
>>>>       uint32_t                        *reset_dump_reg_list;
>>>> +    uint32_t            *reset_dump_reg_value;
>>>>       int                             num_regs;
>>>>         struct amdgpu_reset_domain    *reset_domain;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>>>> index eedb12f6b8a3..942fdbd316f4 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>>>> @@ -1683,7 +1683,7 @@ static ssize_t 
>>>> amdgpu_reset_dump_register_list_write(struct file *f,
>>>>   {
>>>>       struct amdgpu_device *adev = (struct amdgpu_device 
>>>> *)file_inode(f)->i_private;
>>>>       char reg_offset[11];
>>>> -    uint32_t *new, *tmp = NULL;
>>>> +    uint32_t *new, *tmp = NULL, *tmp_value = NULL;
>>>>       int ret, i = 0, len = 0;
>>>>         do {
>>>> @@ -1709,17 +1709,24 @@ static ssize_t 
>>>> amdgpu_reset_dump_register_list_write(struct file *f,
>>>>           i++;
>>>>       } while (len < size);
>>>>   +    new = krealloc_array(tmp_value, i, sizeof(uint32_t), 
>>>> GFP_KERNEL);
>>>
>>> tmp_value is initialized to NULL, which means krealloc_array() will 
>>> behave like kmalloc_array(), is there any particular reason we are 
>>> adding this variable at all just to use krealloc_array(), and why 
>>> not use kmalloc_array() directly ?
>>
>> I thought of using kmalloc_array() (got little confused on next write 
>> cycle), I agree to use kmalloc_array().
>>
>> Regards,
>> S.Amarnath
>>>
>>>> +    if (!new) {
>>>> +        ret = -ENOMEM;
>>>> +        goto error_free;
>>>> +    }
>>>>       ret = down_write_killable(&adev->reset_domain->sem);
>>>>       if (ret)
>>>>           goto error_free;
>>>>         swap(adev->reset_dump_reg_list, tmp);
>>>> +    swap(adev->reset_dump_reg_value, new);
>>>>       adev->num_regs = i;
>>>>       up_write(&adev->reset_domain->sem);
>>>>       ret = size;
>>>>     error_free:
>>>>       kfree(tmp);
>>>> +    kfree(new);
>>>>       return ret;
>>>>   }
>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> index 4daa0e893965..963c897a76e6 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> @@ -4720,15 +4720,14 @@ int amdgpu_device_pre_asic_reset(struct 
>>>> amdgpu_device *adev,
>>>>     static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
>>>>   {
>>>> -    uint32_t reg_value;
>>>>       int i;
>>>> lockdep_assert_held(&adev->reset_domain->sem);
>>>> -    dump_stack();
>>> This should be a part of different patch, where you can give some 
>>> background on why are we removing this.
>
Will make different patch for this.
> You missed this comment.
> - Shashank
>
>>>>         for (i = 0; i < adev->num_regs; i++) {
>>>> -        reg_value = RREG32(adev->reset_dump_reg_list[i]);
>>>> - trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i], 
>>>> reg_value);
>>>> +        adev->reset_dump_reg_value[i] = 
>>>> RREG32(adev->reset_dump_reg_list[i]);
>>>> + trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
>>>> +                adev->reset_dump_reg_value[i]);
>>>>       }
>>>>         return 0;
>>>
>>> - Shashank


More information about the amd-gfx mailing list