[PATCH v1 1/2] drm/amdgpu: save the reset dump register value for devcoredump
Sharma, Shashank
shashank.sharma at amd.com
Fri May 20 14:06:36 UTC 2022
Hey Amar,
On 5/20/2022 3:49 PM, Somalapuram Amaranath wrote:
> Allocate memory for register value and use the same values for devcoredump.
> Remove dump_stack reset register dumps.
>
> Signed-off-by: Somalapuram Amaranath <Amaranath.Somalapuram at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 9 ++++++++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +++----
> 3 files changed, 12 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 76df583663c7..c79d9992b113 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1042,6 +1042,7 @@ struct amdgpu_device {
>
> /* reset dump register */
> uint32_t *reset_dump_reg_list;
> + uint32_t *reset_dump_reg_value;
> int num_regs;
>
> struct amdgpu_reset_domain *reset_domain;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index eedb12f6b8a3..942fdbd316f4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -1683,7 +1683,7 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f,
> {
> struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
> char reg_offset[11];
> - uint32_t *new, *tmp = NULL;
> + uint32_t *new, *tmp = NULL, *tmp_value = NULL;
> int ret, i = 0, len = 0;
>
> do {
> @@ -1709,17 +1709,24 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f,
> i++;
> } while (len < size);
>
> + new = krealloc_array(tmp_value, i, sizeof(uint32_t), GFP_KERNEL);
tmp_value is initialized to NULL, which means krealloc_array() will
behave like kmalloc_array(), is there any particular reason we are
adding this variable at all just to use krealloc_array(), and why not
use kmalloc_array() directly ?
> + if (!new) {
> + ret = -ENOMEM;
> + goto error_free;
> + }
> ret = down_write_killable(&adev->reset_domain->sem);
> if (ret)
> goto error_free;
>
> swap(adev->reset_dump_reg_list, tmp);
> + swap(adev->reset_dump_reg_value, new);
> adev->num_regs = i;
> up_write(&adev->reset_domain->sem);
> ret = size;
>
> error_free:
> kfree(tmp);
> + kfree(new);
> return ret;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 4daa0e893965..963c897a76e6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -4720,15 +4720,14 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
>
> static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
> {
> - uint32_t reg_value;
> int i;
>
> lockdep_assert_held(&adev->reset_domain->sem);
> - dump_stack();
This should be a part of different patch, where you can give some
background on why are we removing this.
>
> for (i = 0; i < adev->num_regs; i++) {
> - reg_value = RREG32(adev->reset_dump_reg_list[i]);
> - trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i], reg_value);
> + adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
> + trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
> + adev->reset_dump_reg_value[i]);
> }
>
> return 0;
- Shashank
More information about the amd-gfx
mailing list