[PATCH] drm/amdgpu: revert "Add autodump debugfs node for gpu reset v8"
Alex Deucher
alexdeucher at gmail.com
Mon Oct 4 13:32:03 UTC 2021
Acked-by: Alex Deucher <alexander.deucher at amd.com>
On Mon, Oct 4, 2021 at 4:31 AM Christian König
<ckoenig.leichtzumerken at gmail.com> wrote:
>
> Ping? Alex any objections to this?
>
> Otherwise I'm going to push it with Nirmoy's acked-by.
>
> Christian.
>
> Am 30.09.21 um 11:26 schrieb Christian König:
> > This reverts commit 728e7e0cd61899208e924472b9e641dbeb0775c4.
> >
> > Further discussion reveals that this feature is severely broken
> > and needs to be reverted ASAP.
> >
> > GPU reset can never be delayed by userspace even for debugging or
> > otherwise we can run into in kernel deadlocks.
> >
> > Signed-off-by: Christian König <christian.koenig at amd.com>
> > ---
> > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 -
> > drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 80 ---------------------
> > drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h | 5 --
> > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 --
> > 4 files changed, 91 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > index dc3c6b3a00e5..6a1928a720a6 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > @@ -1078,8 +1078,6 @@ struct amdgpu_device {
> > char product_name[32];
> > char serial[20];
> >
> > - struct amdgpu_autodump autodump;
> > -
> > atomic_t throttling_logging_enabled;
> > struct ratelimit_state throttling_logging_rs;
> > uint32_t ras_hw_enabled;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > index 277128846dd1..0b89ba142a59 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > @@ -27,7 +27,6 @@
> > #include <linux/pci.h>
> > #include <linux/uaccess.h>
> > #include <linux/pm_runtime.h>
> > -#include <linux/poll.h>
> >
> > #include "amdgpu.h"
> > #include "amdgpu_pm.h"
> > @@ -37,85 +36,7 @@
> > #include "amdgpu_securedisplay.h"
> > #include "amdgpu_fw_attestation.h"
> >
> > -int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev)
> > -{
> > #if defined(CONFIG_DEBUG_FS)
> > - unsigned long timeout = 600 * HZ;
> > - int ret;
> > -
> > - wake_up_interruptible(&adev->autodump.gpu_hang);
> > -
> > - ret = wait_for_completion_interruptible_timeout(&adev->autodump.dumping, timeout);
> > - if (ret == 0) {
> > - pr_err("autodump: timeout, move on to gpu recovery\n");
> > - return -ETIMEDOUT;
> > - }
> > -#endif
> > - return 0;
> > -}
> > -
> > -#if defined(CONFIG_DEBUG_FS)
> > -
> > -static int amdgpu_debugfs_autodump_open(struct inode *inode, struct file *file)
> > -{
> > - struct amdgpu_device *adev = inode->i_private;
> > - int ret;
> > -
> > - file->private_data = adev;
> > -
> > - ret = down_read_killable(&adev->reset_sem);
> > - if (ret)
> > - return ret;
> > -
> > - if (adev->autodump.dumping.done) {
> > - reinit_completion(&adev->autodump.dumping);
> > - ret = 0;
> > - } else {
> > - ret = -EBUSY;
> > - }
> > -
> > - up_read(&adev->reset_sem);
> > -
> > - return ret;
> > -}
> > -
> > -static int amdgpu_debugfs_autodump_release(struct inode *inode, struct file *file)
> > -{
> > - struct amdgpu_device *adev = file->private_data;
> > -
> > - complete_all(&adev->autodump.dumping);
> > - return 0;
> > -}
> > -
> > -static unsigned int amdgpu_debugfs_autodump_poll(struct file *file, struct poll_table_struct *poll_table)
> > -{
> > - struct amdgpu_device *adev = file->private_data;
> > -
> > - poll_wait(file, &adev->autodump.gpu_hang, poll_table);
> > -
> > - if (amdgpu_in_reset(adev))
> > - return POLLIN | POLLRDNORM | POLLWRNORM;
> > -
> > - return 0;
> > -}
> > -
> > -static const struct file_operations autodump_debug_fops = {
> > - .owner = THIS_MODULE,
> > - .open = amdgpu_debugfs_autodump_open,
> > - .poll = amdgpu_debugfs_autodump_poll,
> > - .release = amdgpu_debugfs_autodump_release,
> > -};
> > -
> > -static void amdgpu_debugfs_autodump_init(struct amdgpu_device *adev)
> > -{
> > - init_completion(&adev->autodump.dumping);
> > - complete_all(&adev->autodump.dumping);
> > - init_waitqueue_head(&adev->autodump.gpu_hang);
> > -
> > - debugfs_create_file("amdgpu_autodump", 0600,
> > - adev_to_drm(adev)->primary->debugfs_root,
> > - adev, &autodump_debug_fops);
> > -}
> >
> > /**
> > * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes
> > @@ -1590,7 +1511,6 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
> > }
> >
> > amdgpu_ras_debugfs_create_all(adev);
> > - amdgpu_debugfs_autodump_init(adev);
> > amdgpu_rap_debugfs_init(adev);
> > amdgpu_securedisplay_debugfs_init(adev);
> > amdgpu_fw_attestation_debugfs_init(adev);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
> > index 141a8474e24f..8b641f40fdf6 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
> > @@ -26,10 +26,6 @@
> > /*
> > * Debugfs
> > */
> > -struct amdgpu_autodump {
> > - struct completion dumping;
> > - struct wait_queue_head gpu_hang;
> > -};
> >
> > int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
> > int amdgpu_debugfs_init(struct amdgpu_device *adev);
> > @@ -37,4 +33,3 @@ void amdgpu_debugfs_fini(struct amdgpu_device *adev);
> > void amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
> > void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
> > void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
> > -int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > index 41c6b3aacd37..4d34b2da8582 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > @@ -4458,10 +4458,6 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
> > if (reset_context->reset_req_dev == adev)
> > job = reset_context->job;
> >
> > - /* no need to dump if device is not in good state during probe period */
> > - if (!adev->gmc.xgmi.pending_reset)
> > - amdgpu_debugfs_wait_dump(adev);
> > -
> > if (amdgpu_sriov_vf(adev)) {
> > /* stop the data exchange thread */
> > amdgpu_virt_fini_data_exchange(adev);
>
More information about the amd-gfx
mailing list