[PATCH] drm/amdgpu: add amdgpu_timeout_ring_* file to debugfs
Nicolai Hähnle
nicolai.haehnle at amd.com
Wed Jun 14 11:27:58 UTC 2023
Report the per-ring timeout in milliseconds and allow users to adjust
the timeout dynamically. This can be useful for debugging, e.g. to more
easily test whether a submission genuinely hangs or is just taking very
long, and to temporarily disable GPU recovery so that shader problems
can be examined in detail, including single-stepping through shader
code.
It feels a bit questionable to access ring->sched.timeout without any
locking -- under a C++ memory model it would technically be undefined
behavior. But it's not like a lot can go wrong here in practice, and
it's not clear to me what locking or atomics, if any, should be used.
Signed-off-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 32 +++++++++++++++++++++++-
1 file changed, 31 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index dc474b809604..32d223daa789 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -471,35 +471,65 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
return result;
}
static const struct file_operations amdgpu_debugfs_ring_fops = {
.owner = THIS_MODULE,
.read = amdgpu_debugfs_ring_read,
.llseek = default_llseek
};
+static int amdgpu_debugfs_timeout_ring_get(void *data, u64 *val) {
+ struct amdgpu_ring *ring = data;
+
+ if (ring->sched.timeout == MAX_SCHEDULE_TIMEOUT)
+ *val = 0;
+ else
+ *val = jiffies_to_msecs(ring->sched.timeout);
+
+ return 0;
+}
+
+static int amdgpu_debugfs_timeout_ring_set(void *data, u64 val) {
+ struct amdgpu_ring *ring = data;
+
+ if (val == 0)
+ ring->sched.timeout = MAX_SCHEDULE_TIMEOUT;
+ else
+ ring->sched.timeout = msecs_to_jiffies(val);
+
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_timeout_ring_fops,
+ amdgpu_debugfs_timeout_ring_get,
+ amdgpu_debugfs_timeout_ring_set,
+ "%llu\n");
+
#endif
void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
#if defined(CONFIG_DEBUG_FS)
struct drm_minor *minor = adev_to_drm(adev)->primary;
struct dentry *root = minor->debugfs_root;
- char name[32];
+ char name[40];
sprintf(name, "amdgpu_ring_%s", ring->name);
debugfs_create_file_size(name, S_IFREG | S_IRUGO, root, ring,
&amdgpu_debugfs_ring_fops,
ring->ring_size + 12);
+ sprintf(name, "amdgpu_timeout_ring_%s", ring->name);
+ debugfs_create_file(name, S_IFREG | S_IRUGO | S_IWUSR, root, ring,
+ &amdgpu_debugfs_timeout_ring_fops);
#endif
}
/**
* amdgpu_ring_test_helper - tests ring and set sched readiness status
*
* @ring: ring to try the recovery on
*
* Tests ring and set sched readiness status
*
--
2.40.0
More information about the amd-gfx
mailing list