[PATCH 10/20] drm/amdgpu: add debugfs ctrl node
Alex Deucher
alexdeucher at gmail.com
Tue Mar 5 20:41:26 UTC 2019
From: xinhui pan <xinhui.pan at amd.com>
allow userspace enable/disable ras
Signed-off-by: xinhui pan <xinhui.pan at amd.com>
Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 122 ++++++++++++++++++++++--
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 9 ++
2 files changed, 121 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index d7c7090fade9..ca9c7d1ede2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -200,6 +200,90 @@ static const struct file_operations amdgpu_ras_debugfs_ops = {
.llseek = default_llseek
};
+/*
+ * DOC: ras debugfs control interface
+ *
+ * It accepts struct ras_debug_if who has two members.
+ *
+ * First member: ras_debug_if::head or ras_debug_if::inject.
+ * It is used to indicate which IP block will be under control.
+ * Its contents are not human readable, IOW, write it by your programs.
+ *
+ * head has four members, they are block, type, sub_block_index, name.
+ * block: which IP will be under control.
+ * type: what kind of error will be enabled/disabled/injected.
+ * sub_block_index: some IPs have subcomponets. say, GFX, sDMA.
+ * name: the name of IP.
+ *
+ * inject has two more members than head, they are address, value.
+ * As their names indicate, inject operation will write the
+ * value to the address.
+ *
+ * Second member: struct ras_debug_if::op.
+ * It has three kinds of operations.
+ * 0: disable RAS on the block. Take ::head as its data.
+ * 1: enable RAS on the block. Take ::head as its data.
+ * 2: inject errors on the block. Take ::inject as its data.
+ *
+ * How to check the result?
+ *
+ * For disable/enable, please check ras features at
+ * /sys/class/drm/card[0/1/2...]/device/ras/features
+ *
+ * For inject, please check corresponding err count at
+ * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
+ *
+ * NOTE: operation is only allowed on blocks which are supported.
+ * Please check ras mask at /sys/module/amdgpu/parameters/ras_mask
+ */
+static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+ struct ras_debug_if data;
+ int ret = 0;
+
+ if (size < sizeof(data))
+ return -EINVAL;
+
+ memset(&data, 0, sizeof(data));
+
+ if (*pos)
+ return -EINVAL;
+
+ if (copy_from_user(&data, buf, sizeof(data)))
+ return -EINVAL;
+
+ *pos = size;
+
+ if (!amdgpu_ras_is_supported(adev, data.head.block))
+ return -EINVAL;
+
+ switch (data.op) {
+ case 0:
+ ret = amdgpu_ras_feature_enable(adev, &data.head, 0);
+ break;
+ case 1:
+ ret = amdgpu_ras_feature_enable(adev, &data.head, 1);
+ break;
+ case 2:
+ ret = amdgpu_ras_error_inject(adev, &data.inject);
+ break;
+ };
+
+ if (ret)
+ return -EINVAL;
+
+ return size;
+}
+
+static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = {
+ .owner = THIS_MODULE,
+ .read = NULL,
+ .write = amdgpu_ras_debugfs_ctrl_write,
+ .llseek = default_llseek
+};
+
static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -657,6 +741,31 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
/* sysfs end */
/* debugfs begin */
+static int amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct drm_minor *minor = adev->ddev->primary;
+ struct dentry *root = minor->debugfs_root, *dir;
+ struct dentry *ent;
+
+ dir = debugfs_create_dir("ras", root);
+ if (IS_ERR(dir))
+ return -EINVAL;
+
+ con->dir = dir;
+
+ ent = debugfs_create_file("ras_ctrl",
+ S_IWUGO | S_IRUGO, con->dir,
+ adev, &amdgpu_ras_debugfs_ctrl_ops);
+ if (IS_ERR(ent)) {
+ debugfs_remove(con->dir);
+ return -EINVAL;
+ }
+
+ con->ent = ent;
+ return 0;
+}
+
int amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
struct ras_fs_if *head)
{
@@ -709,8 +818,10 @@ static int amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev)
amdgpu_ras_debugfs_remove(adev, &obj->head);
}
+ debugfs_remove(con->ent);
debugfs_remove(con->dir);
con->dir = NULL;
+ con->ent = NULL;
return 0;
}
@@ -720,17 +831,8 @@ static int amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev)
static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
{
- struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- struct drm_minor *minor = adev->ddev->primary;
- struct dentry *root = minor->debugfs_root, *dir;
-
- dir = debugfs_create_dir("ras", root);
- if (IS_ERR(dir))
- return -EINVAL;
-
- con->dir = dir;
-
amdgpu_ras_sysfs_create_feature_node(adev);
+ amdgpu_ras_debugfs_create_ctrl_node(adev);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index d6d4bbb92a42..599dcd01aec7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -86,6 +86,8 @@ struct amdgpu_ras {
struct list_head head;
/* debugfs */
struct dentry *dir;
+ /* debugfs ctrl */
+ struct dentry *ent;
/* sysfs */
struct device_attribute features_attr;
/* block array */
@@ -135,6 +137,13 @@ struct ras_dispatch_if {
struct amdgpu_iv_entry *entry;
};
+struct ras_debug_if {
+ union {
+ struct ras_common_if head;
+ struct ras_inject_if inject;
+ };
+ int op;
+};
/* work flow
* vbios
* 1: ras feature enable (enabled by default)
--
2.20.1
More information about the amd-gfx
mailing list