[PATCH 10/20] drm/amdgpu: add debugfs ctrl node

Alex Deucher alexdeucher at gmail.com
Tue Mar 5 20:41:26 UTC 2019


From: xinhui pan <xinhui.pan at amd.com>

allow userspace enable/disable ras

Signed-off-by: xinhui pan <xinhui.pan at amd.com>
Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 122 ++++++++++++++++++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h |   9 ++
 2 files changed, 121 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index d7c7090fade9..ca9c7d1ede2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -200,6 +200,90 @@ static const struct file_operations amdgpu_ras_debugfs_ops = {
 	.llseek = default_llseek
 };
 
+/*
+ * DOC: ras debugfs control interface
+ *
+ * It accepts struct ras_debug_if who has two members.
+ *
+ * First member: ras_debug_if::head or ras_debug_if::inject.
+ * It is used to indicate which IP block will be under control.
+ * Its contents are not human readable, IOW, write it by your programs.
+ *
+ * head has four members, they are block, type, sub_block_index, name.
+ * block: which IP will be under control.
+ * type: what kind of error will be enabled/disabled/injected.
+ * sub_block_index: some IPs have subcomponets. say, GFX, sDMA.
+ * name: the name of IP.
+ *
+ * inject has two more members than head, they are address, value.
+ * As their names indicate, inject operation will write the
+ * value to the address.
+ *
+ * Second member: struct ras_debug_if::op.
+ * It has three kinds of operations.
+ *  0: disable RAS on the block. Take ::head as its data.
+ *  1: enable RAS on the block. Take ::head as its data.
+ *  2: inject errors on the block. Take ::inject as its data.
+ *
+ * How to check the result?
+ *
+ * For disable/enable, please check ras features at
+ * /sys/class/drm/card[0/1/2...]/device/ras/features
+ *
+ * For inject, please check corresponding err count at
+ * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
+ *
+ * NOTE: operation is only allowed on blocks which are supported.
+ * Please check ras mask at /sys/module/amdgpu/parameters/ras_mask
+ */
+static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *buf,
+		size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+	struct ras_debug_if data;
+	int ret = 0;
+
+	if (size < sizeof(data))
+		return -EINVAL;
+
+	memset(&data, 0, sizeof(data));
+
+	if (*pos)
+		return -EINVAL;
+
+	if (copy_from_user(&data, buf, sizeof(data)))
+		return -EINVAL;
+
+	*pos = size;
+
+	if (!amdgpu_ras_is_supported(adev, data.head.block))
+		return -EINVAL;
+
+	switch (data.op) {
+	case 0:
+		ret = amdgpu_ras_feature_enable(adev, &data.head, 0);
+		break;
+	case 1:
+		ret = amdgpu_ras_feature_enable(adev, &data.head, 1);
+		break;
+	case 2:
+		ret = amdgpu_ras_error_inject(adev, &data.inject);
+		break;
+	};
+
+	if (ret)
+		return -EINVAL;
+
+	return size;
+}
+
+static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = {
+	.owner = THIS_MODULE,
+	.read = NULL,
+	.write = amdgpu_ras_debugfs_ctrl_write,
+	.llseek = default_llseek
+};
+
 static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -657,6 +741,31 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
 /* sysfs end */
 
 /* debugfs begin */
+static int amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct drm_minor *minor = adev->ddev->primary;
+	struct dentry *root = minor->debugfs_root, *dir;
+	struct dentry *ent;
+
+	dir = debugfs_create_dir("ras", root);
+	if (IS_ERR(dir))
+		return -EINVAL;
+
+	con->dir = dir;
+
+	ent = debugfs_create_file("ras_ctrl",
+			S_IWUGO | S_IRUGO, con->dir,
+			adev, &amdgpu_ras_debugfs_ctrl_ops);
+	if (IS_ERR(ent)) {
+		debugfs_remove(con->dir);
+		return -EINVAL;
+	}
+
+	con->ent = ent;
+	return 0;
+}
+
 int amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
 		struct ras_fs_if *head)
 {
@@ -709,8 +818,10 @@ static int amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev)
 		amdgpu_ras_debugfs_remove(adev, &obj->head);
 	}
 
+	debugfs_remove(con->ent);
 	debugfs_remove(con->dir);
 	con->dir = NULL;
+	con->ent = NULL;
 
 	return 0;
 }
@@ -720,17 +831,8 @@ static int amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev)
 
 static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
 {
-	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
-	struct drm_minor *minor = adev->ddev->primary;
-	struct dentry *root = minor->debugfs_root, *dir;
-
-	dir = debugfs_create_dir("ras", root);
-	if (IS_ERR(dir))
-		return -EINVAL;
-
-	con->dir = dir;
-
 	amdgpu_ras_sysfs_create_feature_node(adev);
+	amdgpu_ras_debugfs_create_ctrl_node(adev);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index d6d4bbb92a42..599dcd01aec7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -86,6 +86,8 @@ struct amdgpu_ras {
 	struct list_head head;
 	/* debugfs */
 	struct dentry *dir;
+	/* debugfs ctrl */
+	struct dentry *ent;
 	/* sysfs */
 	struct device_attribute features_attr;
 	/* block array */
@@ -135,6 +137,13 @@ struct ras_dispatch_if {
 	struct amdgpu_iv_entry *entry;
 };
 
+struct ras_debug_if {
+	union {
+		struct ras_common_if head;
+		struct ras_inject_if inject;
+	};
+	int op;
+};
 /* work flow
  * vbios
  * 1: ras feature enable (enabled by default)
-- 
2.20.1



More information about the amd-gfx mailing list