[PATCH 6/6] drm/amdgpu/ras: fix and update the documentation for RAS
Alex Deucher
alexdeucher at gmail.com
Thu Sep 19 21:45:27 UTC 2019
Add new sections to amdgpu.rst, fix up formatting issues,
add additional documentation to each section.
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
Documentation/gpu/amdgpu.rst | 24 ++++++++++-
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 53 +++++++++++++++++++++----
2 files changed, 68 insertions(+), 9 deletions(-)
diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst
index 80db5d89cd49..5b9eaf23558e 100644
--- a/Documentation/gpu/amdgpu.rst
+++ b/Documentation/gpu/amdgpu.rst
@@ -79,12 +79,32 @@ AMDGPU XGMI Support
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
:internal:
-AMDGPU RAS debugfs control interface
-====================================
+AMDGPU RAS Support
+==================
+
+RAS debugfs/sysfs Control and Error Injection Interfaces
+--------------------------------------------------------
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
:doc: AMDGPU RAS debugfs control interface
+RAS Error Count sysfs Interface
+-------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+ :doc: AMDGPU RAS sysfs Error Count Interface
+
+RAS EEPROM debugfs Interface
+----------------------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+ :doc: AMDGPU RAS debugfs EEPROM table reset interface
+
+RAS VRAM Bad Pages sysfs Interface
+----------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+ :doc: AMDGPU RAS sysfs gpu_vram_bad_pages Interface
.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
:internal:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 83b681a16e56..14f3f8d831d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -310,7 +310,18 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
/**
* DOC: AMDGPU RAS debugfs EEPROM table reset interface
*
- * Usage: echo 1 > ../ras/ras_eeprom_reset will reset EEPROM table to 0 entries.
+ * Some boards contain an EEPROM which is used to persistently store a list of
+ * bad pages containing ECC errors detected in vram. This interface provides
+ * a way to reset the EEPROM, e.g., after testing error injection.
+ *
+ * Usage:
+ *
+ * .. code-block:: bash
+ *
+ * echo 1 > ../ras/ras_eeprom_reset
+ *
+ * will reset EEPROM table to 0 entries.
+ *
*/
static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f, const char __user *buf,
size_t size, loff_t *pos)
@@ -337,6 +348,27 @@ static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = {
.llseek = default_llseek
};
+/**
+ * DOC: AMDGPU RAS sysfs Error Count Interface
+ *
+ * It allows user to read the error count for each IP block on the gpu through
+ * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
+ *
+ * It outputs the multiple lines which report the uncorrected (ue) and corrected
+ * (ce) error counts.
+ *
+ * The format of one line is below,
+ *
+ * [ce|ue]: count
+ *
+ * Example:
+ *
+ * .. code-block:: bash
+ *
+ * ue: 0
+ * ce: 1
+ *
+ */
static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -781,8 +813,8 @@ static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
};
}
-/*
- * DOC: ras sysfs gpu_vram_bad_pages interface
+/**
+ * DOC: AMDGPU RAS sysfs gpu_vram_bad_pages Interface
*
* It allows user to read the bad pages of vram on the gpu through
* /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
@@ -794,14 +826,21 @@ static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
*
* gpu pfn and gpu page size are printed in hex format.
* flags can be one of below character,
+ *
* R: reserved, this gpu page is reserved and not able to use.
+ *
* P: pending for reserve, this gpu page is marked as bad, will be reserved
- * in next window of page_reserve.
+ * in next window of page_reserve.
+ *
* F: unable to reserve. this gpu page can't be reserved due to some reasons.
*
- * examples:
- * 0x00000001 : 0x00001000 : R
- * 0x00000002 : 0x00001000 : P
+ * Examples:
+ *
+ * .. code-block:: bash
+ *
+ * 0x00000001 : 0x00001000 : R
+ * 0x00000002 : 0x00001000 : P
+ *
*/
static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
--
2.20.1
More information about the amd-gfx
mailing list