[PATCH] drm/amdgpu: Expose ras version & schema info
Asad Kamal
asad.kamal at amd.com
Fri Oct 6 16:34:41 UTC 2023
Expose ras table version & schema info to sysfs
Signed-off-by: Asad Kamal <asad.kamal at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 58 +++++++++++++++++++++++--
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 11 +++++
2 files changed, 66 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 9c8203e87859..362e32350af7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1370,6 +1370,22 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
return sysfs_emit(buf, "feature mask: 0x%x\n", con->features);
}
+static ssize_t amdgpu_ras_sysfs_version_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct amdgpu_ras *con =
+ container_of(attr, struct amdgpu_ras, version_attr);
+ return sysfs_emit(buf, "table version: 0x%x\n", con->eeprom_control.tbl_hdr.version);
+}
+
+static ssize_t amdgpu_ras_sysfs_schema_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct amdgpu_ras *con =
+ container_of(attr, struct amdgpu_ras, schema_attr);
+ return sysfs_emit(buf, "schema: 0x%x\n", con->schema);
+}
+
static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -1379,11 +1395,13 @@ static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev)
RAS_FS_NAME);
}
-static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev)
+static int amdgpu_ras_sysfs_remove_dev_attr_node(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct attribute *attrs[] = {
&con->features_attr.attr,
+ &con->version_attr.attr,
+ &con->schema_attr.attr,
NULL
};
struct attribute_group group = {
@@ -1459,7 +1477,7 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
if (amdgpu_bad_page_threshold != 0)
amdgpu_ras_sysfs_remove_bad_page_node(adev);
- amdgpu_ras_sysfs_remove_feature_node(adev);
+ amdgpu_ras_sysfs_remove_dev_attr_node(adev);
return 0;
}
@@ -1582,6 +1600,10 @@ static BIN_ATTR(gpu_vram_bad_pages, S_IRUGO,
amdgpu_ras_sysfs_badpages_read, NULL, 0);
static DEVICE_ATTR(features, S_IRUGO,
amdgpu_ras_sysfs_features_read, NULL);
+static DEVICE_ATTR(version, 0444,
+ amdgpu_ras_sysfs_version_show, NULL);
+static DEVICE_ATTR(schema, 0444,
+ amdgpu_ras_sysfs_schema_show, NULL);
static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -1590,6 +1612,8 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
};
struct attribute *attrs[] = {
&con->features_attr.attr,
+ &con->version_attr.attr,
+ &con->schema_attr.attr,
NULL
};
struct bin_attribute *bin_attrs[] = {
@@ -1598,11 +1622,20 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
};
int r;
+ group.attrs = attrs;
+
/* add features entry */
con->features_attr = dev_attr_features;
- group.attrs = attrs;
sysfs_attr_init(attrs[0]);
+ /* add version entry */
+ con->version_attr = dev_attr_version;
+ sysfs_attr_init(attrs[1]);
+
+ /* add schema entry */
+ con->schema_attr = dev_attr_schema;
+ sysfs_attr_init(attrs[2]);
+
if (amdgpu_bad_page_threshold != 0) {
/* add bad_page_features entry */
bin_attr_gpu_vram_bad_pages.private = NULL;
@@ -2594,6 +2627,21 @@ static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev)
}
}
+static int amdgpu_get_ras_schema(struct amdgpu_device *adev)
+{
+ if (amdgpu_ras_asic_supported(adev)) {
+ switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
+ case IP_VERSION(13, 0, 6):
+ return AMDGPU_RAS_POISON_SUPPORTED | AMDGPU_RAS_SBC_SUPPORTED |
+ AMDGPU_RAS_DED_SUPPORTED | AMDGPU_RAS_PARITY_SUPPORTED;
+ default:
+ return 0;
+ }
+
+ } else
+ return 0;
+}
+
int amdgpu_ras_init(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -2636,10 +2684,14 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
con->update_channel_flag = false;
con->features = 0;
+ con->schema = 0;
INIT_LIST_HEAD(&con->head);
/* Might need get this flag from vbios. */
con->flags = RAS_DEFAULT_FLAGS;
+ /* Get RAS schema for particular SOC */
+ con->schema = amdgpu_get_ras_schema(adev);
+
/* initialize nbio ras function ahead of any other
* ras functions so hardware fatal error interrupt
* can be enabled as early as possible */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 7999d202c9bc..ebd53736606c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -38,6 +38,14 @@ struct amdgpu_iv_entry;
#define AMDGPU_RAS_INST_MASK 0xfffff000
#define AMDGPU_RAS_INST_SHIFT 0xc
+/* Bit mask for RAS error types, SBC=Single bit correctable error
+ * DED = Double Bit error detection
+ */
+#define AMDGPU_RAS_PARITY_SUPPORTED BIT(0)
+#define AMDGPU_RAS_SBC_SUPPORTED BIT(1)
+#define AMDGPU_RAS_DED_SUPPORTED BIT(2)
+#define AMDGPU_RAS_POISON_SUPPORTED BIT(3)
+
enum amdgpu_ras_block {
AMDGPU_RAS_BLOCK__UMC = 0,
AMDGPU_RAS_BLOCK__SDMA,
@@ -389,9 +397,12 @@ struct amdgpu_ras {
/* ras infrastructure */
/* for ras itself. */
uint32_t features;
+ uint32_t schema;
struct list_head head;
/* sysfs */
struct device_attribute features_attr;
+ struct device_attribute version_attr;
+ struct device_attribute schema_attr;
struct bin_attribute badpages_attr;
struct dentry *de_ras_eeprom_table;
/* block array */
--
2.42.0
More information about the amd-gfx
mailing list