[PATCH 3/3] drm/amdgpu: create mmhub ras framework
Tao Zhou
tao.zhou1 at amd.com
Thu Aug 8 06:53:51 UTC 2019
enable mmhub ras feature and create sysfs/debugfs node for mmhub
Signed-off-by: Tao Zhou <tao.zhou1 at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 3 +-
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 112 +++++++++++++++++-------
2 files changed, 82 insertions(+), 33 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 071145ac67b5..cac2ef84a1a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -177,7 +177,8 @@ struct amdgpu_gmc {
struct amdgpu_xgmi xgmi;
struct amdgpu_irq_src ecc_irq;
- struct ras_common_if *ras_if;
+ struct ras_common_if *umc_ras_if;
+ struct ras_common_if *mmhub_ras_if;
};
#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type))
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index ce8ff7172883..017a3c113705 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -262,7 +262,7 @@ static int gmc_v9_0_process_ecc_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- struct ras_common_if *ras_if = adev->gmc.ras_if;
+ struct ras_common_if *ras_if = adev->gmc.umc_ras_if;
struct ras_dispatch_if ih_data = {
.entry = entry,
};
@@ -736,27 +736,25 @@ static int gmc_v9_0_allocate_vm_inv_eng(struct amdgpu_device *adev)
return 0;
}
-static int gmc_v9_0_ecc_late_init(void *handle)
+static int gmc_v9_0_ecc_ras_block_late_init(void *handle,
+ struct ras_fs_if *fs_info, struct ras_common_if *ras_block)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct ras_common_if **ras_if = &adev->gmc.ras_if;
+ struct ras_common_if **ras_if = NULL;
struct ras_ih_if ih_info = {
.cb = gmc_v9_0_process_ras_data_cb,
};
- struct ras_fs_if fs_info = {
- .sysfs_name = "umc_err_count",
- .debugfs_name = "umc_err_inject",
- };
- struct ras_common_if ras_block = {
- .block = AMDGPU_RAS_BLOCK__UMC,
- .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
- .sub_block_index = 0,
- .name = "umc",
- };
int r;
- if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) {
- amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
+ if (ras_block->block == AMDGPU_RAS_BLOCK__UMC)
+ ras_if = &adev->gmc.umc_ras_if;
+ else if (ras_block->block == AMDGPU_RAS_BLOCK__MMHUB)
+ ras_if = &adev->gmc.mmhub_ras_if;
+ else
+ BUG();
+
+ if (!amdgpu_ras_is_supported(adev, ras_block->block)) {
+ amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
return 0;
}
@@ -771,7 +769,7 @@ static int gmc_v9_0_ecc_late_init(void *handle)
if (r == -EAGAIN) {
/* request a gpu reset. will run again. */
amdgpu_ras_request_reset_on_boot(adev,
- AMDGPU_RAS_BLOCK__UMC);
+ ras_block->block);
return 0;
}
/* fail to enable ras, cleanup all. */
@@ -785,41 +783,46 @@ static int gmc_v9_0_ecc_late_init(void *handle)
if (!*ras_if)
return -ENOMEM;
- **ras_if = ras_block;
+ **ras_if = *ras_block;
r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
if (r) {
if (r == -EAGAIN) {
amdgpu_ras_request_reset_on_boot(adev,
- AMDGPU_RAS_BLOCK__UMC);
+ ras_block->block);
r = 0;
}
goto feature;
}
ih_info.head = **ras_if;
- fs_info.head = **ras_if;
+ fs_info->head = **ras_if;
- r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
- if (r)
- goto interrupt;
+ if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) {
+ r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
+ if (r)
+ goto interrupt;
+ }
- amdgpu_ras_debugfs_create(adev, &fs_info);
+ amdgpu_ras_debugfs_create(adev, fs_info);
- r = amdgpu_ras_sysfs_create(adev, &fs_info);
+ r = amdgpu_ras_sysfs_create(adev, fs_info);
if (r)
goto sysfs;
resume:
- r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0);
- if (r)
- goto irq;
+ if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) {
+ r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0);
+ if (r)
+ goto irq;
+ }
return 0;
irq:
amdgpu_ras_sysfs_remove(adev, *ras_if);
sysfs:
amdgpu_ras_debugfs_remove(adev, *ras_if);
- amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
+ if (ras_block->block == AMDGPU_RAS_BLOCK__UMC)
+ amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
interrupt:
amdgpu_ras_feature_enable(adev, *ras_if, 0);
feature:
@@ -828,6 +831,40 @@ static int gmc_v9_0_ecc_late_init(void *handle)
return r;
}
+static int gmc_v9_0_ecc_late_init(void *handle)
+{
+ int r;
+
+ struct ras_fs_if umc_fs_info = {
+ .sysfs_name = "umc_err_count",
+ .debugfs_name = "umc_err_inject",
+ };
+ struct ras_common_if umc_ras_block = {
+ .block = AMDGPU_RAS_BLOCK__UMC,
+ .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ .sub_block_index = 0,
+ .name = "umc",
+ };
+ struct ras_fs_if mmhub_fs_info = {
+ .sysfs_name = "mmhub_err_count",
+ .debugfs_name = "mmhub_err_inject",
+ };
+ struct ras_common_if mmhub_ras_block = {
+ .block = AMDGPU_RAS_BLOCK__MMHUB,
+ .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
+ .sub_block_index = 0,
+ .name = "mmhub",
+ };
+
+ r = gmc_v9_0_ecc_ras_block_late_init(handle,
+ &umc_fs_info, &umc_ras_block);
+ if (r)
+ return r;
+
+ r = gmc_v9_0_ecc_ras_block_late_init(handle,
+ &mmhub_fs_info, &mmhub_ras_block);
+ return r;
+}
static int gmc_v9_0_late_init(void *handle)
{
@@ -1188,21 +1225,32 @@ static int gmc_v9_0_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) &&
- adev->gmc.ras_if) {
- struct ras_common_if *ras_if = adev->gmc.ras_if;
+ adev->gmc.umc_ras_if) {
+ struct ras_common_if *ras_if = adev->gmc.umc_ras_if;
struct ras_ih_if ih_info = {
.head = *ras_if,
};
- /*remove fs first*/
+ /* remove fs first */
amdgpu_ras_debugfs_remove(adev, ras_if);
amdgpu_ras_sysfs_remove(adev, ras_if);
- /*remove the IH*/
+ /* remove the IH */
amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
amdgpu_ras_feature_enable(adev, ras_if, 0);
kfree(ras_if);
}
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) &&
+ adev->gmc.mmhub_ras_if) {
+ struct ras_common_if *ras_if = adev->gmc.mmhub_ras_if;
+
+ /* remove fs and disable ras feature */
+ amdgpu_ras_debugfs_remove(adev, ras_if);
+ amdgpu_ras_sysfs_remove(adev, ras_if);
+ amdgpu_ras_feature_enable(adev, ras_if, 0);
+ kfree(ras_if);
+ }
+
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
--
2.17.1
More information about the amd-gfx
mailing list