[PATCH 08/11] drm/amdgpu: Rework mca ras sw_init

Hawking Zhang Hawking.Zhang at amd.com
Mon Mar 6 02:31:57 UTC 2023


To align with other IP blocks

Signed-off-by: Hawking Zhang <Hawking.Zhang at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 21 ++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 72 +++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h |  9 ++--
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 15 +++---
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.c   | 44 ++-------------
 drivers/gpu/drm/amd/amdgpu/mca_v3_0.h   |  4 +-
 6 files changed, 111 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 087a75374610..524e2c9b3012 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -477,6 +477,27 @@ int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev)
 		}
 	}
 
+	/* mca.x ras block */
+	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MCA)) {
+		r = amdgpu_mca_mp0_ras_sw_init(adev);
+		if (r) {
+			dev_err(adev->dev, "Failed to initialize mca.mp0 ras block!\n");
+			return r;
+		}
+
+		r = amdgpu_mca_mp1_ras_sw_init(adev);
+		if (r) {
+			dev_err(adev->dev, "Failed to initialize mca.mp1 ras block!\n");
+			return r;
+		}
+
+		r = amdgpu_mca_mpio_ras_sw_init(adev);
+		if (r) {
+			dev_err(adev->dev, "Failed to initialize mca.mpio ras block!\n");
+			return r;
+		}
+	}
+
 	/* xgmi ras block */
 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL)) {
 		adev->gmc.xgmi.ras = &xgmi_ras;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index 51c2a82e2fa4..0b545bdcd636 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -70,3 +70,75 @@ void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev,
 
 	amdgpu_mca_reset_error_count(adev, mc_status_addr);
 }
+
+int amdgpu_mca_mp0_ras_sw_init(struct amdgpu_device *adev)
+{
+	int err;
+	struct amdgpu_mca_ras_block *ras;
+
+	if (!adev->mca.mp0.ras)
+		return 0;
+
+	ras = adev->mca.mp0.ras;
+
+	err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+	if (err) {
+		dev_err(adev->dev, "Failed to register mca.mp0 ras block!\n");
+		return err;
+	}
+
+	strcpy(ras->ras_block.ras_comm.name, "mca.mp0");
+	ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
+	ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+	adev->mca.mp0.ras_if = &ras->ras_block.ras_comm;
+
+	return 0;
+}
+
+int amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev)
+{
+        int err;
+        struct amdgpu_mca_ras_block *ras;
+
+        if (!adev->mca.mp1.ras)
+                return 0;
+
+        ras = adev->mca.mp1.ras;
+
+        err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+        if (err) {
+                dev_err(adev->dev, "Failed to register mca.mp1 ras block!\n");
+                return err;
+        }
+
+        strcpy(ras->ras_block.ras_comm.name, "mca.mp1");
+        ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
+        ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+        adev->mca.mp1.ras_if = &ras->ras_block.ras_comm;
+
+        return 0;
+}
+
+int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev)
+{
+        int err;
+        struct amdgpu_mca_ras_block *ras;
+
+        if (!adev->mca.mpio.ras)
+                return 0;
+
+        ras = adev->mca.mpio.ras;
+
+        err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+        if (err) {
+                dev_err(adev->dev, "Failed to register mca.mpio ras block!\n");
+                return err;
+        }
+
+        strcpy(ras->ras_block.ras_comm.name, "mca.mpio");
+        ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA;
+        ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+        adev->mca.mpio.ras_if = &ras->ras_block.ras_comm;
+
+        return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
index 7ce16d16e34b..997a073e2409 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
@@ -30,12 +30,7 @@ struct amdgpu_mca_ras {
 	struct amdgpu_mca_ras_block *ras;
 };
 
-struct amdgpu_mca_funcs {
-	void (*init)(struct amdgpu_device *adev);
-};
-
 struct amdgpu_mca {
-	const struct amdgpu_mca_funcs *funcs;
 	struct amdgpu_mca_ras mp0;
 	struct amdgpu_mca_ras mp1;
 	struct amdgpu_mca_ras mpio;
@@ -55,5 +50,7 @@ void amdgpu_mca_reset_error_count(struct amdgpu_device *adev,
 void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev,
 				      uint64_t mc_status_addr,
 				      void *ras_error_status);
-
+int amdgpu_mca_mp0_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev);
+int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 9a333f9744bf..67c2a5186b8a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1363,13 +1363,18 @@ static void gmc_v9_0_set_hdp_ras_funcs(struct amdgpu_device *adev)
 	adev->hdp.ras = &hdp_v4_0_ras;
 }
 
-static void gmc_v9_0_set_mca_funcs(struct amdgpu_device *adev)
+static void gmc_v9_0_set_mca_ras_funcs(struct amdgpu_device *adev)
 {
+	struct amdgpu_mca *mca = &adev->mca;
+
 	/* is UMC the right IP to check for MCA?  Maybe DF? */
 	switch (adev->ip_versions[UMC_HWIP][0]) {
 	case IP_VERSION(6, 7, 0):
-		if (!adev->gmc.xgmi.connected_to_cpu)
-			adev->mca.funcs = &mca_v3_0_funcs;
+		if (!adev->gmc.xgmi.connected_to_cpu) {
+			mca->mp0.ras = &mca_v3_0_mp0_ras;
+			mca->mp1.ras = &mca_v3_0_mp1_ras;
+			mca->mpio.ras = &mca_v3_0_mpio_ras;
+		}
 		break;
 	default:
 		break;
@@ -1398,7 +1403,7 @@ static int gmc_v9_0_early_init(void *handle)
 	gmc_v9_0_set_mmhub_ras_funcs(adev);
 	gmc_v9_0_set_gfxhub_funcs(adev);
 	gmc_v9_0_set_hdp_ras_funcs(adev);
-	gmc_v9_0_set_mca_funcs(adev);
+	gmc_v9_0_set_mca_ras_funcs(adev);
 
 	adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
 	adev->gmc.shared_aperture_end =
@@ -1611,8 +1616,6 @@ static int gmc_v9_0_sw_init(void *handle)
 	adev->gfxhub.funcs->init(adev);
 
 	adev->mmhub.funcs->init(adev);
-	if (adev->mca.funcs)
-		adev->mca.funcs->init(adev);
 
 	spin_lock_init(&adev->gmc.invalidate_lock);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
index d4bd7d1d2649..6dae4a2e2767 100644
--- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c
@@ -51,19 +51,13 @@ static int mca_v3_0_ras_block_match(struct amdgpu_ras_block_object *block_obj,
 	return -EINVAL;
 }
 
-const struct amdgpu_ras_block_hw_ops mca_v3_0_mp0_hw_ops = {
+static const struct amdgpu_ras_block_hw_ops mca_v3_0_mp0_hw_ops = {
 	.query_ras_error_count = mca_v3_0_mp0_query_ras_error_count,
 	.query_ras_error_address = NULL,
 };
 
 struct amdgpu_mca_ras_block mca_v3_0_mp0_ras = {
 	.ras_block = {
-		.ras_comm = {
-			.block = AMDGPU_RAS_BLOCK__MCA,
-			.sub_block_index = AMDGPU_RAS_MCA_BLOCK__MP0,
-			.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
-			.name = "mp0",
-		},
 		.hw_ops = &mca_v3_0_mp0_hw_ops,
 		.ras_block_match = mca_v3_0_ras_block_match,
 	},
@@ -77,19 +71,13 @@ static void mca_v3_0_mp1_query_ras_error_count(struct amdgpu_device *adev,
 				         ras_error_status);
 }
 
-const struct amdgpu_ras_block_hw_ops mca_v3_0_mp1_hw_ops = {
+static const struct amdgpu_ras_block_hw_ops mca_v3_0_mp1_hw_ops = {
 	.query_ras_error_count = mca_v3_0_mp1_query_ras_error_count,
 	.query_ras_error_address = NULL,
 };
 
 struct amdgpu_mca_ras_block mca_v3_0_mp1_ras = {
 	.ras_block = {
-		.ras_comm = {
-			.block = AMDGPU_RAS_BLOCK__MCA,
-			.sub_block_index = AMDGPU_RAS_MCA_BLOCK__MP1,
-			.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
-			.name = "mp1",
-		},
 		.hw_ops = &mca_v3_0_mp1_hw_ops,
 		.ras_block_match = mca_v3_0_ras_block_match,
 	},
@@ -103,40 +91,14 @@ static void mca_v3_0_mpio_query_ras_error_count(struct amdgpu_device *adev,
 				         ras_error_status);
 }
 
-const struct amdgpu_ras_block_hw_ops mca_v3_0_mpio_hw_ops = {
+static const struct amdgpu_ras_block_hw_ops mca_v3_0_mpio_hw_ops = {
 	.query_ras_error_count = mca_v3_0_mpio_query_ras_error_count,
 	.query_ras_error_address = NULL,
 };
 
 struct amdgpu_mca_ras_block mca_v3_0_mpio_ras = {
 	.ras_block = {
-		.ras_comm = {
-			.block = AMDGPU_RAS_BLOCK__MCA,
-			.sub_block_index = AMDGPU_RAS_MCA_BLOCK__MPIO,
-			.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
-			.name = "mpio",
-		},
 		.hw_ops = &mca_v3_0_mpio_hw_ops,
 		.ras_block_match = mca_v3_0_ras_block_match,
 	},
 };
-
-
-static void mca_v3_0_init(struct amdgpu_device *adev)
-{
-	struct amdgpu_mca *mca = &adev->mca;
-
-	mca->mp0.ras = &mca_v3_0_mp0_ras;
-	mca->mp1.ras = &mca_v3_0_mp1_ras;
-	mca->mpio.ras = &mca_v3_0_mpio_ras;
-	amdgpu_ras_register_ras_block(adev, &mca->mp0.ras->ras_block);
-	amdgpu_ras_register_ras_block(adev, &mca->mp1.ras->ras_block);
-	amdgpu_ras_register_ras_block(adev, &mca->mpio.ras->ras_block);
-	mca->mp0.ras_if = &mca->mp0.ras->ras_block.ras_comm;
-	mca->mp1.ras_if = &mca->mp1.ras->ras_block.ras_comm;
-	mca->mpio.ras_if = &mca->mpio.ras->ras_block.ras_comm;
-}
-
-const struct amdgpu_mca_funcs mca_v3_0_funcs = {
-	.init = mca_v3_0_init,
-};
\ No newline at end of file
diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h
index b899b86194c2..d3eaef0d7f2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.h
@@ -21,6 +21,8 @@
 #ifndef __MCA_V3_0_H__
 #define __MCA_V3_0_H__
 
-extern const struct amdgpu_mca_funcs mca_v3_0_funcs;
+extern struct amdgpu_mca_ras_block mca_v3_0_mp0_ras;
+extern struct amdgpu_mca_ras_block mca_v3_0_mp1_ras;
+extern struct amdgpu_mca_ras_block mca_v3_0_mpio_ras;
 
 #endif
-- 
2.17.1



More information about the amd-gfx mailing list