[PATCH 05/11] drm/amdgpu: Move umc ras block init to gmc ras sw_init
Hawking Zhang
Hawking.Zhang at amd.com
Mon Mar 6 02:31:54 UTC 2023
Initialize umc ras block only when umc ip block
supports ras. Driver queries ras capabilities after
early_init, ras block init needs to be moved to
sw_init.
Signed-off-by: Hawking Zhang <Hawking.Zhang at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 16 +++++++++++--
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 30 +++++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 1 +
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 26 ++++-----------------
drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 21 ++++-------------
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 26 ++++-----------------
7 files changed, 58 insertions(+), 64 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 6830f671cde7..97a12d8d786a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -446,9 +446,21 @@ void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
} while (fault->timestamp < tmp);
}
-int amdgpu_gmc_ras_early_init(struct amdgpu_device *adev)
+int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev)
{
- if (!adev->gmc.xgmi.connected_to_cpu) {
+ int r;
+
+ /* umc ras block */
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) {
+ r = amdgpu_umc_ras_sw_init(adev);
+ if (r) {
+ dev_err(adev->dev, "Failed to initialize umc ras block!\n");
+ return r;
+ }
+ }
+
+ /* xgmi ras block */
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL)) {
adev->gmc.xgmi.ras = &xgmi_ras;
amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras->ras_block);
adev->gmc.xgmi.ras_if = &adev->gmc.xgmi.ras->ras_block.ras_comm;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 0305b660cd17..f1773abd5e1a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -351,7 +351,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev,
uint16_t pasid, uint64_t timestamp);
void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
uint16_t pasid);
-int amdgpu_gmc_ras_early_init(struct amdgpu_device *adev);
+int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 1b8574bc4463..da68ceaa024c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -208,6 +208,36 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, true);
}
+int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev)
+{
+ int err;
+ struct amdgpu_umc_ras *ras;
+
+ if (!adev->umc.ras)
+ return 0;
+
+ ras = adev->umc.ras;
+
+ err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
+ if (err) {
+ dev_err(adev->dev, "Failed to register umc ras block!\n");
+ return err;
+ }
+
+ strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc");
+ ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC;
+ ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->umc.ras_if = &ras->ras_block.ras_comm;
+
+ if (!ras->ras_block.ras_late_init)
+ ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init;
+
+ if (ras->ras_block.ras_cb)
+ ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb;
+
+ return 0;
+}
+
int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
{
int r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index 36e19336f3b3..d7f1229ff11f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -87,6 +87,7 @@ struct amdgpu_umc {
unsigned long active_mask;
};
+int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset);
int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index c59c2332d191..924f6f38fae6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -703,25 +703,8 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev)
default:
break;
}
- if (adev->umc.ras) {
- amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block);
-
- strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc");
- adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC;
- adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
- adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm;
-
- /* If don't define special ras_late_init function, use default ras_late_init */
- if (!adev->umc.ras->ras_block.ras_late_init)
- adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init;
-
- /* If not defined special ras_cb function, use default ras_cb */
- if (!adev->umc.ras->ras_block.ras_cb)
- adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb;
- }
}
-
static void gmc_v10_0_set_mmhub_funcs(struct amdgpu_device *adev)
{
switch (adev->ip_versions[MMHUB_HWIP][0]) {
@@ -758,7 +741,6 @@ static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev)
static int gmc_v10_0_early_init(void *handle)
{
- int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
gmc_v10_0_set_mmhub_funcs(adev);
@@ -774,10 +756,6 @@ static int gmc_v10_0_early_init(void *handle)
adev->gmc.private_aperture_end =
adev->gmc.private_aperture_start + (4ULL << 30) - 1;
- r = amdgpu_gmc_ras_early_init(adev);
- if (r)
- return r;
-
return 0;
}
@@ -1028,6 +1006,10 @@ static int gmc_v10_0_sw_init(void *handle)
amdgpu_vm_manager_init(adev);
+ r = amdgpu_gmc_ras_sw_init(adev);
+ if (r)
+ return r;
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index af7b3ba1ca00..1c585cc24857 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -581,23 +581,6 @@ static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev)
default:
break;
}
-
- if (adev->umc.ras) {
- amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block);
-
- strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc");
- adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC;
- adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
- adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm;
-
- /* If don't define special ras_late_init function, use default ras_late_init */
- if (!adev->umc.ras->ras_block.ras_late_init)
- adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init;
-
- /* If not define special ras_cb function, use default ras_cb */
- if (!adev->umc.ras->ras_block.ras_cb)
- adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb;
- }
}
@@ -846,6 +829,10 @@ static int gmc_v11_0_sw_init(void *handle)
amdgpu_vm_manager_init(adev);
+ r = amdgpu_gmc_ras_sw_init(adev);
+ if (r)
+ return r;
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index b06170c00dfc..e9b6599e790c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1318,23 +1318,6 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
default:
break;
}
-
- if (adev->umc.ras) {
- amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block);
-
- strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc");
- adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC;
- adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
- adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm;
-
- /* If don't define special ras_late_init function, use default ras_late_init */
- if (!adev->umc.ras->ras_block.ras_late_init)
- adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init;
-
- /* If not defined special ras_cb function, use default ras_cb */
- if (!adev->umc.ras->ras_block.ras_cb)
- adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb;
- }
}
static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
@@ -1406,7 +1389,6 @@ static void gmc_v9_0_set_mca_funcs(struct amdgpu_device *adev)
static int gmc_v9_0_early_init(void *handle)
{
- int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* ARCT and VEGA20 don't have XGMI defined in their IP discovery tables */
@@ -1436,10 +1418,6 @@ static int gmc_v9_0_early_init(void *handle)
adev->gmc.private_aperture_end =
adev->gmc.private_aperture_start + (4ULL << 30) - 1;
- r = amdgpu_gmc_ras_early_init(adev);
- if (r)
- return r;
-
return 0;
}
@@ -1798,6 +1776,10 @@ static int gmc_v9_0_sw_init(void *handle)
gmc_v9_0_save_registers(adev);
+ r = amdgpu_gmc_ras_sw_init(adev);
+ if (r)
+ return r;
+
return 0;
}
--
2.17.1
More information about the amd-gfx
mailing list