[PATCH 2/2] drm/amdgpu: Enable gfx v11_0_3 ras if poison mode is supported
Hawking Zhang
Hawking.Zhang at amd.com
Sun Jun 11 10:45:46 UTC 2023
GFX v11_0_3 ras needs to be enabled if poison mode
is supported. Driver doesn't need issue an feature
enable call in gfx_v11_0 late init phase. The ras
late init call is already centralized to
amdgpu_ras_late_init.
In addition, move poison_mode check out of common
helper like amdgpu_ras_is_supported and
amdgpu_ras_is_feature_allowed ensure only GFX RAS
is enabled when poison mode is supported.
Signed-off-by: Hawking Zhang <Hawking.Zhang at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 49 ++++++++-----------------
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 26 -------------
2 files changed, 16 insertions(+), 59 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index dd7cdc234d7e..35e70860d628 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -126,6 +126,7 @@ static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
uint64_t addr);
static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
uint64_t addr);
+static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev);
#ifdef CONFIG_X86_MCE_AMD
static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev);
struct mce_notifier_adev_list {
@@ -757,16 +758,6 @@ static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev,
return 0;
}
-static int amdgpu_ras_check_feature_allowed(struct amdgpu_device *adev,
- struct ras_common_if *head)
-{
- if (amdgpu_ras_is_feature_allowed(adev, head) ||
- amdgpu_ras_is_poison_mode_supported(adev))
- return 1;
- else
- return 0;
-}
-
/* wrapper of psp_ras_enable_features */
int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
struct ras_common_if *head, bool enable)
@@ -797,7 +788,7 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
}
/* Do not enable if it is not allowed. */
- if (enable && !amdgpu_ras_check_feature_allowed(adev, head))
+ if (enable && !amdgpu_ras_is_feature_allowed(adev, head))
goto out;
/* Only enable ras feature operation handle on host side */
@@ -2420,9 +2411,9 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
}
/*
- * this is workaround for vega20 workstation sku,
- * force enable gfx ras, ignore vbios gfx ras flag
- * due to GC EDC can not write
+ * Common helpers for device or IP specific RAS quirks including
+ * a). Enable gfx ras on D16406 or D36002 board
+ * b). Enable gfx ras in gfx_v11_0_3 if poison mode is supported
*/
static void amdgpu_ras_get_quirks(struct amdgpu_device *adev)
{
@@ -2431,10 +2422,16 @@ static void amdgpu_ras_get_quirks(struct amdgpu_device *adev)
if (!ctx)
return;
+ /* Enable gfx ras on specific board */
if (strnstr(ctx->vbios_version, "D16406",
sizeof(ctx->vbios_version)) ||
- strnstr(ctx->vbios_version, "D36002",
- sizeof(ctx->vbios_version)))
+ strnstr(ctx->vbios_version, "D36002",
+ sizeof(ctx->vbios_version)))
+ adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX);
+
+ /* Enable gfx ras on gfx_v11_0_3 if poison mode is supported */
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3) &&
+ amdgpu_ras_is_poison_mode_supported(adev))
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX);
}
@@ -2502,6 +2499,8 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
1 << AMDGPU_RAS_BLOCK__MMHUB);
}
+ amdgpu_ras_query_poison_mode(adev);
+
amdgpu_ras_get_quirks(adev);
/* hw_supported needs to be aligned with RAS block mask. */
@@ -2659,8 +2658,6 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
goto release_con;
}
- amdgpu_ras_query_poison_mode(adev);
-
if (amdgpu_ras_fs_init(adev)) {
r = -EINVAL;
goto release_con;
@@ -3115,26 +3112,12 @@ int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_co
int amdgpu_ras_is_supported(struct amdgpu_device *adev,
unsigned int block)
{
- int ret = 0;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
if (block >= AMDGPU_RAS_BLOCK_COUNT)
return 0;
- ret = ras && (adev->ras_enabled & (1 << block));
-
- /* For the special asic with mem ecc enabled but sram ecc
- * not enabled, even if the ras block is not supported on
- * .ras_enabled, if the asic supports poison mode and the
- * ras block has ras configuration, it can be considered
- * that the ras block supports ras function.
- */
- if (!ret &&
- amdgpu_ras_is_poison_mode_supported(adev) &&
- amdgpu_ras_get_ras_block(adev, block, 0))
- ret = 1;
-
- return ret;
+ return (ras && (adev->ras_enabled & (1 << block)));
}
int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 690e121d9dda..11e0c574b9f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -4650,26 +4650,6 @@ static int gfx_v11_0_early_init(void *handle)
return gfx_v11_0_init_microcode(adev);
}
-static int gfx_v11_0_ras_late_init(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct ras_common_if *gfx_common_if;
- int ret;
-
- gfx_common_if = kzalloc(sizeof(struct ras_common_if), GFP_KERNEL);
- if (!gfx_common_if)
- return -ENOMEM;
-
- gfx_common_if->block = AMDGPU_RAS_BLOCK__GFX;
-
- ret = amdgpu_ras_feature_enable(adev, gfx_common_if, true);
- if (ret)
- dev_warn(adev->dev, "Failed to enable gfx11 ras feature\n");
-
- kfree(gfx_common_if);
- return 0;
-}
-
static int gfx_v11_0_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -4683,12 +4663,6 @@ static int gfx_v11_0_late_init(void *handle)
if (r)
return r;
- if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3)) {
- r = gfx_v11_0_ras_late_init(handle);
- if (r)
- return r;
- }
-
return 0;
}
--
2.17.1
More information about the amd-gfx
mailing list