[PATCH v4] drm/amdgpu: drop gfx_v11_0_cp_ecc_error_irq_funcs
Zhang, Horatio
Hongkun.Zhang at amd.com
Thu May 4 05:44:33 UTC 2023
[AMD Official Use Only - General]
Hi Hawking,
Thank you for your review.
I will change the judgment criteria to if (adev->gfx.cp_error_irq.funcs), and submit this patch to amd-staging-drm-next.
Regards,
Horatio
-----Original Message-----
From: Zhang, Hawking <Hawking.Zhang at amd.com>
Sent: Friday, April 28, 2023 1:31 PM
To: Zhang, Horatio <Hongkun.Zhang at amd.com>; amd-gfx at lists.freedesktop.org
Cc: Yao, Longlong <Longlong.Yao at amd.com>; Chai, Thomas <YiPeng.Chai at amd.com>; Zhang, Horatio <Hongkun.Zhang at amd.com>; Koenig, Christian <Christian.Koenig at amd.com>; Chen, Guchun <Guchun.Chen at amd.com>; Xu, Feifei <Feifei.Xu at amd.com>
Subject: RE: [PATCH v4] drm/amdgpu: drop gfx_v11_0_cp_ecc_error_irq_funcs
[AMD Official Use Only - General]
+ if (!(adev->gfx.cp_ecc_error_irq.funcs == NULL)) {
Just check if (adev->gfx.cp_error_irq.funcs) {
+ r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
+ if (r)
+ goto late_fini;
+ }
Or you can use
If (!adev->gfx.cp_ecc_error_irq.funcs)
Return 0;
Either way works for me. With that addressed,
The change is
Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>
Regards,
Hawking
-----Original Message-----
From: Horatio Zhang <Hongkun.Zhang at amd.com>
Sent: Friday, April 28, 2023 11:48
To: Zhang, Hawking <Hawking.Zhang at amd.com>; amd-gfx at lists.freedesktop.org
Cc: Yao, Longlong <Longlong.Yao at amd.com>; Chai, Thomas <YiPeng.Chai at amd.com>; Zhang, Horatio <Hongkun.Zhang at amd.com>; Zhang, Hawking <Hawking.Zhang at amd.com>; Koenig, Christian <Christian.Koenig at amd.com>; Chen, Guchun <Guchun.Chen at amd.com>; Xu, Feifei <Feifei.Xu at amd.com>
Subject: [PATCH v4] drm/amdgpu: drop gfx_v11_0_cp_ecc_error_irq_funcs
The gfx.cp_ecc_error_irq is retired in gfx11. In gfx_v11_0_hw_fini still use amdgpu_irq_put to disable this interrupt, which caused the call trace in this function.
[ 102.873958] Call Trace:
[ 102.873959] <TASK>
[ 102.873961] gfx_v11_0_hw_fini+0x23/0x1e0 [amdgpu] [ 102.874019] gfx_v11_0_suspend+0xe/0x20 [amdgpu] [ 102.874072] amdgpu_device_ip_suspend_phase2+0x240/0x460 [amdgpu] [ 102.874122] amdgpu_device_ip_suspend+0x3d/0x80 [amdgpu] [ 102.874172] amdgpu_device_pre_asic_reset+0xd9/0x490 [amdgpu] [ 102.874223] amdgpu_device_gpu_recover.cold+0x548/0xce6 [amdgpu] [ 102.874321] amdgpu_debugfs_reset_work+0x4c/0x70 [amdgpu] [ 102.874375] process_one_work+0x21f/0x3f0 [ 102.874377] worker_thread+0x200/0x3e0 [ 102.874378] ? process_one_work+0x3f0/0x3f0 [ 102.874379] kthread+0xfd/0x130 [ 102.874380] ? kthread_complete_and_exit+0x20/0x20
[ 102.874381] ret_from_fork+0x22/0x30
v2:
- Handle umc and gfx ras cases in separated patch
- Retired the gfx_v11_0_cp_ecc_error_irq_funcs in gfx11
v3:
- Improve the subject and code comments
- Add judgment on gfx11 in the function of amdgpu_gfx_ras_late_init
v4:
- Drop the define of CP_ME1_PIPE_INST_ADDR_INTERVAL and SET_ECC_ME_PIPE_STATE which using in gfx_v11_0_set_cp_ecc_error_state
- Check cp_ecc_error_irq.funcs rather than ip version for a more sustainable life
Signed-off-by: Horatio Zhang <Hongkun.Zhang at amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>
Acked-by: Christian König <christian.koenig at amd.com>
Reviewed-by: Guchun Chen <guchun.chen at amd.com>
Reviewed-by: Feifei Xu <Feifei.Xu at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 8 +++-- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 46 -------------------------
2 files changed, 5 insertions(+), 49 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 66b9740ec376..042f69e1ef91 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -720,9 +720,11 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *r
if (r)
return r;
- r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
- if (r)
- goto late_fini;
+ if (!(adev->gfx.cp_ecc_error_irq.funcs == NULL)) {
+ r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
+ if (r)
+ goto late_fini;
+ }
} else {
amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 8a4c4769e607..4ba21f798c72 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -1355,13 +1355,6 @@ static int gfx_v11_0_sw_init(void *handle)
if (r)
return r;
- /* ECC error */
- r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
- GFX_11_0_0__SRCID__CP_ECC_ERROR,
- &adev->gfx.cp_ecc_error_irq);
- if (r)
- return r;
-
/* FED error */
r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT,
@@ -4483,7 +4476,6 @@ static int gfx_v11_0_hw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int r;
- amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
@@ -5962,36 +5954,6 @@ static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev
}
}
-#define CP_ME1_PIPE_INST_ADDR_INTERVAL 0x1 -#define SET_ECC_ME_PIPE_STATE(reg_addr, state) \
- do { \
- uint32_t tmp = RREG32_SOC15_IP(GC, reg_addr); \
- tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, state); \
- WREG32_SOC15_IP(GC, reg_addr, tmp); \
- } while (0)
-
-static int gfx_v11_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- unsigned type,
- enum amdgpu_interrupt_state state)
-{
- uint32_t ecc_irq_state = 0;
- uint32_t pipe0_int_cntl_addr = 0;
- int i = 0;
-
- ecc_irq_state = (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0;
-
- pipe0_int_cntl_addr = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
-
- WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, ecc_irq_state);
-
- for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++)
- SET_ECC_ME_PIPE_STATE(pipe0_int_cntl_addr + i * CP_ME1_PIPE_INST_ADDR_INTERVAL,
- ecc_irq_state);
-
- return 0;
-}
-
static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type, @@ -6408,11 +6370,6 @@ static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
.process = gfx_v11_0_priv_inst_irq, };
-static const struct amdgpu_irq_src_funcs gfx_v11_0_cp_ecc_error_irq_funcs = {
- .set = gfx_v11_0_set_cp_ecc_error_state,
- .process = amdgpu_gfx_cp_ecc_error_irq,
-};
-
static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = {
.process = gfx_v11_0_rlc_gc_fed_irq, }; @@ -6428,9 +6385,6 @@ static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gfx.priv_inst_irq.num_types = 1;
adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
- adev->gfx.cp_ecc_error_irq.num_types = 1; /* CP ECC error */
- adev->gfx.cp_ecc_error_irq.funcs = &gfx_v11_0_cp_ecc_error_irq_funcs;
-
adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */
adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs;
--
2.34.1
More information about the amd-gfx
mailing list