[PATCH v3] drm/amdgpu: dequeue mes scheduler during fini
Xiao, Jack
Jack.Xiao at amd.com
Tue Oct 18 03:37:14 UTC 2022
[AMD Official Use Only - General]
>> + /* dequeue sched pipe via kiq */
>> + void (*kiq_dequeue_sched)(struct amdgpu_device *adev);
>> +
It's unnecessary to expose a new callback function proto to perform dequeuing scheduler queue,
for kiq_fini or mes_hw_fini callback had got the propriate sequence to do this during fini procedure.
In addition, it seems that the issue is caused by that unclear CP_HQD_PQ_WPTR/CONTROL registers would trigger scheduler reading from unexpected address once CP_MES_CNTL is enabled:
a. for dequeue operation uses MMIO rather than kiq, can the dequeue operation move to mes_v11_hw_fini?
b. if something went wrong, it's better move the dequeue operation inside mes_v11_0_kiq_hw_fini before disabling mes controller.
Regards,
Jack
________________________________
From: YuBiao Wang <YuBiao.Wang at amd.com>
Sent: Friday, 14 October 2022 11:34
To: amd-gfx at lists.freedesktop.org <amd-gfx at lists.freedesktop.org>
Cc: Grodzovsky, Andrey <Andrey.Grodzovsky at amd.com>; Quan, Evan <Evan.Quan at amd.com>; Chen, Horace <Horace.Chen at amd.com>; Tuikov, Luben <Luben.Tuikov at amd.com>; Koenig, Christian <Christian.Koenig at amd.com>; Deucher, Alexander <Alexander.Deucher at amd.com>; Xiao, Jack <Jack.Xiao at amd.com>; Zhang, Hawking <Hawking.Zhang at amd.com>; Liu, Monk <Monk.Liu at amd.com>; Xu, Feifei <Feifei.Xu at amd.com>; Wang, Yang(Kevin) <KevinYang.Wang at amd.com>; Wang, YuBiao <YuBiao.Wang at amd.com>
Subject: [PATCH v3] drm/amdgpu: dequeue mes scheduler during fini
Update: Remove redundant comments as Christian suggests.
[Why]
If mes is not dequeued during fini, mes will be in an uncleaned state
during reload, then mes couldn't receive some commands which leads to
reload failure.
[How]
Perform MES dequeue via MMIO after all the unmap jobs are done by mes
and before kiq fini.
Signed-off-by: YuBiao Wang <YuBiao.Wang at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 3 ++
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 3 ++
drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 41 +++++++++++++++++++++++--
3 files changed, 44 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index ad980f4b66e1..ea8efa52503b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -130,6 +130,9 @@ struct amdgpu_mes {
int (*kiq_hw_init)(struct amdgpu_device *adev);
int (*kiq_hw_fini)(struct amdgpu_device *adev);
+ /* dequeue sched pipe via kiq */
+ void (*kiq_dequeue_sched)(struct amdgpu_device *adev);
+
/* ip specific functions */
const struct amdgpu_mes_funcs *funcs;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 257b2e4de600..7c75758f58e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -4406,6 +4406,9 @@ static int gfx_v11_0_hw_fini(void *handle)
if (amdgpu_gfx_disable_kcq(adev))
DRM_ERROR("KCQ disable failed\n");
+ if (adev->mes.ring.sched.ready && adev->mes.kiq_dequeue_sched)
+ adev->mes.kiq_dequeue_sched(adev);
+
amdgpu_mes_kiq_hw_fini(adev);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index b48684db2832..eef29646b074 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -44,6 +44,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes1.bin");
static int mes_v11_0_hw_fini(void *handle);
static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev);
static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev);
+static void mes_v11_0_kiq_dequeue_sched(struct amdgpu_device *adev);
#define MES_EOP_SIZE 2048
@@ -1078,6 +1079,7 @@ static int mes_v11_0_sw_init(void *handle)
adev->mes.funcs = &mes_v11_0_funcs;
adev->mes.kiq_hw_init = &mes_v11_0_kiq_hw_init;
adev->mes.kiq_hw_fini = &mes_v11_0_kiq_hw_fini;
+ adev->mes.kiq_dequeue_sched = &mes_v11_0_kiq_dequeue_sched;
r = amdgpu_mes_init(adev);
if (r)
@@ -1151,6 +1153,42 @@ static int mes_v11_0_sw_fini(void *handle)
return 0;
}
+static void mes_v11_0_kiq_dequeue_sched(struct amdgpu_device *adev)
+{
+ uint32_t data;
+ int i;
+
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0);
+
+ /* disable the queue if it's active */
+ if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+ break;
+ udelay(1);
+ }
+ }
+ data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
+ data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_EN, 0);
+ data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+ DOORBELL_HIT, 1);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 0);
+
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 0);
+ WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 0);
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
+ adev->mes.ring.sched.ready = false;
+}
+
static void mes_v11_0_kiq_setting(struct amdgpu_ring *ring)
{
uint32_t tmp;
@@ -1257,9 +1295,6 @@ static int mes_v11_0_hw_init(void *handle)
static int mes_v11_0_hw_fini(void *handle)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- adev->mes.ring.sched.ready = false;
return 0;
}
--
2.25.1
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20221018/6f15fa57/attachment-0001.htm>
More information about the amd-gfx
mailing list