<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
</head>
<body>
Thx, but in that case this patch here then won't help either it just
mitigates the problem.<br>
<br>
Can you try to reduce num_hw_submission for the MES ring?<br>
<br>
Thanks,<br>
Christian.<br>
<br>
<div class="moz-cite-prefix">Am 22.07.24 um 05:27 schrieb Xiao,
Jack:<br>
</div>
<blockquote type="cite"
cite="mid:PH8PR12MB68417B9F856C92BD02912A08EFA82@PH8PR12MB6841.namprd12.prod.outlook.com">
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<style type="text/css" style="display:none;">P {margin-top:0;margin-bottom:0;}</style>
<p
style="font-family:Calibri;font-size:10pt;color:#0000FF;margin:5pt;font-style:normal;font-weight:normal;text-decoration:none;"
align="Left">
[AMD Official Use Only - AMD Internal Distribution Only]<br>
</p>
<br>
<div>
<div class="elementToProof"
style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
<i>>> I think we rather need to increase the MES ring
size instead.</i></div>
<div class="elementToProof"
style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
<br>
</div>
<div class="elementToProof"
style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
Unfortunately, it doesn't work. I guess mes firmware has
limitation.</div>
<div class="elementToProof"
style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
<br>
</div>
<div class="elementToProof"
style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
Regards,</div>
<div class="elementToProof"
style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
Jack</div>
<div class="elementToProof"
style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
<br>
</div>
<hr style="display:inline-block;width:98%" tabindex="-1">
<div id="divRplyFwdMsg" dir="ltr"><font style="font-size:11pt"
face="Calibri, sans-serif" color="#000000"><b>From:</b>
Christian König <a class="moz-txt-link-rfc2396E" href="mailto:ckoenig.leichtzumerken@gmail.com"><ckoenig.leichtzumerken@gmail.com></a><br>
<b>Sent:</b> Friday, 19 July 2024 23:44<br>
<b>To:</b> Xiao, Jack <a class="moz-txt-link-rfc2396E" href="mailto:Jack.Xiao@amd.com"><Jack.Xiao@amd.com></a>;
<a class="moz-txt-link-abbreviated" href="mailto:amd-gfx@lists.freedesktop.org">amd-gfx@lists.freedesktop.org</a>
<a class="moz-txt-link-rfc2396E" href="mailto:amd-gfx@lists.freedesktop.org"><amd-gfx@lists.freedesktop.org></a>; Deucher, Alexander
<a class="moz-txt-link-rfc2396E" href="mailto:Alexander.Deucher@amd.com"><Alexander.Deucher@amd.com></a><br>
<b>Subject:</b> Re: [PATCH] drm/amdgpu/mes: fix mes ring
buffer overflow</font>
<div> </div>
</div>
<div class="BodyFragment"><font size="2"><span
style="font-size:11pt;">
<div class="PlainText">Am 19.07.24 um 11:16 schrieb Jack
Xiao:<br>
> wait memory room until enough before writing mes
packets<br>
> to avoid ring buffer overflow.<br>
><br>
> Signed-off-by: Jack Xiao <a class="moz-txt-link-rfc2396E" href="mailto:Jack.Xiao@amd.com"><Jack.Xiao@amd.com></a><br>
> ---<br>
> drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 18
++++++++++++++----<br>
> drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 18
++++++++++++++----<br>
> 2 files changed, 28 insertions(+), 8 deletions(-)<br>
><br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c<br>
> index 8ce51b9236c1..68c74adf79f1 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c<br>
> @@ -168,7 +168,7 @@ static int
mes_v11_0_submit_pkt_and_poll_completion(struct
amdgpu_mes *mes,<br>
> const char *op_str, *misc_op_str;<br>
> unsigned long flags;<br>
> u64 status_gpu_addr;<br>
> - u32 status_offset;<br>
> + u32 seq, status_offset;<br>
> u64 *status_ptr;<br>
> signed long r;<br>
> int ret;<br>
> @@ -196,6 +196,13 @@ static int
mes_v11_0_submit_pkt_and_poll_completion(struct
amdgpu_mes *mes,<br>
> if (r)<br>
> goto error_unlock_free;<br>
> <br>
> + seq = ++ring->fence_drv.sync_seq;<br>
> + r = amdgpu_fence_wait_polling(ring,<br>
> + seq -
ring->fence_drv.num_fences_mask,<br>
> + timeout);<br>
> + if (r < 1)<br>
> + goto error_undo;<br>
> +<br>
> api_status = (struct MES_API_STATUS *)((char
*)pkt + api_status_off);<br>
> api_status->api_completion_fence_addr =
status_gpu_addr;<br>
> api_status->api_completion_fence_value =
1;<br>
> @@ -208,8 +215,7 @@ static int
mes_v11_0_submit_pkt_and_poll_completion(struct
amdgpu_mes *mes,<br>
> mes_status_pkt.header.dwsize =
API_FRAME_SIZE_IN_DWORDS;<br>
>
mes_status_pkt.api_status.api_completion_fence_addr =<br>
> ring->fence_drv.gpu_addr;<br>
> -
mes_status_pkt.api_status.api_completion_fence_value =<br>
> - ++ring->fence_drv.sync_seq;<br>
> +
mes_status_pkt.api_status.api_completion_fence_value =
seq;<br>
> <br>
> amdgpu_ring_write_multiple(ring,
&mes_status_pkt,<br>
>
sizeof(mes_status_pkt) / 4);<br>
> @@ -229,7 +235,7 @@ static int
mes_v11_0_submit_pkt_and_poll_completion(struct
amdgpu_mes *mes,<br>
> dev_dbg(adev->dev, "MES msg=%d
was emitted\n",<br>
> x_pkt->header.opcode);<br>
> <br>
> - r = amdgpu_fence_wait_polling(ring,
ring->fence_drv.sync_seq, timeout);<br>
> + r = amdgpu_fence_wait_polling(ring, seq,
timeout);<br>
> if (r < 1 || !*status_ptr) {<br>
> <br>
> if (misc_op_str)<br>
> @@ -252,6 +258,10 @@ static int
mes_v11_0_submit_pkt_and_poll_completion(struct
amdgpu_mes *mes,<br>
> amdgpu_device_wb_free(adev, status_offset);<br>
> return 0;<br>
> <br>
> +error_undo:<br>
> + dev_err(adev->dev, "MES ring buffer is
full.\n");<br>
> + amdgpu_ring_undo(ring);<br>
> +<br>
> error_unlock_free:<br>
>
spin_unlock_irqrestore(&mes->ring_lock, flags);<br>
> <br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c<br>
> index c9f74231ad59..48e01206bcc4 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c<br>
> @@ -154,7 +154,7 @@ static int
mes_v12_0_submit_pkt_and_poll_completion(struct
amdgpu_mes *mes,<br>
> const char *op_str, *misc_op_str;<br>
> unsigned long flags;<br>
> u64 status_gpu_addr;<br>
> - u32 status_offset;<br>
> + u32 seq, status_offset;<br>
> u64 *status_ptr;<br>
> signed long r;<br>
> int ret;<br>
> @@ -182,6 +182,13 @@ static int
mes_v12_0_submit_pkt_and_poll_completion(struct
amdgpu_mes *mes,<br>
> if (r)<br>
> goto error_unlock_free;<br>
> <br>
> + seq = ++ring->fence_drv.sync_seq;<br>
> + r = amdgpu_fence_wait_polling(ring,<br>
> + seq -
ring->fence_drv.num_fences_mask,<br>
<br>
That's what's amdgpu_fence_emit_polling() does anyway.<br>
<br>
So this here just moves the polling a bit earlier.<br>
<br>
I think we rather need to increase the MES ring size
instead.<br>
<br>
Regards,<br>
Christian.<br>
<br>
<br>
> + timeout);<br>
> + if (r < 1)<br>
> + goto error_undo;<br>
> +<br>
> api_status = (struct MES_API_STATUS *)((char
*)pkt + api_status_off);<br>
> api_status->api_completion_fence_addr =
status_gpu_addr;<br>
> api_status->api_completion_fence_value =
1;<br>
> @@ -194,8 +201,7 @@ static int
mes_v12_0_submit_pkt_and_poll_completion(struct
amdgpu_mes *mes,<br>
> mes_status_pkt.header.dwsize =
API_FRAME_SIZE_IN_DWORDS;<br>
>
mes_status_pkt.api_status.api_completion_fence_addr =<br>
> ring->fence_drv.gpu_addr;<br>
> -
mes_status_pkt.api_status.api_completion_fence_value =<br>
> - ++ring->fence_drv.sync_seq;<br>
> +
mes_status_pkt.api_status.api_completion_fence_value =
seq;<br>
> <br>
> amdgpu_ring_write_multiple(ring,
&mes_status_pkt,<br>
>
sizeof(mes_status_pkt) / 4);<br>
> @@ -215,7 +221,7 @@ static int
mes_v12_0_submit_pkt_and_poll_completion(struct
amdgpu_mes *mes,<br>
> dev_dbg(adev->dev, "MES msg=%d
was emitted\n",<br>
> x_pkt->header.opcode);<br>
> <br>
> - r = amdgpu_fence_wait_polling(ring,
ring->fence_drv.sync_seq, timeout);<br>
> + r = amdgpu_fence_wait_polling(ring, seq,
timeout);<br>
> if (r < 1 || !*status_ptr) {<br>
> <br>
> if (misc_op_str)<br>
> @@ -238,6 +244,10 @@ static int
mes_v12_0_submit_pkt_and_poll_completion(struct
amdgpu_mes *mes,<br>
> amdgpu_device_wb_free(adev, status_offset);<br>
> return 0;<br>
> <br>
> +error_undo:<br>
> + dev_err(adev->dev, "MES ring buffer is
full.\n");<br>
> + amdgpu_ring_undo(ring);<br>
> +<br>
> error_unlock_free:<br>
>
spin_unlock_irqrestore(&mes->ring_lock, flags);<br>
> <br>
<br>
</div>
</span></font></div>
</div>
</blockquote>
<br>
</body>
</html>