<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
</head>
<body>
<p><br>
</p>
<div class="moz-cite-prefix">On 2023-07-10 06:54, Christian König
wrote:<br>
</div>
<blockquote type="cite" cite="mid:6ca6ce34-df6a-884a-0ab7-8fe9e3033f3a@gmail.com">Am
07.07.23 um 17:49 schrieb Philip Yang:
<br>
<blockquote type="cite">Retry faults are delegated to soft IH ring
and then processed by
<br>
deferred worker. Current soft IH ring size PAGE_SIZE can store
128
<br>
entries, which may overflow and drop retry faults, causes HW
stucks
<br>
because the retry fault is not recovered.
<br>
<br>
Increase soft IH ring size to 8KB, enough to store 256 CAM
entries
<br>
because we clear the CAM entry after handling the retry fault
from soft
<br>
ring.
<br>
<br>
Define macro IH_RING_SIZE and IH_SW_RING_SIZE to remove
duplicate
<br>
constant.
<br>
<br>
Show warning message if soft IH ring overflows because this
should not
<br>
happen.
<br>
<br>
Signed-off-by: Philip Yang <a class="moz-txt-link-rfc2396E" href="mailto:Philip.Yang@amd.com"><Philip.Yang@amd.com></a>
<br>
---
<br>
drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 8 ++++++--
<br>
drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h | 7 +++++--
<br>
drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 2 +-
<br>
drivers/gpu/drm/amd/amdgpu/ih_v6_0.c | 4 ++--
<br>
drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 4 ++--
<br>
drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 4 ++--
<br>
drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 4 ++--
<br>
7 files changed, 20 insertions(+), 13 deletions(-)
<br>
<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
<br>
index fceb3b384955..51a0dbd2358a 100644
<br>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
<br>
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
<br>
@@ -138,6 +138,7 @@ void amdgpu_ih_ring_fini(struct
amdgpu_device *adev, struct amdgpu_ih_ring *ih)
<br>
/**
<br>
* amdgpu_ih_ring_write - write IV to the ring buffer
<br>
*
<br>
+ * @adev: amdgpu_device pointer
<br>
* @ih: ih ring to write to
<br>
* @iv: the iv to write
<br>
* @num_dw: size of the iv in dw
<br>
@@ -145,8 +146,8 @@ void amdgpu_ih_ring_fini(struct
amdgpu_device *adev, struct amdgpu_ih_ring *ih)
<br>
* Writes an IV to the ring buffer using the CPU and increment
the wptr.
<br>
* Used for testing and delegating IVs to a software ring.
<br>
*/
<br>
-void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const
uint32_t *iv,
<br>
- unsigned int num_dw)
<br>
+void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct
amdgpu_ih_ring *ih,
<br>
+ const uint32_t *iv, unsigned int num_dw)
<br>
{
<br>
uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2;
<br>
unsigned int i;
<br>
@@ -161,6 +162,9 @@ void amdgpu_ih_ring_write(struct
amdgpu_ih_ring *ih, const uint32_t *iv,
<br>
if (wptr != READ_ONCE(ih->rptr)) {
<br>
wmb();
<br>
WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr));
<br>
+ } else {
<br>
+ dev_warn(adev->dev, "IH soft ring buffer overflow
0x%X, 0x%X\n",
<br>
+ wptr, ih->rptr);
<br>
}
<br>
}
<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
<br>
index dd1c2eded6b9..6c6184f0dbc1 100644
<br>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
<br>
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
<br>
@@ -27,6 +27,9 @@
<br>
/* Maximum number of IVs processed at once */
<br>
#define AMDGPU_IH_MAX_NUM_IVS 32
<br>
+#define IH_RING_SIZE (256 * 1024)
<br>
+#define IH_SW_RING_SIZE (8 * 1024) /* enough for 256 CAM
entries */
<br>
+
<br>
</blockquote>
<br>
Please add an AMDGPU_ prefix to the macro name and don't put
comments on the same line as the macro.
<br>
</blockquote>
<p>I have pushed this, will wait longer for comment in future.
Thanks.<br>
</p>
<blockquote type="cite" cite="mid:6ca6ce34-df6a-884a-0ab7-8fe9e3033f3a@gmail.com">
<br>
Apart from that looks good to me,
<br>
Christian.
<br>
<br>
<blockquote type="cite"> struct amdgpu_device;
<br>
struct amdgpu_iv_entry;
<br>
@@ -97,8 +100,8 @@ struct amdgpu_ih_funcs {
<br>
int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct
amdgpu_ih_ring *ih,
<br>
unsigned ring_size, bool use_bus_addr);
<br>
void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct
amdgpu_ih_ring *ih);
<br>
-void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const
uint32_t *iv,
<br>
- unsigned int num_dw);
<br>
+void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct
amdgpu_ih_ring *ih,
<br>
+ const uint32_t *iv, unsigned int num_dw);
<br>
int amdgpu_ih_wait_on_checkpoint_process_ts(struct
amdgpu_device *adev,
<br>
struct amdgpu_ih_ring *ih);
<br>
int amdgpu_ih_process(struct amdgpu_device *adev, struct
amdgpu_ih_ring *ih);
<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
<br>
index 5273decc5753..fa6d0adcec20 100644
<br>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
<br>
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
<br>
@@ -493,7 +493,7 @@ void amdgpu_irq_delegate(struct
amdgpu_device *adev,
<br>
struct amdgpu_iv_entry *entry,
<br>
unsigned int num_dw)
<br>
{
<br>
- amdgpu_ih_ring_write(&adev->irq.ih_soft,
entry->iv_entry, num_dw);
<br>
+ amdgpu_ih_ring_write(adev, &adev->irq.ih_soft,
entry->iv_entry, num_dw);
<br>
schedule_work(&adev->irq.ih_soft_work);
<br>
}
<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
<br>
index b02e1cef78a7..980b24120080 100644
<br>
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
<br>
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
<br>
@@ -535,7 +535,7 @@ static int ih_v6_0_sw_init(void *handle)
<br>
* use bus address for ih ring by psp bl */
<br>
use_bus_addr =
<br>
(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ?
false : true;
<br>
- r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 *
1024, use_bus_addr);
<br>
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih,
IH_RING_SIZE, use_bus_addr);
<br>
if (r)
<br>
return r;
<br>
@@ -548,7 +548,7 @@ static int ih_v6_0_sw_init(void *handle)
<br>
/* initialize ih control register offset */
<br>
ih_v6_0_init_register_offset(adev);
<br>
- r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft,
PAGE_SIZE, true);
<br>
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft,
IH_SW_RING_SIZE, true);
<br>
if (r)
<br>
return r;
<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
<br>
index eec13cb5bf75..b6a8478dabf4 100644
<br>
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
<br>
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
<br>
@@ -565,7 +565,7 @@ static int navi10_ih_sw_init(void *handle)
<br>
use_bus_addr = false;
<br>
else
<br>
use_bus_addr = true;
<br>
- r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 *
1024, use_bus_addr);
<br>
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih,
IH_RING_SIZE, use_bus_addr);
<br>
if (r)
<br>
return r;
<br>
@@ -578,7 +578,7 @@ static int navi10_ih_sw_init(void *handle)
<br>
/* initialize ih control registers offset */
<br>
navi10_ih_init_register_offset(adev);
<br>
- r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft,
PAGE_SIZE, true);
<br>
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft,
IH_SW_RING_SIZE, true);
<br>
if (r)
<br>
return r;
<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
<br>
index 1e83db0c5438..d364c6dd152c 100644
<br>
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
<br>
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
<br>
@@ -485,7 +485,7 @@ static int vega10_ih_sw_init(void *handle)
<br>
if (r)
<br>
return r;
<br>
- r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 *
1024, true);
<br>
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih,
IH_RING_SIZE, true);
<br>
if (r)
<br>
return r;
<br>
@@ -510,7 +510,7 @@ static int vega10_ih_sw_init(void *handle)
<br>
/* initialize ih control registers offset */
<br>
vega10_ih_init_register_offset(adev);
<br>
- r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft,
PAGE_SIZE, true);
<br>
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft,
IH_SW_RING_SIZE, true);
<br>
if (r)
<br>
return r;
<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
<br>
index 4d719df376a7..544ee55a22da 100644
<br>
--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
<br>
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
<br>
@@ -539,7 +539,7 @@ static int vega20_ih_sw_init(void *handle)
<br>
(adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4,
4, 2)))
<br>
use_bus_addr = false;
<br>
- r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 *
1024, use_bus_addr);
<br>
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih,
IH_RING_SIZE, use_bus_addr);
<br>
if (r)
<br>
return r;
<br>
@@ -565,7 +565,7 @@ static int vega20_ih_sw_init(void *handle)
<br>
/* initialize ih control registers offset */
<br>
vega20_ih_init_register_offset(adev);
<br>
- r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft,
PAGE_SIZE, use_bus_addr);
<br>
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft,
IH_SW_RING_SIZE, use_bus_addr);
<br>
if (r)
<br>
return r;
<br>
</blockquote>
<br>
</blockquote>
</body>
</html>