[PATCH] drm/amdgpu: protect kiq overrun
Yintian Tao
yttao at amd.com
Wed Apr 22 14:37:47 UTC 2020
Wait for the oldest to be signaled to make sure
there will be no kiq overrun.
Signed-off-by: Yintian Tao <yttao at amd.com>
---
.../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 8 ++++-
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 8 ++++-
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 30 +++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 3 ++
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 6 ++++
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 ++++
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 7 +++++
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 7 +++++
8 files changed, 73 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 691c89705bcd..72a5d7e15494 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -311,7 +311,7 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
struct v10_compute_mqd *m;
- uint32_t mec, pipe;
+ uint32_t mec, pipe, seq = 0;
int r;
m = get_mqd(mqd);
@@ -325,6 +325,12 @@ static int kgd_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
mec, pipe, queue_id);
spin_lock(&adev->gfx.kiq.ring_lock);
+ r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+ if (r) {
+ pr_err("critical bug! too many kiq submission\n");
+ goto out_unlock;
+ }
+
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index df841c2ac5e7..332f72b2d334 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -309,7 +309,7 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
struct v9_mqd *m;
- uint32_t mec, pipe;
+ uint32_t mec, pipe, seq = 0;
int r;
m = get_mqd(mqd);
@@ -323,6 +323,12 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd,
mec, pipe, queue_id);
spin_lock(&adev->gfx.kiq.ring_lock);
+ r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+ if (r) {
+ pr_err("critical bug! too many kiq submissions\n");
+ goto out_unlock;
+ }
+
r = amdgpu_ring_alloc(kiq_ring, 7);
if (r) {
pr_err("Failed to alloc KIQ (%d).\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a721b0e0ff69..387b1a8ed4df 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -321,6 +321,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
AMDGPU_RING_PRIO_DEFAULT);
if (r)
dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
+ else
+ kiq->max_sub_num = (ring->ring_size / 4) /
+ ring->funcs->align_mask;
return r;
}
@@ -663,6 +666,21 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
return 0;
}
+int amdgpu_gfx_kiq_is_avail(struct amdgpu_kiq *kiq)
+{
+ uint32_t seq = 0;
+ signed long r = 0;
+
+ seq = abs(kiq->ring.fence_drv.sync_seq - kiq->max_sub_num);
+ if (seq > kiq->max_sub_num) {
+ r = amdgpu_fence_wait_polling(&kiq->ring, seq,
+ MAX_KIQ_REG_WAIT);
+ return r < 1 ? -ETIME : 0;
+ }
+
+ return 0;
+}
+
uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
{
signed long r, cnt = 0;
@@ -674,6 +692,12 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
BUG_ON(!ring->funcs->emit_rreg);
spin_lock_irqsave(&kiq->ring_lock, flags);
+ r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+ if (r) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ goto failed_kiq_read;
+ }
+
if (amdgpu_device_wb_get(adev, ®_val_offs)) {
spin_unlock_irqrestore(&kiq->ring_lock, flags);
pr_err("critical bug! too many kiq readers\n");
@@ -728,6 +752,12 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
BUG_ON(!ring->funcs->emit_wreg);
spin_lock_irqsave(&kiq->ring_lock, flags);
+ r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+ if (r) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ goto failed_kiq_write;
+ }
+
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_wreg(ring, reg, v);
amdgpu_fence_emit_polling(ring, &seq);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index ee698f0246d8..1ee59a927bd9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -103,6 +103,7 @@ struct amdgpu_kiq {
struct amdgpu_ring ring;
struct amdgpu_irq_src irq;
const struct kiq_pm4_funcs *pmf;
+ uint32_t max_sub_num;
};
/*
@@ -387,4 +388,6 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry);
uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
+
+int amdgpu_gfx_kiq_is_avail(struct amdgpu_kiq *kiq);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 8c10084f44ef..4b027006d072 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -57,6 +57,12 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
uint32_t seq;
spin_lock_irqsave(&kiq->ring_lock, flags);
+ r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+ if (r) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ goto failed_kiq;
+ }
+
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
ref, mask);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 5b1549f167b0..a136e2229f7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4051,6 +4051,12 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
BUG_ON(!ring->funcs->emit_rreg);
spin_lock_irqsave(&kiq->ring_lock, flags);
+ r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+ if (r) {
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ goto failed_kiq_read;
+ }
+
if (amdgpu_device_wb_get(adev, ®_val_offs)) {
spin_unlock_irqrestore(&kiq->ring_lock, flags);
pr_err("critical bug! too many kiq readers\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 30b75d79efdb..77d8bc9c0111 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -423,6 +423,13 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
if (amdgpu_emu_mode == 0 && ring->sched.ready) {
spin_lock(&adev->gfx.kiq.ring_lock);
+ r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+ if (r) {
+ spin_unlock(&kiq->ring_lock);
+ DRM_ERROR("too many kiq submissions\n");
+ return -ETIME;
+ }
+
/* 2 dwords flush + 8 dwords fence */
amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
kiq->pmf->kiq_invalidate_tlbs(ring,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index fecdbc471983..c429a2a5fe3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -614,6 +614,13 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
ndw += kiq->pmf->invalidate_tlbs_size;
spin_lock(&adev->gfx.kiq.ring_lock);
+ r = amdgpu_gfx_kiq_is_avail(&adev->gfx.kiq);
+ if (r) {
+ spin_unlock(&kiq->ring_lock);
+ DRM_ERROR("critical bug! too many kiq submissions\n");
+ return -ETIME;
+ }
+
/* 2 dwords flush + 8 dwords fence */
amdgpu_ring_alloc(ring, ndw);
if (vega20_xgmi_wa)
--
2.17.1
More information about the amd-gfx
mailing list