[PATCH] drm/radeon: Always flush VM again on < CIK

Michel Dänzer michel at daenzer.net
Mon Aug 18 02:10:52 PDT 2014


On 15.08.2014 23:52, Christian König wrote:
> 
> I think I've figured out what the cause of the remaining issues is while
> working on the implicit sync stuff.
> 
> The issue happens when the flush is done because of a CS to the DMA ring
> and a CS to the GFX ring directly after that which depends on the DMA
> submission to be finished.
> 
> In this situation we insert semaphore command so that the GFX ring wait
> for the DMA ring to finish execution and normally don't flush on the GFX
> ring a second time (the flush should be done on the DMA ring and we
> waited for that to finish).
> 
> The problem here is that semaphores can't be executed on the PFP, so the
> PFP doesn't wait for the semaphore to be completed and happily starts
> fetching commands while the flush on the DMA ring isn't completed.
> 
> @Michel: can you give this branch a try and see if it now works for you:
> http://cgit.freedesktop.org/~deathsimple/linux/log/?h=vm-flushing

Unfortunately not; in fact, it seems to make the problem occur even faster,
after just hundreds of piglit tests instead of after thousands.

However, based on your description above, I came up with the patch below,
which fixes the problem for me, with or without your 'drop
RADEON_FENCE_SIGNALED_SEQ' patch.


From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer at amd.com>
Date: Mon, 18 Aug 2014 17:29:17 +0900
Subject: [PATCH] drm/radeon: Sync ME and PFP after CP semaphore waits on >=
 Cayman
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes lockups due to CP read GPUVM faults when running piglit on Cape
Verde.

Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
---

If the PACKET3_PFP_SYNC_ME packet was already supported before Cayman,
it might be a good idea to do this wherever possible, to avoid any
other issues the PFP running ahead of semaphore waits might cause.

 drivers/gpu/drm/radeon/cik.c         | 17 +++++++++++++++++
 drivers/gpu/drm/radeon/ni.c          | 33 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/radeon/nid.h         |  2 ++
 drivers/gpu/drm/radeon/radeon_asic.c |  4 ++--
 drivers/gpu/drm/radeon/radeon_asic.h |  4 ++++
 5 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 81d07e6..49707ac 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -3920,6 +3920,17 @@ void cik_fence_compute_ring_emit(struct radeon_device *rdev,
 	radeon_ring_write(ring, 0);
 }
 
+/**
+ * cik_semaphore_ring_emit - emit a semaphore on the CP ring
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon ring buffer object
+ * @semaphore: radeon semaphore object
+ * @emit_wait: Is this a sempahore wait?
+ *
+ * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
+ * from running ahead of semaphore waits.
+ */
 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
 			     struct radeon_ring *ring,
 			     struct radeon_semaphore *semaphore,
@@ -3932,6 +3943,12 @@ bool cik_semaphore_ring_emit(struct radeon_device *rdev,
 	radeon_ring_write(ring, lower_32_bits(addr));
 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
 
+	if (emit_wait) {
+		/* Prevent the PFP from running ahead of the semaphore wait */
+		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+		radeon_ring_write(ring, 0x0);
+	}
+
 	return true;
 }
 
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index ba89375..4e586c7 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1363,6 +1363,39 @@ void cayman_fence_ring_emit(struct radeon_device *rdev,
 	radeon_ring_write(ring, 0);
 }
 
+
+/**
+ * cayman_semaphore_ring_emit - emit a semaphore on the CP ring
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon ring buffer object
+ * @semaphore: radeon semaphore object
+ * @emit_wait: Is this a sempahore wait?
+ *
+ * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
+ * from running ahead of semaphore waits.
+ */
+bool cayman_semaphore_ring_emit(struct radeon_device *rdev,
+				struct radeon_ring *ring,
+				struct radeon_semaphore *semaphore,
+				bool emit_wait)
+{
+	uint64_t addr = semaphore->gpu_addr;
+	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
+
+	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
+	radeon_ring_write(ring, lower_32_bits(addr));
+	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel);
+
+	if (emit_wait) {
+		/* Prevent the PFP from running ahead of the semaphore wait */
+		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+		radeon_ring_write(ring, 0x0);
+	}
+
+	return true;
+}
+
 void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
 	struct radeon_ring *ring = &rdev->ring[ib->ring];
diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h
index 2e12e4d..dba6d37 100644
--- a/drivers/gpu/drm/radeon/nid.h
+++ b/drivers/gpu/drm/radeon/nid.h
@@ -1131,6 +1131,8 @@
 #define	PACKET3_DRAW_INDEX_MULTI_ELEMENT		0x36
 #define	PACKET3_WRITE_DATA				0x37
 #define	PACKET3_MEM_SEMAPHORE				0x39
+#              define PACKET3_SEM_SEL_SIGNAL	    (0x6 << 29)
+#              define PACKET3_SEM_SEL_WAIT	    (0x7 << 29)
 #define	PACKET3_MPEG_INDEX				0x3A
 #define	PACKET3_WAIT_REG_MEM				0x3C
 #define	PACKET3_MEM_WRITE				0x3D
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index eeeeabe..67eb267 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -1555,7 +1555,7 @@ static struct radeon_asic_ring cayman_gfx_ring = {
 	.ib_execute = &cayman_ring_ib_execute,
 	.ib_parse = &evergreen_ib_parse,
 	.emit_fence = &cayman_fence_ring_emit,
-	.emit_semaphore = &r600_semaphore_ring_emit,
+	.emit_semaphore = &cayman_semaphore_ring_emit,
 	.cs_parse = &evergreen_cs_parse,
 	.ring_test = &r600_ring_test,
 	.ib_test = &r600_ib_test,
@@ -1804,7 +1804,7 @@ static struct radeon_asic_ring si_gfx_ring = {
 	.ib_execute = &si_ring_ib_execute,
 	.ib_parse = &si_ib_parse,
 	.emit_fence = &si_fence_ring_emit,
-	.emit_semaphore = &r600_semaphore_ring_emit,
+	.emit_semaphore = &cayman_semaphore_ring_emit,
 	.cs_parse = NULL,
 	.ring_test = &r600_ring_test,
 	.ib_test = &r600_ib_test,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 275a5dc..6b4c757 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -590,6 +590,10 @@ int sumo_dpm_force_performance_level(struct radeon_device *rdev,
  */
 void cayman_fence_ring_emit(struct radeon_device *rdev,
 			    struct radeon_fence *fence);
+bool cayman_semaphore_ring_emit(struct radeon_device *rdev,
+				struct radeon_ring *cp,
+				struct radeon_semaphore *semaphore,
+				bool emit_wait);
 void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev);
 int cayman_init(struct radeon_device *rdev);
 void cayman_fini(struct radeon_device *rdev);
-- 
2.1.0


-- 
Earthling Michel Dänzer            |                  http://www.amd.com
Libre software enthusiast          |                Mesa and X developer


More information about the dri-devel mailing list