[PATCH] drm/radeon: Always flush VM again on < CIK

Christian König deathsimple at vodafone.de
Mon Aug 18 02:44:27 PDT 2014


Am 18.08.2014 um 11:10 schrieb Michel Dänzer:
> On 15.08.2014 23:52, Christian König wrote:
>> I think I've figured out what the cause of the remaining issues is while
>> working on the implicit sync stuff.
>>
>> The issue happens when the flush is done because of a CS to the DMA ring
>> and a CS to the GFX ring directly after that which depends on the DMA
>> submission to be finished.
>>
>> In this situation we insert semaphore command so that the GFX ring wait
>> for the DMA ring to finish execution and normally don't flush on the GFX
>> ring a second time (the flush should be done on the DMA ring and we
>> waited for that to finish).
>>
>> The problem here is that semaphores can't be executed on the PFP, so the
>> PFP doesn't wait for the semaphore to be completed and happily starts
>> fetching commands while the flush on the DMA ring isn't completed.
>>
>> @Michel: can you give this branch a try and see if it now works for you:
>> http://cgit.freedesktop.org/~deathsimple/linux/log/?h=vm-flushing
> Unfortunately not; in fact, it seems to make the problem occur even faster,
> after just hundreds of piglit tests instead of after thousands.
>
> However, based on your description above, I came up with the patch below,
> which fixes the problem for me, with or without your 'drop
> RADEON_FENCE_SIGNALED_SEQ' patch.

Oh, yes of course! That's indeed much simpler and the PFP_SYNC_ME packet 
should be available even on R600. I'm going to take care of this and 
supplying patches for all hardware generations we have.

Thanks for pointing me to the right solution,
Christian.

>
>
> From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer at amd.com>
> Date: Mon, 18 Aug 2014 17:29:17 +0900
> Subject: [PATCH] drm/radeon: Sync ME and PFP after CP semaphore waits on >=
>   Cayman
> MIME-Version: 1.0
> Content-Type: text/plain; charset=UTF-8
> Content-Transfer-Encoding: 8bit
>
> Fixes lockups due to CP read GPUVM faults when running piglit on Cape
> Verde.
>
> Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
> ---
>
> If the PACKET3_PFP_SYNC_ME packet was already supported before Cayman,
> it might be a good idea to do this wherever possible, to avoid any
> other issues the PFP running ahead of semaphore waits might cause.
>
>   drivers/gpu/drm/radeon/cik.c         | 17 +++++++++++++++++
>   drivers/gpu/drm/radeon/ni.c          | 33 +++++++++++++++++++++++++++++++++
>   drivers/gpu/drm/radeon/nid.h         |  2 ++
>   drivers/gpu/drm/radeon/radeon_asic.c |  4 ++--
>   drivers/gpu/drm/radeon/radeon_asic.h |  4 ++++
>   5 files changed, 58 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
> index 81d07e6..49707ac 100644
> --- a/drivers/gpu/drm/radeon/cik.c
> +++ b/drivers/gpu/drm/radeon/cik.c
> @@ -3920,6 +3920,17 @@ void cik_fence_compute_ring_emit(struct radeon_device *rdev,
>   	radeon_ring_write(ring, 0);
>   }
>   
> +/**
> + * cik_semaphore_ring_emit - emit a semaphore on the CP ring
> + *
> + * @rdev: radeon_device pointer
> + * @ring: radeon ring buffer object
> + * @semaphore: radeon semaphore object
> + * @emit_wait: Is this a sempahore wait?
> + *
> + * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
> + * from running ahead of semaphore waits.
> + */
>   bool cik_semaphore_ring_emit(struct radeon_device *rdev,
>   			     struct radeon_ring *ring,
>   			     struct radeon_semaphore *semaphore,
> @@ -3932,6 +3943,12 @@ bool cik_semaphore_ring_emit(struct radeon_device *rdev,
>   	radeon_ring_write(ring, lower_32_bits(addr));
>   	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
>   
> +	if (emit_wait) {
> +		/* Prevent the PFP from running ahead of the semaphore wait */
> +		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
> +		radeon_ring_write(ring, 0x0);
> +	}
> +
>   	return true;
>   }
>   
> diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
> index ba89375..4e586c7 100644
> --- a/drivers/gpu/drm/radeon/ni.c
> +++ b/drivers/gpu/drm/radeon/ni.c
> @@ -1363,6 +1363,39 @@ void cayman_fence_ring_emit(struct radeon_device *rdev,
>   	radeon_ring_write(ring, 0);
>   }
>   
> +
> +/**
> + * cayman_semaphore_ring_emit - emit a semaphore on the CP ring
> + *
> + * @rdev: radeon_device pointer
> + * @ring: radeon ring buffer object
> + * @semaphore: radeon semaphore object
> + * @emit_wait: Is this a sempahore wait?
> + *
> + * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
> + * from running ahead of semaphore waits.
> + */
> +bool cayman_semaphore_ring_emit(struct radeon_device *rdev,
> +				struct radeon_ring *ring,
> +				struct radeon_semaphore *semaphore,
> +				bool emit_wait)
> +{
> +	uint64_t addr = semaphore->gpu_addr;
> +	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
> +
> +	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
> +	radeon_ring_write(ring, lower_32_bits(addr));
> +	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel);
> +
> +	if (emit_wait) {
> +		/* Prevent the PFP from running ahead of the semaphore wait */
> +		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
> +		radeon_ring_write(ring, 0x0);
> +	}
> +
> +	return true;
> +}
> +
>   void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
>   {
>   	struct radeon_ring *ring = &rdev->ring[ib->ring];
> diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h
> index 2e12e4d..dba6d37 100644
> --- a/drivers/gpu/drm/radeon/nid.h
> +++ b/drivers/gpu/drm/radeon/nid.h
> @@ -1131,6 +1131,8 @@
>   #define	PACKET3_DRAW_INDEX_MULTI_ELEMENT		0x36
>   #define	PACKET3_WRITE_DATA				0x37
>   #define	PACKET3_MEM_SEMAPHORE				0x39
> +#              define PACKET3_SEM_SEL_SIGNAL	    (0x6 << 29)
> +#              define PACKET3_SEM_SEL_WAIT	    (0x7 << 29)
>   #define	PACKET3_MPEG_INDEX				0x3A
>   #define	PACKET3_WAIT_REG_MEM				0x3C
>   #define	PACKET3_MEM_WRITE				0x3D
> diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
> index eeeeabe..67eb267 100644
> --- a/drivers/gpu/drm/radeon/radeon_asic.c
> +++ b/drivers/gpu/drm/radeon/radeon_asic.c
> @@ -1555,7 +1555,7 @@ static struct radeon_asic_ring cayman_gfx_ring = {
>   	.ib_execute = &cayman_ring_ib_execute,
>   	.ib_parse = &evergreen_ib_parse,
>   	.emit_fence = &cayman_fence_ring_emit,
> -	.emit_semaphore = &r600_semaphore_ring_emit,
> +	.emit_semaphore = &cayman_semaphore_ring_emit,
>   	.cs_parse = &evergreen_cs_parse,
>   	.ring_test = &r600_ring_test,
>   	.ib_test = &r600_ib_test,
> @@ -1804,7 +1804,7 @@ static struct radeon_asic_ring si_gfx_ring = {
>   	.ib_execute = &si_ring_ib_execute,
>   	.ib_parse = &si_ib_parse,
>   	.emit_fence = &si_fence_ring_emit,
> -	.emit_semaphore = &r600_semaphore_ring_emit,
> +	.emit_semaphore = &cayman_semaphore_ring_emit,
>   	.cs_parse = NULL,
>   	.ring_test = &r600_ring_test,
>   	.ib_test = &r600_ib_test,
> diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
> index 275a5dc..6b4c757 100644
> --- a/drivers/gpu/drm/radeon/radeon_asic.h
> +++ b/drivers/gpu/drm/radeon/radeon_asic.h
> @@ -590,6 +590,10 @@ int sumo_dpm_force_performance_level(struct radeon_device *rdev,
>    */
>   void cayman_fence_ring_emit(struct radeon_device *rdev,
>   			    struct radeon_fence *fence);
> +bool cayman_semaphore_ring_emit(struct radeon_device *rdev,
> +				struct radeon_ring *cp,
> +				struct radeon_semaphore *semaphore,
> +				bool emit_wait);
>   void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev);
>   int cayman_init(struct radeon_device *rdev);
>   void cayman_fini(struct radeon_device *rdev);



More information about the dri-devel mailing list