[PATCH libdrm] drm: fix race issue

Liu, Monk Monk.Liu at amd.com
Mon Aug 7 14:29:34 UTC 2017


Christian

>   		output->buf_handle = bo;
>   		output->alloc_size = bo->alloc_size; @@ -466,6 +461,8 @@ int 
> amdgpu_bo_cpu_map(amdgpu_bo_handle bo, void **cpu)
>   	bo->cpu_map_count = 1;
>   	pthread_mutex_unlock(&bo->cpu_access_mutex);
>   
> +	amdgpu_add_handle_to_table(bo);

This one is a wrong base cherry-pick issue, I didn't tend to add this line, sorry for confuse .... my bad and thanks for the catch


> int amdgpu_bo_import(amdgpu_device_handle dev,
>   		/* Get a KMS handle. */
>   		r = drmPrimeFDToHandle(dev->fd, shared_handle, &handle);
>   		if (r) {
> +			pthread_mutex_unlock(&dev->bo_table_mutex);
>   			return r;
>   		}


This is another fix, I think it can be separated with main patch, this is just an obvious mutex_unlock missing in our code


For the main patch, I rework one and will sent for review again 

BR Monk


-----Original Message-----
From: Christian König [mailto:deathsimple at vodafone.de] 
Sent: Monday, August 7, 2017 9:32 PM
To: Liu, Monk <Monk.Liu at amd.com>; amd-gfx at lists.freedesktop.org
Subject: Re: [PATCH libdrm] drm: fix race issue

Am 07.08.2017 um 13:44 schrieb Monk Liu:
> From: Monk Liu <monk.liu at amd.com>
>
> there is race issue between two threads on amdgpu_bo_free and 
> amdgpu_bo_import, this patch tend to fix it by moving the 
> pthread_mutex_lock out of bo_internal and cover the update_reference 
> function.
>
> besides the mutex_unlock in bo_import should also cover bo refcount 
> increasement.

Nice catch, but the solution is to heavy.

>
> Change-Id: I8ec5a2879dda0e6468864fc64e6b46059482998b
> Find-by: Deng hui <hui.deng at amd.com>
> Signed-off-by: Monk Liu <monk.liu at amd.com>
> ---
>   amdgpu/amdgpu_bo.c       | 13 +++++--------
>   amdgpu/amdgpu_internal.h | 17 +++++++++++++++--
>   2 files changed, 20 insertions(+), 10 deletions(-)
>
> diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c index 
> ec99488..10a3efe 100644
> --- a/amdgpu/amdgpu_bo.c
> +++ b/amdgpu/amdgpu_bo.c
> @@ -52,27 +52,22 @@ static void amdgpu_close_kms_handle(amdgpu_device_handle dev,
>   	drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &args);
>   }
>   
> -void amdgpu_bo_free_internal(amdgpu_bo_handle bo)
> +drm_private void amdgpu_bo_free_internal(amdgpu_bo_handle bo)
>   {
>   	/* Remove the buffer from the hash tables. */
> -	pthread_mutex_lock(&bo->dev->bo_table_mutex);
>   	util_hash_table_remove(bo->dev->bo_handles,
>   			       (void*)(uintptr_t)bo->handle);
>   	if (bo->flink_name) {
>   		util_hash_table_remove(bo->dev->bo_flink_names,
>   				       (void*)(uintptr_t)bo->flink_name);
>   	}
> -	pthread_mutex_unlock(&bo->dev->bo_table_mutex);
>   
>   	/* Release CPU access. */
>   	if (bo->cpu_map_count > 0) {
>   		bo->cpu_map_count = 1;
>   		amdgpu_bo_cpu_unmap(bo);
>   	}
> -
>   	amdgpu_close_kms_handle(bo->dev, bo->handle);
> -	pthread_mutex_destroy(&bo->cpu_access_mutex);
> -	free(bo);
>   }
>   
>   int amdgpu_bo_alloc(amdgpu_device_handle dev, @@ -297,6 +292,7 @@ 
> int amdgpu_bo_import(amdgpu_device_handle dev,
>   		/* Get a KMS handle. */
>   		r = drmPrimeFDToHandle(dev->fd, shared_handle, &handle);
>   		if (r) {
> +			pthread_mutex_unlock(&dev->bo_table_mutex);
>   			return r;
>   		}
>   
> @@ -339,10 +335,9 @@ int amdgpu_bo_import(amdgpu_device_handle dev,
>   	}
>   
>   	if (bo) {
> -		pthread_mutex_unlock(&dev->bo_table_mutex);
> -
>   		/* The buffer already exists, just bump the refcount. */
>   		atomic_inc(&bo->refcount);
> +		pthread_mutex_unlock(&dev->bo_table_mutex);

Actually the problem is only here, that one needs to use
atomic_inc_unless() before the mutex is dropped and return only if we could successfully acquire a reference to the BO.

All other changes are unnecessary.

>   
>   		output->buf_handle = bo;
>   		output->alloc_size = bo->alloc_size; @@ -466,6 +461,8 @@ int 
> amdgpu_bo_cpu_map(amdgpu_bo_handle bo, void **cpu)
>   	bo->cpu_map_count = 1;
>   	pthread_mutex_unlock(&bo->cpu_access_mutex);
>   
> +	amdgpu_add_handle_to_table(bo);

Well that is clearly a NAK. IIRC we discussed that before and came to the conclusion that it isn't a good idea.

But it looks unrelated to the other changes, so why do you want to add it now?

Regards,
Christian.

> +
>   	*cpu = ptr;
>   	return 0;
>   }
> diff --git a/amdgpu/amdgpu_internal.h b/amdgpu/amdgpu_internal.h index 
> 0508131..e1a6559 100644
> --- a/amdgpu/amdgpu_internal.h
> +++ b/amdgpu/amdgpu_internal.h
> @@ -30,6 +30,9 @@
>   
>   #include <assert.h>
>   #include <pthread.h>
> +#include <stdlib.h>
> +
> +#include "libdrm_macros.h"
>   #include "xf86atomic.h"
>   #include "amdgpu.h"
>   #include "util_double_list.h"
> @@ -193,8 +196,18 @@ static inline bool update_references(atomic_t *dst, atomic_t *src)
>   static inline void amdgpu_bo_reference(struct amdgpu_bo **dst,
>   					struct amdgpu_bo *src)
>   {
> -	if (update_references(&(*dst)->refcount, &src->refcount))
> -		amdgpu_bo_free_internal(*dst);
> +	struct amdgpu_bo* bo = *dst;
> +	assert(bo!=NULL);
> +
> +	pthread_mutex_lock(&bo->dev->bo_table_mutex);
> +
> +	if (update_references(&bo->refcount, src?&src->refcount:NULL)) {
> +		amdgpu_bo_free_internal(bo);
> +		pthread_mutex_destroy(&bo->cpu_access_mutex);
> +		pthread_mutex_unlock(&bo->dev->bo_table_mutex);
> +		free(bo);
> +	} else
> +		pthread_mutex_unlock(&bo->dev->bo_table_mutex);
>   	*dst = src;
>   }
>   




More information about the amd-gfx mailing list