[PATCH 1/2] drm/radeon: Make sure CS mutex is held across GPU reset.

Konrad Rzeszutek Wilk konrad.wilk at oracle.com
Fri Sep 16 01:31:44 PDT 2011


On Thu, Sep 15, 2011 at 08:21:00PM +0200, Michel Dänzer wrote:
> From: Michel Dänzer <michel.daenzer at amd.com>
> 
> This was only the case if the GPU reset was triggered from the CS ioctl,
> otherwise other processes could happily enter the CS ioctl and wreak havoc
> during the GPU reset.
> 
> This is a little complicated because the GPU reset can be triggered from the
> CS ioctl, in which case we're already holding the mutex, or from other call
> paths, in which case we need to lock the mutex. AFAICT the mutex API doesn't
> allow nested locking or finding out the mutex owner, so we need to handle this
> with some helper functions.
> 
> Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
> ---
>  drivers/gpu/drm/radeon/radeon.h        |   60 ++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/radeon/radeon_cs.c     |   14 ++++----
>  drivers/gpu/drm/radeon/radeon_device.c |   16 ++++++++
>  3 files changed, 83 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index ef0e0e0..89304d9 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -1203,6 +1203,8 @@ struct radeon_device {
>  	struct radeon_pm		pm;
>  	uint32_t			bios_scratch[RADEON_BIOS_NUM_SCRATCH];
>  	struct mutex			cs_mutex;
> +	struct task_struct		*cs_mutex_owner;
> +	struct mutex			cs_mutex_owner_mutex;

That is a bit of 'mutex.. mutex'. What about just having the same
name as before?

>  	struct radeon_wb		wb;
>  	struct radeon_dummy_page	dummy_page;
>  	bool				gpu_lockup;
> @@ -1241,6 +1243,64 @@ struct radeon_device {
>  	struct radeon_i2c_chan *i2c_bus[RADEON_MAX_I2C_BUS];
>  };
>  
> +
> +/*
> + * CS mutex helpers
> + */
> +
> +static inline void cs_mutex_lock(struct radeon_device *rdev)
> +{
> +	mutex_lock(&rdev->cs_mutex);
> +
> +	mutex_lock(&rdev->cs_mutex_owner_mutex);
> +	rdev->cs_mutex_owner = current;
> +	mutex_unlock(&rdev->cs_mutex_owner_mutex);
> +}
> +
> +static inline void cs_mutex_unlock(struct radeon_device *rdev)
> +{
> +	mutex_lock(&rdev->cs_mutex_owner_mutex);
> +	rdev->cs_mutex_owner = NULL;
> +	mutex_unlock(&rdev->cs_mutex_owner_mutex);
> +
> +	mutex_unlock(&rdev->cs_mutex);
> +}
> +
> +/*
> + * Check if this process locked the CS mutex already; if it didn't, lock it.
> + *
> + * Returns:
> + * true: This function locked the mutex.
> + * false: This function didn't lock the mutex (this process already locked it
> + * before), so the caller probably shouldn't unlock it.
> + */
> +static inline int cs_mutex_ensure_locked(struct radeon_device *rdev)
> +{
> +	int took_mutex;
> +	int have_mutex;

I think you meant 'bool'?

> +
> +	mutex_lock(&rdev->cs_mutex_owner_mutex);
> +	took_mutex = mutex_trylock(&rdev->cs_mutex);
> +	if (took_mutex) {
> +		/* The mutex was unlocked before, so it's ours now */
> +		rdev->cs_mutex_owner = current;
> +		have_mutex = true;

consider you set that here..
> +	} else {
> +		/* Somebody already locked the mutex. Was it this process? */
> +		have_mutex = (rdev->cs_mutex_owner == current);
> +	}
> +	mutex_unlock(&rdev->cs_mutex_owner_mutex);
> +
> +	if (!have_mutex) {
> +		/* Another process locked the mutex, take it */
> +		cs_mutex_lock(rdev);
> +		took_mutex = true;
> +	}
> +
> +	return took_mutex;

and if it is really going to be bool, you might as well change the
return signature and make it the function decleration return bool
instead of int.

> +}
> +
> +
>  int radeon_device_init(struct radeon_device *rdev,
>  		       struct drm_device *ddev,
>  		       struct pci_dev *pdev,
> diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
> index fae00c0..61e3063 100644
> --- a/drivers/gpu/drm/radeon/radeon_cs.c
> +++ b/drivers/gpu/drm/radeon/radeon_cs.c
> @@ -222,7 +222,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>  	struct radeon_cs_chunk *ib_chunk;
>  	int r;
>  
> -	mutex_lock(&rdev->cs_mutex);
> +	cs_mutex_lock(rdev);
>  	/* initialize parser */
>  	memset(&parser, 0, sizeof(struct radeon_cs_parser));
>  	parser.filp = filp;
> @@ -233,14 +233,14 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>  	if (r) {
>  		DRM_ERROR("Failed to initialize parser !\n");
>  		radeon_cs_parser_fini(&parser, r);
> -		mutex_unlock(&rdev->cs_mutex);
> +		cs_mutex_unlock(rdev);
>  		return r;
>  	}
>  	r =  radeon_ib_get(rdev, &parser.ib);
>  	if (r) {
>  		DRM_ERROR("Failed to get ib !\n");
>  		radeon_cs_parser_fini(&parser, r);
> -		mutex_unlock(&rdev->cs_mutex);
> +		cs_mutex_unlock(rdev);
>  		return r;
>  	}
>  	r = radeon_cs_parser_relocs(&parser);
> @@ -248,7 +248,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>  		if (r != -ERESTARTSYS)
>  			DRM_ERROR("Failed to parse relocation %d!\n", r);
>  		radeon_cs_parser_fini(&parser, r);
> -		mutex_unlock(&rdev->cs_mutex);
> +		cs_mutex_unlock(rdev);
>  		return r;
>  	}
>  	/* Copy the packet into the IB, the parser will read from the
> @@ -260,14 +260,14 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>  	if (r || parser.parser_error) {
>  		DRM_ERROR("Invalid command stream !\n");
>  		radeon_cs_parser_fini(&parser, r);
> -		mutex_unlock(&rdev->cs_mutex);
> +		cs_mutex_unlock(rdev);
>  		return r;
>  	}
>  	r = radeon_cs_finish_pages(&parser);
>  	if (r) {
>  		DRM_ERROR("Invalid command stream !\n");
>  		radeon_cs_parser_fini(&parser, r);
> -		mutex_unlock(&rdev->cs_mutex);
> +		cs_mutex_unlock(rdev);
>  		return r;
>  	}
>  	r = radeon_ib_schedule(rdev, parser.ib);
> @@ -275,7 +275,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>  		DRM_ERROR("Failed to schedule IB !\n");
>  	}
>  	radeon_cs_parser_fini(&parser, r);
> -	mutex_unlock(&rdev->cs_mutex);
> +	cs_mutex_unlock(rdev);
>  	return r;
>  }
>  
> diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
> index 5f0fd85..5f3d02d 100644
> --- a/drivers/gpu/drm/radeon/radeon_device.c
> +++ b/drivers/gpu/drm/radeon/radeon_device.c
> @@ -712,6 +712,8 @@ int radeon_device_init(struct radeon_device *rdev,
>  
>  	/* mutex initialization are all done here so we
>  	 * can recall function without having locking issues */
> +	mutex_init(&rdev->cs_mutex_owner_mutex);
> +	rdev->cs_mutex_owner = NULL;
>  	mutex_init(&rdev->cs_mutex);
>  	mutex_init(&rdev->ib_pool.mutex);
>  	mutex_init(&rdev->cp.mutex);
> @@ -949,6 +951,16 @@ int radeon_gpu_reset(struct radeon_device *rdev)
>  {
>  	int r;
>  	int resched;
> +	int took_cs_mutex;
> +
> +	/* Prevent CS ioctl from interfering */
> +	took_cs_mutex = cs_mutex_ensure_locked(rdev);
> +	if (!rdev->gpu_lockup) {
> +		DRM_DEBUG("GPU already reset by previous CS mutex holder\n");
> +		if (took_cs_mutex)
> +			cs_mutex_unlock(rdev);
> +		return 0;
> +	}
>  
>  	radeon_save_bios_scratch_regs(rdev);
>  	/* block TTM */
> @@ -962,8 +974,12 @@ int radeon_gpu_reset(struct radeon_device *rdev)
>  		radeon_restore_bios_scratch_regs(rdev);
>  		drm_helper_resume_force_mode(rdev->ddev);
>  		ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
> +		if (took_cs_mutex)
> +			cs_mutex_unlock(rdev);
>  		return 0;
>  	}
> +	if (took_cs_mutex)
> +		cs_mutex_unlock(rdev);
>  	/* bad news, how to tell it to userspace ? */
>  	dev_info(rdev->dev, "GPU reset failed\n");
>  	return r;
> -- 
> 1.7.6.3
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel


More information about the dri-devel mailing list