[PATCH 1/2] drm/radeon: Make sure CS mutex is held across GPU reset.
Konrad Rzeszutek Wilk
konrad.wilk at oracle.com
Fri Sep 16 01:31:44 PDT 2011
On Thu, Sep 15, 2011 at 08:21:00PM +0200, Michel Dänzer wrote:
> From: Michel Dänzer <michel.daenzer at amd.com>
>
> This was only the case if the GPU reset was triggered from the CS ioctl,
> otherwise other processes could happily enter the CS ioctl and wreak havoc
> during the GPU reset.
>
> This is a little complicated because the GPU reset can be triggered from the
> CS ioctl, in which case we're already holding the mutex, or from other call
> paths, in which case we need to lock the mutex. AFAICT the mutex API doesn't
> allow nested locking or finding out the mutex owner, so we need to handle this
> with some helper functions.
>
> Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
> ---
> drivers/gpu/drm/radeon/radeon.h | 60 ++++++++++++++++++++++++++++++++
> drivers/gpu/drm/radeon/radeon_cs.c | 14 ++++----
> drivers/gpu/drm/radeon/radeon_device.c | 16 ++++++++
> 3 files changed, 83 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index ef0e0e0..89304d9 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -1203,6 +1203,8 @@ struct radeon_device {
> struct radeon_pm pm;
> uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH];
> struct mutex cs_mutex;
> + struct task_struct *cs_mutex_owner;
> + struct mutex cs_mutex_owner_mutex;
That is a bit of 'mutex.. mutex'. What about just having the same
name as before?
> struct radeon_wb wb;
> struct radeon_dummy_page dummy_page;
> bool gpu_lockup;
> @@ -1241,6 +1243,64 @@ struct radeon_device {
> struct radeon_i2c_chan *i2c_bus[RADEON_MAX_I2C_BUS];
> };
>
> +
> +/*
> + * CS mutex helpers
> + */
> +
> +static inline void cs_mutex_lock(struct radeon_device *rdev)
> +{
> + mutex_lock(&rdev->cs_mutex);
> +
> + mutex_lock(&rdev->cs_mutex_owner_mutex);
> + rdev->cs_mutex_owner = current;
> + mutex_unlock(&rdev->cs_mutex_owner_mutex);
> +}
> +
> +static inline void cs_mutex_unlock(struct radeon_device *rdev)
> +{
> + mutex_lock(&rdev->cs_mutex_owner_mutex);
> + rdev->cs_mutex_owner = NULL;
> + mutex_unlock(&rdev->cs_mutex_owner_mutex);
> +
> + mutex_unlock(&rdev->cs_mutex);
> +}
> +
> +/*
> + * Check if this process locked the CS mutex already; if it didn't, lock it.
> + *
> + * Returns:
> + * true: This function locked the mutex.
> + * false: This function didn't lock the mutex (this process already locked it
> + * before), so the caller probably shouldn't unlock it.
> + */
> +static inline int cs_mutex_ensure_locked(struct radeon_device *rdev)
> +{
> + int took_mutex;
> + int have_mutex;
I think you meant 'bool'?
> +
> + mutex_lock(&rdev->cs_mutex_owner_mutex);
> + took_mutex = mutex_trylock(&rdev->cs_mutex);
> + if (took_mutex) {
> + /* The mutex was unlocked before, so it's ours now */
> + rdev->cs_mutex_owner = current;
> + have_mutex = true;
consider you set that here..
> + } else {
> + /* Somebody already locked the mutex. Was it this process? */
> + have_mutex = (rdev->cs_mutex_owner == current);
> + }
> + mutex_unlock(&rdev->cs_mutex_owner_mutex);
> +
> + if (!have_mutex) {
> + /* Another process locked the mutex, take it */
> + cs_mutex_lock(rdev);
> + took_mutex = true;
> + }
> +
> + return took_mutex;
and if it is really going to be bool, you might as well change the
return signature and make it the function decleration return bool
instead of int.
> +}
> +
> +
> int radeon_device_init(struct radeon_device *rdev,
> struct drm_device *ddev,
> struct pci_dev *pdev,
> diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
> index fae00c0..61e3063 100644
> --- a/drivers/gpu/drm/radeon/radeon_cs.c
> +++ b/drivers/gpu/drm/radeon/radeon_cs.c
> @@ -222,7 +222,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> struct radeon_cs_chunk *ib_chunk;
> int r;
>
> - mutex_lock(&rdev->cs_mutex);
> + cs_mutex_lock(rdev);
> /* initialize parser */
> memset(&parser, 0, sizeof(struct radeon_cs_parser));
> parser.filp = filp;
> @@ -233,14 +233,14 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> if (r) {
> DRM_ERROR("Failed to initialize parser !\n");
> radeon_cs_parser_fini(&parser, r);
> - mutex_unlock(&rdev->cs_mutex);
> + cs_mutex_unlock(rdev);
> return r;
> }
> r = radeon_ib_get(rdev, &parser.ib);
> if (r) {
> DRM_ERROR("Failed to get ib !\n");
> radeon_cs_parser_fini(&parser, r);
> - mutex_unlock(&rdev->cs_mutex);
> + cs_mutex_unlock(rdev);
> return r;
> }
> r = radeon_cs_parser_relocs(&parser);
> @@ -248,7 +248,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> if (r != -ERESTARTSYS)
> DRM_ERROR("Failed to parse relocation %d!\n", r);
> radeon_cs_parser_fini(&parser, r);
> - mutex_unlock(&rdev->cs_mutex);
> + cs_mutex_unlock(rdev);
> return r;
> }
> /* Copy the packet into the IB, the parser will read from the
> @@ -260,14 +260,14 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> if (r || parser.parser_error) {
> DRM_ERROR("Invalid command stream !\n");
> radeon_cs_parser_fini(&parser, r);
> - mutex_unlock(&rdev->cs_mutex);
> + cs_mutex_unlock(rdev);
> return r;
> }
> r = radeon_cs_finish_pages(&parser);
> if (r) {
> DRM_ERROR("Invalid command stream !\n");
> radeon_cs_parser_fini(&parser, r);
> - mutex_unlock(&rdev->cs_mutex);
> + cs_mutex_unlock(rdev);
> return r;
> }
> r = radeon_ib_schedule(rdev, parser.ib);
> @@ -275,7 +275,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> DRM_ERROR("Failed to schedule IB !\n");
> }
> radeon_cs_parser_fini(&parser, r);
> - mutex_unlock(&rdev->cs_mutex);
> + cs_mutex_unlock(rdev);
> return r;
> }
>
> diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
> index 5f0fd85..5f3d02d 100644
> --- a/drivers/gpu/drm/radeon/radeon_device.c
> +++ b/drivers/gpu/drm/radeon/radeon_device.c
> @@ -712,6 +712,8 @@ int radeon_device_init(struct radeon_device *rdev,
>
> /* mutex initialization are all done here so we
> * can recall function without having locking issues */
> + mutex_init(&rdev->cs_mutex_owner_mutex);
> + rdev->cs_mutex_owner = NULL;
> mutex_init(&rdev->cs_mutex);
> mutex_init(&rdev->ib_pool.mutex);
> mutex_init(&rdev->cp.mutex);
> @@ -949,6 +951,16 @@ int radeon_gpu_reset(struct radeon_device *rdev)
> {
> int r;
> int resched;
> + int took_cs_mutex;
> +
> + /* Prevent CS ioctl from interfering */
> + took_cs_mutex = cs_mutex_ensure_locked(rdev);
> + if (!rdev->gpu_lockup) {
> + DRM_DEBUG("GPU already reset by previous CS mutex holder\n");
> + if (took_cs_mutex)
> + cs_mutex_unlock(rdev);
> + return 0;
> + }
>
> radeon_save_bios_scratch_regs(rdev);
> /* block TTM */
> @@ -962,8 +974,12 @@ int radeon_gpu_reset(struct radeon_device *rdev)
> radeon_restore_bios_scratch_regs(rdev);
> drm_helper_resume_force_mode(rdev->ddev);
> ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
> + if (took_cs_mutex)
> + cs_mutex_unlock(rdev);
> return 0;
> }
> + if (took_cs_mutex)
> + cs_mutex_unlock(rdev);
> /* bad news, how to tell it to userspace ? */
> dev_info(rdev->dev, "GPU reset failed\n");
> return r;
> --
> 1.7.6.3
>
> _______________________________________________
> dri-devel mailing list
> dri-devel at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
More information about the dri-devel
mailing list