[Intel-gfx] [PATCH 1/3] drm/i915: Skip the ERR_PTR error state

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Thu Dec 6 12:57:23 UTC 2018


On 06/12/2018 08:44, Chris Wilson wrote:
> Although commit fb6f0b64e455 ("drm/i915: Prevent machine hang from
> Broxton's vtd w/a and error capture") applied cleanly after a 24 month
> hiatus, the code had moved on with new methods for peeking and fetching
> the captured gpu info. Make sure we catch all uses of the stashed error
> state and avoid dereferencing the error pointer.
> 
> Fixes: fb6f0b64e455 ("drm/i915: Prevent machine hang from Broxton's vtd w/a and error capture")
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c   | 11 ++++++++++-
>   drivers/gpu/drm/i915/i915_gpu_error.c | 12 +++++++++---
>   drivers/gpu/drm/i915/i915_sysfs.c     |  3 +++
>   3 files changed, 22 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 38dcee1ca062..f8aa8586c9a6 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -984,6 +984,9 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file)
>   	intel_runtime_pm_get(i915);
>   	gpu = i915_capture_gpu_state(i915);
>   	intel_runtime_pm_put(i915);
> +
> +	if (IS_ERR(gpu))
> +		return PTR_ERR(gpu);
>   	if (!gpu)
>   		return -ENOMEM;
>   
> @@ -1018,7 +1021,13 @@ i915_error_state_write(struct file *filp,
>   
>   static int i915_error_state_open(struct inode *inode, struct file *file)
>   {
> -	file->private_data = i915_first_error_state(inode->i_private);
> +	struct i915_gpu_state *error;
> +
> +	error = i915_first_error_state(inode->i_private);
> +	if (IS_ERR(error))
> +		return PTR_ERR(error);
> +
> +	file->private_data  = error;
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 07465123c166..7ca6f3f31d41 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -1907,6 +1907,11 @@ i915_capture_gpu_state(struct drm_i915_private *i915)
>   {
>   	struct i915_gpu_state *error;
>   
> +	/* Check if GPU capture has been disabled */
> +	error = READ_ONCE(i915->gpu_error.first_error);
> +	if (IS_ERR(error))
> +		return error;
> +
>   	error = kzalloc(sizeof(*error), GFP_ATOMIC);
>   	if (!error)
>   		return NULL;

Looks like it would be cleaner to return ERR_PTR(-ENOMEM) here. At least 
the i915_gpu_info_open hunk would be.

Then in drm-tip I see i915_capture_error_state as a caller which needs 
to handle ERR_PTR now as well, no?

> @@ -1987,7 +1992,7 @@ i915_first_error_state(struct drm_i915_private *i915)
>   
>   	spin_lock_irq(&i915->gpu_error.lock);
>   	error = i915->gpu_error.first_error;
> -	if (error)
> +	if (!IS_ERR_OR_NULL(error))
>   		i915_gpu_state_get(error);
>   	spin_unlock_irq(&i915->gpu_error.lock);
>   
> @@ -2000,10 +2005,11 @@ void i915_reset_error_state(struct drm_i915_private *i915)
>   
>   	spin_lock_irq(&i915->gpu_error.lock);
>   	error = i915->gpu_error.first_error;
> -	i915->gpu_error.first_error = NULL;
> +	if (!IS_ERR_OR_NULL(error)) /* once disabled, always disabled */

Hm, this will apply on transient -ENOMEM as well.

> +		i915->gpu_error.first_error = NULL;
>   	spin_unlock_irq(&i915->gpu_error.lock);
>   
> -	if (!IS_ERR(error))
> +	if (!IS_ERR_OR_NULL(error))
>   		i915_gpu_state_put(error);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
> index 535caebd9813..370b7497e9df 100644
> --- a/drivers/gpu/drm/i915/i915_sysfs.c
> +++ b/drivers/gpu/drm/i915/i915_sysfs.c
> @@ -521,6 +521,9 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
>   	ssize_t ret;
>   
>   	gpu = i915_first_error_state(i915);
> +	if (IS_ERR(gpu))
> +		return PTR_ERR(gpu);
> +
>   	if (gpu) {
>   		ret = i915_gpu_state_copy_to_buffer(gpu, buf, off, count);
>   		i915_gpu_state_put(gpu);#
> 

A single control block:

if (!IS_ERR_OR_NULL) {
	...
} else {
	...
}

seems like it would do the trick.

Regards,

Tvrtko


More information about the Intel-gfx mailing list