[Nouveau] [RFC PATCH 01/29] therm: split return code and value in nvkm_get_temp

Pierre Moreau pierre.morrow at free.fr
Sun Oct 8 10:16:02 UTC 2017


As you changed the return value of `temp_get()` to solely be the error code, or
absence of an error, I would change all those tests that checked whether the
returned value was strictly less, or greater than, 0 to now only compare
against 0 (no error). For example,

  if (therm && therm->attr_get && nvkm_therm_temp_get(therm, &val) < 0)
  if (therm->func->temp_get(therm, &val) >= 0)

could become

  if (therm && therm->attr_get && nvkm_therm_temp_get(therm, &val))
  if (!therm->func->temp_get(therm, &val))

to match other error checking code.

More comments below

On 2017-09-15 — 17:11, Karol Herbst wrote:
> The current hwmon code doesn't check if the returned value was actually an
> error.
> 
> Since Kepler temperature sensors are able to report negative values.

I assume those negative values are not for error reporting, but more if you
buried your GPU in the snow somewhere in the Antarctic and still want a valid
temperature to be reported.

> Since Pascal (and maybe earlier) we have sensors with improved precision.
> 
> Adjust the nvkm_get_temp method to be able to deal with those changes
> and let hwmon return an error properly.
> 
> Signed-off-by: Karol Herbst <karolherbst at gmail.com>
> ---
>  drm/nouveau/include/nvkm/subdev/therm.h |  2 +-
>  drm/nouveau/nouveau_hwmon.c             | 15 +++++++++------
>  drm/nouveau/nvkm/subdev/therm/base.c    | 19 ++++++++++++++-----
>  drm/nouveau/nvkm/subdev/therm/g84.c     | 11 ++++++-----
>  drm/nouveau/nvkm/subdev/therm/gp100.c   |  9 +++++----
>  drm/nouveau/nvkm/subdev/therm/nv40.c    |  9 +++------
>  drm/nouveau/nvkm/subdev/therm/nv50.c    |  9 +++------
>  drm/nouveau/nvkm/subdev/therm/priv.h    |  4 ++--
>  drm/nouveau/nvkm/subdev/therm/temp.c    | 16 ++++++++++++----
>  9 files changed, 55 insertions(+), 39 deletions(-)
> 
> diff --git a/drm/nouveau/include/nvkm/subdev/therm.h b/drm/nouveau/include/nvkm/subdev/therm.h
> index 9841f076..8c84017f 100644
> --- a/drm/nouveau/include/nvkm/subdev/therm.h
> +++ b/drm/nouveau/include/nvkm/subdev/therm.h
> @@ -86,7 +86,7 @@ struct nvkm_therm {
>  	int (*attr_set)(struct nvkm_therm *, enum nvkm_therm_attr_type, int);
>  };
>  
> -int nvkm_therm_temp_get(struct nvkm_therm *);
> +int nvkm_therm_temp_get(struct nvkm_therm *, int *);
>  int nvkm_therm_fan_sense(struct nvkm_therm *);
>  int nvkm_therm_cstate(struct nvkm_therm *, int, int);
>  
> diff --git a/drm/nouveau/nouveau_hwmon.c b/drm/nouveau/nouveau_hwmon.c
> index 7c965648..f1914d9a 100644
> --- a/drm/nouveau/nouveau_hwmon.c
> +++ b/drm/nouveau/nouveau_hwmon.c
> @@ -326,8 +326,9 @@ nouveau_temp_is_visible(const void *data, u32 attr, int channel)
>  {
>  	struct nouveau_drm *drm = nouveau_drm((struct drm_device *)data);
>  	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
> +	int val;
>  
> -	if (therm && therm->attr_get && nvkm_therm_temp_get(therm) < 0)
> +	if (therm && therm->attr_get && nvkm_therm_temp_get(therm, &val) < 0)
>  		return 0;
>  
>  	switch (attr) {
> @@ -421,15 +422,16 @@ nouveau_temp_read(struct device *dev, u32 attr, int channel, long *val)
>  	struct drm_device *drm_dev = dev_get_drvdata(dev);
>  	struct nouveau_drm *drm = nouveau_drm(drm_dev);
>  	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
> -	int ret;
> +	int ret = 0;
> +	int temp;
>  
>  	if (!therm || !therm->attr_get)
>  		return -EOPNOTSUPP;
>  
>  	switch (attr) {
>  	case hwmon_temp_input:
> -		ret = nvkm_therm_temp_get(therm);
> -		*val = ret < 0 ? ret : (ret * 1000);
> +		ret = nvkm_therm_temp_get(therm, &temp);
> +		*val = temp * 1000;
>  		break;
>  	case hwmon_temp_max:
>  		*val = therm->attr_get(therm, NVKM_THERM_ATTR_THRS_DOWN_CLK)
> @@ -459,7 +461,7 @@ nouveau_temp_read(struct device *dev, u32 attr, int channel, long *val)
>  		return -EOPNOTSUPP;
>  	}
>  
> -	return 0;
> +	return ret;
>  }
>  
>  static int
> @@ -713,6 +715,7 @@ nouveau_hwmon_init(struct drm_device *dev)
>  	struct device *hwmon_dev;
>  	int ret = 0;
>  	int i = 0;
> +	int val;
>  
>  	hwmon = drm->hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL);
>  	if (!hwmon)
> @@ -720,7 +723,7 @@ nouveau_hwmon_init(struct drm_device *dev)
>  	hwmon->dev = dev;
>  
>  	if (therm && therm->attr_get && therm->attr_set) {
> -		if (nvkm_therm_temp_get(therm) >= 0)
> +		if (nvkm_therm_temp_get(therm, &val) >= 0)
>  			special_groups[i++] = &temp1_auto_point_sensor_group;
>  		if (therm->fan_get && therm->fan_get(therm) >= 0)
>  			special_groups[i++] = &pwm_fan_sensor_group;
> diff --git a/drm/nouveau/nvkm/subdev/therm/base.c b/drm/nouveau/nvkm/subdev/therm/base.c
> index f27fc6d0..8fa691fb 100644
> --- a/drm/nouveau/nvkm/subdev/therm/base.c
> +++ b/drm/nouveau/nvkm/subdev/therm/base.c
> @@ -24,22 +24,26 @@
>  #include "priv.h"
>  
>  int
> -nvkm_therm_temp_get(struct nvkm_therm *therm)
> +nvkm_therm_temp_get(struct nvkm_therm *therm, int *val)
>  {
>  	if (therm->func->temp_get)
> -		return therm->func->temp_get(therm);
> +		return therm->func->temp_get(therm, val);
>  	return -ENODEV;
>  }
>  
>  static int
>  nvkm_therm_update_trip(struct nvkm_therm *therm)
>  {
> +	int temp, ret;
>  	struct nvbios_therm_trip_point *trip = therm->fan->bios.trip,
>  				       *cur_trip = NULL,
>  				       *last_trip = therm->last_trip;
> -	u8  temp = therm->func->temp_get(therm);
>  	u16 duty, i;
>  
> +	ret = therm->func->temp_get(therm, &temp);
> +	if (ret < 0)
> +		return ret;
> +
>  	/* look for the trip point corresponding to the current temperature */
>  	cur_trip = NULL;
>  	for (i = 0; i < therm->fan->bios.nr_fan_trip; i++) {
> @@ -67,9 +71,13 @@ static int
>  nvkm_therm_compute_linear_duty(struct nvkm_therm *therm, u8 linear_min_temp,
>                                 u8 linear_max_temp)
>  {
> -	u8  temp = therm->func->temp_get(therm);
> +	int temp, ret;
>  	u16 duty;
>  
> +	ret = therm->func->temp_get(therm, &temp);
> +	if (ret < 0)
> +		return ret;
> +
>  	/* handle the non-linear part first */
>  	if (temp < linear_min_temp)
>  		return therm->fan->bios.min_duty;
> @@ -182,6 +190,7 @@ nvkm_therm_fan_mode(struct nvkm_therm *therm, int mode)
>  {
>  	struct nvkm_subdev *subdev = &therm->subdev;
>  	struct nvkm_device *device = subdev->device;
> +	int val;
>  	static const char *name[] = {
>  		"disabled",
>  		"manual",
> @@ -197,7 +206,7 @@ nvkm_therm_fan_mode(struct nvkm_therm *therm, int mode)
>  	/* do not allow automatic fan management if the thermal sensor is
>  	 * not available */
>  	if (mode == NVKM_THERM_CTRL_AUTO &&
> -	    therm->func->temp_get(therm) < 0)
> +	    therm->func->temp_get(therm, &val) < 0)
>  		return -EINVAL;
>  
>  	if (therm->mode == mode)
> diff --git a/drm/nouveau/nvkm/subdev/therm/g84.c b/drm/nouveau/nvkm/subdev/therm/g84.c
> index 96f8da40..3bb2d829 100644
> --- a/drm/nouveau/nvkm/subdev/therm/g84.c
> +++ b/drm/nouveau/nvkm/subdev/therm/g84.c
> @@ -27,14 +27,15 @@
>  #include <subdev/fuse.h>
>  
>  int
> -g84_temp_get(struct nvkm_therm *therm)
> +g84_temp_get(struct nvkm_therm *therm, int *val)
>  {
>  	struct nvkm_device *device = therm->subdev.device;
>  
> -	if (nvkm_fuse_read(device->fuse, 0x1a8) == 1)
> -		return nvkm_rd32(device, 0x20400);
> -	else
> +	if (nvkm_fuse_read(device->fuse, 0x1a8) != 1)
>  		return -ENODEV;
> +
> +	*val = nvkm_rd32(device, 0x20400);

You could allow passing NULL values for `val` for when you are not interested
by the results, as you did in some places further up.
I would probably check for NULL values for `val`, and for `therm` though it
seems to be an implicit rule that the caller should be the one doing the check.

> +	return 0;
>  }
>  
>  void
> @@ -115,7 +116,7 @@ g84_therm_threshold_hyst_emulation(struct nvkm_therm *therm,
>  	}
>  
>  	/* fix the state (in case someone reprogrammed the alarms) */
> -	cur = therm->func->temp_get(therm);
> +	therm->func->temp_get(therm, &cur);

Should there be a check for the return value of the `temp_get()` function here,
and error out if an error code was returned? In any case, you are the behaviour
here, as if `temp_get()` previously returned an error code, the if-statement
would always be false, whereas now, it’s undecided as `cur` will not even be
initialised in that case!

I am not sure whether it should be the caller of `temp_get()` or `temp_get()`
itself which should initialise a default value for the `val` argument (probably
the latter).

>  	if (new_state == NVKM_THERM_THRS_LOWER && cur > thrs->temp)
>  		new_state = NVKM_THERM_THRS_HIGHER;
>  	else if (new_state == NVKM_THERM_THRS_HIGHER &&
> diff --git a/drm/nouveau/nvkm/subdev/therm/gp100.c b/drm/nouveau/nvkm/subdev/therm/gp100.c
> index 9f0dea3f..d8206748 100644
> --- a/drm/nouveau/nvkm/subdev/therm/gp100.c
> +++ b/drm/nouveau/nvkm/subdev/therm/gp100.c
> @@ -24,7 +24,7 @@
>  #include "priv.h"
>  
>  static int
> -gp100_temp_get(struct nvkm_therm *therm)
> +gp100_temp_get(struct nvkm_therm *therm, int *val)
>  {
>  	struct nvkm_device *device = therm->subdev.device;
>  	struct nvkm_subdev *subdev = &therm->subdev;
> @@ -36,9 +36,10 @@ gp100_temp_get(struct nvkm_therm *therm)
>  		nvkm_trace(subdev, "reading temperature from SHADOWed sensor\n");
>  
>  	/* device valid */
> -	if (tsensor & 0x20000000)
> -		return (inttemp >> 8);
> -	else
> +	if (tsensor & 0x20000000) {
> +		*val = inttemp >> 8;
> +		return 0;
> +	} else
>  		return -ENODEV;
>  }
>  
> diff --git a/drm/nouveau/nvkm/subdev/therm/nv40.c b/drm/nouveau/nvkm/subdev/therm/nv40.c
> index 2c92ffb5..cfd5b215 100644
> --- a/drm/nouveau/nvkm/subdev/therm/nv40.c
> +++ b/drm/nouveau/nvkm/subdev/therm/nv40.c
> @@ -70,7 +70,7 @@ nv40_sensor_setup(struct nvkm_therm *therm)
>  }
>  
>  static int
> -nv40_temp_get(struct nvkm_therm *therm)
> +nv40_temp_get(struct nvkm_therm *therm, int *val)
>  {
>  	struct nvkm_device *device = therm->subdev.device;
>  	struct nvbios_therm_sensor *sensor = &therm->bios_sensor;
> @@ -95,11 +95,8 @@ nv40_temp_get(struct nvkm_therm *therm)
>  	core_temp = core_temp + sensor->offset_num / sensor->offset_den;
>  	core_temp = core_temp + sensor->offset_constant - 8;
>  
> -	/* reserve negative temperatures for errors */
> -	if (core_temp < 0)
> -		core_temp = 0;
> -
> -	return core_temp;
> +	*val = core_temp;
> +	return 0;
>  }
>  
>  static int
> diff --git a/drm/nouveau/nvkm/subdev/therm/nv50.c b/drm/nouveau/nvkm/subdev/therm/nv50.c
> index 9b57b433..62ec4063 100644
> --- a/drm/nouveau/nvkm/subdev/therm/nv50.c
> +++ b/drm/nouveau/nvkm/subdev/therm/nv50.c
> @@ -126,7 +126,7 @@ nv50_sensor_setup(struct nvkm_therm *therm)
>  }
>  
>  static int
> -nv50_temp_get(struct nvkm_therm *therm)
> +nv50_temp_get(struct nvkm_therm *therm, int *val)
>  {
>  	struct nvkm_device *device = therm->subdev.device;
>  	struct nvbios_therm_sensor *sensor = &therm->bios_sensor;
> @@ -143,11 +143,8 @@ nv50_temp_get(struct nvkm_therm *therm)
>  	core_temp = core_temp + sensor->offset_num / sensor->offset_den;
>  	core_temp = core_temp + sensor->offset_constant - 8;
>  
> -	/* reserve negative temperatures for errors */
> -	if (core_temp < 0)
> -		core_temp = 0;
> -
> -	return core_temp;
> +	*val = core_temp;
> +	return 0;
>  }
>  
>  static void
> diff --git a/drm/nouveau/nvkm/subdev/therm/priv.h b/drm/nouveau/nvkm/subdev/therm/priv.h
> index 1f46e371..b325ec5f 100644
> --- a/drm/nouveau/nvkm/subdev/therm/priv.h
> +++ b/drm/nouveau/nvkm/subdev/therm/priv.h
> @@ -91,7 +91,7 @@ struct nvkm_therm_func {
>  	int (*pwm_set)(struct nvkm_therm *, int line, u32, u32);
>  	int (*pwm_clock)(struct nvkm_therm *, int line);
>  
> -	int (*temp_get)(struct nvkm_therm *);
> +	int (*temp_get)(struct nvkm_therm *, int *);
>  
>  	int (*fan_sense)(struct nvkm_therm *);
>  
> @@ -105,7 +105,7 @@ int  nv50_fan_pwm_get(struct nvkm_therm *, int, u32 *, u32 *);
>  int  nv50_fan_pwm_set(struct nvkm_therm *, int, u32, u32);
>  int  nv50_fan_pwm_clock(struct nvkm_therm *, int);
>  
> -int  g84_temp_get(struct nvkm_therm *);
> +int  g84_temp_get(struct nvkm_therm *, int *);
>  void g84_sensor_setup(struct nvkm_therm *);
>  void g84_therm_fini(struct nvkm_therm *);
>  
> diff --git a/drm/nouveau/nvkm/subdev/therm/temp.c b/drm/nouveau/nvkm/subdev/therm/temp.c
> index ddb2b2c6..5ec2dfb3 100644
> --- a/drm/nouveau/nvkm/subdev/therm/temp.c
> +++ b/drm/nouveau/nvkm/subdev/therm/temp.c
> @@ -86,7 +86,10 @@ nvkm_therm_sensor_event(struct nvkm_therm *therm, enum nvkm_therm_thrs thrs,
>  	static const char * const thresholds[] = {
>  		"fanboost", "downclock", "critical", "shutdown"
>  	};
> -	int temperature = therm->func->temp_get(therm);
> +	int temperature;
> +
> +	if (therm->func->temp_get(therm, &temperature) < 0)
> +		return;
>  
>  	if (thrs < 0 || thrs > 3)
>  		return;
> @@ -140,7 +143,10 @@ nvkm_therm_threshold_hyst_polling(struct nvkm_therm *therm,
>  {
>  	enum nvkm_therm_thrs_direction direction;
>  	enum nvkm_therm_thrs_state prev_state, new_state;
> -	int temp = therm->func->temp_get(therm);
> +	int temp;
> +
> +	if (therm->func->temp_get(therm, &temp) < 0)
> +		return;
>  
>  	prev_state = nvkm_therm_sensor_get_threshold_state(therm, thrs_name);
>  
> @@ -166,6 +172,7 @@ alarm_timer_callback(struct nvkm_alarm *alarm)
>  	struct nvbios_therm_sensor *sensor = &therm->bios_sensor;
>  	struct nvkm_timer *tmr = therm->subdev.device->timer;
>  	unsigned long flags;
> +	int val;
>  
>  	spin_lock_irqsave(&therm->sensor.alarm_program_lock, flags);
>  
> @@ -185,7 +192,7 @@ alarm_timer_callback(struct nvkm_alarm *alarm)
>  	spin_unlock_irqrestore(&therm->sensor.alarm_program_lock, flags);
>  
>  	/* schedule the next poll in one second */
> -	if (therm->func->temp_get(therm) >= 0)
> +	if (therm->func->temp_get(therm, &val) >= 0)
>  		nvkm_timer_alarm(tmr, 1000000000ULL, alarm);
>  }
>  
> @@ -227,9 +234,10 @@ nvkm_therm_sensor_fini(struct nvkm_therm *therm, bool suspend)
>  void
>  nvkm_therm_sensor_preinit(struct nvkm_therm *therm)
>  {
> +	int val;
>  	const char *sensor_avail = "yes";
>  
> -	if (therm->func->temp_get(therm) < 0)
> +	if (therm->func->temp_get(therm, &val) < 0)
>  		sensor_avail = "no";
>  
>  	nvkm_debug(&therm->subdev, "internal sensor: %s\n", sensor_avail);
> -- 
> 2.14.1
> 
> _______________________________________________
> Nouveau mailing list
> Nouveau at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/nouveau
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/nouveau/attachments/20171008/050cfaac/attachment.sig>


More information about the Nouveau mailing list