[Nouveau] [RFC PATCH 01/29] therm: split return code and value in nvkm_get_temp

Karol Herbst karolherbst at gmail.com
Sun Oct 8 10:41:06 UTC 2017


On Sun, Oct 8, 2017 at 12:16 PM, Pierre Moreau <pierre.morrow at free.fr> wrote:
> As you changed the return value of `temp_get()` to solely be the error code, or
> absence of an error, I would change all those tests that checked whether the
> returned value was strictly less, or greater than, 0 to now only compare
> against 0 (no error). For example,
>
>   if (therm && therm->attr_get && nvkm_therm_temp_get(therm, &val) < 0)
>   if (therm->func->temp_get(therm, &val) >= 0)
>
> could become
>
>   if (therm && therm->attr_get && nvkm_therm_temp_get(therm, &val))
>   if (!therm->func->temp_get(therm, &val))
>

right, makes sense.

> to match other error checking code.
>
> More comments below
>
> On 2017-09-15 — 17:11, Karol Herbst wrote:
>> The current hwmon code doesn't check if the returned value was actually an
>> error.
>>
>> Since Kepler temperature sensors are able to report negative values.
>
> I assume those negative values are not for error reporting, but more if you
> buried your GPU in the snow somewhere in the Antarctic and still want a valid
> temperature to be reported.

well, nobody ever tested that, but at least on Kepler technically we
have two new registers related to therm, one offset and one value. If
the offset is 0x60, value needs to be substracted by that offset to
get the real temperature. No idea if that works for real though, but
why would they add it anyhow if not?

>
>> Since Pascal (and maybe earlier) we have sensors with improved precision.
>>
>> Adjust the nvkm_get_temp method to be able to deal with those changes
>> and let hwmon return an error properly.
>>
>> Signed-off-by: Karol Herbst <karolherbst at gmail.com>
>> ---
>>  drm/nouveau/include/nvkm/subdev/therm.h |  2 +-
>>  drm/nouveau/nouveau_hwmon.c             | 15 +++++++++------
>>  drm/nouveau/nvkm/subdev/therm/base.c    | 19 ++++++++++++++-----
>>  drm/nouveau/nvkm/subdev/therm/g84.c     | 11 ++++++-----
>>  drm/nouveau/nvkm/subdev/therm/gp100.c   |  9 +++++----
>>  drm/nouveau/nvkm/subdev/therm/nv40.c    |  9 +++------
>>  drm/nouveau/nvkm/subdev/therm/nv50.c    |  9 +++------
>>  drm/nouveau/nvkm/subdev/therm/priv.h    |  4 ++--
>>  drm/nouveau/nvkm/subdev/therm/temp.c    | 16 ++++++++++++----
>>  9 files changed, 55 insertions(+), 39 deletions(-)
>>
>> diff --git a/drm/nouveau/include/nvkm/subdev/therm.h b/drm/nouveau/include/nvkm/subdev/therm.h
>> index 9841f076..8c84017f 100644
>> --- a/drm/nouveau/include/nvkm/subdev/therm.h
>> +++ b/drm/nouveau/include/nvkm/subdev/therm.h
>> @@ -86,7 +86,7 @@ struct nvkm_therm {
>>       int (*attr_set)(struct nvkm_therm *, enum nvkm_therm_attr_type, int);
>>  };
>>
>> -int nvkm_therm_temp_get(struct nvkm_therm *);
>> +int nvkm_therm_temp_get(struct nvkm_therm *, int *);
>>  int nvkm_therm_fan_sense(struct nvkm_therm *);
>>  int nvkm_therm_cstate(struct nvkm_therm *, int, int);
>>
>> diff --git a/drm/nouveau/nouveau_hwmon.c b/drm/nouveau/nouveau_hwmon.c
>> index 7c965648..f1914d9a 100644
>> --- a/drm/nouveau/nouveau_hwmon.c
>> +++ b/drm/nouveau/nouveau_hwmon.c
>> @@ -326,8 +326,9 @@ nouveau_temp_is_visible(const void *data, u32 attr, int channel)
>>  {
>>       struct nouveau_drm *drm = nouveau_drm((struct drm_device *)data);
>>       struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
>> +     int val;
>>
>> -     if (therm && therm->attr_get && nvkm_therm_temp_get(therm) < 0)
>> +     if (therm && therm->attr_get && nvkm_therm_temp_get(therm, &val) < 0)
>>               return 0;
>>
>>       switch (attr) {
>> @@ -421,15 +422,16 @@ nouveau_temp_read(struct device *dev, u32 attr, int channel, long *val)
>>       struct drm_device *drm_dev = dev_get_drvdata(dev);
>>       struct nouveau_drm *drm = nouveau_drm(drm_dev);
>>       struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
>> -     int ret;
>> +     int ret = 0;
>> +     int temp;
>>
>>       if (!therm || !therm->attr_get)
>>               return -EOPNOTSUPP;
>>
>>       switch (attr) {
>>       case hwmon_temp_input:
>> -             ret = nvkm_therm_temp_get(therm);
>> -             *val = ret < 0 ? ret : (ret * 1000);
>> +             ret = nvkm_therm_temp_get(therm, &temp);
>> +             *val = temp * 1000;
>>               break;
>>       case hwmon_temp_max:
>>               *val = therm->attr_get(therm, NVKM_THERM_ATTR_THRS_DOWN_CLK)
>> @@ -459,7 +461,7 @@ nouveau_temp_read(struct device *dev, u32 attr, int channel, long *val)
>>               return -EOPNOTSUPP;
>>       }
>>
>> -     return 0;
>> +     return ret;
>>  }
>>
>>  static int
>> @@ -713,6 +715,7 @@ nouveau_hwmon_init(struct drm_device *dev)
>>       struct device *hwmon_dev;
>>       int ret = 0;
>>       int i = 0;
>> +     int val;
>>
>>       hwmon = drm->hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL);
>>       if (!hwmon)
>> @@ -720,7 +723,7 @@ nouveau_hwmon_init(struct drm_device *dev)
>>       hwmon->dev = dev;
>>
>>       if (therm && therm->attr_get && therm->attr_set) {
>> -             if (nvkm_therm_temp_get(therm) >= 0)
>> +             if (nvkm_therm_temp_get(therm, &val) >= 0)
>>                       special_groups[i++] = &temp1_auto_point_sensor_group;
>>               if (therm->fan_get && therm->fan_get(therm) >= 0)
>>                       special_groups[i++] = &pwm_fan_sensor_group;
>> diff --git a/drm/nouveau/nvkm/subdev/therm/base.c b/drm/nouveau/nvkm/subdev/therm/base.c
>> index f27fc6d0..8fa691fb 100644
>> --- a/drm/nouveau/nvkm/subdev/therm/base.c
>> +++ b/drm/nouveau/nvkm/subdev/therm/base.c
>> @@ -24,22 +24,26 @@
>>  #include "priv.h"
>>
>>  int
>> -nvkm_therm_temp_get(struct nvkm_therm *therm)
>> +nvkm_therm_temp_get(struct nvkm_therm *therm, int *val)
>>  {
>>       if (therm->func->temp_get)
>> -             return therm->func->temp_get(therm);
>> +             return therm->func->temp_get(therm, val);
>>       return -ENODEV;
>>  }
>>
>>  static int
>>  nvkm_therm_update_trip(struct nvkm_therm *therm)
>>  {
>> +     int temp, ret;
>>       struct nvbios_therm_trip_point *trip = therm->fan->bios.trip,
>>                                      *cur_trip = NULL,
>>                                      *last_trip = therm->last_trip;
>> -     u8  temp = therm->func->temp_get(therm);
>>       u16 duty, i;
>>
>> +     ret = therm->func->temp_get(therm, &temp);
>> +     if (ret < 0)
>> +             return ret;
>> +
>>       /* look for the trip point corresponding to the current temperature */
>>       cur_trip = NULL;
>>       for (i = 0; i < therm->fan->bios.nr_fan_trip; i++) {
>> @@ -67,9 +71,13 @@ static int
>>  nvkm_therm_compute_linear_duty(struct nvkm_therm *therm, u8 linear_min_temp,
>>                                 u8 linear_max_temp)
>>  {
>> -     u8  temp = therm->func->temp_get(therm);
>> +     int temp, ret;
>>       u16 duty;
>>
>> +     ret = therm->func->temp_get(therm, &temp);
>> +     if (ret < 0)
>> +             return ret;
>> +
>>       /* handle the non-linear part first */
>>       if (temp < linear_min_temp)
>>               return therm->fan->bios.min_duty;
>> @@ -182,6 +190,7 @@ nvkm_therm_fan_mode(struct nvkm_therm *therm, int mode)
>>  {
>>       struct nvkm_subdev *subdev = &therm->subdev;
>>       struct nvkm_device *device = subdev->device;
>> +     int val;
>>       static const char *name[] = {
>>               "disabled",
>>               "manual",
>> @@ -197,7 +206,7 @@ nvkm_therm_fan_mode(struct nvkm_therm *therm, int mode)
>>       /* do not allow automatic fan management if the thermal sensor is
>>        * not available */
>>       if (mode == NVKM_THERM_CTRL_AUTO &&
>> -         therm->func->temp_get(therm) < 0)
>> +         therm->func->temp_get(therm, &val) < 0)
>>               return -EINVAL;
>>
>>       if (therm->mode == mode)
>> diff --git a/drm/nouveau/nvkm/subdev/therm/g84.c b/drm/nouveau/nvkm/subdev/therm/g84.c
>> index 96f8da40..3bb2d829 100644
>> --- a/drm/nouveau/nvkm/subdev/therm/g84.c
>> +++ b/drm/nouveau/nvkm/subdev/therm/g84.c
>> @@ -27,14 +27,15 @@
>>  #include <subdev/fuse.h>
>>
>>  int
>> -g84_temp_get(struct nvkm_therm *therm)
>> +g84_temp_get(struct nvkm_therm *therm, int *val)
>>  {
>>       struct nvkm_device *device = therm->subdev.device;
>>
>> -     if (nvkm_fuse_read(device->fuse, 0x1a8) == 1)
>> -             return nvkm_rd32(device, 0x20400);
>> -     else
>> +     if (nvkm_fuse_read(device->fuse, 0x1a8) != 1)
>>               return -ENODEV;
>> +
>> +     *val = nvkm_rd32(device, 0x20400);
>
> You could allow passing NULL values for `val` for when you are not interested
> by the results, as you did in some places further up.
> I would probably check for NULL values for `val`, and for `therm` though it
> seems to be an implicit rule that the caller should be the one doing the check.

well, I think we should do all NULL checks inside the "public" subdev
functions. g84_temp_get is internal to the subdev, so we can assume
therm never to be NULL.
Will go through those and see where some are missing related to my changes.

>
>> +     return 0;
>>  }
>>
>>  void
>> @@ -115,7 +116,7 @@ g84_therm_threshold_hyst_emulation(struct nvkm_therm *therm,
>>       }
>>
>>       /* fix the state (in case someone reprogrammed the alarms) */
>> -     cur = therm->func->temp_get(therm);
>> +     therm->func->temp_get(therm, &cur);
>
> Should there be a check for the return value of the `temp_get()` function here,
> and error out if an error code was returned? In any case, you are the behaviour
> here, as if `temp_get()` previously returned an error code, the if-statement
> would always be false, whereas now, it’s undecided as `cur` will not even be
> initialised in that case!
>
> I am not sure whether it should be the caller of `temp_get()` or `temp_get()`
> itself which should initialise a default value for the `val` argument (probably
> the latter).

I think error checking is the right way to go here, then we don't need
a default value. Values in an error case are usually not trustworthy
anyhow.

>
>>       if (new_state == NVKM_THERM_THRS_LOWER && cur > thrs->temp)
>>               new_state = NVKM_THERM_THRS_HIGHER;
>>       else if (new_state == NVKM_THERM_THRS_HIGHER &&
>> diff --git a/drm/nouveau/nvkm/subdev/therm/gp100.c b/drm/nouveau/nvkm/subdev/therm/gp100.c
>> index 9f0dea3f..d8206748 100644
>> --- a/drm/nouveau/nvkm/subdev/therm/gp100.c
>> +++ b/drm/nouveau/nvkm/subdev/therm/gp100.c
>> @@ -24,7 +24,7 @@
>>  #include "priv.h"
>>
>>  static int
>> -gp100_temp_get(struct nvkm_therm *therm)
>> +gp100_temp_get(struct nvkm_therm *therm, int *val)
>>  {
>>       struct nvkm_device *device = therm->subdev.device;
>>       struct nvkm_subdev *subdev = &therm->subdev;
>> @@ -36,9 +36,10 @@ gp100_temp_get(struct nvkm_therm *therm)
>>               nvkm_trace(subdev, "reading temperature from SHADOWed sensor\n");
>>
>>       /* device valid */
>> -     if (tsensor & 0x20000000)
>> -             return (inttemp >> 8);
>> -     else
>> +     if (tsensor & 0x20000000) {
>> +             *val = inttemp >> 8;
>> +             return 0;
>> +     } else
>>               return -ENODEV;
>>  }
>>
>> diff --git a/drm/nouveau/nvkm/subdev/therm/nv40.c b/drm/nouveau/nvkm/subdev/therm/nv40.c
>> index 2c92ffb5..cfd5b215 100644
>> --- a/drm/nouveau/nvkm/subdev/therm/nv40.c
>> +++ b/drm/nouveau/nvkm/subdev/therm/nv40.c
>> @@ -70,7 +70,7 @@ nv40_sensor_setup(struct nvkm_therm *therm)
>>  }
>>
>>  static int
>> -nv40_temp_get(struct nvkm_therm *therm)
>> +nv40_temp_get(struct nvkm_therm *therm, int *val)
>>  {
>>       struct nvkm_device *device = therm->subdev.device;
>>       struct nvbios_therm_sensor *sensor = &therm->bios_sensor;
>> @@ -95,11 +95,8 @@ nv40_temp_get(struct nvkm_therm *therm)
>>       core_temp = core_temp + sensor->offset_num / sensor->offset_den;
>>       core_temp = core_temp + sensor->offset_constant - 8;
>>
>> -     /* reserve negative temperatures for errors */
>> -     if (core_temp < 0)
>> -             core_temp = 0;
>> -
>> -     return core_temp;
>> +     *val = core_temp;
>> +     return 0;
>>  }
>>
>>  static int
>> diff --git a/drm/nouveau/nvkm/subdev/therm/nv50.c b/drm/nouveau/nvkm/subdev/therm/nv50.c
>> index 9b57b433..62ec4063 100644
>> --- a/drm/nouveau/nvkm/subdev/therm/nv50.c
>> +++ b/drm/nouveau/nvkm/subdev/therm/nv50.c
>> @@ -126,7 +126,7 @@ nv50_sensor_setup(struct nvkm_therm *therm)
>>  }
>>
>>  static int
>> -nv50_temp_get(struct nvkm_therm *therm)
>> +nv50_temp_get(struct nvkm_therm *therm, int *val)
>>  {
>>       struct nvkm_device *device = therm->subdev.device;
>>       struct nvbios_therm_sensor *sensor = &therm->bios_sensor;
>> @@ -143,11 +143,8 @@ nv50_temp_get(struct nvkm_therm *therm)
>>       core_temp = core_temp + sensor->offset_num / sensor->offset_den;
>>       core_temp = core_temp + sensor->offset_constant - 8;
>>
>> -     /* reserve negative temperatures for errors */
>> -     if (core_temp < 0)
>> -             core_temp = 0;
>> -
>> -     return core_temp;
>> +     *val = core_temp;
>> +     return 0;
>>  }
>>
>>  static void
>> diff --git a/drm/nouveau/nvkm/subdev/therm/priv.h b/drm/nouveau/nvkm/subdev/therm/priv.h
>> index 1f46e371..b325ec5f 100644
>> --- a/drm/nouveau/nvkm/subdev/therm/priv.h
>> +++ b/drm/nouveau/nvkm/subdev/therm/priv.h
>> @@ -91,7 +91,7 @@ struct nvkm_therm_func {
>>       int (*pwm_set)(struct nvkm_therm *, int line, u32, u32);
>>       int (*pwm_clock)(struct nvkm_therm *, int line);
>>
>> -     int (*temp_get)(struct nvkm_therm *);
>> +     int (*temp_get)(struct nvkm_therm *, int *);
>>
>>       int (*fan_sense)(struct nvkm_therm *);
>>
>> @@ -105,7 +105,7 @@ int  nv50_fan_pwm_get(struct nvkm_therm *, int, u32 *, u32 *);
>>  int  nv50_fan_pwm_set(struct nvkm_therm *, int, u32, u32);
>>  int  nv50_fan_pwm_clock(struct nvkm_therm *, int);
>>
>> -int  g84_temp_get(struct nvkm_therm *);
>> +int  g84_temp_get(struct nvkm_therm *, int *);
>>  void g84_sensor_setup(struct nvkm_therm *);
>>  void g84_therm_fini(struct nvkm_therm *);
>>
>> diff --git a/drm/nouveau/nvkm/subdev/therm/temp.c b/drm/nouveau/nvkm/subdev/therm/temp.c
>> index ddb2b2c6..5ec2dfb3 100644
>> --- a/drm/nouveau/nvkm/subdev/therm/temp.c
>> +++ b/drm/nouveau/nvkm/subdev/therm/temp.c
>> @@ -86,7 +86,10 @@ nvkm_therm_sensor_event(struct nvkm_therm *therm, enum nvkm_therm_thrs thrs,
>>       static const char * const thresholds[] = {
>>               "fanboost", "downclock", "critical", "shutdown"
>>       };
>> -     int temperature = therm->func->temp_get(therm);
>> +     int temperature;
>> +
>> +     if (therm->func->temp_get(therm, &temperature) < 0)
>> +             return;
>>
>>       if (thrs < 0 || thrs > 3)
>>               return;
>> @@ -140,7 +143,10 @@ nvkm_therm_threshold_hyst_polling(struct nvkm_therm *therm,
>>  {
>>       enum nvkm_therm_thrs_direction direction;
>>       enum nvkm_therm_thrs_state prev_state, new_state;
>> -     int temp = therm->func->temp_get(therm);
>> +     int temp;
>> +
>> +     if (therm->func->temp_get(therm, &temp) < 0)
>> +             return;
>>
>>       prev_state = nvkm_therm_sensor_get_threshold_state(therm, thrs_name);
>>
>> @@ -166,6 +172,7 @@ alarm_timer_callback(struct nvkm_alarm *alarm)
>>       struct nvbios_therm_sensor *sensor = &therm->bios_sensor;
>>       struct nvkm_timer *tmr = therm->subdev.device->timer;
>>       unsigned long flags;
>> +     int val;
>>
>>       spin_lock_irqsave(&therm->sensor.alarm_program_lock, flags);
>>
>> @@ -185,7 +192,7 @@ alarm_timer_callback(struct nvkm_alarm *alarm)
>>       spin_unlock_irqrestore(&therm->sensor.alarm_program_lock, flags);
>>
>>       /* schedule the next poll in one second */
>> -     if (therm->func->temp_get(therm) >= 0)
>> +     if (therm->func->temp_get(therm, &val) >= 0)
>>               nvkm_timer_alarm(tmr, 1000000000ULL, alarm);
>>  }
>>
>> @@ -227,9 +234,10 @@ nvkm_therm_sensor_fini(struct nvkm_therm *therm, bool suspend)
>>  void
>>  nvkm_therm_sensor_preinit(struct nvkm_therm *therm)
>>  {
>> +     int val;
>>       const char *sensor_avail = "yes";
>>
>> -     if (therm->func->temp_get(therm) < 0)
>> +     if (therm->func->temp_get(therm, &val) < 0)
>>               sensor_avail = "no";
>>
>>       nvkm_debug(&therm->subdev, "internal sensor: %s\n", sensor_avail);
>> --
>> 2.14.1
>>
>> _______________________________________________
>> Nouveau mailing list
>> Nouveau at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/nouveau


More information about the Nouveau mailing list