[Nouveau] [PATCH 03/32] therm: Split return code and value in nvkm_get_temp

Karol Herbst kherbst at redhat.com
Wed Nov 22 01:42:45 UTC 2017


On Wed, Nov 22, 2017 at 1:32 AM, Martin Peres <martin.peres at free.fr> wrote:
> On 17/11/17 02:04, Karol Herbst wrote:
>> The current hwmon code doesn't check if the returned value was actually an
>> error.
>>
>> Since Kepler temperature sensors are able to report negative values. Those
>> negative values are not for error reporting, but rather when you buried
>> your GPU in snow somewhere in Antarctica and still want a valid
>> temperature to be reported (unverified).
>>
>> Since Pascal (and maybe earlier) we have sensors with improved precision.
>>
>> Adjust the nvkm_get_temp method to be able to deal with those changes
>> and let hwmon return an error properly.
>
> Where did you get this information? And if it is the case, then we will
> royally screw up the value because we don't even know on how many bits
> 0x20400 uses...
>
> If you are indeed right, I would like to see g84.c's temp_get().
>

Nvidia uses 0x020460 on Pascal and I thought we had that in envytools already?

[0] 228.412618 MMIO32 R 0x020460 0x20002f20 PTHERM.I2C_SLAVE+0x60 => 0x20002f20
[0] 229.413039 MMIO32 R 0x020460 0x20002f10 PTHERM.I2C_SLAVE+0x60 => 0x20002f10
[0] 230.416976 MMIO32 R 0x020460 0x20002f68 PTHERM.I2C_SLAVE+0x60 => 0x20002f68
[0] 231.417407 MMIO32 R 0x020460 0x20002fe8 PTHERM.I2C_SLAVE+0x60 => 0x20002fe8
[0] 232.417742 MMIO32 R 0x020460 0x20002ff0 PTHERM.I2C_SLAVE+0x60 => 0x20002ff0
[0] 233.417742 MMIO32 R 0x020460 0x20003020 PTHERM.I2C_SLAVE+0x60 => 0x20003020

or nvapeek 0x20400 && nvapeek 0x020460;
00020400: 0000002c
00020460: 20002c60

>>
>> Signed-off-by: Karol Herbst <karolherbst at gmail.com>
>> ---
>>  drm/nouveau/include/nvkm/subdev/clk.h   |  4 ++--
>>  drm/nouveau/include/nvkm/subdev/therm.h |  2 +-
>>  drm/nouveau/nouveau_hwmon.c             | 15 +++++++++------
>>  drm/nouveau/nvkm/subdev/clk/base.c      |  2 +-
>>  drm/nouveau/nvkm/subdev/therm/base.c    | 19 ++++++++++++++-----
>>  drm/nouveau/nvkm/subdev/therm/g84.c     | 13 ++++++++-----
>>  drm/nouveau/nvkm/subdev/therm/gp100.c   |  9 +++++----
>>  drm/nouveau/nvkm/subdev/therm/nv40.c    |  9 +++------
>>  drm/nouveau/nvkm/subdev/therm/nv50.c    |  9 +++------
>>  drm/nouveau/nvkm/subdev/therm/priv.h    |  4 ++--
>>  drm/nouveau/nvkm/subdev/therm/temp.c    | 16 ++++++++++++----
>>  11 files changed, 60 insertions(+), 42 deletions(-)
>>
>> diff --git a/drm/nouveau/include/nvkm/subdev/clk.h b/drm/nouveau/include/nvkm/subdev/clk.h
>> index e5275f74..506f8cc6 100644
>> --- a/drm/nouveau/include/nvkm/subdev/clk.h
>> +++ b/drm/nouveau/include/nvkm/subdev/clk.h
>> @@ -100,7 +100,7 @@ struct nvkm_clk {
>>       int ustate_dc; /* user-requested (-1 disabled, -2 perfmon) */
>>       int astate; /* perfmon adjustment (base) */
>>       int dstate; /* display adjustment (min+) */
>> -     u8  temp;
>> +     int temp;
>>
>>       bool allow_reclock;
>>  #define NVKM_CLK_BOOST_NONE 0x0
>> @@ -122,7 +122,7 @@ int nvkm_clk_read(struct nvkm_clk *, enum nv_clk_src);
>>  int nvkm_clk_ustate(struct nvkm_clk *, int req, int pwr);
>>  int nvkm_clk_astate(struct nvkm_clk *, int req, int rel, bool wait);
>>  int nvkm_clk_dstate(struct nvkm_clk *, int req, int rel);
>> -int nvkm_clk_tstate(struct nvkm_clk *, u8 temperature);
>> +int nvkm_clk_tstate(struct nvkm_clk *, int temperature);
>>
>>  int nv04_clk_new(struct nvkm_device *, int, struct nvkm_clk **);
>>  int nv40_clk_new(struct nvkm_device *, int, struct nvkm_clk **);
>> diff --git a/drm/nouveau/include/nvkm/subdev/therm.h b/drm/nouveau/include/nvkm/subdev/therm.h
>> index 9841f076..8c84017f 100644
>> --- a/drm/nouveau/include/nvkm/subdev/therm.h
>> +++ b/drm/nouveau/include/nvkm/subdev/therm.h
>> @@ -86,7 +86,7 @@ struct nvkm_therm {
>>       int (*attr_set)(struct nvkm_therm *, enum nvkm_therm_attr_type, int);
>>  };
>>
>> -int nvkm_therm_temp_get(struct nvkm_therm *);
>> +int nvkm_therm_temp_get(struct nvkm_therm *, int *);
>>  int nvkm_therm_fan_sense(struct nvkm_therm *);
>>  int nvkm_therm_cstate(struct nvkm_therm *, int, int);
>>
>> diff --git a/drm/nouveau/nouveau_hwmon.c b/drm/nouveau/nouveau_hwmon.c
>> index 7c965648..7486e4af 100644
>> --- a/drm/nouveau/nouveau_hwmon.c
>> +++ b/drm/nouveau/nouveau_hwmon.c
>> @@ -326,8 +326,9 @@ nouveau_temp_is_visible(const void *data, u32 attr, int channel)
>>  {
>>       struct nouveau_drm *drm = nouveau_drm((struct drm_device *)data);
>>       struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
>> +     int val;
>>
>> -     if (therm && therm->attr_get && nvkm_therm_temp_get(therm) < 0)
>> +     if (therm && therm->attr_get && nvkm_therm_temp_get(therm, &val))
>>               return 0;
>>
>>       switch (attr) {
>> @@ -421,15 +422,16 @@ nouveau_temp_read(struct device *dev, u32 attr, int channel, long *val)
>>       struct drm_device *drm_dev = dev_get_drvdata(dev);
>>       struct nouveau_drm *drm = nouveau_drm(drm_dev);
>>       struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
>> -     int ret;
>> +     int ret = 0;
>> +     int temp;
>>
>>       if (!therm || !therm->attr_get)
>>               return -EOPNOTSUPP;
>>
>>       switch (attr) {
>>       case hwmon_temp_input:
>> -             ret = nvkm_therm_temp_get(therm);
>> -             *val = ret < 0 ? ret : (ret * 1000);
>> +             ret = nvkm_therm_temp_get(therm, &temp);
>> +             *val = temp * 1000;
>>               break;
>>       case hwmon_temp_max:
>>               *val = therm->attr_get(therm, NVKM_THERM_ATTR_THRS_DOWN_CLK)
>> @@ -459,7 +461,7 @@ nouveau_temp_read(struct device *dev, u32 attr, int channel, long *val)
>>               return -EOPNOTSUPP;
>>       }
>>
>> -     return 0;
>> +     return ret;
>>  }
>>
>>  static int
>> @@ -713,6 +715,7 @@ nouveau_hwmon_init(struct drm_device *dev)
>>       struct device *hwmon_dev;
>>       int ret = 0;
>>       int i = 0;
>> +     int val;
>>
>>       hwmon = drm->hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL);
>>       if (!hwmon)
>> @@ -720,7 +723,7 @@ nouveau_hwmon_init(struct drm_device *dev)
>>       hwmon->dev = dev;
>>
>>       if (therm && therm->attr_get && therm->attr_set) {
>> -             if (nvkm_therm_temp_get(therm) >= 0)
>> +             if (!nvkm_therm_temp_get(therm, &val))
>>                       special_groups[i++] = &temp1_auto_point_sensor_group;
>>               if (therm->fan_get && therm->fan_get(therm) >= 0)
>>                       special_groups[i++] = &pwm_fan_sensor_group;
>> diff --git a/drm/nouveau/nvkm/subdev/clk/base.c b/drm/nouveau/nvkm/subdev/clk/base.c
>> index e4c8d310..0b28dbb9 100644
>> --- a/drm/nouveau/nvkm/subdev/clk/base.c
>> +++ b/drm/nouveau/nvkm/subdev/clk/base.c
>> @@ -540,7 +540,7 @@ nvkm_clk_astate(struct nvkm_clk *clk, int req, int rel, bool wait)
>>  }
>>
>>  int
>> -nvkm_clk_tstate(struct nvkm_clk *clk, u8 temp)
>> +nvkm_clk_tstate(struct nvkm_clk *clk, int temp)
>>  {
>>       if (clk->temp == temp)
>>               return 0;
>> diff --git a/drm/nouveau/nvkm/subdev/therm/base.c b/drm/nouveau/nvkm/subdev/therm/base.c
>> index f27fc6d0..8e5f6f7f 100644
>> --- a/drm/nouveau/nvkm/subdev/therm/base.c
>> +++ b/drm/nouveau/nvkm/subdev/therm/base.c
>> @@ -24,22 +24,26 @@
>>  #include "priv.h"
>>
>>  int
>> -nvkm_therm_temp_get(struct nvkm_therm *therm)
>> +nvkm_therm_temp_get(struct nvkm_therm *therm, int *val)
>>  {
>>       if (therm->func->temp_get)
>> -             return therm->func->temp_get(therm);
>> +             return therm->func->temp_get(therm, val);
>>       return -ENODEV;
>>  }
>>
>>  static int
>>  nvkm_therm_update_trip(struct nvkm_therm *therm)
>>  {
>> +     int temp, ret;
>>       struct nvbios_therm_trip_point *trip = therm->fan->bios.trip,
>>                                      *cur_trip = NULL,
>>                                      *last_trip = therm->last_trip;
>> -     u8  temp = therm->func->temp_get(therm);
>>       u16 duty, i;
>>
>> +     ret = therm->func->temp_get(therm, &temp);
>> +     if (ret < 0)
>> +             return ret;
>> +
>>       /* look for the trip point corresponding to the current temperature */
>>       cur_trip = NULL;
>>       for (i = 0; i < therm->fan->bios.nr_fan_trip; i++) {
>> @@ -67,9 +71,13 @@ static int
>>  nvkm_therm_compute_linear_duty(struct nvkm_therm *therm, u8 linear_min_temp,
>>                                 u8 linear_max_temp)
>>  {
>> -     u8  temp = therm->func->temp_get(therm);
>> +     int temp, ret;
>>       u16 duty;
>>
>> +     ret = therm->func->temp_get(therm, &temp);
>> +     if (ret < 0)
>> +             return ret;
>> +
>>       /* handle the non-linear part first */
>>       if (temp < linear_min_temp)
>>               return therm->fan->bios.min_duty;
>> @@ -182,6 +190,7 @@ nvkm_therm_fan_mode(struct nvkm_therm *therm, int mode)
>>  {
>>       struct nvkm_subdev *subdev = &therm->subdev;
>>       struct nvkm_device *device = subdev->device;
>> +     int val;
>>       static const char *name[] = {
>>               "disabled",
>>               "manual",
>> @@ -197,7 +206,7 @@ nvkm_therm_fan_mode(struct nvkm_therm *therm, int mode)
>>       /* do not allow automatic fan management if the thermal sensor is
>>        * not available */
>>       if (mode == NVKM_THERM_CTRL_AUTO &&
>> -         therm->func->temp_get(therm) < 0)
>> +         therm->func->temp_get(therm, &val))
>>               return -EINVAL;
>>
>>       if (therm->mode == mode)
>> diff --git a/drm/nouveau/nvkm/subdev/therm/g84.c b/drm/nouveau/nvkm/subdev/therm/g84.c
>> index 96f8da40..81c0bda8 100644
>> --- a/drm/nouveau/nvkm/subdev/therm/g84.c
>> +++ b/drm/nouveau/nvkm/subdev/therm/g84.c
>> @@ -27,14 +27,15 @@
>>  #include <subdev/fuse.h>
>>
>>  int
>> -g84_temp_get(struct nvkm_therm *therm)
>> +g84_temp_get(struct nvkm_therm *therm, int *val)
>>  {
>>       struct nvkm_device *device = therm->subdev.device;
>>
>> -     if (nvkm_fuse_read(device->fuse, 0x1a8) == 1)
>> -             return nvkm_rd32(device, 0x20400);
>> -     else
>> +     if (nvkm_fuse_read(device->fuse, 0x1a8) != 1)
>>               return -ENODEV;
>> +
>> +     *val = nvkm_rd32(device, 0x20400);
>> +     return 0;
>>  }
>>
>>  void
>> @@ -114,8 +115,10 @@ g84_therm_threshold_hyst_emulation(struct nvkm_therm *therm,
>>               new_state = NVKM_THERM_THRS_LOWER;
>>       }
>>
>> +     if (therm->func->temp_get(therm, &cur))
>> +             return;
>> +
>>       /* fix the state (in case someone reprogrammed the alarms) */
>> -     cur = therm->func->temp_get(therm);
>>       if (new_state == NVKM_THERM_THRS_LOWER && cur > thrs->temp)
>>               new_state = NVKM_THERM_THRS_HIGHER;
>>       else if (new_state == NVKM_THERM_THRS_HIGHER &&
>> diff --git a/drm/nouveau/nvkm/subdev/therm/gp100.c b/drm/nouveau/nvkm/subdev/therm/gp100.c
>> index 9f0dea3f..d8206748 100644
>> --- a/drm/nouveau/nvkm/subdev/therm/gp100.c
>> +++ b/drm/nouveau/nvkm/subdev/therm/gp100.c
>> @@ -24,7 +24,7 @@
>>  #include "priv.h"
>>
>>  static int
>> -gp100_temp_get(struct nvkm_therm *therm)
>> +gp100_temp_get(struct nvkm_therm *therm, int *val)
>>  {
>>       struct nvkm_device *device = therm->subdev.device;
>>       struct nvkm_subdev *subdev = &therm->subdev;
>> @@ -36,9 +36,10 @@ gp100_temp_get(struct nvkm_therm *therm)
>>               nvkm_trace(subdev, "reading temperature from SHADOWed sensor\n");
>>
>>       /* device valid */
>> -     if (tsensor & 0x20000000)
>> -             return (inttemp >> 8);
>> -     else
>> +     if (tsensor & 0x20000000) {
>> +             *val = inttemp >> 8;
>> +             return 0;
>> +     } else
>>               return -ENODEV;
>>  }
>>
>> diff --git a/drm/nouveau/nvkm/subdev/therm/nv40.c b/drm/nouveau/nvkm/subdev/therm/nv40.c
>> index 2c92ffb5..cfd5b215 100644
>> --- a/drm/nouveau/nvkm/subdev/therm/nv40.c
>> +++ b/drm/nouveau/nvkm/subdev/therm/nv40.c
>> @@ -70,7 +70,7 @@ nv40_sensor_setup(struct nvkm_therm *therm)
>>  }
>>
>>  static int
>> -nv40_temp_get(struct nvkm_therm *therm)
>> +nv40_temp_get(struct nvkm_therm *therm, int *val)
>>  {
>>       struct nvkm_device *device = therm->subdev.device;
>>       struct nvbios_therm_sensor *sensor = &therm->bios_sensor;
>> @@ -95,11 +95,8 @@ nv40_temp_get(struct nvkm_therm *therm)
>>       core_temp = core_temp + sensor->offset_num / sensor->offset_den;
>>       core_temp = core_temp + sensor->offset_constant - 8;
>>
>> -     /* reserve negative temperatures for errors */
>> -     if (core_temp < 0)
>> -             core_temp = 0;
>> -
>> -     return core_temp;
>> +     *val = core_temp;
>> +     return 0;
>>  }
>>
>>  static int
>> diff --git a/drm/nouveau/nvkm/subdev/therm/nv50.c b/drm/nouveau/nvkm/subdev/therm/nv50.c
>> index 9b57b433..62ec4063 100644
>> --- a/drm/nouveau/nvkm/subdev/therm/nv50.c
>> +++ b/drm/nouveau/nvkm/subdev/therm/nv50.c
>> @@ -126,7 +126,7 @@ nv50_sensor_setup(struct nvkm_therm *therm)
>>  }
>>
>>  static int
>> -nv50_temp_get(struct nvkm_therm *therm)
>> +nv50_temp_get(struct nvkm_therm *therm, int *val)
>>  {
>>       struct nvkm_device *device = therm->subdev.device;
>>       struct nvbios_therm_sensor *sensor = &therm->bios_sensor;
>> @@ -143,11 +143,8 @@ nv50_temp_get(struct nvkm_therm *therm)
>>       core_temp = core_temp + sensor->offset_num / sensor->offset_den;
>>       core_temp = core_temp + sensor->offset_constant - 8;
>>
>> -     /* reserve negative temperatures for errors */
>> -     if (core_temp < 0)
>> -             core_temp = 0;
>> -
>> -     return core_temp;
>> +     *val = core_temp;
>> +     return 0;
>>  }
>>
>>  static void
>> diff --git a/drm/nouveau/nvkm/subdev/therm/priv.h b/drm/nouveau/nvkm/subdev/therm/priv.h
>> index 1f46e371..b325ec5f 100644
>> --- a/drm/nouveau/nvkm/subdev/therm/priv.h
>> +++ b/drm/nouveau/nvkm/subdev/therm/priv.h
>> @@ -91,7 +91,7 @@ struct nvkm_therm_func {
>>       int (*pwm_set)(struct nvkm_therm *, int line, u32, u32);
>>       int (*pwm_clock)(struct nvkm_therm *, int line);
>>
>> -     int (*temp_get)(struct nvkm_therm *);
>> +     int (*temp_get)(struct nvkm_therm *, int *);
>>
>>       int (*fan_sense)(struct nvkm_therm *);
>>
>> @@ -105,7 +105,7 @@ int  nv50_fan_pwm_get(struct nvkm_therm *, int, u32 *, u32 *);
>>  int  nv50_fan_pwm_set(struct nvkm_therm *, int, u32, u32);
>>  int  nv50_fan_pwm_clock(struct nvkm_therm *, int);
>>
>> -int  g84_temp_get(struct nvkm_therm *);
>> +int  g84_temp_get(struct nvkm_therm *, int *);
>>  void g84_sensor_setup(struct nvkm_therm *);
>>  void g84_therm_fini(struct nvkm_therm *);
>>
>> diff --git a/drm/nouveau/nvkm/subdev/therm/temp.c b/drm/nouveau/nvkm/subdev/therm/temp.c
>> index ddb2b2c6..e7b8cbe2 100644
>> --- a/drm/nouveau/nvkm/subdev/therm/temp.c
>> +++ b/drm/nouveau/nvkm/subdev/therm/temp.c
>> @@ -86,7 +86,10 @@ nvkm_therm_sensor_event(struct nvkm_therm *therm, enum nvkm_therm_thrs thrs,
>>       static const char * const thresholds[] = {
>>               "fanboost", "downclock", "critical", "shutdown"
>>       };
>> -     int temperature = therm->func->temp_get(therm);
>> +     int temperature;
>> +
>> +     if (therm->func->temp_get(therm, &temperature))
>> +             return;
>>
>>       if (thrs < 0 || thrs > 3)
>>               return;
>> @@ -140,7 +143,10 @@ nvkm_therm_threshold_hyst_polling(struct nvkm_therm *therm,
>>  {
>>       enum nvkm_therm_thrs_direction direction;
>>       enum nvkm_therm_thrs_state prev_state, new_state;
>> -     int temp = therm->func->temp_get(therm);
>> +     int temp;
>> +
>> +     if (therm->func->temp_get(therm, &temp))
>> +             return;
>>
>>       prev_state = nvkm_therm_sensor_get_threshold_state(therm, thrs_name);
>>
>> @@ -166,6 +172,7 @@ alarm_timer_callback(struct nvkm_alarm *alarm)
>>       struct nvbios_therm_sensor *sensor = &therm->bios_sensor;
>>       struct nvkm_timer *tmr = therm->subdev.device->timer;
>>       unsigned long flags;
>> +     int val;
>>
>>       spin_lock_irqsave(&therm->sensor.alarm_program_lock, flags);
>>
>> @@ -185,7 +192,7 @@ alarm_timer_callback(struct nvkm_alarm *alarm)
>>       spin_unlock_irqrestore(&therm->sensor.alarm_program_lock, flags);
>>
>>       /* schedule the next poll in one second */
>> -     if (therm->func->temp_get(therm) >= 0)
>> +     if (!therm->func->temp_get(therm, &val))
>>               nvkm_timer_alarm(tmr, 1000000000ULL, alarm);
>>  }
>>
>> @@ -227,9 +234,10 @@ nvkm_therm_sensor_fini(struct nvkm_therm *therm, bool suspend)
>>  void
>>  nvkm_therm_sensor_preinit(struct nvkm_therm *therm)
>>  {
>> +     int val;
>>       const char *sensor_avail = "yes";
>>
>> -     if (therm->func->temp_get(therm) < 0)
>> +     if (therm->func->temp_get(therm, &val))
>>               sensor_avail = "no";
>>
>>       nvkm_debug(&therm->subdev, "internal sensor: %s\n", sensor_avail);
>>
>
> _______________________________________________
> Nouveau mailing list
> Nouveau at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/nouveau


More information about the Nouveau mailing list