[PATCH v1] drm/i915/selftests: Improve RC6 power measurement and error handling
Jani Nikula
jani.nikula at linux.intel.com
Tue Feb 11 15:53:11 UTC 2025
On Tue, 11 Feb 2025, Sk Anirban <sk.anirban at intel.com> wrote:
> Improve RC6 power measurement and error handling. Add detailed error
> messages, and introduce a threshold check for RC6 residency.
Improve how? Why?
BR,
Jani.
>
> Signed-off-by: Sk Anirban <sk.anirban at intel.com>
> ---
> drivers/gpu/drm/i915/gt/selftest_rc6.c | 61 +++++++++++++++++---------
> 1 file changed, 40 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c
> index 908483ab0bc8..30bc2ff040ce 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_rc6.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c
> @@ -33,15 +33,20 @@ int live_rc6_manual(void *arg)
> {
> struct intel_gt *gt = arg;
> struct intel_rc6 *rc6 = >->rc6;
> - u64 rc0_power, rc6_power;
> + struct intel_rps *rps = >->rps;
> intel_wakeref_t wakeref;
> + u64 sleep_time = 1000;
> + u32 rc0_freq = 0;
> + u32 rc6_freq = 0;
> + u64 rc0_power[3];
> + u64 rc6_power[3];
> bool has_power;
> + u64 threshold;
> ktime_t dt;
> u64 res[2];
> int err = 0;
> - u32 rc0_freq = 0;
> - u32 rc6_freq = 0;
> - struct intel_rps *rps = >->rps;
> + u64 diff;
> +
>
> /*
> * Our claim is that we can "encourage" the GPU to enter rc6 at will.
> @@ -65,9 +70,9 @@ int live_rc6_manual(void *arg)
> res[0] = rc6_residency(rc6);
>
> dt = ktime_get();
> - rc0_power = librapl_energy_uJ();
> - msleep(1000);
> - rc0_power = librapl_energy_uJ() - rc0_power;
> + rc0_power[0] = librapl_energy_uJ();
> + msleep(sleep_time);
> + rc0_power[1] = librapl_energy_uJ() - rc0_power[0];
> dt = ktime_sub(ktime_get(), dt);
> res[1] = rc6_residency(rc6);
> rc0_freq = intel_rps_read_actual_frequency_fw(rps);
> @@ -79,11 +84,12 @@ int live_rc6_manual(void *arg)
> }
>
> if (has_power) {
> - rc0_power = div64_u64(NSEC_PER_SEC * rc0_power,
> - ktime_to_ns(dt));
> - if (!rc0_power) {
> + rc0_power[2] = div64_u64(NSEC_PER_SEC * rc0_power[1],
> + ktime_to_ns(dt));
> +
> + if (!rc0_power[2]) {
> if (rc0_freq)
> - pr_debug("No power measured while in RC0! GPU Freq: %u in RC0\n",
> + pr_debug("No power measured while in RC0! GPU Freq: %uMHz in RC0\n",
> rc0_freq);
> else
> pr_err("No power and freq measured while in RC0\n");
> @@ -98,10 +104,10 @@ int live_rc6_manual(void *arg)
> res[0] = rc6_residency(rc6);
> intel_uncore_forcewake_flush(rc6_to_uncore(rc6), FORCEWAKE_ALL);
> dt = ktime_get();
> - rc6_power = librapl_energy_uJ();
> - msleep(1000);
> + rc6_power[0] = librapl_energy_uJ();
> + msleep(sleep_time);
> rc6_freq = intel_rps_read_actual_frequency_fw(rps);
> - rc6_power = librapl_energy_uJ() - rc6_power;
> + rc6_power[1] = librapl_energy_uJ() - rc6_power[0];
> dt = ktime_sub(ktime_get(), dt);
> res[1] = rc6_residency(rc6);
> if (res[1] == res[0]) {
> @@ -112,14 +118,27 @@ int live_rc6_manual(void *arg)
> err = -EINVAL;
> }
>
> + diff = res[1] - res[0];
> + threshold = (9 * NSEC_PER_MSEC * sleep_time) / 10;
> + if (diff < threshold) {
> + pr_err("Did not enter RC6 properly, RC6 start residency=%lluns, RC6 end residency=%lluns\n",
> + res[0], res[1]);
> + err = -EINVAL;
> + }
> +
> if (has_power) {
> - rc6_power = div64_u64(NSEC_PER_SEC * rc6_power,
> - ktime_to_ns(dt));
> - pr_info("GPU consumed %llduW in RC0 and %llduW in RC6\n",
> - rc0_power, rc6_power);
> - if (2 * rc6_power > rc0_power) {
> - pr_err("GPU leaked energy while in RC6! GPU Freq: %u in RC6 and %u in RC0\n",
> - rc6_freq, rc0_freq);
> + rc6_power[2] = div64_u64(NSEC_PER_SEC * rc6_power[1],
> + ktime_to_ns(dt));
> + pr_info("GPU consumed %lluuW in RC0 and %lluuW in RC6\n",
> + rc0_power[2], rc6_power[2]);
> +
> + if (2 * rc6_power[2] > rc0_power[2]) {
> + pr_err("GPU leaked energy while in RC6!\n"
> + "GPU Freq: %uMHz in RC6 and %uMHz in RC0\n"
> + "RC0 energy before & after sleep respectively: %lluuJ %lluuJ\n"
> + "RC6 energy before & after sleep respectively: %lluuJ %lluuJ\n",
> + rc6_freq, rc0_freq, rc0_power[0], rc0_power[1],
> + rc6_power[0], rc6_power[1]);
> err = -EINVAL;
> goto out_unlock;
> }
--
Jani Nikula, Intel
More information about the Intel-gfx
mailing list