[PATCH v5] drm/i915/selftests: Implement frequency logging for energy reading validation
Anirban, Sk
sk.anirban at intel.com
Fri Oct 11 12:36:24 UTC 2024
On 11-10-2024 11:52, Anirban, Sk wrote:
>
>
> On 10-10-2024 23:45, Pottumuttu, Sai Teja wrote:
>>
>> On 10-10-2024 23:04, Sk Anirban wrote:
>>> Introduce RC6 & RC0 frequency logging mechanism to ensure accurate
>>> energy readings aimed at addressing GPU energy leaks and power
>>> measurement failures.
>>> This enhancement will help ensure the accuracy of energy readings.
>>>
>>> v2:
>>> - Improved commit message.
>>> v3:
>>> - Used pr_err log to display frequency (Anshuman)
>>> - Sorted headers alphabetically (Sai Teja)
>>> v4:
>>> - Improved commit message
>>> - Fix pr_err log (Sai Teja)
>>> v5:
>>> -Add error & debug logging for RC0 power and frequency checks
>>> (Anshuman)
>>>
>>> Signed-off-by: Sk Anirban <sk.anirban at intel.com>
>>> Reviewed-by: Sai Teja Pottumuttu <sai.teja.pottumuttu at intel.com>
>>> ---
>>> drivers/gpu/drm/i915/gt/selftest_rc6.c | 20 ++++++++++++++++----
>>> 1 file changed, 16 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c
>>> b/drivers/gpu/drm/i915/gt/selftest_rc6.c
>>> index 1aa1446c8fb0..0cf86fed39ca 100644
>>> --- a/drivers/gpu/drm/i915/gt/selftest_rc6.c
>>> +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c
>>> @@ -8,6 +8,7 @@
>>> #include "intel_gpu_commands.h"
>>> #include "intel_gt_requests.h"
>>> #include "intel_ring.h"
>>> +#include "intel_rps.h"
>>> #include "selftest_rc6.h"
>>> #include "selftests/i915_random.h"
>>> @@ -38,6 +39,9 @@ int live_rc6_manual(void *arg)
>>> ktime_t dt;
>>> u64 res[2];
>>> int err = 0;
>>> + u32 rc0_freq = 0;
>>> + u32 rc6_freq = 0;
>>> + struct intel_rps *rps = >->rps;
>>> /*
>>> * Our claim is that we can "encourage" the GPU to enter rc6
>>> at will.
>>> @@ -66,6 +70,7 @@ int live_rc6_manual(void *arg)
>>> rc0_power = librapl_energy_uJ() - rc0_power;
>>> dt = ktime_sub(ktime_get(), dt);
>>> res[1] = rc6_residency(rc6);
>>> + rc0_freq = intel_rps_read_actual_frequency(rps);
>>> if ((res[1] - res[0]) >> 10) {
>>> pr_err("RC6 residency increased by %lldus while disabled
>>> for 1000ms!\n",
>>> (res[1] - res[0]) >> 10);
>>> @@ -77,9 +82,14 @@ int live_rc6_manual(void *arg)
>>> rc0_power = div64_u64(NSEC_PER_SEC * rc0_power,
>>> ktime_to_ns(dt));
>>> if (!rc0_power) {
>>> - pr_err("No power measured while in RC0\n");
>>> - err = -EINVAL;
>>> - goto out_unlock;
>>> + if (rc0_freq)
>>> + pr_debug("No power measured while in RC0! GPU Freq:
>>> %u in RC0\n",
>>> + rc0_freq);
>> This line is not indented properly.
will fix this issue.
>>> + else {
>>> + pr_err("No power and freq measured while in RC0\n");
>>> + err = -EINVAL;
>>> + goto out_unlock;
>> AFAIU we should have the EINVAL and goto out_unlock irrespective of
>> rc0_freq. Reason being, if rc0_power is not measured then the
>> comparison we are doing with rc6_power below becomes invalid so I
>> think we shouldn't proceed if rc0_power is not measured and just goto
>> out_unlock with EINVAL. Am I missing something?
the main purpose was to print the message as debug_log if there is any
issue with the power measurement, although we may include a check
whether rc_0 power is there or not before the final validation of "(2 *
rc6_power > rc0_power)". @Anshuman, please provide your inputs here.
>>> + }
>>> }
>>> }
>>> @@ -91,6 +101,7 @@ int live_rc6_manual(void *arg)
>>> dt = ktime_get();
>>> rc6_power = librapl_energy_uJ();
>>> msleep(100);
>>> + rc6_freq = intel_rps_read_actual_frequency(rps);
>>> rc6_power = librapl_energy_uJ() - rc6_power;
>>> dt = ktime_sub(ktime_get(), dt);
>>> res[1] = rc6_residency(rc6);
>>> @@ -108,7 +119,8 @@ int live_rc6_manual(void *arg)
>>> pr_info("GPU consumed %llduW in RC0 and %llduW in RC6\n",
>>> rc0_power, rc6_power);
>>> if (2 * rc6_power > rc0_power) {
>>> - pr_err("GPU leaked energy while in RC6!\n");
>>> + pr_err("GPU leaked energy while in RC6! GPU Freq: %u in
>>> RC6 and %u in RC0\n",
>>> + rc6_freq, rc0_freq);
>>> err = -EINVAL;
>>> goto out_unlock;
>>> }
More information about the Intel-gfx
mailing list