[PATCH i-g-t 8/8] tests/intel/xe_drm_fdinfo: Stop asserting on usage percentage
Lucas De Marchi
lucas.demarchi at intel.com
Sat Jan 4 07:15:48 UTC 2025
It's unreliable to assert on the usage percentage considering 2 data
points as it still depends on the CPU scheduling not preempting tasks in
the wrong moment. On a normal use case of a top-like application, the
value not accounted for would simply show up in the next sample without
much issue. For a test assertion, it's better to check that the value
reported via fdinfo is reasonably close to the one saved by the GPU in
the spin. It's still allowed some error because there are a few GPU
ticks of difference due to the **GPU** scheduling the contexts.
Cc: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
---
tests/intel/xe_drm_fdinfo.c | 49 +++++++++++++++++++++++--------------
1 file changed, 31 insertions(+), 18 deletions(-)
diff --git a/tests/intel/xe_drm_fdinfo.c b/tests/intel/xe_drm_fdinfo.c
index 1089e5119..120436fbe 100644
--- a/tests/intel/xe_drm_fdinfo.c
+++ b/tests/intel/xe_drm_fdinfo.c
@@ -3,6 +3,8 @@
* Copyright © 2023 Intel Corporation
*/
+#include <math.h>
+
#include "igt.h"
#include "igt_core.h"
#include "igt_device.h"
@@ -371,7 +373,8 @@ static void basic_engine_utilization(int xe)
static void
check_results(struct pceu_cycles *s1, struct pceu_cycles *s2,
- int class, int width, enum expected_load expected_load)
+ int class, int width, uint32_t spin_stamp,
+ enum expected_load expected_load)
{
double percent;
u64 den, num;
@@ -383,12 +386,9 @@ check_results(struct pceu_cycles *s1, struct pceu_cycles *s2,
num = s2[class].cycles - s1[class].cycles;
den = s2[class].total_cycles - s1[class].total_cycles;
- percent = (num * 100.0) / (den + 1);
-
- /* for parallel submission scale the busyness with width */
- percent /= width;
- igt_debug("%s: percent: %f\n", engine_map[class], percent);
+ percent = (num * 100.0) / (den + 1) / width;
+ igt_debug("%s: percent: %.2f%%\n", engine_map[class], percent);
switch (expected_load) {
case EXPECTED_LOAD_IDLE:
@@ -396,11 +396,12 @@ check_results(struct pceu_cycles *s1, struct pceu_cycles *s2,
break;
case EXPECTED_LOAD_FULL:
/*
- * We are still relying on CPU sleep time and there could be
- * some imprecision when calculating the load. Use a 5% margin.
+ * percentage error between value saved by gpu in xe_spin and what
+ * is reported via fdinfo
*/
- igt_assert_lt_double(95.0, percent);
- igt_assert_lt_double(percent, 105.0);
+ percent = fabs((num - spin_stamp) * 100.0) / (spin_stamp + 1);
+ igt_debug("%s: error: %.2f%%\n", engine_map[class], percent);
+ igt_assert_lt_double(percent, 5.0);
break;
}
}
@@ -438,14 +439,17 @@ utilization_single(int fd, struct drm_xe_engine_class_instance *hwe, unsigned in
expected_load = flags & TEST_BUSY ?
EXPECTED_LOAD_FULL : EXPECTED_LOAD_IDLE;
- check_results(pceu1[0], pceu2[0], hwe->engine_class, 1, expected_load);
+
+ check_results(pceu1[0], pceu2[0], hwe->engine_class, 1,
+ cork ? cork->spin->timestamp : 0, expected_load);
if (flags & TEST_ISOLATION) {
/*
* Load from one client shouldn't spill on another,
* so check for idle
*/
- check_results(pceu1[1], pceu2[1], hwe->engine_class, 1, EXPECTED_LOAD_IDLE);
+ check_results(pceu1[1], pceu2[1], hwe->engine_class, 1, 0,
+ EXPECTED_LOAD_IDLE);
close(new_fd);
}
@@ -461,6 +465,7 @@ utilization_single_destroy_queue(int fd, struct drm_xe_engine_class_instance *hw
struct pceu_cycles pceu1[DRM_XE_ENGINE_CLASS_COMPUTE + 1];
struct pceu_cycles pceu2[DRM_XE_ENGINE_CLASS_COMPUTE + 1];
struct xe_cork *cork;
+ uint32_t timestamp;
uint32_t vm;
vm = xe_vm_create(fd, 0, 0);
@@ -472,13 +477,15 @@ utilization_single_destroy_queue(int fd, struct drm_xe_engine_class_instance *hw
/* destroy queue before sampling again */
xe_cork_sync_end(fd, cork);
+ timestamp = cork->spin->timestamp;
xe_cork_destroy(fd, cork);
read_engine_cycles(fd, pceu2);
xe_vm_destroy(fd, vm);
- check_results(pceu1, pceu2, hwe->engine_class, 1, EXPECTED_LOAD_FULL);
+ check_results(pceu1, pceu2, hwe->engine_class, 1, timestamp,
+ EXPECTED_LOAD_FULL);
}
static void
@@ -503,7 +510,8 @@ utilization_others_idle(int fd, struct drm_xe_engine_class_instance *hwe)
enum expected_load expected_load = hwe->engine_class != class ?
EXPECTED_LOAD_IDLE : EXPECTED_LOAD_FULL;
- check_results(pceu1, pceu2, class, 1, expected_load);
+ check_results(pceu1, pceu2, class, 1, cork->spin->timestamp,
+ expected_load);
}
xe_cork_destroy(fd, cork);
@@ -547,7 +555,8 @@ utilization_others_full_load(int fd, struct drm_xe_engine_class_instance *hwe)
if (!cork[class])
continue;
- check_results(pceu1, pceu2, class, 1, expected_load);
+ check_results(pceu1, pceu2, class, 1, cork[class]->spin->timestamp,
+ expected_load);
xe_cork_destroy(fd, cork[class]);
}
@@ -585,7 +594,9 @@ utilization_all_full_load(int fd)
if (!cork[class])
continue;
- check_results(pceu1, pceu2, class, 1, EXPECTED_LOAD_FULL);
+ check_results(pceu1, pceu2, class, 1,
+ cork[class]->spin->timestamp,
+ EXPECTED_LOAD_FULL);
xe_cork_destroy(fd, cork[class]);
}
@@ -657,14 +668,16 @@ utilization_multi(int fd, int gt, int class, unsigned int flags)
expected_load = flags & TEST_BUSY ?
EXPECTED_LOAD_FULL : EXPECTED_LOAD_IDLE;
- check_results(pceu[0], pceu[1], class, width, expected_load);
+
+ check_results(pceu[0], pceu[1], class, width,
+ cork ? cork->spin->timestamp : 0, expected_load);
if (flags & TEST_ISOLATION) {
/*
* Load from one client shouldn't spill on another,
* so check for idle
*/
- check_results(pceu_spill[0], pceu_spill[1], class, width,
+ check_results(pceu_spill[0], pceu_spill[1], class, width, 0,
EXPECTED_LOAD_IDLE);
close(fd_spill);
}
--
2.47.0
More information about the igt-dev
mailing list