[PATCH 6/7] drm/xe/client: Print runtime to fdinfo

Lucas De Marchi lucas.demarchi at intel.com
Tue Apr 16 03:04:53 UTC 2024


Print the accumulated runtime, per client, when printing fdinfo.
Each time a query is done it first does 2 things:

1) loop through all the exec queues for the current client and
   accumulates the runtime, per engine class. CTX_TIMESTAMP is used for
   that, being read from the context image.

2) Read a "GPU timestamp" that can be used for considering "how much GPU
   time has passed" and that has the same unit/ref-clock as the one
   recording the runtime. RING_TIMESTAMP is used for that via MMIO.

This second part is done once per engine class, since it's a register
that is replicated on all engines. It is however the same stamp. At
least for the current GPUs this was tested one. It may be simplified,
but in order to play safe and avoid the cases the clock is different in
future for primary/media GTs, or across engine classes, just read it per
class.

This is exported to userspace as 2 numbers in fdinfo:

	drm-engine-<class>: <GPU_TIMESTAMP> <RUNTIME> ticks

Userspace is expected to collect at least 2 samples, which allows to
know the engine busyness as per:

		    RUNTIME1 - RUNTIME0
	busyness = ---------------------
			  T1 - T0

When calculating the overall system busyness, userspace can loop through
all the clients and add up all the numbers.  Since the GPU timestamp
will be a little bit different, it's expected some fluctuation on
accuracy, but that may be improved with a better hardware/GuC interface
in future, maintaining the UAPI.

Another thing to point out is that it's expected that userspace reads
any 2 samples every few seconds.  Given the update frequency of the
counters involved and that CTX_TIMESTAMP is 32b, it is expect to wrap
every 25 ~ 30 seconds.  This could be mitigated by adding a workqueue to
accumulate the counters every so often, but it's additional complexity
for something that is done already by userspace every few seconds in
tools like gputop (from igt), htop, nvtop, etc.

Test-with: https://lore.kernel.org/igt-dev/20240405060056.59379-1-lucas.demarchi@intel.com/
Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
---
 drivers/gpu/drm/xe/xe_drm_client.c | 81 +++++++++++++++++++++++++++++-
 1 file changed, 80 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index 08f0b7c95901..79eb453bfb14 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -2,6 +2,7 @@
 /*
  * Copyright © 2023 Intel Corporation
  */
+#include "xe_drm_client.h"
 
 #include <drm/drm_print.h>
 #include <drm/xe_drm.h>
@@ -12,7 +13,10 @@
 #include "xe_bo.h"
 #include "xe_bo_types.h"
 #include "xe_device_types.h"
-#include "xe_drm_client.h"
+#include "xe_exec_queue.h"
+#include "xe_gt.h"
+#include "xe_hw_engine.h"
+#include "xe_pm.h"
 #include "xe_trace.h"
 
 /**
@@ -179,6 +183,80 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
 	}
 }
 
+static const u64 class_to_mask[] = {
+        [XE_ENGINE_CLASS_RENDER] = XE_HW_ENGINE_RCS_MASK,
+        [XE_ENGINE_CLASS_VIDEO_DECODE] = XE_HW_ENGINE_VCS_MASK,
+        [XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_HW_ENGINE_VECS_MASK,
+        [XE_ENGINE_CLASS_COPY] = XE_HW_ENGINE_BCS_MASK,
+        [XE_ENGINE_CLASS_OTHER] = XE_HW_ENGINE_GSCCS_MASK,
+        [XE_ENGINE_CLASS_COMPUTE] = XE_HW_ENGINE_CCS_MASK,
+};
+
+static void show_runtime(struct drm_printer *p, struct drm_file *file)
+{
+	struct xe_file *xef = file->driver_priv;
+	struct xe_device *xe = xef->xe;
+	struct xe_gt *gt;
+	struct xe_hw_engine *hwe;
+	struct xe_exec_queue *q;
+	unsigned long i, id_hwe, id_gt, capacity[XE_ENGINE_CLASS_MAX] = { };
+	u64 gpu_timestamp, engine_mask = 0;
+	bool gpu_stamp = false;
+
+	xe_pm_runtime_get(xe);
+
+	mutex_lock(&xef->exec_queue.lock);
+	xa_for_each(&xef->exec_queue.xa, i, q)
+		xe_exec_queue_update_runtime(q);
+	mutex_unlock(&xef->exec_queue.lock);
+
+	for_each_gt(gt, xe, id_gt)
+		engine_mask |= gt->info.engine_mask;
+
+	BUILD_BUG_ON(ARRAY_SIZE(class_to_mask) != XE_ENGINE_CLASS_MAX);
+	for (i = 0; i < XE_ENGINE_CLASS_MAX; i++)
+		capacity[i] = hweight64(engine_mask & class_to_mask[i]);
+
+	/*
+	 * Iterate over all engines, printing the accumulated
+	 * runtime for this xef per engine class
+	 */
+	for_each_gt(gt, xe, id_gt) {
+		xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+		for_each_hw_engine(hwe, gt, id_hwe) {
+			const char *class_name;
+
+			if (!capacity[hwe->class])
+				continue;
+
+			/*
+			 * Use any (first) engine to have a timestamp to be used every
+			 * time
+			 */
+			if (!gpu_stamp) {
+				gpu_timestamp = xe_hw_engine_read_timestamp(hwe);
+				gpu_stamp = true;
+			}
+
+			class_name = xe_hw_engine_class_to_str(hwe->class);
+
+			drm_printf(p, "drm-engine-%s:\t%llu %llu ticks\n",
+				   class_name, gpu_timestamp,
+				   xef->runtime[hwe->class]);
+
+			if (capacity[hwe->class] > 1)
+				drm_printf(p, "drm-engine-capacity-%s:\t%lu\n",
+					   class_name, capacity[hwe->class]);
+
+			/* engine class already handled, skip next iterations */
+			capacity[hwe->class] = 0;
+		}
+		xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+	}
+
+	xe_pm_runtime_get(xe);
+}
+
 /**
  * xe_drm_client_fdinfo() - Callback for fdinfo interface
  * @p: The drm_printer ptr
@@ -192,5 +270,6 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
 void xe_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
 {
 	show_meminfo(p, file);
+	show_runtime(p, file);
 }
 #endif
-- 
2.43.0



More information about the Intel-xe mailing list