[PATCH 8/8] drm/i915: Fallback to hw context runtime when sw tracking is not available

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Wed Jan 8 11:18:49 UTC 2020


From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

In GuC mode we are not receiving the context switch interrupts to be able
to accurately track context runtimes.

We can fallback to using PPHWSP counter updated by the GPU on context save.

QQQ
Downsides are: 1) we do not see currently executing batch and 2) with a
12MHz command streamer timestamp timer frequency the 32-bit counter wraps
every ~358 seconds. This makes endless OpenCL batches with hearbeats
turned off also a problem.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
 drivers/gpu/drm/i915/i915_drm_client.c | 34 ++++++++++++++++++++------
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index 55b2f86cc4c1..0b84ae528dcc 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -58,6 +58,24 @@ sw_busy_add(struct i915_gem_context *ctx, unsigned int engine_class)
 	return total;
 }
 
+static u64
+hw_busy_add(struct i915_gem_context *ctx, unsigned int engine_class)
+{
+	struct i915_gem_engines *engines = rcu_dereference(ctx->engines);
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
+	u64 total = 0;
+
+	for_each_gem_engine(ce, engines, it) {
+		if (ce->engine->uabi_class != engine_class)
+			continue;
+
+		total += intel_context_get_hw_runtime_ns(ce);
+	}
+
+	return total;
+}
+
 static ssize_t
 show_client_busy(struct device *kdev, struct device_attribute *attr, char *buf)
 {
@@ -68,12 +86,14 @@ show_client_busy(struct device *kdev, struct device_attribute *attr, char *buf)
 	struct i915_gem_context *ctx;
 	u64 total = 0;
 
-	if (i915_attr->no_busy_stats)
-		return -ENODEV;
-
 	rcu_read_lock();
-	list_for_each_entry_rcu(ctx, list, client_link)
-		total += sw_busy_add(ctx, engine_class);
+	if (i915_attr->no_busy_stats) {
+		list_for_each_entry_rcu(ctx, list, client_link)
+			total += hw_busy_add(ctx, engine_class);
+	} else {
+		list_for_each_entry_rcu(ctx, list, client_link)
+			total += sw_busy_add(ctx, engine_class);
+	}
 	rcu_read_unlock();
 
 	return snprintf(buf, PAGE_SIZE, "%llu\n", total);
@@ -164,7 +184,7 @@ __i915_drm_client_register(struct i915_drm_client *client,
 			if (ret) {
 				int j, k;
 
-				/* Unwind if not available. */
+				/* Unwind and fallback if not available. */
 				j = 0;
 				for_each_uabi_engine(engine, i915) {
 					if (j++ == i)
@@ -181,7 +201,7 @@ __i915_drm_client_register(struct i915_drm_client *client,
 				}
 
 				dev_notice_once(i915->drm.dev,
-						"Engine busy stats not available! (%d)",
+						"Reduced accuracy context runtime mode (%d)",
 						ret);
 				break;
 			}
-- 
2.20.1



More information about the Intel-gfx-trybot mailing list