[Intel-gfx] [PATCH] drm/i915/perf: allow holding preemption on filtered ctx

Lionel Landwerlin lionel.g.landwerlin at intel.com
Tue Jun 5 16:19:11 UTC 2018


We would like to make use of perf in Vulkan. The Vulkan API is much
lower level than OpenGL, with applications directly exposed to the
concept of command buffers (pretty much equivalent to our batch
buffers). In Vulkan, queries are always limited in scope to a command
buffer. In OpenGL, the lack of command buffer concept meant that
queries' duration could span multiple command buffers.

With that restriction gone in Vulkan, we would like to simplify
measuring performance just by measuring the deltas between the counter
snapshots written by 2 MI_RECORD_PERF_COUNT commands, rather than the
more complex scheme with currently have in the GL driver, using 2
MI_RECORD_PERF_COUNT commands and doing some post processing on the
stream of OA reports to remove any unrelated deltas in the OA stream.

Disabling preemption only apply to the context with which want to
query performance counters and is considered a privileged operation
(disabled by default).

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.h |  6 ++++++
 drivers/gpu/drm/i915/i915_perf.c        | 24 ++++++++++++++++++++++--
 drivers/gpu/drm/i915/intel_lrc.c        |  4 +++-
 include/uapi/drm/i915_drm.h             |  8 ++++++++
 4 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index f40d85448a28..17e8efe0397e 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -187,6 +187,12 @@ struct i915_gem_context {
 	/** remap_slice: Bitmask of cache lines that need remapping */
 	u8 remap_slice;
 
+	/*
+	 * perf_disabled_preemption: Disable preemption for batch buffers
+	 * submitted on this context.
+	 */
+	bool perf_disabled_preemption;
+
 	/** handles_vma: rbtree to look up our context specific obj/vma for
 	 * the user handle. (user handles are per fd, but the binding is
 	 * per vm, which may be one per context or shared with the global GTT)
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 02f8034bb699..735babf272a9 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -354,6 +354,7 @@ struct perf_open_properties {
 	u32 sample_flags;
 
 	u64 single_context:1;
+	u64 context_disable_preemption:1;
 	u64 ctx_handle;
 
 	/* OA sampling state */
@@ -1362,6 +1363,8 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 	mutex_lock(&dev_priv->drm.struct_mutex);
 	dev_priv->perf.oa.exclusive_stream = NULL;
 	dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
+	if (stream->ctx)
+		stream->ctx->perf_disabled_preemption = false;
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	free_oa_buffer(dev_priv);
@@ -2200,6 +2203,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 		goto err_enable;
 	}
 
+	if (props->context_disable_preemption)
+		stream->ctx->perf_disabled_preemption = true;
+
 	stream->ops = &i915_oa_stream_ops;
 
 	dev_priv->perf.oa.exclusive_stream = stream;
@@ -2663,6 +2669,14 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 		}
 	}
 
+	if (props->context_disable_preemption) {
+		if (!specific_ctx) {
+			ret = -EINVAL;
+			goto err;
+		}
+		privileged_op = true;
+	}
+
 	/*
 	 * On Haswell the OA unit supports clock gating off for a specific
 	 * context and in this mode there's no visibility of metrics for the
@@ -2677,8 +2691,10 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 	 * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to
 	 * enable the OA unit by default.
 	 */
-	if (IS_HASWELL(dev_priv) && specific_ctx)
+	if (IS_HASWELL(dev_priv) && specific_ctx &&
+	    !props->context_disable_preemption) {
 		privileged_op = false;
+	}
 
 	/* Similar to perf's kernel.perf_paranoid_cpu sysctl option
 	 * we check a dev.i915.perf_stream_paranoid sysctl option
@@ -2687,7 +2703,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 	 */
 	if (privileged_op &&
 	    i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
-		DRM_DEBUG("Insufficient privileges to open system-wide i915 perf stream\n");
+		DRM_DEBUG("Insufficient privileges to open i915 perf stream\n");
 		ret = -EACCES;
 		goto err_ctx;
 	}
@@ -2879,6 +2895,10 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
 			props->oa_periodic = true;
 			props->oa_period_exponent = value;
 			break;
+		case DRM_I915_PERF_PROP_DISABLE_PREEMPTION:
+			if (value)
+				props->context_disable_preemption = 1;
+			break;
 		case DRM_I915_PERF_PROP_MAX:
 			MISSING_CASE(id);
 			return -EINVAL;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7d63c6d2f687..417ca93ea606 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2099,7 +2099,9 @@ static int gen8_emit_bb_start(struct i915_request *rq,
 	 *
 	 * That satisfies both the GPGPU w/a and our heavy-handed paranoia.
 	 */
-	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	*cs++ = MI_ARB_ON_OFF |
+		(rq->gem_context->perf_disabled_preemption ?
+		 MI_ARB_DISABLE : MI_ARB_ENABLE);
 
 	/* FIXME(BDW): Address space and security selectors. */
 	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 3d9c44f35df4..4cb80082df7e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1553,6 +1553,14 @@ enum drm_i915_perf_property_id {
 	 */
 	DRM_I915_PERF_PROP_OA_EXPONENT,
 
+	/**
+	 * Specifying this property is only valid when specify a context to
+	 * filter with DRM_I915_PERF_PROP_CTX_HANDLE. Specifying this property
+	 * will disable preemption for the particular context we want to
+	 * measure.
+	 */
+	DRM_I915_PERF_PROP_DISABLE_PREEMPTION,
+
 	DRM_I915_PERF_PROP_MAX /* non-ABI */
 };
 
-- 
2.17.1



More information about the Intel-gfx mailing list