[Intel-gfx] [PATCH v14 10/12] drm/i915: add a new perf configuration execbuf parameter

Lionel Landwerlin lionel.g.landwerlin at intel.com
Thu Sep 5 12:04:49 UTC 2019


We want the ability to dispatch a set of command buffer to the
hardware, each with a different OA configuration. To achieve this, we
reuse a couple of fields from the execbuf2 struct (I CAN HAZ
execbuf3?) to notify what OA configuration should be used for a batch
buffer. This requires the process making the execbuf with this flag to
also own the perf fd at the time of execbuf.

v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris)
    Move oa_config vma to active (Chris)

v3: Don't drop the lock for engine lookup (Chris)
    Move OA config vma to active before writing the ringbuffer (Chris)

v4: Reuse i915_user_extension_fn
    Serialize requests with OA config updates

v5: Check that the chained extension is only present once (Chris)
    Unpin oa_vma in main path (Chris)

v6: Use BIT_ULL (Chris)

v7: Hold drm.struct_mutex when serializing the request with OA config (Chris)

v8: Remove active request from engine (Lionel)

v9: Move fetching OA configuration pass engine pinning (Lionel)
    Lock VMA before moving to active (Chris)

v10: Fix leak on perf_fd (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 147 +++++++++++++++++-
 drivers/gpu/drm/i915/i915_getparam.c          |   4 +
 include/uapi/drm/i915_drm.h                   |  39 +++++
 3 files changed, 188 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 46ad8d9642d1..d416b60c94bb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -24,6 +24,7 @@
 #include "i915_gem_clflush.h"
 #include "i915_gem_context.h"
 #include "i915_gem_ioctls.h"
+#include "i915_perf.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
 
@@ -284,7 +285,12 @@ struct i915_execbuffer {
 	struct {
 		u64 flags; /** Available extensions parameters */
 		struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
+		struct drm_i915_gem_execbuffer_ext_perf perf_config;
 	} extensions;
+
+	struct file *perf_file;
+	struct i915_oa_config *oa_config; /** HW configuration for OA, NULL is not needed. */
+	struct i915_vma *oa_vma;
 };
 
 #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
@@ -1152,6 +1158,58 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
 	return err;
 }
 
+
+static int
+eb_get_oa_config(struct i915_execbuffer *eb)
+{
+	struct drm_i915_gem_object *oa_bo;
+	int err = 0;
+
+	eb->perf_file = NULL;
+	eb->oa_config = NULL;
+	eb->oa_vma = NULL;
+
+	if ((eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) == 0)
+		return 0;
+
+	eb->perf_file = fget(eb->extensions.perf_config.perf_fd);
+	if (!eb->perf_file)
+		return -EINVAL;
+
+	err = i915_mutex_lock_interruptible(&eb->i915->drm);
+	if (err)
+		return err;
+
+	if (eb->perf_file->private_data != eb->i915->perf.exclusive_stream)
+		err = -EINVAL;
+
+	mutex_unlock(&eb->i915->drm.struct_mutex);
+
+	if (err)
+		return err;
+
+	if (eb->i915->perf.exclusive_stream->engine != eb->engine)
+		return -EINVAL;
+
+	err = i915_perf_get_oa_config_and_bo(
+		eb->i915->perf.exclusive_stream,
+		eb->extensions.perf_config.oa_config,
+		&eb->oa_config, &oa_bo);
+	if (err)
+		return err;
+
+	eb->oa_vma = i915_vma_instance(oa_bo,
+				       &eb->engine->gt->ggtt->vm, NULL);
+	i915_gem_object_put(oa_bo);
+	if (IS_ERR(eb->oa_vma)) {
+		err = PTR_ERR(eb->oa_vma);
+		eb->oa_vma = NULL;
+		return err;
+	}
+
+	return 0;
+}
+
 static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 			     struct i915_vma *vma,
 			     unsigned int len)
@@ -2051,6 +2109,54 @@ add_to_client(struct i915_request *rq, struct drm_file *file)
 	spin_unlock(&file_priv->mm.lock);
 }
 
+static int eb_oa_config(struct i915_execbuffer *eb)
+{
+	struct i915_perf_stream *perf_stream;
+	int err;
+
+	if (!eb->oa_config)
+		return 0;
+
+	perf_stream = eb->perf_file->private_data;
+
+	err = mutex_lock_interruptible(&perf_stream->config_mutex);
+	if (err)
+		return err;
+
+	err = i915_active_request_set(&perf_stream->active_config_rq,
+				      eb->request);
+	if (err)
+		goto out;
+
+	/*
+	 * If the config hasn't changed, skip reconfiguring the HW (this is
+	 * subject to a delay we want to avoid has much as possible).
+	 */
+	if (eb->oa_config == perf_stream->oa_config)
+		goto out;
+
+	i915_vma_lock(eb->oa_vma);
+	err = i915_request_await_object(eb->request, eb->oa_vma->obj, false); /* await_resv already! */
+	if (!err)
+		err = i915_vma_move_to_active(eb->oa_vma, eb->request, 0);
+	i915_vma_unlock(eb->oa_vma);
+	if (err)
+		goto out;
+
+	err = eb->engine->emit_bb_start(eb->request,
+					eb->oa_vma->node.start,
+					0, I915_DISPATCH_SECURE);
+	if (err)
+		goto out;
+
+	swap(eb->oa_config, perf_stream->oa_config);
+
+out:
+	mutex_unlock(&perf_stream->config_mutex);
+
+	return err;
+}
+
 static int eb_submit(struct i915_execbuffer *eb)
 {
 	int err;
@@ -2077,6 +2183,10 @@ static int eb_submit(struct i915_execbuffer *eb)
 			return err;
 	}
 
+	err = eb_oa_config(eb);
+	if (err)
+		return err;
+
 	err = eb->engine->emit_bb_start(eb->request,
 					eb->batch->node.start +
 					eb->batch_start_offset,
@@ -2643,8 +2753,25 @@ static int parse_timeline_fences(struct i915_user_extension __user *ext, void *d
 	return 0;
 }
 
+static int parse_perf_config(struct i915_user_extension __user *ext, void *data)
+{
+	struct i915_execbuffer *eb = data;
+
+	if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF))
+		return -EINVAL;
+
+	if (copy_from_user(&eb->extensions.perf_config, ext,
+			   sizeof(eb->extensions.perf_config)))
+		return -EFAULT;
+
+	eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF);
+
+	return 0;
+}
+
 static const i915_user_extension_fn execbuf_extensions[] = {
         [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
+        [DRM_I915_GEM_EXECBUFFER_EXT_PERF] = parse_perf_config,
 };
 
 static int
@@ -2769,9 +2896,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_context;
 
+	err = eb_get_oa_config(&eb);
+	if (err)
+		goto err_oa_config;
+
 	err = i915_mutex_lock_interruptible(dev);
 	if (err)
-		goto err_engine;
+		goto err_oa_config;
 
 	err = eb_relocate(&eb);
 	if (err) {
@@ -2889,6 +3020,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		}
 	}
 
+	if (eb.oa_vma) {
+		err = i915_vma_pin(eb.oa_vma, 0, 0, PIN_GLOBAL);
+		if (err)
+			goto err_request;
+	}
+
 	/*
 	 * Whilst this request exists, batch_obj will be on the
 	 * active_list, and so will hold the active reference. Only when this
@@ -2929,7 +3066,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (eb.exec)
 		eb_release_vmas(&eb);
 	mutex_unlock(&dev->struct_mutex);
-err_engine:
+err_oa_config:
+	if (eb.perf_file)
+		fput(eb.perf_file);
+	if (eb.oa_config)
+		i915_oa_config_put(eb.oa_config);
+	if (eb.oa_vma)
+		i915_vma_unpin(eb.oa_vma);
 	eb_unpin_engine(&eb);
 err_context:
 	i915_gem_context_put(eb.gem_context);
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index bd41cc5ce906..39d4c2c2e0f4 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -161,6 +161,10 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_PARAM_PERF_REVISION:
 		value = i915_perf_ioctl_version();
 		break;
+	case I915_PARAM_HAS_EXEC_PERF_CONFIG:
+		/* Obviously requires perf support. */
+		value = i915->perf.initialized;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index e98c9a7baa91..3166c9ca85f3 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -624,6 +624,16 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_PERF_REVISION	55
 
+/*
+ * Request an i915/perf performance configuration change before running the
+ * commands given in an execbuf.
+ *
+ * Performance configuration ID and the file descriptor of the i915 perf
+ * stream are given through drm_i915_gem_execbuffer_ext_perf. See
+ * I915_EXEC_EXT.
+ */
+#define I915_PARAM_HAS_EXEC_PERF_CONFIG 56
+
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1026,6 +1036,12 @@ enum drm_i915_gem_execbuffer_ext {
 	 */
 	DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 1,
 
+	/**
+	 * This identifier is associated with
+	 * drm_i915_gem_execbuffer_perf_ext.
+	 */
+	DRM_I915_GEM_EXECBUFFER_EXT_PERF,
+
 	DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
 };
 
@@ -1056,6 +1072,29 @@ struct drm_i915_gem_execbuffer_ext_timeline_fences {
 	__u64 values_ptr;
 };
 
+struct drm_i915_gem_execbuffer_ext_perf {
+	struct i915_user_extension base;
+
+	/**
+	 * Performance file descriptor returned by DRM_IOCTL_I915_PERF_OPEN.
+	 * This is used to identify that the application requesting a HW
+	 * performance configuration change actually has a right to do so
+	 * because it also has access the i915-perf stream.
+	 */
+	__s32 perf_fd;
+
+	/**
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 pad;
+
+	/**
+	 * OA configuration ID to switch to before executing the commands
+	 * associated to the execbuf.
+	 */
+	__u64 oa_config;
+};
+
 struct drm_i915_gem_execbuffer2 {
 	/**
 	 * List of gem_exec_object2 structs
-- 
2.23.0



More information about the Intel-gfx mailing list