[PATCH 8/9] drm/i915: add a new perf configuration execbuf parameter

Chris Wilson chris at chris-wilson.co.uk
Wed Oct 9 08:53:50 UTC 2019


From: Lionel Landwerlin <lionel.g.landwerlin at intel.com>

We want the ability to dispatch a set of command buffer to the
hardware, each with a different OA configuration. To achieve this, we
reuse a couple of fields from the execbuf2 struct (I CAN HAZ
execbuf3?) to notify what OA configuration should be used for a batch
buffer. This requires the process making the execbuf with this flag to
also own the perf fd at the time of execbuf.

v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris)
    Move oa_config vma to active (Chris)

v3: Don't drop the lock for engine lookup (Chris)
    Move OA config vma to active before writing the ringbuffer (Chris)

v4: Reuse i915_user_extension_fn
    Serialize requests with OA config updates

v5: Check that the chained extension is only present once (Chris)
    Unpin oa_vma in main path (Chris)

v6: Use BIT_ULL (Chris)

v7: Hold drm.struct_mutex when serializing the request with OA config (Chris)

v8: Remove active request from engine (Lionel)

v9: Move fetching OA configuration pass engine pinning (Lionel)
    Lock VMA before moving to active (Chris)

v10: Fix leak on perf_fd (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 149 +++++++++++++++++-
 drivers/gpu/drm/i915/i915_getparam.c          |   4 +
 include/uapi/drm/i915_drm.h                   |  38 ++++-
 3 files changed, 186 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 41bfd97785da..efa00435bab8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -24,6 +24,7 @@
 #include "i915_gem_clflush.h"
 #include "i915_gem_context.h"
 #include "i915_gem_ioctls.h"
+#include "i915_perf.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
 
@@ -47,6 +48,8 @@ enum {
 #define __EXEC_INTERNAL_FLAGS	(~0u << 30)
 #define UPDATE			PIN_OFFSET_FIXED
 
+#define EXTRA_VMA 2
+
 #define BATCH_OFFSET_BIAS (256*1024)
 
 #define __I915_EXEC_ILLEGAL_FLAGS \
@@ -278,6 +281,12 @@ struct i915_execbuffer {
 		struct drm_syncobj **syncobj;
 		unsigned long count;
 	} fences;
+
+	struct { /* HW configuration for OA. */
+		struct file *file;
+		struct i915_oa_config *config;
+		struct i915_vma *vma;
+	} oa;
 };
 
 #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
@@ -899,6 +908,17 @@ put_fence_array(const struct i915_execbuffer *eb)
 	__free_fence_array(eb->fences.syncobj, eb->fences.count);
 }
 
+static void
+put_perf_config(const struct i915_execbuffer *eb)
+{
+	if (!eb->oa.config)
+		return;
+
+	i915_vma_put(eb->oa.vma);
+	i915_oa_config_put(eb->oa.config);
+	fput(eb->oa.file);
+}
+
 static void eb_destroy(const struct i915_execbuffer *eb)
 {
 	GEM_BUG_ON(eb->reloc_cache.rq);
@@ -910,6 +930,7 @@ static void eb_destroy(const struct i915_execbuffer *eb)
 		kfree(eb->buckets);
 
 	put_fence_array(eb);
+	put_perf_config(eb);
 }
 
 static inline u64
@@ -2072,6 +2093,58 @@ add_to_client(struct i915_request *rq, struct drm_file *file)
 	spin_unlock(&file_priv->mm.lock);
 }
 
+static int eb_oa_config(struct i915_execbuffer *eb)
+{
+	struct i915_perf_stream *stream;
+	struct i915_perf *perf;
+	int err;
+
+	if (!eb->oa.config)
+		return 0;
+
+	stream = eb->oa.file->private_data;
+	perf = stream->perf;
+
+	err = mutex_lock_interruptible(&perf->lock);
+	if (err)
+		return err;
+
+	if (stream != perf->exclusive_stream) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (stream->engine != eb->engine) { /* FIXME: virtual selection */
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = i915_active_fence_set(&perf->active_config, eb->request);
+	if (err)
+		goto out;
+
+	/*
+	 * If the config hasn't changed, skip reconfiguring the HW (this is
+	 * subject to a delay we want to avoid has much as possible).
+	 *
+	 * After a GPU reset, the oa config will not be recovered.
+	 */
+	if (eb->oa.config == stream->oa_config)
+		goto out;
+
+	err = eb->engine->emit_bb_start(eb->request,
+					eb->oa.vma->node.start, 0,
+					I915_DISPATCH_SECURE); /* ggtt */
+	if (err)
+		goto out;
+
+	swap(eb->oa.config, stream->oa_config);
+
+out:
+	mutex_unlock(&perf->lock);
+	return err;
+}
+
 static int eb_submit(struct i915_execbuffer *eb)
 {
 	int err;
@@ -2098,6 +2171,10 @@ static int eb_submit(struct i915_execbuffer *eb)
 			return err;
 	}
 
+	err = eb_oa_config(eb);
+	if (err)
+		return err;
+
 	err = eb->engine->emit_bb_start(eb->request,
 					eb->batch->node.start +
 					eb->batch_start_offset,
@@ -2455,8 +2532,60 @@ signal_fence_array(struct i915_execbuffer *eb)
 	}
 }
 
+static int parse_perf_config(struct i915_user_extension __user *ext, void *data)
+{
+	struct i915_gem_execbuffer_ext_perf perf_config;
+	struct i915_execbuffer *eb = data;
+	struct i915_perf_stream *stream;
+	struct i915_oa_config *config;
+	struct i915_vma *vma;
+	struct file *file;
+	int err;
+
+	if (eb->oa.config)
+		return -EINVAL; /* exclusive */
+
+	if (copy_from_user(&perf_config, ext, sizeof(perf_config)))
+		return -EFAULT;
+
+	if (perf_config.flags)
+		return -EINVAL;
+
+	file = fget(perf_config.perf_fd);
+	if (!file)
+		return -EINVAL;
+
+	stream = i915_perf_file_get_stream(file);
+	if (!stream)
+		return -EINVAL;
+
+	config = i915_perf_get_oa_config(stream->perf, perf_config.oa_config);
+	if (!config) {
+		err = -EINVAL;
+		goto err_file;
+	}
+
+	vma = i915_perf_stream_get_oa_vma(stream, config);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err_config;
+	}
+
+	eb->oa.vma = vma;
+	eb->oa.file = file;
+	eb->oa.config = config;
+	return 0;
+
+err_config:
+	i915_oa_config_put(config);
+err_file:
+	fput(file);
+	return err;
+}
+
 static const i915_user_extension_fn execbuf_extensions[] = {
 	[I915_EXEC_EXT_FENCE_ARRAY] = parse_fence_array,
+	[I915_EXEC_EXT_PERF_CONFIG] = parse_perf_config,
 };
 
 int i915_gem_execbuffer_ext_version(void)
@@ -2506,11 +2635,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		args->flags |= __EXEC_HAS_RELOC;
 
 	memset(&eb.fences, 0, sizeof(eb.fences));
+	eb.oa.config = NULL;
 
 	eb.exec = exec;
-	eb.vma = (struct i915_vma **)(exec + args->buffer_count + 1);
+	eb.vma = (struct i915_vma **)(exec + args->buffer_count + EXTRA_VMA);
 	eb.vma[0] = NULL;
-	eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1);
+	eb.flags = (unsigned int *)(eb.vma + args->buffer_count + EXTRA_VMA);
 
 	eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
 	reloc_cache_init(&eb.reloc_cache, eb.i915);
@@ -2628,6 +2758,17 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		}
 	}
 
+	if (eb.oa.config) {
+		err = i915_vma_pin(eb.oa.vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+		if (err)
+			goto err_vma;
+
+		eb.vma[eb.buffer_count] = eb.oa.vma;
+		eb.flags[eb.buffer_count] = __EXEC_OBJECT_HAS_PIN;
+		eb.oa.vma->exec_flags = &eb.flags[eb.buffer_count];
+		eb.buffer_count++;
+	}
+
 	if (eb.batch_len == 0)
 		eb.batch_len = eb.batch->size - eb.batch_start_offset;
 
@@ -2805,7 +2946,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
 	/* Copy in the exec list from userland */
 	exec_list = kvmalloc_array(count, sizeof(*exec_list),
 				   __GFP_NOWARN | GFP_KERNEL);
-	exec2_list = kvmalloc_array(count + 1, eb_element_size(),
+	exec2_list = kvmalloc_array(count + EXTRA_VMA, eb_element_size(),
 				    __GFP_NOWARN | GFP_KERNEL);
 	if (exec_list == NULL || exec2_list == NULL) {
 		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
@@ -2880,7 +3021,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
 		return -EINVAL;
 
 	/* Allocate an extra slot for use by the command parser */
-	exec2_list = kvmalloc_array(count + 1, eb_element_size(),
+	exec2_list = kvmalloc_array(count + EXTRA_VMA, eb_element_size(),
 				    __GFP_NOWARN | GFP_KERNEL);
 	if (exec2_list == NULL) {
 		DRM_DEBUG("Failed to allocate exec list for %zd buffers\n",
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index bdbe5e4508a4..ef390080bd3b 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -165,6 +165,10 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_PARAM_EXEC_EXT_VERSION:
 		value = i915_gem_execbuffer_ext_version();
 		break;
+	case I915_PARAM_HAS_EXEC_PERF_CONFIG:
+		/* Obviously requires perf support. */
+		value = !!i915->perf.i915;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 3b958f941b8c..9dd0c857e66b 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -624,6 +624,16 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_EXEC_EXT_VERSION  55
 
+/*
+ * Request an i915/perf performance configuration change before running the
+ * commands given in an execbuf.
+ *
+ * Performance configuration ID and the file descriptor of the i915 perf
+ * stream are given through drm_i915_gem_execbuffer_ext_perf. See
+ * I915_EXEC_USE_EXTENSIONS.
+ */
+#define I915_PARAM_HAS_EXEC_PERF_CONFIG 56
+
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1027,6 +1037,29 @@ struct i915_gem_execbuffer_ext_fence_array {
 	__u64 count; /* number of fences */
 };
 
+struct i915_gem_execbuffer_ext_perf {
+	struct i915_user_extension base; /* .name = I915_EXEC_EXT_PERF_CONFIG */
+
+	/*
+	 * Performance file descriptor returned by DRM_IOCTL_I915_PERF_OPEN.
+	 * This is used to identify that the application requesting a HW
+	 * performance configuration change actually has a right to do so
+	 * because it also has access the i915-perf stream.
+	 */
+	__s32 perf_fd;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 flags;
+
+	/*
+	 * OA configuration ID to switch to before executing the commands
+	 * associated to the execbuf.
+	 */
+	__u64 oa_config;
+};
+
 struct drm_i915_gem_execbuffer2 {
 	/**
 	 * List of gem_exec_object2 structs
@@ -1053,9 +1086,11 @@ struct drm_i915_gem_execbuffer2 {
 	 * single struct drm_i915_gem_base_execbuffer_ext and num_cliprects is
 	 * 0. Extensions:
 	 *  - i915_gem_execbuffer_ext_fence_array (I915_EXEC_EXT_FENCE_ARRAY)
+	 *  - i915_gem_execbuffer_ext_perf (I915_EXEC_EXT_PERF_CONFIG)
 	 */
 	__u64 cliprects_ptr;
 #define I915_EXEC_EXT_FENCE_ARRAY 0
+#define I915_EXEC_EXT_PERF_CONFIG 1
 
 	__u64 flags;
 #define I915_EXEC_RING_MASK              (0x3f)
@@ -1065,7 +1100,8 @@ struct drm_i915_gem_execbuffer2 {
 #define I915_EXEC_BLT                    (3<<0)
 #define I915_EXEC_VEBOX                  (4<<0)
 
-/* Used for switching the constants addressing mode on gen4+ RENDER ring.
+/*
+ * Used for switching the constants addressing mode on gen4+ RENDER ring.
  * Gen6+ only supports relative addressing to dynamic state (default) and
  * absolute addressing.
  *
-- 
2.23.0



More information about the Intel-gfx-trybot mailing list