[Intel-gfx] [PATCH v2 8/9] drm/i915: add a new perf configuration execbuf parameter
Chris Wilson
chris at chris-wilson.co.uk
Wed Oct 9 06:42:23 UTC 2019
From: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
We want the ability to dispatch a set of command buffer to the
hardware, each with a different OA configuration. To achieve this, we
reuse a couple of fields from the execbuf2 struct (I CAN HAZ
execbuf3?) to notify what OA configuration should be used for a batch
buffer. This requires the process making the execbuf with this flag to
also own the perf fd at the time of execbuf.
v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris)
Move oa_config vma to active (Chris)
v3: Don't drop the lock for engine lookup (Chris)
Move OA config vma to active before writing the ringbuffer (Chris)
v4: Reuse i915_user_extension_fn
Serialize requests with OA config updates
v5: Check that the chained extension is only present once (Chris)
Unpin oa_vma in main path (Chris)
v6: Use BIT_ULL (Chris)
v7: Hold drm.struct_mutex when serializing the request with OA config (Chris)
v8: Remove active request from engine (Lionel)
v9: Move fetching OA configuration pass engine pinning (Lionel)
Lock VMA before moving to active (Chris)
v10: Fix leak on perf_fd (Lionel)
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 141 +++++++++++++++++-
drivers/gpu/drm/i915/i915_getparam.c | 4 +
include/uapi/drm/i915_drm.h | 44 +++++-
3 files changed, 182 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index b308503ec7ce..5de119ddea1c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -24,6 +24,7 @@
#include "i915_gem_clflush.h"
#include "i915_gem_context.h"
#include "i915_gem_ioctls.h"
+#include "i915_perf.h"
#include "i915_trace.h"
#include "i915_user_extensions.h"
@@ -47,6 +48,8 @@ enum {
#define __EXEC_INTERNAL_FLAGS (~0u << 30)
#define UPDATE PIN_OFFSET_FIXED
+#define EXTRA_VMA 2
+
#define BATCH_OFFSET_BIAS (256*1024)
#define __I915_EXEC_ILLEGAL_FLAGS \
@@ -273,6 +276,12 @@ struct i915_execbuffer {
*/
int lut_size;
struct hlist_head *buckets; /** ht for relocation handles */
+
+ struct { /* HW configuration for OA. */
+ struct file *file;
+ struct i915_oa_config *config;
+ struct i915_vma *vma;
+ } oa;
};
#define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
@@ -889,6 +898,12 @@ static void eb_destroy(const struct i915_execbuffer *eb)
if (eb->lut_size > 0)
kfree(eb->buckets);
+
+ if (eb->oa.config) {
+ i915_vma_put(eb->oa.vma);
+ i915_oa_config_put(eb->oa.config);
+ fput(eb->oa.file);
+ }
}
static inline u64
@@ -2051,6 +2066,58 @@ add_to_client(struct i915_request *rq, struct drm_file *file)
spin_unlock(&file_priv->mm.lock);
}
+static int eb_oa_config(struct i915_execbuffer *eb)
+{
+ struct i915_perf_stream *stream;
+ struct i915_perf *perf;
+ int err;
+
+ if (!eb->oa.config)
+ return 0;
+
+ stream = eb->oa.file->private_data;
+ perf = stream->perf;
+
+ err = mutex_lock_interruptible(&perf->lock);
+ if (err)
+ return err;
+
+ if (stream != perf->exclusive_stream) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (stream->engine != eb->engine) { /* FIXME: virtual selection */
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = i915_active_fence_set(&perf->active_config, eb->request);
+ if (err)
+ goto out;
+
+ /*
+ * If the config hasn't changed, skip reconfiguring the HW (this is
+ * subject to a delay we want to avoid has much as possible).
+ *
+ * After a GPU reset, the oa config will not be recovered.
+ */
+ if (eb->oa.config == stream->oa_config)
+ goto out;
+
+ err = eb->engine->emit_bb_start(eb->request,
+ eb->oa.vma->node.start, 0,
+ I915_DISPATCH_SECURE); /* ggtt */
+ if (err)
+ goto out;
+
+ swap(eb->oa.config, stream->oa_config);
+
+out:
+ mutex_unlock(&perf->lock);
+ return err;
+}
+
static int eb_submit(struct i915_execbuffer *eb)
{
int err;
@@ -2077,6 +2144,10 @@ static int eb_submit(struct i915_execbuffer *eb)
return err;
}
+ err = eb_oa_config(eb);
+ if (err)
+ return err;
+
err = eb->engine->emit_bb_start(eb->request,
eb->batch->node.start +
eb->batch_start_offset,
@@ -2427,7 +2498,56 @@ signal_fence_array(struct i915_execbuffer *eb,
}
}
+static int parse_perf_config(struct i915_user_extension __user *ext, void *data)
+{
+ struct drm_i915_gem_execbuffer_ext_perf perf_config;
+ struct i915_execbuffer *eb = data;
+ struct i915_perf_stream *stream;
+ struct i915_oa_config *config;
+ struct i915_vma *vma;
+ struct file *file;
+ int err;
+
+ if (eb->oa.config)
+ return -EINVAL; /* exclusive */
+
+ if (copy_from_user(&perf_config, ext, sizeof(perf_config)))
+ return -EFAULT;
+
+ file = fget(perf_config.perf_fd);
+ if (!file)
+ return -EINVAL;
+
+ stream = i915_perf_file_get_stream(file);
+ if (!stream)
+ return -EINVAL;
+
+ config = i915_perf_get_oa_config(stream->perf, perf_config.oa_config);
+ if (!config) {
+ err = -EINVAL;
+ goto err_file;
+ }
+
+ vma = i915_perf_stream_get_oa_vma(stream, config);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_config;
+ }
+
+ eb->oa.vma = vma;
+ eb->oa.file = file;
+ eb->oa.config = config;
+ return 0;
+
+err_config:
+ i915_oa_config_put(config);
+err_file:
+ fput(file);
+ return err;
+}
+
static const i915_user_extension_fn execbuf_extensions[] = {
+ [I915_EXEC_EXT_PERF_CONFIG] = parse_perf_config,
};
static int
@@ -2467,10 +2587,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
args->flags |= __EXEC_HAS_RELOC;
+ eb.oa.config = NULL;
+
eb.exec = exec;
- eb.vma = (struct i915_vma **)(exec + args->buffer_count + 1);
+ eb.vma = (struct i915_vma **)(exec + args->buffer_count + EXTRA_VMA);
eb.vma[0] = NULL;
- eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1);
+ eb.flags = (unsigned int *)(eb.vma + args->buffer_count + EXTRA_VMA);
eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
reloc_cache_init(&eb.reloc_cache, eb.i915);
@@ -2588,6 +2710,17 @@ i915_gem_do_execbuffer(struct drm_device *dev,
}
}
+ if (eb.oa.config) {
+ err = i915_vma_pin(eb.oa.vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ if (err)
+ goto err_vma;
+
+ eb.vma[eb.buffer_count] = eb.oa.vma;
+ eb.flags[eb.buffer_count] = __EXEC_OBJECT_HAS_PIN;
+ eb.oa.vma->exec_flags = &eb.flags[eb.buffer_count];
+ eb.buffer_count++;
+ }
+
if (eb.batch_len == 0)
eb.batch_len = eb.batch->size - eb.batch_start_offset;
@@ -2768,7 +2901,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
/* Copy in the exec list from userland */
exec_list = kvmalloc_array(count, sizeof(*exec_list),
__GFP_NOWARN | GFP_KERNEL);
- exec2_list = kvmalloc_array(count + 1, eb_element_size(),
+ exec2_list = kvmalloc_array(count + EXTRA_VMA, eb_element_size(),
__GFP_NOWARN | GFP_KERNEL);
if (exec_list == NULL || exec2_list == NULL) {
DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
@@ -2844,7 +2977,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
/* Allocate an extra slot for use by the command parser */
- exec2_list = kvmalloc_array(count + 1, eb_element_size(),
+ exec2_list = kvmalloc_array(count + EXTRA_VMA, eb_element_size(),
__GFP_NOWARN | GFP_KERNEL);
if (exec2_list == NULL) {
DRM_DEBUG("Failed to allocate exec list for %zd buffers\n",
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index ad33fbe90a28..6ad290b14e12 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -160,6 +160,10 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
case I915_PARAM_PERF_REVISION:
value = i915_perf_ioctl_version();
break;
+ case I915_PARAM_HAS_EXEC_PERF_CONFIG:
+ /* Obviously requires perf support. */
+ value = !!i915->perf.i915;
+ break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 06c5c187264b..2ce498e498a2 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -618,6 +618,16 @@ typedef struct drm_i915_irq_wait {
*/
#define I915_PARAM_PERF_REVISION 54
+/*
+ * Request an i915/perf performance configuration change before running the
+ * commands given in an execbuf.
+ *
+ * Performance configuration ID and the file descriptor of the i915 perf
+ * stream are given through drm_i915_gem_execbuffer_ext_perf. See
+ * I915_EXEC_USE_EXTENSIONS.
+ */
+#define I915_PARAM_HAS_EXEC_PERF_CONFIG 55
+
/* Must be kept compact -- no holes and well documented */
typedef struct drm_i915_getparam {
@@ -1014,6 +1024,29 @@ struct drm_i915_gem_exec_fence {
__u32 flags;
};
+struct drm_i915_gem_execbuffer_ext_perf {
+ struct i915_user_extension base; /* .name = I915_EXEC_EXT_PERF_CONFIG */
+
+ /*
+ * Performance file descriptor returned by DRM_IOCTL_I915_PERF_OPEN.
+ * This is used to identify that the application requesting a HW
+ * performance configuration change actually has a right to do so
+ * because it also has access the i915-perf stream.
+ */
+ __s32 perf_fd;
+
+ /*
+ * Unused for now. Must be cleared to zero.
+ */
+ __u32 flags;
+
+ /*
+ * OA configuration ID to switch to before executing the commands
+ * associated to the execbuf.
+ */
+ __u64 oa_config;
+};
+
struct drm_i915_gem_execbuffer2 {
/**
* List of gem_exec_object2 structs
@@ -1038,9 +1071,13 @@ struct drm_i915_gem_execbuffer2 {
*
* If I915_EXEC_USE_EXTENSIONS is set, then this is a pointer to a
* single struct drm_i915_gem_base_execbuffer_ext and num_cliprects is
- * 0.
+ * 0. Extensions:
+ * drm_i915_gem_execbuffer_ext_perf (I915_EXEC_EXT_PERF_CONFIG)
*/
__u64 cliprects_ptr;
+#define I915_EXEC_EXT_PERF_CONFIG 0
+
+ __u64 flags;
#define I915_EXEC_RING_MASK (0x3f)
#define I915_EXEC_DEFAULT (0<<0)
#define I915_EXEC_RENDER (1<<0)
@@ -1048,7 +1085,8 @@ struct drm_i915_gem_execbuffer2 {
#define I915_EXEC_BLT (3<<0)
#define I915_EXEC_VEBOX (4<<0)
-/* Used for switching the constants addressing mode on gen4+ RENDER ring.
+/*
+ * Used for switching the constants addressing mode on gen4+ RENDER ring.
* Gen6+ only supports relative addressing to dynamic state (default) and
* absolute addressing.
*
@@ -1058,7 +1096,7 @@ struct drm_i915_gem_execbuffer2 {
#define I915_EXEC_CONSTANTS_REL_GENERAL (0<<6) /* default */
#define I915_EXEC_CONSTANTS_ABSOLUTE (1<<6)
#define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */
- __u64 flags;
+
__u64 rsvd1; /* now used for context info */
__u64 rsvd2;
};
--
2.23.0
More information about the Intel-gfx
mailing list