[Intel-gfx] [PATCH v6 08/11] drm/i915: add a new perf configuration execbuf parameter

Lionel Landwerlin lionel.g.landwerlin at intel.com
Mon Jul 1 11:34:34 UTC 2019


We want the ability to dispatch a set of command buffer to the
hardware, each with a different OA configuration. To achieve this, we
reuse a couple of fields from the execbuf2 struct (I CAN HAZ
execbuf3?) to notify what OA configuration should be used for a batch
buffer. This requires the process making the execbuf with this flag to
also own the perf fd at the time of execbuf.

v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris)
    Move oa_config vma to active (Chris)

v3: Don't drop the lock for engine lookup (Chris)
    Move OA config vma to active before writing the ringbuffer (Chris)

v4: Reuse i915_user_extension_fn
    Serialize requests with OA config updates

v5: Check that the chained extension is only present once (Chris)
    Unpin oa_vma in main path (Chris)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 124 +++++++++++++++++-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   2 +
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |   9 ++
 drivers/gpu/drm/i915/gt/intel_lrc.c           |   1 +
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |   4 +-
 drivers/gpu/drm/i915/i915_drv.c               |   4 +
 drivers/gpu/drm/i915/i915_drv.h               |   8 +-
 drivers/gpu/drm/i915/i915_perf.c              |  23 ++--
 include/uapi/drm/i915_drm.h                   |  37 ++++++
 9 files changed, 196 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index d3004fc1f995..f92bace9caff 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -283,7 +283,12 @@ struct i915_execbuffer {
 	struct {
 		u64 flags; /** Available extensions parameters */
 		struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
+		struct drm_i915_gem_execbuffer_ext_perf perf_config;
 	} extensions;
+
+	struct i915_oa_config *oa_config; /** HW configuration for OA, NULL is not needed. */
+	struct drm_i915_gem_object *oa_bo;
+	struct i915_vma *oa_vma;
 };
 
 #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
@@ -1210,6 +1215,21 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
 	return err;
 }
 
+
+static int
+get_execbuf_oa_config(struct i915_execbuffer *eb)
+{
+	eb->oa_config = NULL;
+	eb->oa_vma = NULL;
+	eb->oa_bo = NULL;
+
+	if ((eb->extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) == 0)
+		return 0;
+
+	return i915_perf_get_oa_config(eb->i915, eb->extensions.perf_config.oa_config,
+				       &eb->oa_config, &eb->oa_bo);
+}
+
 static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 			     struct i915_vma *vma,
 			     unsigned int len)
@@ -2072,6 +2092,40 @@ add_to_client(struct i915_request *rq, struct drm_file *file)
 	list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list);
 }
 
+static int eb_oa_config(struct i915_execbuffer *eb)
+{
+	int err;
+
+	if (!eb->oa_config)
+		return 0;
+
+	err = i915_active_request_set(&eb->engine->last_oa_config,
+				      eb->request);
+	if (err)
+		return err;
+
+	/*
+	 * If the config hasn't changed, skip reconfiguring the HW (this is
+	 * subject to a delay we want to avoid has much as possible).
+	 */
+	if (eb->oa_config == eb->i915->perf.oa.exclusive_stream->oa_config)
+		return 0;
+
+	err = i915_vma_move_to_active(eb->oa_vma, eb->request, 0);
+	if (err)
+		return err;
+
+	err = eb->engine->emit_bb_start(eb->request,
+					eb->oa_vma->node.start,
+					0, I915_DISPATCH_SECURE);
+	if (err)
+		return err;
+
+	swap(eb->oa_config, eb->i915->perf.oa.exclusive_stream->oa_config);
+
+	return 0;
+}
+
 static int eb_submit(struct i915_execbuffer *eb)
 {
 	int err;
@@ -2098,6 +2152,10 @@ static int eb_submit(struct i915_execbuffer *eb)
 			return err;
 	}
 
+	err = eb_oa_config(eb);
+	if (err)
+		return err;
+
 	err = eb->engine->emit_bb_start(eb->request,
 					eb->batch->node.start +
 					eb->batch_start_offset,
@@ -2528,8 +2586,25 @@ static int parse_timeline_fences(struct i915_user_extension __user *ext, void *d
 	return 0;
 }
 
+static int parse_perf_config(struct i915_user_extension __user *ext, void *data)
+{
+	struct i915_execbuffer *eb = data;
+
+	if (eb->extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_PERF))
+		return -EINVAL;
+
+	if (copy_from_user(&eb->extensions.perf_config, ext,
+			   sizeof(eb->extensions.perf_config)))
+		return -EFAULT;
+
+	eb->extensions.flags |= BIT(DRM_I915_GEM_EXECBUFFER_EXT_PERF);
+
+	return 0;
+}
+
 static const i915_user_extension_fn execbuf_extensions[] = {
         [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
+        [DRM_I915_GEM_EXECBUFFER_EXT_PERF] = parse_perf_config,
 };
 
 static int
@@ -2634,9 +2709,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		}
 	}
 
+	err = get_execbuf_oa_config(&eb);
+	if (err)
+		goto err_oa_config;
+
 	err = eb_create(&eb);
 	if (err)
-		goto err_out_fence;
+		goto err_oa_config;
 
 	GEM_BUG_ON(!eb.lut_size);
 
@@ -2661,6 +2740,27 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_unlock;
 
+	if (eb.extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
+		struct file *perf_file;
+
+		if (!intel_engine_has_oa(eb.engine)) {
+			err = -ENODEV;
+			goto err_engine;
+		}
+
+		perf_file = fget(eb.extensions.perf_config.perf_fd);
+		if (!perf_file)
+			goto err_engine;
+
+		if (perf_file->private_data != eb.i915->perf.oa.exclusive_stream)
+			err = -EINVAL;
+
+		fput(perf_file);
+
+		if (unlikely(err))
+			goto err_engine;
+	}
+
 	err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
 	if (unlikely(err))
 		goto err_engine;
@@ -2781,6 +2881,20 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		}
 	}
 
+	if (eb.extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
+		eb.oa_vma = i915_vma_instance(eb.oa_bo,
+					      &eb.engine->i915->ggtt.vm, NULL);
+		if (unlikely(IS_ERR(eb.oa_vma))) {
+			err = PTR_ERR(eb.oa_vma);
+			eb.oa_vma = NULL;
+			goto err_request;
+		}
+
+		err = i915_vma_pin(eb.oa_vma, 0, 0, PIN_GLOBAL);
+		if (err)
+			goto err_request;
+	}
+
 	/*
 	 * Whilst this request exists, batch_obj will be on the
 	 * active_list, and so will hold the active reference. Only when this
@@ -2825,7 +2939,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	i915_gem_context_put(eb.gem_context);
 err_destroy:
 	eb_destroy(&eb);
-err_out_fence:
+err_oa_config:
+	if (eb.oa_config) {
+		i915_gem_object_put(eb.oa_bo);
+		i915_oa_config_put(eb.oa_config);
+	}
+	if (eb.oa_vma)
+		i915_vma_unpin(eb.oa_vma);
 	if (out_fence_fd != -1)
 		put_unused_fd(out_fence_fd);
 err_exec_fence:
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index d1508f0b4c84..b3ee3e3b58dd 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -859,6 +859,8 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 
 	engine->set_default_submission(engine);
 
+	INIT_ACTIVE_REQUEST(&engine->last_oa_config);
+
 	return 0;
 
 err_unpin:
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 7e056114344e..39da40937e7f 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -363,6 +363,8 @@ struct intel_engine_cs {
 	struct i915_wa_list wa_list;
 	struct i915_wa_list whitelist;
 
+	struct i915_active_request last_oa_config;
+
 	u32             irq_keep_mask; /* always keep these interrupts */
 	u32		irq_enable_mask; /* bitmask to enable ring interrupt */
 	void		(*irq_enable)(struct intel_engine_cs *engine);
@@ -446,6 +448,7 @@ struct intel_engine_cs {
 #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
 #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
 #define I915_ENGINE_IS_VIRTUAL       BIT(5)
+#define I915_ENGINE_HAS_OA           BIT(6)
 	unsigned int flags;
 
 	/*
@@ -541,6 +544,12 @@ intel_engine_is_virtual(const struct intel_engine_cs *engine)
 	return engine->flags & I915_ENGINE_IS_VIRTUAL;
 }
 
+static inline bool
+intel_engine_has_oa(const struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_HAS_OA;
+}
+
 #define instdone_slice_mask(dev_priv__) \
 	(IS_GEN(dev_priv__, 7) ? \
 	 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index cce8337bdf9c..001b696834f2 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2769,6 +2769,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
 		engine->init_context = gen8_init_rcs_context;
 		engine->emit_flush = gen8_emit_flush_render;
 		engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
+		engine->flags |= I915_ENGINE_HAS_OA;
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index 02a4a52e2019..476d465f701c 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -2206,8 +2206,10 @@ static void setup_rcs(struct intel_engine_cs *engine)
 		engine->irq_enable_mask = I915_USER_INTERRUPT;
 	}
 
-	if (IS_HASWELL(i915))
+	if (IS_HASWELL(i915)) {
 		engine->emit_bb_start = hsw_emit_bb_start;
+		engine->flags |= I915_ENGINE_HAS_OA;
+	}
 
 	engine->resume = rcs_resume;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 088f9d2af3fa..a08c6123bf38 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -487,6 +487,10 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_PARAM_PERF_REVISION:
 		value = 1;
 		break;
+	case I915_PARAM_HAS_EXEC_PERF_CONFIG:
+		/* Obviously requires perf support. */
+		value = dev_priv->perf.initialized;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fe93a260bd28..664554114718 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1116,7 +1116,7 @@ struct i915_oa_config {
 
 	struct list_head vma_link;
 
-	atomic_t ref_count;
+	struct kref ref;
 };
 
 struct i915_perf_stream;
@@ -2615,6 +2615,12 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915,
 			    int metrics_set,
 			    struct i915_oa_config **out_config,
 			    struct drm_i915_gem_object **out_obj);
+void i915_oa_config_release(struct kref *ref);
+
+static inline void i915_oa_config_put(struct i915_oa_config *oa_config)
+{
+	kref_put(&oa_config->ref, i915_oa_config_release);
+}
 
 /* i915_gem_evict.c */
 int __must_check i915_gem_evict_something(struct i915_address_space *vm,
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 03e6908282e3..6b659b5f1948 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -367,10 +367,9 @@ struct perf_open_properties {
 	int oa_period_exponent;
 };
 
-static void put_oa_config(struct i915_oa_config *oa_config)
+void i915_oa_config_release(struct kref *ref)
 {
-	if (!atomic_dec_and_test(&oa_config->ref_count))
-		return;
+	struct i915_oa_config *oa_config = container_of(ref, typeof(*oa_config), ref);
 
 	if (oa_config->obj) {
 		struct drm_i915_private *i915 = oa_config->i915;
@@ -488,7 +487,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915,
 	}
 
 	if (out_config) {
-		atomic_inc(&oa_config->ref_count);
+		kref_get(&oa_config->ref);
 		*out_config = oa_config;
 	}
 
@@ -510,7 +509,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915,
 	mutex_unlock(&i915->perf.metrics_lock);
 
 	if (ret && out_config) {
-		put_oa_config(oa_config);
+		i915_oa_config_put(oa_config);
 		*out_config = NULL;
 	}
 
@@ -1484,7 +1483,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 	if (stream->ctx)
 		oa_put_render_ctx_id(stream);
 
-	put_oa_config(stream->oa_config);
+	i915_oa_config_put(stream->oa_config);
 
 	if (dev_priv->perf.oa.spurious_report_rs.missed) {
 		DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
@@ -2479,7 +2478,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	free_oa_buffer(dev_priv);
 
 err_oa_buf_alloc:
-	put_oa_config(stream->oa_config);
+	i915_oa_config_put(stream->oa_config);
 
 	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
 	intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
@@ -3296,7 +3295,7 @@ void i915_perf_register(struct drm_i915_private *dev_priv)
 		goto sysfs_error;
 
 	dev_priv->perf.oa.test_config.i915 = dev_priv;
-	atomic_set(&dev_priv->perf.oa.test_config.ref_count, 1);
+	kref_init(&dev_priv->perf.oa.test_config.ref);
 
 	goto exit;
 
@@ -3553,7 +3552,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
 	}
 
 	oa_config->i915 = dev_priv;
-	atomic_set(&oa_config->ref_count, 1);
+	kref_init(&oa_config->ref);
 
 	if (!uuid_is_valid(args->uuid)) {
 		DRM_DEBUG("Invalid uuid format for OA config\n");
@@ -3652,7 +3651,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
 sysfs_err:
 	mutex_unlock(&dev_priv->perf.metrics_lock);
 reg_err:
-	put_oa_config(oa_config);
+	i915_oa_config_put(oa_config);
 	DRM_DEBUG("Failed to add new OA config\n");
 	return err;
 }
@@ -3708,7 +3707,7 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
 
 	DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
 
-	put_oa_config(oa_config);
+	i915_oa_config_put(oa_config);
 
 	return 0;
 
@@ -3878,7 +3877,7 @@ static int destroy_config(int id, void *p, void *data)
 {
 	struct i915_oa_config *oa_config = p;
 
-	put_oa_config(oa_config);
+	i915_oa_config_put(oa_config);
 
 	return 0;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 6bd76a0d29e5..96bc09af8cc4 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -623,6 +623,16 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
 
+/*
+ * Request an i915/perf performance configuration change before running the
+ * commands given in an execbuf.
+ *
+ * Performance configuration ID and the file descriptor of the i915 perf
+ * stream are given through drm_i915_gem_execbuffer_ext_perf. See
+ * I915_EXEC_EXT.
+ */
+#define I915_PARAM_HAS_EXEC_PERF_CONFIG 56
+
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1025,6 +1035,12 @@ enum drm_i915_gem_execbuffer_ext {
 	 */
 	DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 0,
 
+	/**
+	 * This identifier is associated with
+	 * drm_i915_gem_execbuffer_perf_ext.
+	 */
+	DRM_I915_GEM_EXECBUFFER_EXT_PERF,
+
 	DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
 };
 
@@ -1055,6 +1071,27 @@ struct drm_i915_gem_execbuffer_ext_timeline_fences {
 	__u64 values_ptr;
 };
 
+struct drm_i915_gem_execbuffer_ext_perf {
+	struct i915_user_extension base;
+
+	/**
+	 * Performance file descriptor returned by DRM_IOCTL_I915_PERF_OPEN.
+	 * This is used to identify that the application
+	 */
+	__s32 perf_fd;
+
+	/**
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 pad;
+
+	/**
+	 * OA configuration ID to switch to before executing the commands
+	 * associated to the execbuf.
+	 */
+	__u64 oa_config;
+};
+
 struct drm_i915_gem_execbuffer2 {
 	/**
 	 * List of gem_exec_object2 structs
-- 
2.21.0.392.gf8f6787159e



More information about the Intel-gfx mailing list