[Intel-gfx] [PATCH 05/11] drm/i915/perf: allow for CS OA configs to be created lazily
Chris Wilson
chris at chris-wilson.co.uk
Wed Oct 9 20:36:35 UTC 2019
From: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Here we introduce a mechanism by which the execbuf part of the i915
driver will be able to request that a batch buffer containing the
programming for a particular OA config be created.
We'll execute these OA configuration buffers right before executing a
set of userspace commands so that a particular user batchbuffer be
executed with a given OA configuration.
This mechanism essentially allows the userspace driver to go through
several OA configuration without having to open/close the i915/perf
stream.
v2: No need for locking on object OA config object creation (Chris)
Flush cpu mapping of OA config (Chris)
v3: Properly deal with the perf_metric lock (Chris/Lionel)
v4: Fix oa config unref/put when not found (Lionel)
v5: Allocate BOs for configurations on the stream instead of globally
(Lionel)
v6: Fix 64bit division (Chris)
v7: Store allocated config BOs into the stream (Lionel)
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Reviewed-by: Chris Wilson <chris at chris-wilson.co.uk> (v4)
---
drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 1 +
drivers/gpu/drm/i915/i915_perf.c | 107 +++++++++++--------
drivers/gpu/drm/i915/i915_perf.h | 24 +++++
drivers/gpu/drm/i915/i915_perf_types.h | 23 ++--
4 files changed, 102 insertions(+), 53 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index b0227ab2fe1b..0987100c786b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -138,6 +138,7 @@
/* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
#define MI_LRI_CS_MMIO (1<<19)
#define MI_LRI_FORCE_POSTED (1<<12)
+#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
#define MI_SRM_LRM_GLOBAL_GTT (1<<22)
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index bf99eaf2315f..8c06f42720d6 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -369,52 +369,52 @@ struct perf_open_properties {
struct intel_engine_cs *engine;
};
+struct i915_oa_config_bo {
+ struct llist_node node;
+
+ struct i915_oa_config *oa_config;
+ struct i915_vma *vma;
+};
+
static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
-static void free_oa_config(struct i915_oa_config *oa_config)
+void i915_oa_config_release(struct kref *ref)
{
+ struct i915_oa_config *oa_config =
+ container_of(ref, typeof(*oa_config), ref);
+
if (!PTR_ERR(oa_config->flex_regs))
kfree(oa_config->flex_regs);
if (!PTR_ERR(oa_config->b_counter_regs))
kfree(oa_config->b_counter_regs);
if (!PTR_ERR(oa_config->mux_regs))
kfree(oa_config->mux_regs);
- kfree(oa_config);
-}
-
-static void put_oa_config(struct i915_oa_config *oa_config)
-{
- if (!atomic_dec_and_test(&oa_config->ref_count))
- return;
- free_oa_config(oa_config);
+ kfree_rcu(oa_config, rcu);
}
-static int get_oa_config(struct i915_perf *perf,
- int metrics_set,
- struct i915_oa_config **out_config)
+struct i915_oa_config *
+i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set)
{
- int ret;
-
- if (metrics_set == 1) {
- *out_config = &perf->test_config;
- atomic_inc(&perf->test_config.ref_count);
- return 0;
- }
-
- ret = mutex_lock_interruptible(&perf->metrics_lock);
- if (ret)
- return ret;
+ struct i915_oa_config *oa_config;
- *out_config = idr_find(&perf->metrics_idr, metrics_set);
- if (!*out_config)
- ret = -EINVAL;
+ rcu_read_lock();
+ if (metrics_set == 1)
+ oa_config = &perf->test_config;
else
- atomic_inc(&(*out_config)->ref_count);
+ oa_config = idr_find(&perf->metrics_idr, metrics_set);
+ if (oa_config)
+ oa_config = i915_oa_config_get(oa_config);
+ rcu_read_unlock();
- mutex_unlock(&perf->metrics_lock);
+ return oa_config;
+}
- return ret;
+static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
+{
+ i915_oa_config_put(oa_bo->oa_config);
+ i915_vma_put(oa_bo->vma);
+ kfree(oa_bo);
}
static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
@@ -1337,6 +1337,16 @@ free_oa_buffer(struct i915_perf_stream *stream)
stream->oa_buffer.vaddr = NULL;
}
+static void
+free_oa_configs(struct i915_perf_stream *stream)
+{
+ struct i915_oa_config_bo *oa_bo, *tmp;
+
+ i915_oa_config_put(stream->oa_config);
+ llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node)
+ free_oa_config_bo(oa_bo);
+}
+
static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
{
struct i915_perf *perf = stream->perf;
@@ -1353,7 +1363,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
free_oa_buffer(stream);
put_oa_context(stream);
- put_oa_config(stream->oa_config);
+ free_oa_configs(stream);
if (perf->spurious_report_rs.missed) {
DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
@@ -1500,10 +1510,6 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
goto err_unpin;
}
- DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n",
- i915_ggtt_offset(stream->oa_buffer.vma),
- stream->oa_buffer.vaddr);
-
return 0;
err_unpin:
@@ -2194,9 +2200,10 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
}
}
- ret = get_oa_config(perf, props->metrics_set, &stream->oa_config);
- if (ret) {
+ stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set);
+ if (!stream->oa_config) {
DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
+ ret = -EINVAL;
goto err_config;
}
@@ -2213,6 +2220,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
goto err_enable;
}
+ DRM_DEBUG("opening stream oa config uuid=%s\n",
+ stream->oa_config->uuid);
+
hrtimer_init(&stream->poll_check_timer,
CLOCK_MONOTONIC, HRTIMER_MODE_REL);
stream->poll_check_timer.function = oa_poll_check_timer_cb;
@@ -2228,7 +2238,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
free_oa_buffer(stream);
err_oa_buf_alloc:
- put_oa_config(stream->oa_config);
+ free_oa_configs(stream);
err_config:
put_oa_context(stream);
@@ -3049,7 +3059,8 @@ void i915_perf_register(struct drm_i915_private *i915)
if (ret)
goto sysfs_error;
- atomic_set(&perf->test_config.ref_count, 1);
+ perf->test_config.perf = perf;
+ kref_init(&perf->test_config.ref);
goto exit;
@@ -3307,7 +3318,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
return -ENOMEM;
}
- atomic_set(&oa_config->ref_count, 1);
+ oa_config->perf = perf;
+ kref_init(&oa_config->ref);
if (!uuid_is_valid(args->uuid)) {
DRM_DEBUG("Invalid uuid format for OA config\n");
@@ -3406,7 +3418,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
sysfs_err:
mutex_unlock(&perf->metrics_lock);
reg_err:
- put_oa_config(oa_config);
+ i915_oa_config_put(oa_config);
DRM_DEBUG("Failed to add new OA config\n");
return err;
}
@@ -3442,13 +3454,13 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
ret = mutex_lock_interruptible(&perf->metrics_lock);
if (ret)
- goto lock_err;
+ return ret;
oa_config = idr_find(&perf->metrics_idr, *arg);
if (!oa_config) {
DRM_DEBUG("Failed to remove unknown OA config\n");
ret = -ENOENT;
- goto config_err;
+ goto err_unlock;
}
GEM_BUG_ON(*arg != oa_config->id);
@@ -3458,13 +3470,16 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
idr_remove(&perf->metrics_idr, *arg);
+ mutex_unlock(&perf->metrics_lock);
+
DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
- put_oa_config(oa_config);
+ i915_oa_config_put(oa_config);
+
+ return 0;
-config_err:
+err_unlock:
mutex_unlock(&perf->metrics_lock);
-lock_err:
return ret;
}
@@ -3634,7 +3649,7 @@ void i915_perf_init(struct drm_i915_private *i915)
static int destroy_config(int id, void *p, void *data)
{
- put_oa_config(p);
+ i915_oa_config_put(p);
return 0;
}
diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h
index 295e33e8eef7..4ceebce72060 100644
--- a/drivers/gpu/drm/i915/i915_perf.h
+++ b/drivers/gpu/drm/i915/i915_perf.h
@@ -6,6 +6,7 @@
#ifndef __I915_PERF_H__
#define __I915_PERF_H__
+#include <linux/kref.h>
#include <linux/types.h>
#include "i915_perf_types.h"
@@ -13,6 +14,7 @@
struct drm_device;
struct drm_file;
struct drm_i915_private;
+struct i915_oa_config;
struct intel_context;
struct intel_engine_cs;
@@ -28,7 +30,29 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
+
void i915_oa_init_reg_state(const struct intel_context *ce,
const struct intel_engine_cs *engine);
+struct i915_oa_config *
+i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set);
+
+static inline struct i915_oa_config *
+i915_oa_config_get(struct i915_oa_config *oa_config)
+{
+ if (kref_get_unless_zero(&oa_config->ref))
+ return oa_config;
+ else
+ return NULL;
+}
+
+void i915_oa_config_release(struct kref *ref);
+static inline void i915_oa_config_put(struct i915_oa_config *oa_config)
+{
+ if (!oa_config)
+ return;
+
+ kref_put(&oa_config->ref, i915_oa_config_release);
+}
+
#endif /* __I915_PERF_H__ */
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
index 8cfe598352a8..b1b7c8d04f50 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -9,7 +9,7 @@
#include <linux/atomic.h>
#include <linux/device.h>
#include <linux/hrtimer.h>
-#include <linux/list.h>
+#include <linux/llist.h>
#include <linux/poll.h>
#include <linux/sysfs.h>
#include <linux/types.h>
@@ -22,6 +22,7 @@
struct drm_i915_private;
struct file;
struct i915_gem_context;
+struct i915_perf;
struct i915_vma;
struct intel_context;
struct intel_engine_cs;
@@ -37,6 +38,8 @@ struct i915_oa_reg {
};
struct i915_oa_config {
+ struct i915_perf *perf;
+
char uuid[UUID_STRING_LEN + 1];
int id;
@@ -51,7 +54,8 @@ struct i915_oa_config {
struct attribute *attrs[2];
struct device_attribute sysfs_metric_id;
- atomic_t ref_count;
+ struct kref ref;
+ struct rcu_head rcu;
};
struct i915_perf_stream;
@@ -179,6 +183,12 @@ struct i915_perf_stream {
*/
struct i915_oa_config *oa_config;
+ /**
+ * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
+ * each time @oa_config changes.
+ */
+ struct llist_head oa_config_bos;
+
/**
* @pinned_ctx: The OA context specific information.
*/
@@ -333,13 +343,13 @@ struct i915_perf {
/*
* Lock associated with adding/modifying/removing OA configs
- * in dev_priv->perf.metrics_idr.
+ * in perf->metrics_idr.
*/
struct mutex metrics_lock;
/*
- * List of dynamic configurations, you need to hold
- * dev_priv->perf.metrics_lock to access it.
+ * List of dynamic configurations (struct i915_oa_config), you
+ * need to hold perf->metrics_lock to access it.
*/
struct idr metrics_idr;
@@ -352,8 +362,7 @@ struct i915_perf {
/*
* The stream currently using the OA unit. If accessed
* outside a syscall associated to its file
- * descriptor, you need to hold
- * dev_priv->drm.struct_mutex.
+ * descriptor.
*/
struct i915_perf_stream *exclusive_stream;
--
2.23.0
More information about the Intel-gfx
mailing list