[Intel-gfx] [PATCH v12 06/11] drm/i915/perf: allow for CS OA configs to be created lazily
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Fri Aug 30 14:47:21 UTC 2019
Here we introduce a mechanism by which the execbuf part of the i915
driver will be able to request that a batch buffer containing the
programming for a particular OA config be created.
We'll execute these OA configuration buffers right before executing a
set of userspace commands so that a particular user batchbuffer be
executed with a given OA configuration.
This mechanism essentially allows the userspace driver to go through
several OA configuration without having to open/close the i915/perf
stream.
v2: No need for locking on object OA config object creation (Chris)
Flush cpu mapping of OA config (Chris)
v3: Properly deal with the perf_metric lock (Chris/Lionel)
v4: Fix oa config unref/put when not found (Lionel)
v5: Allocate BOs for configurations on the stream instead of globally
(Lionel)
v6: Fix 64bit division (Chris)
v7: Store allocated config BOs into the stream (Lionel)
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Reviewed-by: Chris Wilson <chris at chris-wilson.co.uk> (v4)
---
drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 1 +
drivers/gpu/drm/i915/i915_drv.h | 19 +-
drivers/gpu/drm/i915/i915_perf.c | 270 ++++++++++++++++---
drivers/gpu/drm/i915/i915_perf.h | 26 ++
4 files changed, 274 insertions(+), 42 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 86e00a2db8a4..a7f1377a54a2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -133,6 +133,7 @@
*/
#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
#define MI_LRI_FORCE_POSTED (1<<12)
+#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
#define MI_SRM_LRM_GLOBAL_GTT (1<<22)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 274a1193d4f0..c0ff6f0fd33e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -990,6 +990,8 @@ struct i915_oa_reg {
};
struct i915_oa_config {
+ struct drm_i915_private *i915;
+
char uuid[UUID_STRING_LEN + 1];
int id;
@@ -1004,7 +1006,7 @@ struct i915_oa_config {
struct attribute *attrs[2];
struct device_attribute sysfs_metric_id;
- atomic_t ref_count;
+ struct kref ref;
};
struct i915_perf_stream;
@@ -1126,11 +1128,22 @@ struct i915_perf_stream {
*/
const struct i915_perf_stream_ops *ops;
+ /**
+ * @active_config_mutex: Protects access to @oa_config & @oa_config_bos.
+ */
+ struct mutex config_mutex;
+
/**
* @oa_config: The OA configuration used by the stream.
*/
struct i915_oa_config *oa_config;
+ /**
+ * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
+ * each time @oa_config changes.
+ */
+ struct list_head oa_config_bos;
+
/**
* The OA context specific information.
*/
@@ -1661,8 +1674,8 @@ struct drm_i915_private {
struct mutex metrics_lock;
/*
- * List of dynamic configurations, you need to hold
- * dev_priv->perf.metrics_lock to access it.
+ * List of dynamic configurations (struct i915_oa_config), you
+ * need to hold dev_priv->perf.metrics_lock to access it.
*/
struct idr metrics_idr;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index e8f4ebcc9bbc..08869660f1f2 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -367,11 +367,19 @@ struct perf_open_properties {
struct intel_engine_cs *engine;
};
+struct i915_oa_config_bo {
+ struct list_head link;
+
+ struct i915_oa_config *oa_config;
+ struct drm_i915_gem_object *bo;
+};
+
static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
-static void free_oa_config(struct drm_i915_private *dev_priv,
- struct i915_oa_config *oa_config)
+void i915_oa_config_release(struct kref *ref)
{
+ struct i915_oa_config *oa_config = container_of(ref, typeof(*oa_config), ref);
+
if (!PTR_ERR(oa_config->flex_regs))
kfree(oa_config->flex_regs);
if (!PTR_ERR(oa_config->b_counter_regs))
@@ -381,40 +389,194 @@ static void free_oa_config(struct drm_i915_private *dev_priv,
kfree(oa_config);
}
-static void put_oa_config(struct drm_i915_private *dev_priv,
- struct i915_oa_config *oa_config)
+static u32 *write_cs_mi_lri(u32 *cs, const struct i915_oa_reg *reg_data, u32 n_regs)
{
- if (!atomic_dec_and_test(&oa_config->ref_count))
- return;
+ u32 i;
+
+ for (i = 0; i < n_regs; i++) {
+ if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) {
+ u32 n_lri = min(n_regs - i,
+ (u32) MI_LOAD_REGISTER_IMM_MAX_REGS);
- free_oa_config(dev_priv, oa_config);
+ *cs++ = MI_LOAD_REGISTER_IMM(n_lri);
+ }
+ *cs++ = i915_mmio_reg_offset(reg_data[i].addr);
+ *cs++ = reg_data[i].value;
+ }
+
+ return cs;
}
-static int get_oa_config(struct drm_i915_private *dev_priv,
- int metrics_set,
- struct i915_oa_config **out_config)
+static struct i915_oa_config_bo* alloc_oa_config_buffer(struct drm_i915_private *i915,
+ struct i915_oa_config *oa_config)
{
- int ret;
+ struct i915_oa_config_bo *oa_bo;
+ size_t config_length = 0;
+ u32 *cs;
+ int err;
+
+ oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL);
+ if (!oa_bo)
+ return ERR_PTR(-ENOMEM);
+
+ oa_bo->oa_config = i915_oa_config_get(oa_config);
+
+ if (oa_config->mux_regs_len > 0) {
+ config_length += DIV_ROUND_UP(oa_config->mux_regs_len,
+ MI_LOAD_REGISTER_IMM_MAX_REGS) * 4;
+ config_length += oa_config->mux_regs_len * 8;
+ }
+ if (oa_config->b_counter_regs_len > 0) {
+ config_length += DIV_ROUND_UP(oa_config->b_counter_regs_len,
+ MI_LOAD_REGISTER_IMM_MAX_REGS) * 4;
+ config_length += oa_config->b_counter_regs_len * 8;
+ }
+ if (oa_config->flex_regs_len > 0) {
+ config_length += DIV_ROUND_UP(oa_config->flex_regs_len,
+ MI_LOAD_REGISTER_IMM_MAX_REGS) * 4;
+ config_length += oa_config->flex_regs_len * 8;
+ }
+ config_length += 4; /* MI_BATCH_BUFFER_END */
+ config_length = ALIGN(config_length, I915_GTT_PAGE_SIZE);
+
+ oa_bo->bo = i915_gem_object_create_shmem(i915, config_length);
+ if (IS_ERR(oa_bo->bo)) {
+ err = PTR_ERR(oa_bo->bo);
+ goto err_oa_config;
+ }
+
+ cs = i915_gem_object_pin_map(oa_bo->bo, I915_MAP_WB);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_oa_bo;
+ }
+
+ cs = write_cs_mi_lri(cs, oa_config->mux_regs, oa_config->mux_regs_len);
+ cs = write_cs_mi_lri(cs, oa_config->b_counter_regs, oa_config->b_counter_regs_len);
+ cs = write_cs_mi_lri(cs, oa_config->flex_regs, oa_config->flex_regs_len);
+
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ i915_gem_object_flush_map(oa_bo->bo);
+ i915_gem_object_unpin_map(oa_bo->bo);
+
+ return oa_bo;
+
+err_oa_bo:
+ i915_gem_object_put(oa_bo->bo);
+err_oa_config:
+ i915_oa_config_put(oa_bo->oa_config);
+ kfree(oa_bo);
+
+ return ERR_PTR(err);
+}
+
+int i915_perf_get_oa_config(struct drm_i915_private *i915,
+ int metrics_set,
+ struct i915_oa_config **out_config)
+{
+ struct i915_oa_config *oa_config;
+ int err;
+
+ if (!i915->perf.initialized)
+ return -ENODEV;
+
+ err = mutex_lock_interruptible(&i915->perf.metrics_lock);
+ if (err)
+ return err;
if (metrics_set == 1) {
- *out_config = &dev_priv->perf.test_config;
- atomic_inc(&dev_priv->perf.test_config.ref_count);
- return 0;
+ oa_config = &i915->perf.test_config;
+ } else {
+ oa_config = idr_find(&i915->perf.metrics_idr, metrics_set);
+ if (!oa_config)
+ err = -EINVAL;
}
- ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
- if (ret)
- return ret;
+ if (!err)
+ *out_config = i915_oa_config_get(oa_config);
- *out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set);
- if (!*out_config)
- ret = -EINVAL;
- else
- atomic_inc(&(*out_config)->ref_count);
+ mutex_unlock(&i915->perf.metrics_lock);
- mutex_unlock(&dev_priv->perf.metrics_lock);
+ return err;
+}
- return ret;
+static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
+{
+ i915_oa_config_put(oa_bo->oa_config);
+ i915_gem_object_put(oa_bo->bo);
+ kfree(oa_bo);
+}
+
+int i915_perf_get_oa_config_and_bo(struct i915_perf_stream *stream,
+ int metrics_set,
+ struct i915_oa_config **out_config,
+ struct drm_i915_gem_object **out_obj)
+{
+ struct drm_i915_private *i915 = stream->dev_priv;
+ struct i915_oa_config *oa_config;
+ int err = 0;
+
+ if (!i915->perf.initialized)
+ return -ENODEV;
+
+ err = i915_perf_get_oa_config(i915, metrics_set, &oa_config);
+ if (err)
+ return err;
+
+ if (out_config)
+ *out_config = oa_config;
+
+ if (out_obj) {
+ struct i915_oa_config_bo *oa_bo = NULL, *oa_bo_iter;
+
+ /* Look for the buffer in the already allocated BOs attached
+ * to the stream.
+ */
+ err = mutex_lock_interruptible(&stream->config_mutex);
+ if (err)
+ goto err;
+
+ list_for_each_entry(oa_bo_iter, &stream->oa_config_bos, link) {
+ if (oa_bo_iter->oa_config == oa_config &&
+ memcmp(oa_bo_iter->oa_config->uuid,
+ oa_config->uuid,
+ sizeof(oa_config->uuid)) == 0) {
+ oa_bo = oa_bo_iter;
+ break;
+ }
+ }
+
+ mutex_unlock(&stream->config_mutex);
+
+ if (!oa_bo) {
+ oa_bo = alloc_oa_config_buffer(i915, oa_config);
+ if (IS_ERR(oa_bo)) {
+ err = PTR_ERR(oa_bo);
+ goto err;
+ }
+
+ err = mutex_lock_interruptible(&stream->config_mutex);
+ if (err) {
+ free_oa_config_bo(oa_bo);
+ goto err;
+ }
+
+ list_add(&oa_bo->link, &stream->oa_config_bos);
+
+ mutex_unlock(&stream->config_mutex);
+ }
+
+ *out_obj = i915_gem_object_get(oa_bo->bo);
+ }
+
+err:
+ if (err) {
+ i915_oa_config_put(oa_config);
+ *out_config = NULL;
+ }
+
+ return err;
}
static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
@@ -1362,6 +1524,18 @@ free_oa_buffer(struct i915_perf_stream *stream)
stream->oa_buffer.vaddr = NULL;
}
+static void
+free_oa_configs(struct i915_perf_stream *stream)
+{
+ struct i915_oa_config_bo *oa_bo, *tmp;
+
+ i915_oa_config_put(stream->oa_config);
+ list_for_each_entry_safe(oa_bo, tmp, &stream->oa_config_bos, link) {
+ list_del(&oa_bo->link);
+ free_oa_config_bo(oa_bo);
+ }
+}
+
static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
@@ -1385,7 +1559,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
if (stream->ctx)
oa_put_render_ctx_id(stream);
- put_oa_config(dev_priv, stream->oa_config);
+ free_oa_configs(stream);
if (dev_priv->perf.spurious_report_rs.missed) {
DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
@@ -2199,6 +2373,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
return -EINVAL;
}
+ mutex_init(&stream->config_mutex);
+
stream->sample_size = sizeof(struct drm_i915_perf_record_header);
format_size = dev_priv->perf.oa_formats[props->oa_format].size;
@@ -2227,7 +2403,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
}
}
- ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config);
+ ret = i915_perf_get_oa_config(dev_priv, props->metrics_set,
+ &stream->oa_config);
if (ret) {
DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
goto err_config;
@@ -2265,6 +2442,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
goto err_enable;
}
+ DRM_DEBUG("opening stream oa config uuid=%s\n", stream->oa_config->uuid);
+
mutex_unlock(&dev_priv->drm.struct_mutex);
hrtimer_init(&stream->poll_check_timer,
@@ -2284,11 +2463,13 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
free_oa_buffer(stream);
err_oa_buf_alloc:
- put_oa_config(dev_priv, stream->oa_config);
+ i915_oa_config_put(stream->oa_config);
intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
+ free_oa_configs(stream);
+
err_config:
if (stream->ctx)
oa_put_render_ctx_id(stream);
@@ -2306,8 +2487,12 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
return;
stream = engine->i915->perf.exclusive_stream;
- if (stream)
+
+ if (stream) {
+ mutex_lock(&stream->config_mutex);
gen8_update_reg_state_unlocked(stream, ce, regs, stream->oa_config);
+ mutex_unlock(&stream->config_mutex);
+ }
}
/**
@@ -2650,7 +2835,9 @@ static int i915_perf_release(struct inode *inode, struct file *file)
struct drm_i915_private *dev_priv = stream->dev_priv;
mutex_lock(&dev_priv->perf.lock);
+
i915_perf_destroy_locked(stream);
+
mutex_unlock(&dev_priv->perf.lock);
/* Release the reference the perf stream kept on the driver. */
@@ -2759,6 +2946,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
goto err_ctx;
}
+ INIT_LIST_HEAD(&stream->oa_config_bos);
stream->dev_priv = dev_priv;
stream->ctx = specific_ctx;
@@ -3085,7 +3273,8 @@ void i915_perf_register(struct drm_i915_private *dev_priv)
if (ret)
goto sysfs_error;
- atomic_set(&dev_priv->perf.test_config.ref_count, 1);
+ dev_priv->perf.test_config.i915 = dev_priv;
+ kref_init(&dev_priv->perf.test_config.ref);
goto exit;
@@ -3341,7 +3530,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
return -ENOMEM;
}
- atomic_set(&oa_config->ref_count, 1);
+ oa_config->i915 = dev_priv;
+ kref_init(&oa_config->ref);
if (!uuid_is_valid(args->uuid)) {
DRM_DEBUG("Invalid uuid format for OA config\n");
@@ -3440,7 +3630,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
sysfs_err:
mutex_unlock(&dev_priv->perf.metrics_lock);
reg_err:
- put_oa_config(dev_priv, oa_config);
+ i915_oa_config_put(oa_config);
DRM_DEBUG("Failed to add new OA config\n");
return err;
}
@@ -3476,13 +3666,13 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
if (ret)
- goto lock_err;
+ return ret;
oa_config = idr_find(&dev_priv->perf.metrics_idr, *arg);
if (!oa_config) {
DRM_DEBUG("Failed to remove unknown OA config\n");
ret = -ENOENT;
- goto config_err;
+ goto err_unlock;
}
GEM_BUG_ON(*arg != oa_config->id);
@@ -3492,13 +3682,16 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
idr_remove(&dev_priv->perf.metrics_idr, *arg);
+ mutex_unlock(&dev_priv->perf.metrics_lock);
+
DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
- put_oa_config(dev_priv, oa_config);
+ i915_oa_config_put(oa_config);
+
+ return 0;
-config_err:
+err_unlock:
mutex_unlock(&dev_priv->perf.metrics_lock);
-lock_err:
return ret;
}
@@ -3668,10 +3861,9 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
static int destroy_config(int id, void *p, void *data)
{
- struct drm_i915_private *dev_priv = data;
struct i915_oa_config *oa_config = p;
- put_oa_config(dev_priv, oa_config);
+ i915_oa_config_put(oa_config);
return 0;
}
@@ -3685,7 +3877,7 @@ void i915_perf_fini(struct drm_i915_private *dev_priv)
if (!dev_priv->perf.initialized)
return;
- idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, dev_priv);
+ idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, NULL);
idr_destroy(&dev_priv->perf.metrics_idr);
unregister_sysctl_table(dev_priv->perf.sysctl_header);
diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h
index 95549de65212..a216e9e2de15 100644
--- a/drivers/gpu/drm/i915/i915_perf.h
+++ b/drivers/gpu/drm/i915/i915_perf.h
@@ -7,10 +7,14 @@
#define __I915_PERF_H__
#include <linux/types.h>
+#include <linux/kref.h>
struct drm_device;
struct drm_file;
+struct drm_i915_gem_object;
struct drm_i915_private;
+struct i915_oa_config;
+struct i915_perf_stream;
struct intel_context;
struct intel_engine_cs;
@@ -29,5 +33,27 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
void i915_oa_init_reg_state(struct intel_engine_cs *engine,
struct intel_context *ce,
u32 *reg_state);
+int i915_perf_get_oa_config(struct drm_i915_private *i915,
+ int metrics_set,
+ struct i915_oa_config **out_config);
+int i915_perf_get_oa_config_and_bo(struct i915_perf_stream *stream,
+ int metrics_set,
+ struct i915_oa_config **out_config,
+ struct drm_i915_gem_object **out_obj);
+void i915_oa_config_release(struct kref *ref);
+
+static inline struct i915_oa_config *i915_oa_config_get(struct i915_oa_config *oa_config)
+{
+ kref_get(&oa_config->ref);
+ return oa_config;
+}
+
+static inline void i915_oa_config_put(struct i915_oa_config *oa_config)
+{
+ if (!oa_config)
+ return;
+
+ kref_put(&oa_config->ref, i915_oa_config_release);
+}
#endif /* __I915_PERF_H__ */
--
2.23.0
More information about the Intel-gfx
mailing list