[Intel-gfx] [PATCH 10/14] drm/i915: Link perf stream structures with Engines
Sagar Arun Kamble
sagar.a.kamble at intel.com
Thu Sep 7 10:06:10 UTC 2017
To allow opening CS perf streams for other engines, this patch
associates exclusive stream with each engine and correspondingly
updates the required fields per stream.
Signed-off-by: Sagar Arun Kamble <sagar.a.kamble at intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 26 +++--
drivers/gpu/drm/i915/i915_perf.c | 168 +++++++++++++++-----------------
drivers/gpu/drm/i915/intel_engine_cs.c | 4 +
drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +
drivers/gpu/drm/i915/intel_ringbuffer.h | 8 ++
5 files changed, 104 insertions(+), 104 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5c38cf5..8bd8c0a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2068,6 +2068,11 @@ struct i915_perf_stream {
*/
struct drm_i915_private *dev_priv;
+ /*
+ * @engine: Engine to which this stream corresponds.
+ */
+ struct intel_engine_cs *engine;
+
/**
* @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
* properties given when opening a stream, representing the contents
@@ -2132,6 +2137,10 @@ struct i915_perf_stream {
u32 last_ctx_id;
u64 last_pid;
u32 last_tag;
+
+ u32 specific_ctx_id;
+ wait_queue_head_t poll_wq;
+ bool pollin;
};
/**
@@ -2620,6 +2629,8 @@ struct drm_i915_private {
*/
struct idr metrics_idr;
+ struct hrtimer poll_check_timer;
+
/*
* Lock associated with anything below within this structure
* except exclusive_stream.
@@ -2627,21 +2638,6 @@ struct drm_i915_private {
struct mutex lock;
struct {
- /*
- * The stream currently using the OA unit. If accessed
- * outside a syscall associated to its file
- * descriptor, you need to hold
- * dev_priv->drm.struct_mutex.
- */
- struct i915_perf_stream *exclusive_stream;
- struct srcu_struct srcu;
-
- u32 specific_ctx_id;
-
- struct hrtimer poll_check_timer;
- wait_queue_head_t poll_wq;
- bool pollin;
-
/**
* For rate limiting any notifications of spurious
* invalid OA reports
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index b44d76e..aad8b23 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -551,20 +551,20 @@ void i915_perf_emit_sample_capture(struct drm_i915_gem_request *request,
u32 tag)
{
struct drm_i915_private *dev_priv = request->i915;
+ struct intel_engine_cs *engine = request->engine;
struct i915_perf_stream *stream;
int idx;
if (!dev_priv->perf.initialized)
return;
- idx = srcu_read_lock(&dev_priv->perf.oa.srcu);
- stream = srcu_dereference(dev_priv->perf.oa.exclusive_stream,
- &dev_priv->perf.oa.srcu);
+ idx = srcu_read_lock(&engine->srcu);
+ stream = srcu_dereference(engine->exclusive_stream,
+ &engine->srcu);
if (stream && stream->enabled && stream->cs_mode)
stream->ops->emit_sample_capture(stream, request,
preallocate, tag);
-
- srcu_read_unlock(&dev_priv->perf.oa.srcu, idx);
+ srcu_read_unlock(&engine->srcu, idx);
}
/**
@@ -663,18 +663,19 @@ static void i915_perf_stream_patch_request(struct i915_perf_stream *stream,
void i915_perf_patch_request(struct drm_i915_gem_request *request)
{
struct drm_i915_private *dev_priv = request->i915;
+ struct intel_engine_cs *engine = request->engine;
struct i915_perf_stream *stream;
int idx;
if (!dev_priv->perf.initialized)
return;
- idx = srcu_read_lock(&dev_priv->perf.oa.srcu);
- stream = srcu_dereference(dev_priv->perf.oa.exclusive_stream,
- &dev_priv->perf.oa.srcu);
+ idx = srcu_read_lock(&engine->srcu);
+ stream = srcu_dereference(engine->exclusive_stream,
+ &engine->srcu);
if (stream && stream->enabled && stream->cs_mode)
stream->ops->patch_request(stream, request);
- srcu_read_unlock(&dev_priv->perf.oa.srcu, idx);
+ srcu_read_unlock(&engine->srcu, idx);
}
/**
@@ -1198,18 +1199,18 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
* switches since it's not-uncommon for periodic samples to
* identify a switch before any 'context switch' report.
*/
- if (!dev_priv->perf.oa.exclusive_stream->ctx ||
- dev_priv->perf.oa.specific_ctx_id == ctx_id ||
+ if (!stream->ctx ||
+ stream->specific_ctx_id == ctx_id ||
(dev_priv->perf.oa.oa_buffer.last_ctx_id ==
- dev_priv->perf.oa.specific_ctx_id) ||
+ stream->specific_ctx_id) ||
reason & OAREPORT_REASON_CTX_SWITCH) {
/*
* While filtering for a single context we avoid
* leaking the IDs of other contexts.
*/
- if (dev_priv->perf.oa.exclusive_stream->ctx &&
- dev_priv->perf.oa.specific_ctx_id != ctx_id) {
+ if (stream->ctx &&
+ stream->specific_ctx_id != ctx_id) {
report32[2] = INVALID_CTX_ID;
}
@@ -1851,7 +1852,7 @@ static int i915_perf_stream_wait_unlocked(struct i915_perf_stream *stream)
}
}
- return wait_event_interruptible(dev_priv->perf.oa.poll_wq,
+ return wait_event_interruptible(stream->poll_wq,
stream_have_data_unlocked(stream));
}
@@ -1870,9 +1871,7 @@ static void i915_perf_stream_poll_wait(struct i915_perf_stream *stream,
struct file *file,
poll_table *wait)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
-
- poll_wait(file, &dev_priv->perf.oa.poll_wq, wait);
+ poll_wait(file, &stream->poll_wq, wait);
}
/**
@@ -1920,7 +1919,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
struct drm_i915_private *dev_priv = stream->dev_priv;
if (i915.enable_execlists)
- dev_priv->perf.oa.specific_ctx_id = stream->ctx->hw_id;
+ stream->specific_ctx_id = stream->ctx->hw_id;
else {
struct intel_engine_cs *engine = dev_priv->engine[RCS];
struct intel_ring *ring;
@@ -1947,7 +1946,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
* i915_ggtt_offset() on the fly) considering the difference
* with gen8+ and execlists
*/
- dev_priv->perf.oa.specific_ctx_id =
+ stream->specific_ctx_id =
i915_ggtt_offset(stream->ctx->engine[engine->id].state);
}
@@ -1966,13 +1965,13 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
struct drm_i915_private *dev_priv = stream->dev_priv;
if (i915.enable_execlists) {
- dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
+ stream->specific_ctx_id = INVALID_CTX_ID;
} else {
struct intel_engine_cs *engine = dev_priv->engine[RCS];
mutex_lock(&dev_priv->drm.struct_mutex);
- dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
+ stream->specific_ctx_id = INVALID_CTX_ID;
engine->context_unpin(engine, stream->ctx);
mutex_unlock(&dev_priv->drm.struct_mutex);
@@ -2034,23 +2033,24 @@ static void free_perf_samples(struct i915_perf_stream *stream)
static void i915_perf_stream_destroy(struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
+ struct intel_engine_cs *engine = stream->engine;
struct i915_perf_stream *engine_stream;
int idx;
- idx = srcu_read_lock(&dev_priv->perf.oa.srcu);
- engine_stream = srcu_dereference(dev_priv->perf.oa.exclusive_stream,
- &dev_priv->perf.oa.srcu);
+ idx = srcu_read_lock(&engine->srcu);
+ engine_stream = srcu_dereference(engine->exclusive_stream,
+ &engine->srcu);
if (WARN_ON(stream != engine_stream))
return;
- srcu_read_unlock(&dev_priv->perf.oa.srcu, idx);
+ srcu_read_unlock(&engine->srcu, idx);
/*
* Unset exclusive_stream first, it will be checked while disabling
* the metric set on gen8+.
*/
mutex_lock(&dev_priv->drm.struct_mutex);
- rcu_assign_pointer(dev_priv->perf.oa.exclusive_stream, NULL);
- synchronize_srcu(&dev_priv->perf.oa.srcu);
+ rcu_assign_pointer(engine->exclusive_stream, NULL);
+ synchronize_srcu(&engine->srcu);
mutex_unlock(&dev_priv->drm.struct_mutex);
if (stream->using_oa) {
@@ -2118,11 +2118,6 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
* memory...
*/
memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
-
- /* Maybe make ->pollin per-stream state if we support multiple
- * concurrent streams in the future.
- */
- dev_priv->perf.oa.pollin = false;
}
static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
@@ -2176,12 +2171,6 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
* memory...
*/
memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
-
- /*
- * Maybe make ->pollin per-stream state if we support multiple
- * concurrent streams in the future.
- */
- dev_priv->perf.oa.pollin = false;
}
static int alloc_obj(struct drm_i915_private *dev_priv,
@@ -2720,6 +2709,7 @@ static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
static void gen7_oa_enable(struct drm_i915_private *dev_priv)
{
struct i915_perf_stream *stream;
+ struct intel_engine_cs *engine = dev_priv->engine[RCS];
int idx;
/*
@@ -2733,13 +2723,12 @@ static void gen7_oa_enable(struct drm_i915_private *dev_priv)
*/
gen7_init_oa_buffer(dev_priv);
- idx = srcu_read_lock(&dev_priv->perf.oa.srcu);
- stream = srcu_dereference(dev_priv->perf.oa.exclusive_stream,
- &dev_priv->perf.oa.srcu);
+ idx = srcu_read_lock(&engine->srcu);
+ stream = srcu_dereference(engine->exclusive_stream,
+ &engine->srcu);
if (!stream->enabled) {
- struct i915_gem_context *ctx =
- dev_priv->perf.oa.exclusive_stream->ctx;
- u32 ctx_id = dev_priv->perf.oa.specific_ctx_id;
+ struct i915_gem_context *ctx = stream->ctx;
+ u32 ctx_id = stream->specific_ctx_id;
bool periodic = dev_priv->perf.oa.periodic;
u32 period_exponent = dev_priv->perf.oa.period_exponent;
u32 report_format = dev_priv->perf.oa.oa_buffer.format;
@@ -2754,7 +2743,7 @@ static void gen7_oa_enable(struct drm_i915_private *dev_priv)
GEN7_OACONTROL_ENABLE);
} else
I915_WRITE(GEN7_OACONTROL, 0);
- srcu_read_unlock(&dev_priv->perf.oa.srcu, idx);
+ srcu_read_unlock(&engine->srcu, idx);
}
static void gen8_oa_enable(struct drm_i915_private *dev_priv)
@@ -2799,7 +2788,7 @@ static void i915_perf_stream_enable(struct i915_perf_stream *stream)
dev_priv->perf.oa.ops.oa_enable(dev_priv);
if (stream->cs_mode || dev_priv->perf.oa.periodic)
- hrtimer_start(&dev_priv->perf.oa.poll_check_timer,
+ hrtimer_start(&dev_priv->perf.poll_check_timer,
ns_to_ktime(POLL_PERIOD),
HRTIMER_MODE_REL_PINNED);
}
@@ -2827,7 +2816,7 @@ static void i915_perf_stream_disable(struct i915_perf_stream *stream)
struct drm_i915_private *dev_priv = stream->dev_priv;
if (stream->cs_mode || dev_priv->perf.oa.periodic)
- hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer);
+ hrtimer_cancel(&dev_priv->perf.poll_check_timer);
if (stream->cs_mode)
i915_perf_stream_release_samples(stream);
@@ -2930,20 +2919,21 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
return -ENODEV;
}
+ engine = dev_priv->engine[RCS];
/*
* To avoid the complexity of having to accurately filter
* counter reports and marshal to the appropriate client
* we currently only allow exclusive access
*/
- idx = srcu_read_lock(&dev_priv->perf.oa.srcu);
+ idx = srcu_read_lock(&engine->srcu);
curr_stream = srcu_dereference(
- dev_priv->perf.oa.exclusive_stream,
- &dev_priv->perf.oa.srcu);
+ engine->exclusive_stream,
+ &engine->srcu);
if (curr_stream) {
DRM_ERROR("Stream already opened\n");
return -EBUSY;
}
- srcu_read_unlock(&dev_priv->perf.oa.srcu, idx);
+ srcu_read_unlock(&engine->srcu, idx);
if (!props->oa_format) {
DRM_DEBUG("OA report format not specified\n");
@@ -2957,8 +2947,8 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
return -EINVAL;
}
- engine = dev_priv->engine[RCS];
stream->using_oa = true;
+ stream->engine = engine;
format_size =
dev_priv->perf.oa.oa_formats[props->oa_format].size;
@@ -3076,17 +3066,19 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
stream->last_tag = INVALID_TAG;
}
- idx = srcu_read_lock(&dev_priv->perf.oa.srcu);
+ engine = dev_priv->engine[props->engine];
+ idx = srcu_read_lock(&engine->srcu);
curr_stream = srcu_dereference(
- dev_priv->perf.oa.exclusive_stream,
- &dev_priv->perf.oa.srcu);
+ engine->exclusive_stream,
+ &engine->srcu);
if (curr_stream) {
DRM_ERROR("Stream already opened\n");
ret = -EINVAL;
goto err_enable;
}
- srcu_read_unlock(&dev_priv->perf.oa.srcu, idx);
+ srcu_read_unlock(&engine->srcu, idx);
+ stream->engine = engine;
ret = alloc_cs_buffer(stream);
if (ret)
goto err_enable;
@@ -3095,6 +3087,8 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
}
stream->ops = &perf_stream_ops;
+ init_waitqueue_head(&stream->poll_wq);
+ stream->pollin = false;
/* Lock device for exclusive_stream access late because
* enable_metric_set() might lock as well on gen8+.
@@ -3102,7 +3096,7 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
ret = i915_mutex_lock_interruptible(&dev_priv->drm);
if (ret)
goto err_lock;
- rcu_assign_pointer(dev_priv->perf.oa.exclusive_stream, stream);
+ rcu_assign_pointer(engine->exclusive_stream, stream);
mutex_unlock(&dev_priv->drm.struct_mutex);
return 0;
@@ -3136,7 +3130,7 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
if (engine->id != RCS)
return;
- stream = engine->i915->perf.oa.exclusive_stream;
+ stream = engine->exclusive_stream;
if (stream)
gen8_update_reg_state_unlocked(ctx, reg_state, stream->oa_config);
}
@@ -3251,7 +3245,7 @@ static ssize_t i915_perf_read(struct file *file,
* before reporting another POLLIN event.
*/
if (ret >= 0 || ret == -EAGAIN)
- dev_priv->perf.oa.pollin = false;
+ stream->pollin = false;
return ret;
}
@@ -3261,21 +3255,21 @@ static enum hrtimer_restart poll_check_timer_cb(struct hrtimer *hrtimer)
struct i915_perf_stream *stream;
struct drm_i915_private *dev_priv =
container_of(hrtimer, typeof(*dev_priv),
- perf.oa.poll_check_timer);
+ perf.poll_check_timer);
int idx;
struct intel_engine_cs *engine;
enum intel_engine_id id;
for_each_engine(engine, dev_priv, id) {
- idx = srcu_read_lock(&dev_priv->perf.oa.srcu);
- stream = srcu_dereference(dev_priv->perf.oa.exclusive_stream,
- &dev_priv->perf.oa.srcu);
+ idx = srcu_read_lock(&engine->srcu);
+ stream = srcu_dereference(engine->exclusive_stream,
+ &engine->srcu);
if (stream && stream->enabled &&
stream_have_data_unlocked(stream)) {
- dev_priv->perf.oa.pollin = true;
- wake_up(&dev_priv->perf.oa.poll_wq);
+ stream->pollin = true;
+ wake_up(&stream->poll_wq);
}
- srcu_read_unlock(&dev_priv->perf.oa.srcu, idx);
+ srcu_read_unlock(&engine->srcu, idx);
}
hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD));
@@ -3314,7 +3308,7 @@ static unsigned int i915_perf_poll_locked(struct drm_i915_private *dev_priv,
* the hrtimer/oa_poll_check_timer_cb to notify us when there are
* samples to read.
*/
- if (dev_priv->perf.oa.pollin)
+ if (stream->pollin)
events |= POLLIN;
return events;
@@ -4374,20 +4368,24 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
void i915_perf_streams_mark_idle(struct drm_i915_private *dev_priv)
{
struct i915_perf_stream *stream;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
int idx;
- idx = srcu_read_lock(&dev_priv->perf.oa.srcu);
- stream = srcu_dereference(dev_priv->perf.oa.exclusive_stream,
- &dev_priv->perf.oa.srcu);
- if (stream && stream->enabled && stream->cs_mode) {
- struct reservation_object *resv =
- stream->cs_buffer.vma->resv;
-
- reservation_object_lock(resv, NULL);
- reservation_object_add_excl_fence(resv, NULL);
- reservation_object_unlock(resv);
+ for_each_engine(engine, dev_priv, id) {
+ idx = srcu_read_lock(&engine->srcu);
+ stream = srcu_dereference(engine->exclusive_stream,
+ &engine->srcu);
+ if (stream && stream->enabled && stream->cs_mode) {
+ struct reservation_object *resv =
+ stream->cs_buffer.vma->resv;
+
+ reservation_object_lock(resv, NULL);
+ reservation_object_add_excl_fence(resv, NULL);
+ reservation_object_unlock(resv);
+ }
+ srcu_read_unlock(&engine->srcu, idx);
}
- srcu_read_unlock(&dev_priv->perf.oa.srcu, idx);
}
/**
@@ -4483,19 +4481,13 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
}
if (dev_priv->perf.oa.timestamp_frequency) {
- hrtimer_init(&dev_priv->perf.oa.poll_check_timer,
+ hrtimer_init(&dev_priv->perf.poll_check_timer,
CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- dev_priv->perf.oa.poll_check_timer.function =
- poll_check_timer_cb;
- init_waitqueue_head(&dev_priv->perf.oa.poll_wq);
+ dev_priv->perf.poll_check_timer.function = poll_check_timer_cb;
mutex_init(&dev_priv->perf.lock);
spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
- /* Perf stream related initialization for Engine */
- rcu_assign_pointer(dev_priv->perf.oa.exclusive_stream, NULL);
- init_srcu_struct(&dev_priv->perf.oa.srcu);
-
oa_sample_rate_hard_limit =
dev_priv->perf.oa.timestamp_frequency / 2;
dev_priv->perf.sysctl_header = register_sysctl_table(dev_root);
@@ -4533,7 +4525,5 @@ void i915_perf_fini(struct drm_i915_private *dev_priv)
memset(&dev_priv->perf.oa.ops, 0, sizeof(dev_priv->perf.oa.ops));
- cleanup_srcu_struct(&dev_priv->perf.oa.srcu);
-
dev_priv->perf.initialized = false;
}
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index ae66834..f3199b9 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -317,6 +317,10 @@ int intel_engines_init(struct drm_i915_private *dev_priv)
goto cleanup;
GEM_BUG_ON(!engine->submit_request);
+
+ /* Perf stream related initialization for Engine */
+ rcu_assign_pointer(engine->exclusive_stream, NULL);
+ init_srcu_struct(&engine->srcu);
}
return 0;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index cdf084e..501c544 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1622,6 +1622,8 @@ void intel_engine_cleanup(struct intel_engine_cs *engine)
intel_engine_cleanup_common(engine);
+ cleanup_srcu_struct(&engine->srcu);
+
dev_priv->engine[engine->id] = NULL;
kfree(engine);
}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 02d8974..2fcfaf4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -441,6 +441,14 @@ struct intel_engine_cs {
* certain bits to encode the command length in the header).
*/
u32 (*get_cmd_length_mask)(u32 cmd_header);
+
+ /*
+ * For RCS, This stream can use the OA unit. All accesses guarded by
+ * SRCU including those in syscall as we can't hold
+ * dev_priv->drm.struct_mutex in the execbuf path.
+ */
+ struct i915_perf_stream __rcu *exclusive_stream;
+ struct srcu_struct srcu;
};
static inline unsigned int
--
1.9.1
More information about the Intel-gfx
mailing list