[Intel-gfx] [PATCH v5 09/10] drm/i915/perf: execute OA configuration from command stream
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Thu Jun 27 08:00:44 UTC 2019
We can't run into issues with doing writing the global OA/NOA
registers configuration from CPU so far, but HW engineers actually
recommend doing this from the command streamer.
Since we have a command buffer prepared for the execbuffer side of
things, we can reuse that approach here too.
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
---
drivers/gpu/drm/i915/i915_perf.c | 203 +++++++++++++++----------------
1 file changed, 100 insertions(+), 103 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index bf4f5fee6764..7e636463e1f5 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -389,6 +389,19 @@ void i915_oa_config_release(struct kref *ref)
kfree(oa_config);
}
+static void i915_oa_config_dispose_buffers(struct drm_i915_private *i915)
+{
+ struct i915_oa_config *oa_config, *next;
+
+ mutex_lock(&i915->perf.metrics_lock);
+ list_for_each_entry_safe(oa_config, next, &i915->perf.metrics_buffers, vma_link) {
+ list_del(&oa_config->vma_link);
+ i915_gem_object_put(oa_config->obj);
+ oa_config->obj = NULL;
+ }
+ mutex_unlock(&i915->perf.metrics_lock);
+}
+
static u32 *write_cs_mi_lri(u32 *cs, const struct i915_oa_reg *reg_data, u32 n_regs)
{
u32 i;
@@ -1813,67 +1826,86 @@ static int alloc_noa_wait(struct drm_i915_private *i915)
return ret;
}
-static void config_oa_regs(struct drm_i915_private *dev_priv,
- const struct i915_oa_reg *regs,
- u32 n_regs)
+static int config_oa_regs(struct drm_i915_private *i915,
+ struct i915_oa_config *oa_config)
{
- u32 i;
+ struct i915_request *rq;
+ struct i915_vma *vma;
+ long timeout;
+ u32 *cs;
+ int err;
- for (i = 0; i < n_regs; i++) {
- const struct i915_oa_reg *reg = regs + i;
+ rq = i915_request_create(i915->engine[RCS0]->kernel_context);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
- I915_WRITE(reg->addr, reg->value);
+ err = i915_active_request_set(&i915->engine[RCS0]->last_oa_config,
+ rq);
+ if (err) {
+ i915_request_add(rq);
+ return err;
+ }
+
+ vma = i915_vma_instance(oa_config->obj, &i915->ggtt.vm, NULL);
+ if (unlikely(IS_ERR(vma))) {
+ i915_request_add(rq);
+ return PTR_ERR(vma);
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+ if (err) {
+ i915_request_add(rq);
+ return err;
}
+
+ err = i915_vma_move_to_active(vma, rq, 0);
+ if (err) {
+ i915_vma_unpin(vma);
+ i915_request_add(rq);
+ return err;
+ }
+
+ cs = intel_ring_begin(rq, INTEL_GEN(i915) >= 8 ? 4 : 2);
+ if (IS_ERR(cs)) {
+ i915_vma_unpin(vma);
+ i915_request_add(rq);
+ return PTR_ERR(cs);
+ }
+
+ if (INTEL_GEN(i915) > 8) {
+ *cs++ = MI_BATCH_BUFFER_START_GEN8;
+ *cs++ = lower_32_bits(vma->node.start);
+ *cs++ = upper_32_bits(vma->node.start);
+ *cs++ = MI_NOOP;
+ } else {
+ *cs++ = MI_BATCH_BUFFER_START;
+ *cs++ = vma->node.start;
+ }
+
+ intel_ring_advance(rq, cs);
+
+ i915_vma_unpin(vma);
+
+ i915_request_add(rq);
+
+ i915_request_get(rq);
+ timeout = i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+ MAX_SCHEDULE_TIMEOUT);
+ i915_request_put(rq);
+
+ return timeout < 0 ? err : 0;
}
static int hsw_enable_metric_set(struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
- const struct i915_oa_config *oa_config = stream->oa_config;
- /* PRM:
- *
- * OA unit is using “crclk” for its functionality. When trunk
- * level clock gating takes place, OA clock would be gated,
- * unable to count the events from non-render clock domain.
- * Render clock gating must be disabled when OA is enabled to
- * count the events from non-render domain. Unit level clock
- * gating for RCS should also be disabled.
- */
I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
~GEN7_DOP_CLOCK_GATE_ENABLE));
I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) |
GEN6_CSUNIT_CLOCK_GATE_DISABLE));
- config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
-
- /* It apparently takes a fairly long time for a new MUX
- * configuration to be be applied after these register writes.
- * This delay duration was derived empirically based on the
- * render_basic config but hopefully it covers the maximum
- * configuration latency.
- *
- * As a fallback, the checks in _append_oa_reports() to skip
- * invalid OA reports do also seem to work to discard reports
- * generated before this config has completed - albeit not
- * silently.
- *
- * Unfortunately this is essentially a magic number, since we
- * don't currently know of a reliable mechanism for predicting
- * how long the MUX config will take to apply and besides
- * seeing invalid reports we don't know of a reliable way to
- * explicitly check that the MUX config has landed.
- *
- * It's even possible we've miss characterized the underlying
- * problem - it just seems like the simplest explanation why
- * a delay at this location would mitigate any invalid reports.
- */
- usleep_range(15000, 20000);
-
- config_oa_regs(dev_priv, oa_config->b_counter_regs,
- oa_config->b_counter_regs_len);
-
- return 0;
+ return config_oa_regs(dev_priv, stream->oa_config);
}
static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
@@ -1978,7 +2010,6 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
{
unsigned int map_type = i915_coherent_map_type(dev_priv);
struct i915_gem_context *ctx;
- struct i915_request *rq;
int ret;
lockdep_assert_held(&dev_priv->drm.struct_mutex);
@@ -2037,14 +2068,9 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
}
/*
- * Apply the configuration by doing one context restore of the edited
- * context image.
+ * The above configuration will be applied when called
+ * config_oa_regs().
*/
- rq = i915_request_create(dev_priv->engine[RCS0]->kernel_context);
- if (IS_ERR(rq))
- return PTR_ERR(rq);
-
- i915_request_add(rq);
return 0;
}
@@ -2093,35 +2119,7 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream)
if (ret)
return ret;
- config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
-
- /* It apparently takes a fairly long time for a new MUX
- * configuration to be be applied after these register writes.
- * This delay duration was derived empirically based on the
- * render_basic config but hopefully it covers the maximum
- * configuration latency.
- *
- * As a fallback, the checks in _append_oa_reports() to skip
- * invalid OA reports do also seem to work to discard reports
- * generated before this config has completed - albeit not
- * silently.
- *
- * Unfortunately this is essentially a magic number, since we
- * don't currently know of a reliable mechanism for predicting
- * how long the MUX config will take to apply and besides
- * seeing invalid reports we don't know of a reliable way to
- * explicitly check that the MUX config has landed.
- *
- * It's even possible we've miss characterized the underlying
- * problem - it just seems like the simplest explanation why
- * a delay at this location would mitigate any invalid reports.
- */
- usleep_range(15000, 20000);
-
- config_oa_regs(dev_priv, oa_config->b_counter_regs,
- oa_config->b_counter_regs_len);
-
- return 0;
+ return config_oa_regs(dev_priv, stream->oa_config);
}
static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
@@ -2292,6 +2290,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
struct perf_open_properties *props)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
+ struct drm_i915_gem_object *obj;
int format_size;
int ret;
@@ -2376,13 +2375,6 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
}
}
- ret = i915_perf_get_oa_config(dev_priv, props->metrics_set,
- &stream->oa_config, NULL);
- if (ret) {
- DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
- goto err_config;
- }
-
ret = alloc_noa_wait(dev_priv);
if (ret) {
DRM_DEBUG("Unable to allocate NOA wait batch buffer\n");
@@ -2412,6 +2404,19 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
if (ret)
goto err_lock;
+ ret = i915_perf_get_oa_config(dev_priv, props->metrics_set,
+ &stream->oa_config, &obj);
+ if (ret) {
+ DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
+ goto err_config;
+ }
+
+ /*
+ * We just need the buffer to be created, but not our own reference on
+ * it as the oa_config already has one.
+ */
+ i915_gem_object_put(obj);
+
stream->ops = &i915_oa_stream_ops;
dev_priv->perf.oa.exclusive_stream = stream;
@@ -2430,14 +2435,16 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
err_enable:
dev_priv->perf.oa.exclusive_stream = NULL;
dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
+
+err_config:
+ i915_oa_config_put(stream->oa_config);
+ i915_oa_config_dispose_buffers(dev_priv);
mutex_unlock(&dev_priv->drm.struct_mutex);
err_lock:
free_oa_buffer(dev_priv);
err_oa_buf_alloc:
- i915_oa_config_put(stream->oa_config);
-
intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
@@ -2446,9 +2453,6 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
mutex_unlock(&dev_priv->drm.struct_mutex);
err_noa_wait_alloc:
- i915_oa_config_put(stream->oa_config);
-
-err_config:
if (stream->ctx)
oa_put_render_ctx_id(stream);
@@ -2810,20 +2814,13 @@ static int i915_perf_release(struct inode *inode, struct file *file)
{
struct i915_perf_stream *stream = file->private_data;
struct drm_i915_private *dev_priv = stream->dev_priv;
- struct i915_oa_config *oa_config, *next;
mutex_lock(&dev_priv->perf.lock);
i915_perf_destroy_locked(stream);
/* Dispose of all oa config batch buffers. */
- mutex_lock(&dev_priv->perf.metrics_lock);
- list_for_each_entry_safe(oa_config, next, &dev_priv->perf.metrics_buffers, vma_link) {
- list_del(&oa_config->vma_link);
- i915_gem_object_put(oa_config->obj);
- oa_config->obj = NULL;
- }
- mutex_unlock(&dev_priv->perf.metrics_lock);
+ i915_oa_config_dispose_buffers(dev_priv);
mutex_unlock(&dev_priv->perf.lock);
--
2.21.0.392.gf8f6787159e
More information about the Intel-gfx
mailing list