[Intel-gfx] [RFC PATCH 2/3] drm/i915/perf: allow for CS OA configs to be created lazily
Chris Wilson
chris at chris-wilson.co.uk
Mon Oct 8 15:34:40 UTC 2018
Quoting Lionel Landwerlin (2018-10-08 16:18:21)
> Here we introduce a mechanism by which the execbuf part of the i915
> driver will be able to request that a batch buffer containing the
> programming for a particular OA config be created.
>
> We'll execute these OA configuration buffers right before executing a
> set of userspace commands so that a particular user batchbuffer be
> executed with a given OA configuration.
>
> This mechanism essentially allows the userspace driver to go through
> several OA configuration without having to open/close the i915/perf
> stream.
>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 22 ++-
> drivers/gpu/drm/i915/i915_perf.c | 195 ++++++++++++++++++----
> drivers/gpu/drm/i915/intel_gpu_commands.h | 1 +
> 3 files changed, 187 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 2264b30ce51a..a35715cd7608 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1378,6 +1378,10 @@ struct i915_oa_config {
> struct attribute *attrs[2];
> struct device_attribute sysfs_metric_id;
>
> + struct i915_vma *vma;
> +
> + struct list_head vma_link;
> +
> atomic_t ref_count;
> };
>
> @@ -1979,11 +1983,21 @@ struct drm_i915_private {
> struct mutex metrics_lock;
>
> /*
> - * List of dynamic configurations, you need to hold
> - * dev_priv->perf.metrics_lock to access it.
> + * List of dynamic configurations (struct i915_oa_config), you
> + * need to hold dev_priv->perf.metrics_lock to access it.
> */
> struct idr metrics_idr;
>
> + /*
> + * List of dynamic configurations (struct i915_oa_config)
> + * which have an allocated buffer in GGTT for reconfiguration,
> + * you need to hold dev_priv->perf.metrics_lock to access it.
> + * Elements are added to the list lazilly on execbuf (when a
> + * particular configuration is requested). The list is freed
> + * upon closing the perf stream.
> + */
> + struct list_head metrics_buffers;
> +
> /*
> * Lock associated with anything below within this structure
> * except exclusive_stream.
> @@ -3315,6 +3329,10 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
> void i915_oa_init_reg_state(struct intel_engine_cs *engine,
> struct i915_gem_context *ctx,
> uint32_t *reg_state);
> +int i915_perf_get_oa_config(struct drm_i915_private *i915,
> + int metrics_set,
> + struct i915_oa_config **out_config,
> + struct i915_vma **out_vma);
>
> /* i915_gem_evict.c */
> int __must_check i915_gem_evict_something(struct i915_address_space *vm,
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index e2a96b6844fe..39c5b44862d4 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -364,9 +364,16 @@ struct perf_open_properties {
> int oa_period_exponent;
> };
>
> -static void free_oa_config(struct drm_i915_private *dev_priv,
> - struct i915_oa_config *oa_config)
> +static void put_oa_config(struct i915_oa_config *oa_config)
> {
> + if (!atomic_dec_and_test(&oa_config->ref_count))
> + return;
> +
> + if (oa_config->vma) {
> + list_del(&oa_config->vma_link);
> + i915_vma_put(oa_config->vma);
> + }
> +
> if (!PTR_ERR(oa_config->flex_regs))
> kfree(oa_config->flex_regs);
> if (!PTR_ERR(oa_config->b_counter_regs))
> @@ -376,38 +383,152 @@ static void free_oa_config(struct drm_i915_private *dev_priv,
> kfree(oa_config);
> }
>
> -static void put_oa_config(struct drm_i915_private *dev_priv,
> - struct i915_oa_config *oa_config)
> +static u32 *write_cs_mi_lri(u32 *cs, const struct i915_oa_reg *reg_data, u32 n_regs)
> {
> - if (!atomic_dec_and_test(&oa_config->ref_count))
> - return;
> + u32 i;
>
> - free_oa_config(dev_priv, oa_config);
> + for (i = 0; i < n_regs; i++) {
> + if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) {
> + u32 n_lri = min(n_regs - i,
> + (u32) MI_LOAD_REGISTER_IMM_MAX_REGS);
> +
> + *cs++ = MI_LOAD_REGISTER_IMM(n_lri);
> + }
> + *cs++ = i915_mmio_reg_offset(reg_data[i].addr);
> + *cs++ = reg_data[i].value;
> + }
> +
> + return cs;
> }
>
> -static int get_oa_config(struct drm_i915_private *dev_priv,
> - int metrics_set,
> - struct i915_oa_config **out_config)
> +static int alloc_oa_config_buffer(struct drm_i915_private *i915,
> + struct i915_oa_config *oa_config)
> {
> + struct drm_i915_gem_object *bo;
> + size_t config_length = 0;
> int ret;
> + u32 *cs;
>
> - if (metrics_set == 1) {
> - *out_config = &dev_priv->perf.oa.test_config;
> - atomic_inc(&dev_priv->perf.oa.test_config.ref_count);
> - return 0;
> + if (oa_config->mux_regs_len > 0) {
> + config_length += DIV_ROUND_UP(oa_config->mux_regs_len,
> + MI_LOAD_REGISTER_IMM_MAX_REGS) * 4;
> + config_length += oa_config->mux_regs_len * 8;
> }
> + if (oa_config->b_counter_regs_len > 0) {
> + config_length += DIV_ROUND_UP(oa_config->b_counter_regs_len,
> + MI_LOAD_REGISTER_IMM_MAX_REGS) * 4;
> + config_length += oa_config->b_counter_regs_len * 8;
> + }
> + if (oa_config->flex_regs_len > 0) {
> + config_length += DIV_ROUND_UP(oa_config->flex_regs_len,
> + MI_LOAD_REGISTER_IMM_MAX_REGS) * 4;
> + config_length += oa_config->flex_regs_len * 8;
> + }
> + config_length += 4; /* MI_BATCH_BUFFER_END */
> + config_length = ALIGN(config_length, I915_GTT_PAGE_SIZE);
>
> - ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
> + ret = i915_mutex_lock_interruptible(&i915->drm);
> if (ret)
> return ret;
>
> - *out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set);
> - if (!*out_config)
> - ret = -EINVAL;
> - else
> - atomic_inc(&(*out_config)->ref_count);
> + bo = i915_gem_object_create(i915, config_length);
> + if (IS_ERR(bo)) {
> + ret = PTR_ERR(bo);
> + goto unlock;
> + }
> +
> + ret = i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
Don't enable snoop on a batchbuffer.
> + if (ret)
> + goto err_unref;
>
> - mutex_unlock(&dev_priv->perf.metrics_lock);
> + oa_config->vma = i915_gem_object_ggtt_pin(bo, NULL, 0, config_length, 0);
Why have you pinned it?
> + if (IS_ERR(oa_config->vma)) {
> + ret = PTR_ERR(oa_config->vma);
> + oa_config->vma = NULL;
> + goto err_unref;
> + }
> +
> + cs = i915_gem_object_pin_map(bo, I915_MAP_WB);
> + if (IS_ERR(cs)) {
> + ret = PTR_ERR(cs);
> + goto err_unpin;
> + }
> +
> + memset(cs, 0, config_length);
Already zero. Or use create_internal to avoid shmemfs overhead. And
since you write all bytes, you can just ignore it.
> + cs = write_cs_mi_lri(cs, oa_config->mux_regs, oa_config->mux_regs_len);
> + cs = write_cs_mi_lri(cs, oa_config->b_counter_regs, oa_config->b_counter_regs_len);
> + cs = write_cs_mi_lri(cs, oa_config->flex_regs, oa_config->flex_regs_len);
> +
> + *cs++ = MI_BATCH_BUFFER_END;
> +
> + i915_gem_object_unpin_map(bo);
> +
> + goto unlock;
> +
> +err_unpin:
> + __i915_vma_unpin(oa_config->vma);
> +
> +err_unref:
> + oa_config->vma = NULL;
> + i915_gem_object_put(bo);
> +
> +unlock:
> + mutex_unlock(&i915->drm.struct_mutex);
> + return ret;
> +}
> +
> +int i915_perf_get_oa_config(struct drm_i915_private *i915,
> + int metrics_set,
> + struct i915_oa_config **out_config,
> + struct i915_vma **out_vma)
> +{
> + int ret = 0;
> + struct i915_oa_config *oa_config;
> +
> + if (!i915->perf.initialized)
> + return -ENODEV;
> +
> + ret = mutex_lock_interruptible(&i915->perf.metrics_lock);
> + if (ret)
> + return ret;
> +
> + if (metrics_set == 1) {
> + oa_config = &i915->perf.oa.test_config;
> + } else {
> + oa_config = idr_find(&i915->perf.metrics_idr, metrics_set);
> + if (!oa_config) {
> + ret = -EINVAL;
> + goto unlock;
> + }
> + }
> +
> + if (out_config) {
> + atomic_inc(&oa_config->ref_count);
> + *out_config = oa_config;
> + }
> +
> + if (out_vma) {
> + if (oa_config->vma) {
> + *out_vma = i915_vma_get(oa_config->vma);
> + } else {
> + ret = alloc_oa_config_buffer(i915, oa_config);
> + if (ret) {
> + goto err_buf_alloc;
> + } else {
> + list_add(&oa_config->vma_link,
> + &i915->perf.metrics_buffers);
> + *out_vma = i915_vma_get(oa_config->vma);
> + }
> + }
Where is out_vma used so we can check if the litetime tracking is ok as
so far you are releasing it before we know it is idle.
-Chris
More information about the Intel-gfx
mailing list