[Intel-gfx] [PATCH v5 07/10] drm/i915: add a new perf configuration execbuf parameter
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Thu Jun 27 12:32:13 UTC 2019
On 27/06/2019 12:45, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2019-06-27 09:00:42)
>> We want the ability to dispatch a set of command buffer to the
>> hardware, each with a different OA configuration. To achieve this, we
>> reuse a couple of fields from the execbuf2 struct (I CAN HAZ
>> execbuf3?) to notify what OA configuration should be used for a batch
>> buffer. This requires the process making the execbuf with this flag to
>> also own the perf fd at the time of execbuf.
>>
>> v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris)
>> Move oa_config vma to active (Chris)
>>
>> v3: Don't drop the lock for engine lookup (Chris)
>> Move OA config vma to active before writing the ringbuffer (Chris)
>>
>> v4: Reuse i915_user_extension_fn
>> Serialize requests with OA config updates
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>> ---
>> .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 130 +++++++++++++++++-
>> drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 +
>> drivers/gpu/drm/i915/gt/intel_engine_types.h | 9 ++
>> drivers/gpu/drm/i915/gt/intel_lrc.c | 1 +
>> drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 4 +-
>> drivers/gpu/drm/i915/i915_drv.c | 4 +
>> drivers/gpu/drm/i915/i915_drv.h | 10 +-
>> drivers/gpu/drm/i915/i915_perf.c | 26 ++--
>> include/uapi/drm/i915_drm.h | 37 +++++
>> 9 files changed, 208 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> index 476fade6fcab..18ae3e8a4e02 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> @@ -283,7 +283,11 @@ struct i915_execbuffer {
>> struct {
>> u64 flags; /** Available extensions parameters */
>> struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
>> + struct drm_i915_gem_execbuffer_ext_perf perf_config;
>> } extensions;
>> +
>> + struct i915_oa_config *oa_config; /** HW configuration for OA, NULL is not needed. */
>> + struct drm_i915_gem_object *oa_bo;
>> };
>>
>> #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
>> @@ -1210,6 +1214,34 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
>> return err;
>> }
>>
>> +
>> +static int
>> +get_execbuf_oa_config(struct drm_i915_private *dev_priv,
>> + s32 perf_fd, u64 oa_config_id,
>> + struct i915_oa_config **out_oa_config,
>> + struct drm_i915_gem_object **out_oa_obj)
>> +{
>> + struct file *perf_file;
>> + int ret;
>> +
>> + if (!dev_priv->perf.oa.exclusive_stream)
>> + return -EINVAL;
>> +
>> + perf_file = fget(perf_fd);
>> + if (!perf_file)
>> + return -EINVAL;
>> +
>> + if (perf_file->private_data != dev_priv->perf.oa.exclusive_stream)
>> + return -EINVAL;
>> +
>> + fput(perf_file);
>> +
>> + ret = i915_perf_get_oa_config(dev_priv, oa_config_id,
>> + out_oa_config, out_oa_obj);
>> +
>> + return ret;
>> +}
>> +
>> static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
>> struct i915_vma *vma,
>> unsigned int len)
>> @@ -2072,6 +2104,64 @@ add_to_client(struct i915_request *rq, struct drm_file *file)
>> list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list);
>> }
>>
>> +static int eb_oa_config(struct i915_execbuffer *eb)
>> +{
>> + struct i915_vma *oa_vma;
>> + int err;
>> +
>> + if (!eb->oa_config)
>> + return 0;
>> +
>> + err = i915_active_request_set(&eb->engine->last_oa_config,
>> + eb->request);
>> + if (err)
>> + return err;
>> +
>> + /*
>> + * If the perf stream was opened with hold preemption, flag the
>> + * request properly so that the priority of the request is bumped once
>> + * it reaches the execlist ports.
>> + */
>> + if (eb->i915->perf.oa.exclusive_stream->hold_preemption)
>> + eb->request->has_perf = true;
> Leave this chunk for the next battle^W patch.
Oops.
>
>> + /*
>> + * If the config hasn't changed, skip reconfiguring the HW (this is
>> + * subject to a delay we want to avoid has much as possible).
>> + */
>> + if (eb->oa_config == eb->i915->perf.oa.exclusive_stream->oa_config)
>> + return 0;
>> +
>> + oa_vma = i915_vma_instance(eb->oa_bo,
>> + &eb->engine->i915->ggtt.vm, NULL);
>> + if (unlikely(IS_ERR(oa_vma)))
>> + return PTR_ERR(oa_vma);
>> +
>> + err = i915_vma_pin(oa_vma, 0, 0, PIN_GLOBAL);
>> + if (err)
>> + return err;
> Ugh. We should not be pinned after creating the request. Might not
> matter -- it's just reservation under its own lock that must not be
> crossed with the timeline lock currently held here.
Should I move this into get_execbuf_oa_config() ?
>
>> +
>> + err = i915_vma_move_to_active(oa_vma, eb->request, 0);
> You can unpin immediately after move_to_active (as the active is treated
> by being pinned).
>
> I'm thinking this will be replaced with just i915_active_ref() as you
> only care about the vma.
>
>> + if (err) {
>> + i915_vma_unpin(oa_vma);
>> + return err;
>> + }
>> +
>> + err = eb->engine->emit_bb_start(eb->request,
>> + oa_vma->node.start,
>> + 0, I915_DISPATCH_SECURE);
>> + if (err) {
>> + i915_vma_unpin(oa_vma);
>> + return err;
>> + }
>> +
>> + i915_vma_unpin(oa_vma);
>> +
>> + swap(eb->oa_config, eb->i915->perf.oa.exclusive_stream->oa_config);
>> +
>> + return 0;
>> +}
>> +
>> static int eb_submit(struct i915_execbuffer *eb)
>> {
>> int err;
>> @@ -2098,6 +2188,10 @@ static int eb_submit(struct i915_execbuffer *eb)
>> return err;
>> }
>>
>> + err = eb_oa_config(eb);
>> + if (err)
>> + return err;
>> +
>> err = eb->engine->emit_bb_start(eb->request,
>> eb->batch->node.start +
>> eb->batch_start_offset,
>> @@ -2520,8 +2614,22 @@ static int parse_timeline_fences(struct i915_user_extension __user *ext, void *d
>> return 0;
>> }
>>
>> +static int parse_perf_config(struct i915_user_extension __user *ext, void *data)
>> +{
>> + struct i915_execbuffer *eb = data;
>> +
>> + if (copy_from_user(&eb->extensions.perf_config, ext,
>> + sizeof(eb->extensions.perf_config)))
>> + return -EFAULT;
>> +
>> + eb->extensions.flags |= BIT(DRM_I915_GEM_EXECBUFFER_EXT_PERF);
>> +
>> + return 0;
>> +}
>> +
>> static const i915_user_extension_fn execbuf_extensions[] = {
>> [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
>> + [DRM_I915_GEM_EXECBUFFER_EXT_PERF] = parse_perf_config,
>> };
>>
>> static int
>> @@ -2573,6 +2681,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>> eb.buffer_count = args->buffer_count;
>> eb.batch_start_offset = args->batch_start_offset;
>> eb.batch_len = args->batch_len;
>> + eb.oa_config = NULL;
>>
>> eb.batch_flags = 0;
>> if (args->flags & I915_EXEC_SECURE) {
>> @@ -2651,9 +2760,23 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>> if (unlikely(err))
>> goto err_unlock;
>>
>> + if (eb.extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
>> + if (!intel_engine_has_oa(eb.engine)) {
>> + err = -ENODEV;
>> + goto err_engine;
>> + }
>> +
>> + err = get_execbuf_oa_config(eb.i915,
>> + eb.extensions.perf_config.perf_fd,
>> + eb.extensions.perf_config.oa_config,
>> + &eb.oa_config, &eb.oa_bo);
>> + if (err)
>> + goto err_engine;
> Why is this under the struct_mutex?
Access to dev_priv->perf.oa.exclusive_stream must be under struct_mutex.
Also because we verify that the engine actually has OA support.
I could split the getting the configuration part away.
>
>> + }
>> +
>> err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
>> if (unlikely(err))
>> - goto err_engine;
>> + goto err_oa;
>>
>> err = eb_relocate(&eb);
>> if (err) {
>> @@ -2806,6 +2929,11 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>> err_vma:
>> if (eb.exec)
>> eb_release_vmas(&eb);
>> +err_oa:
>> + if (eb.oa_config) {
>> + i915_gem_object_put(eb.oa_bo);
>> + i915_oa_config_put(eb.oa_config);
>> + }
>> err_engine:
>> eb_unpin_context(&eb);
>> err_unlock:
>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>> index 4961f74fd902..ba4d2c1abd1a 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
>> @@ -859,6 +859,8 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
>>
>> engine->set_default_submission(engine);
>>
>> + INIT_ACTIVE_REQUEST(&engine->last_oa_config);
>> +
>> return 0;
>>
>> err_unpin:
>> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> index 7e056114344e..39da40937e7f 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> @@ -363,6 +363,8 @@ struct intel_engine_cs {
>> struct i915_wa_list wa_list;
>> struct i915_wa_list whitelist;
>>
>> + struct i915_active_request last_oa_config;
>> +
>> u32 irq_keep_mask; /* always keep these interrupts */
>> u32 irq_enable_mask; /* bitmask to enable ring interrupt */
>> void (*irq_enable)(struct intel_engine_cs *engine);
>> @@ -446,6 +448,7 @@ struct intel_engine_cs {
>> #define I915_ENGINE_HAS_SEMAPHORES BIT(3)
>> #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
>> #define I915_ENGINE_IS_VIRTUAL BIT(5)
>> +#define I915_ENGINE_HAS_OA BIT(6)
>> unsigned int flags;
>>
>> /*
>> @@ -541,6 +544,12 @@ intel_engine_is_virtual(const struct intel_engine_cs *engine)
>> return engine->flags & I915_ENGINE_IS_VIRTUAL;
>> }
>>
>> +static inline bool
>> +intel_engine_has_oa(const struct intel_engine_cs *engine)
>> +{
>> + return engine->flags & I915_ENGINE_HAS_OA;
>> +}
>> +
>> #define instdone_slice_mask(dev_priv__) \
>> (IS_GEN(dev_priv__, 7) ? \
>> 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> index 28685ba91a2c..a6ea468986d1 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
>> @@ -2763,6 +2763,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
>> engine->init_context = gen8_init_rcs_context;
>> engine->emit_flush = gen8_emit_flush_render;
>> engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
>> + engine->flags |= I915_ENGINE_HAS_OA;
>> }
>>
>> return 0;
>> diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
>> index f094406dcc56..a1aee092d0a3 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
>> @@ -2192,8 +2192,10 @@ static void setup_rcs(struct intel_engine_cs *engine)
>> engine->irq_enable_mask = I915_USER_INTERRUPT;
>> }
>>
>> - if (IS_HASWELL(i915))
>> + if (IS_HASWELL(i915)) {
>> engine->emit_bb_start = hsw_emit_bb_start;
>> + engine->flags |= I915_ENGINE_HAS_OA;
>> + }
>>
>> engine->resume = rcs_resume;
>> }
>> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
>> index 48f9009a6318..ef54cf1c54a5 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.c
>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>> @@ -486,6 +486,10 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
>> case I915_PARAM_PERF_REVISION:
>> value = 1;
>> break;
>> + case I915_PARAM_HAS_EXEC_PERF_CONFIG:
>> + /* Obviously requires perf support. */
>> + value = dev_priv->perf.initialized;
>> + break;
>> default:
>> DRM_DEBUG("Unknown parameter %d\n", param->param);
>> return -EINVAL;
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 0323c8182a67..51b103493623 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -1096,6 +1096,8 @@ struct i915_oa_reg {
>> };
>>
>> struct i915_oa_config {
>> + struct drm_i915_private *i915;
>> +
>> char uuid[UUID_STRING_LEN + 1];
>> int id;
>>
>> @@ -1114,7 +1116,7 @@ struct i915_oa_config {
>>
>> struct list_head vma_link;
>>
>> - atomic_t ref_count;
>> + struct kref ref;
>> };
>>
>> struct i915_perf_stream;
>> @@ -2612,6 +2614,12 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915,
>> int metrics_set,
>> struct i915_oa_config **out_config,
>> struct drm_i915_gem_object **out_obj);
>> +void i915_oa_config_release(struct kref *ref);
>> +
>> +static inline void i915_oa_config_put(struct i915_oa_config *oa_config)
>> +{
>> + kref_put(&oa_config->ref, i915_oa_config_release);
>> +}
>>
>> /* i915_gem_evict.c */
>> int __must_check i915_gem_evict_something(struct i915_address_space *vm,
>> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>> index b2f5ba87921c..34d74fa64c74 100644
>> --- a/drivers/gpu/drm/i915/i915_perf.c
>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>> @@ -366,10 +366,11 @@ struct perf_open_properties {
>> int oa_period_exponent;
>> };
>>
>> -static void put_oa_config(struct i915_oa_config *oa_config)
>> +void i915_oa_config_release(struct kref *ref)
>> {
>> - if (!atomic_dec_and_test(&oa_config->ref_count))
>> - return;
>> + struct i915_oa_config *oa_config = container_of(ref, typeof(*oa_config), ref);
>> +
>> + lockdep_assert_held(&oa_config->i915->perf.metrics_lock);
> That's not true for the eb paths.
Actually that's not true anywhere...
>
>>
>> if (oa_config->obj) {
>> list_del(&oa_config->vma_link);
>> @@ -485,7 +486,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915,
>> }
>>
>> if (out_config) {
>> - atomic_inc(&oa_config->ref_count);
>> + kref_get(&oa_config->ref);
>> *out_config = oa_config;
>> }
>>
>> @@ -507,7 +508,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915,
>>
>> err_buf_alloc:
>> if (out_config) {
>> - put_oa_config(oa_config);
>> + i915_oa_config_put(oa_config);
>> *out_config = NULL;
>> }
>> unlock:
>> @@ -1483,7 +1484,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
>> if (stream->ctx)
>> oa_put_render_ctx_id(stream);
>>
>> - put_oa_config(stream->oa_config);
>> + i915_oa_config_put(stream->oa_config);
>>
>> if (dev_priv->perf.oa.spurious_report_rs.missed) {
>> DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
>> @@ -2430,7 +2431,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>> free_oa_buffer(dev_priv);
>>
>> err_oa_buf_alloc:
>> - put_oa_config(stream->oa_config);
>> + i915_oa_config_put(stream->oa_config);
>>
>> intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
>> intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
>> @@ -3246,7 +3247,7 @@ void i915_perf_register(struct drm_i915_private *dev_priv)
>> if (ret)
>> goto sysfs_error;
>>
>> - atomic_set(&dev_priv->perf.oa.test_config.ref_count, 1);
>> + dev_priv->perf.oa.test_config.i915 = dev_priv;
>>
>> goto exit;
>>
>> @@ -3502,7 +3503,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
>> return -ENOMEM;
>> }
>>
>> - atomic_set(&oa_config->ref_count, 1);
>> + oa_config->i915 = dev_priv;
>> + kref_init(&oa_config->ref);
>>
>> if (!uuid_is_valid(args->uuid)) {
>> DRM_DEBUG("Invalid uuid format for OA config\n");
>> @@ -3601,7 +3603,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
>> sysfs_err:
>> mutex_unlock(&dev_priv->perf.metrics_lock);
>> reg_err:
>> - put_oa_config(oa_config);
>> + i915_oa_config_put(oa_config);
>> DRM_DEBUG("Failed to add new OA config\n");
>> return err;
>> }
>> @@ -3655,7 +3657,7 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
>>
>> DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
>>
>> - put_oa_config(oa_config);
>> + i915_oa_config_put(oa_config);
>>
>> config_err:
>> mutex_unlock(&dev_priv->perf.metrics_lock);
>> @@ -3824,7 +3826,7 @@ static int destroy_config(int id, void *p, void *data)
>> {
>> struct i915_oa_config *oa_config = p;
>>
>> - put_oa_config(oa_config);
>> + i915_oa_config_put(oa_config);
>>
>> return 0;
>> }
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index b7fe1915e34d..5938a73963fe 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -623,6 +623,16 @@ typedef struct drm_i915_irq_wait {
>> */
>> #define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
>>
>> +/*
>> + * Request an i915/perf performance configuration change before running the
>> + * commands given in an execbuf.
>> + *
>> + * Performance configuration ID and the file descriptor of the i915 perf
>> + * stream are given through drm_i915_gem_execbuffer_ext_perf. See
>> + * I915_EXEC_EXT.
>> + */
>> +#define I915_PARAM_HAS_EXEC_PERF_CONFIG 56
>> +
>> /* Must be kept compact -- no holes and well documented */
>>
>> typedef struct drm_i915_getparam {
>> @@ -1026,6 +1036,12 @@ enum drm_i915_gem_execbuffer_ext {
>> */
>> DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES,
>>
>> + /**
>> + * This identifier is associated with
>> + * drm_i915_gem_execbuffer_perf_ext.
>> + */
>> + DRM_I915_GEM_EXECBUFFER_EXT_PERF,
>> +
>> DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
>> };
>>
>> @@ -1054,6 +1070,27 @@ struct drm_i915_gem_execbuffer_ext_timeline_fences {
>> __u64 values_ptr;
>> };
>>
>> +struct drm_i915_gem_execbuffer_ext_perf {
>> + struct i915_user_extension base;
>> +
>> + /**
>> + * Performance file descriptor returned by DRM_IOCTL_I915_PERF_OPEN.
>> + * This is used to identify that the application
>> + */
>> + __s32 perf_fd;
>> +
>> + /**
>> + * Unused for now. Must be cleared to zero.
>> + */
>> + __u32 pad;
>> +
>> + /**
>> + * OA configuration ID to switch to before executing the commands
>> + * associated to the execbuf.
>> + */
>> + __u64 oa_config;
>> +};
>> +
>> struct drm_i915_gem_execbuffer2 {
>> /**
>> * List of gem_exec_object2 structs
>> --
>> 2.21.0.392.gf8f6787159e
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
More information about the Intel-gfx
mailing list