[Intel-gfx] [PATCH v12 09/11] drm/i915: add a new perf configuration execbuf parameter
Chris Wilson
chris at chris-wilson.co.uk
Fri Aug 30 15:32:48 UTC 2019
Quoting Lionel Landwerlin (2019-08-30 15:47:24)
> +static int
> +get_execbuf_oa_config(struct i915_execbuffer *eb)
> +{
> + int err = 0;
> +
> + eb->perf_file = NULL;
> + eb->oa_config = NULL;
> + eb->oa_vma = NULL;
> + eb->oa_bo = NULL;
> +
> + if ((eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) == 0)
> + return 0;
> +
> + eb->perf_file = fget(eb->extensions.perf_config.perf_fd);
> + if (!eb->perf_file)
> + return -EINVAL;
> +
> + err = i915_mutex_lock_interruptible(&eb->i915->drm);
> + if (err) {
> + fput(eb->perf_file);
> + eb->perf_file = NULL;
> + return err;
> + }
> +
> + if (eb->perf_file->private_data != eb->i915->perf.exclusive_stream)
> + err = -EINVAL;
> +
> + mutex_unlock(&eb->i915->drm.struct_mutex);
So why can i915->perf.execlusive_stream not change after this point?
> +
> + if (!err) {
> + err = i915_perf_get_oa_config_and_bo(
> + eb->i915->perf.exclusive_stream,
> + eb->extensions.perf_config.oa_config,
> + &eb->oa_config, &eb->oa_bo);
> + }
> +
> + return err;
> +}
> +
> static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
> struct i915_vma *vma,
> unsigned int len)
> @@ -2051,6 +2098,50 @@ add_to_client(struct i915_request *rq, struct drm_file *file)
> spin_unlock(&file_priv->mm.lock);
> }
>
> +static int eb_oa_config(struct i915_execbuffer *eb)
> +{
> + struct i915_perf_stream *perf_stream;
> + int err;
> +
> + if (!eb->oa_config)
> + return 0;
> +
> + perf_stream = eb->perf_file->private_data;
> +
> + err = mutex_lock_interruptible(&perf_stream->config_mutex);
> + if (err)
> + return err;
> +
> + err = i915_active_request_set(&perf_stream->active_config_rq,
> + eb->request);
> + if (err)
> + goto out;
> +
> + /*
> + * If the config hasn't changed, skip reconfiguring the HW (this is
> + * subject to a delay we want to avoid has much as possible).
> + */
> + if (eb->oa_config == perf_stream->oa_config)
> + goto out;
> +
We need to wait for any exclusive fences in case we migrate this object
in future:
i915_vma_lock(eb->oa_vma);
err = i915_request_await_object(eb->request, eb->oa_vma->obj, false); /* await_resv already! */
if (err = 0)
err = i915_vma_move_to_active(eb->oa_vma, eb->request, 0);
i915_vma_unlock(eb->oa_vma);
Though this raises an interesting point, we do not actually want to emit
a semaphore here (albeit the engine is fixed atm) as we are after the
point where we have declared all semaphore waits completed. (Not an
issue to worry about right now!)
> + if (err)
> + goto out;
> +
> + err = eb->engine->emit_bb_start(eb->request,
> + eb->oa_vma->node.start,
> + 0, I915_DISPATCH_SECURE);
> + if (err)
> + goto out;
> +
> + swap(eb->oa_config, perf_stream->oa_config);
> +
> +out:
> + mutex_unlock(&perf_stream->config_mutex);
> +
> + return err;
> +}
> +
> static int eb_submit(struct i915_execbuffer *eb)
> {
> int err;
> @@ -2077,6 +2168,10 @@ static int eb_submit(struct i915_execbuffer *eb)
> return err;
> }
>
> + err = eb_oa_config(eb);
> + if (err)
> + return err;
Ok, definitely needs to be after the waits!
> +
> err = eb->engine->emit_bb_start(eb->request,
> eb->batch->node.start +
> eb->batch_start_offset,
> @@ -2643,8 +2738,25 @@ static int parse_timeline_fences(struct i915_user_extension __user *ext, void *d
> return 0;
> }
>
> +static int parse_perf_config(struct i915_user_extension __user *ext, void *data)
> +{
> + struct i915_execbuffer *eb = data;
> +
> + if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF))
> + return -EINVAL;
> +
> + if (copy_from_user(&eb->extensions.perf_config, ext,
> + sizeof(eb->extensions.perf_config)))
> + return -EFAULT;
> +
> + eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF);
> +
> + return 0;
> +}
> +
> static const i915_user_extension_fn execbuf_extensions[] = {
> [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
> + [DRM_I915_GEM_EXECBUFFER_EXT_PERF] = parse_perf_config,
> };
>
> static int
> @@ -2755,10 +2867,14 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> }
> }
>
> - err = eb_create(&eb);
> + err = get_execbuf_oa_config(&eb);
> if (err)
> goto err_out_fence;
>
> + err = eb_create(&eb);
> + if (err)
> + goto err_oa_config;
> +
> GEM_BUG_ON(!eb.lut_size);
>
> err = eb_select_context(&eb);
> @@ -2769,6 +2885,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> if (unlikely(err))
> goto err_context;
>
> + if (eb.extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
> + struct i915_perf_stream *perf_stream =
> + eb.perf_file->private_data;
> +
> + if (perf_stream->engine != eb.engine) {
> + err = -EINVAL;
> + goto err_engine;
> + }
> + }
> +
> err = i915_mutex_lock_interruptible(dev);
> if (err)
> goto err_engine;
> @@ -2889,6 +3015,20 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> }
> }
>
> + if (eb.extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
> + eb.oa_vma = i915_vma_instance(eb.oa_bo,
> + &eb.engine->i915->ggtt.vm, NULL);
eb.engine->gt->ggtt.vm
The i915_vma_instance() can be created outside of the struct_mutex, and
once we have the oa_vma, we don't need to keep a oa_bo pointer.
Later we can do the i915_vma_pin() before struct_mutex as well.
Thanks, that's looking a better with regard the plan to eliminate
struct_mutex.
-Chris
More information about the Intel-gfx
mailing list