[Intel-gfx] [PATCH v12 09/11] drm/i915: add a new perf configuration execbuf parameter

Fri Aug 30 15:32:48 UTC 2019

Quoting Lionel Landwerlin (2019-08-30 15:47:24)
> +static int
> +get_execbuf_oa_config(struct i915_execbuffer *eb)
> +{
> +       int err = 0;
> +
> +       eb->perf_file = NULL;
> +       eb->oa_config = NULL;
> +       eb->oa_vma = NULL;
> +       eb->oa_bo = NULL;
> +
> +       if ((eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) == 0)
> +               return 0;
> +
> +       eb->perf_file = fget(eb->extensions.perf_config.perf_fd);
> +       if (!eb->perf_file)
> +               return -EINVAL;
> +
> +       err = i915_mutex_lock_interruptible(&eb->i915->drm);
> +       if (err) {
> +               fput(eb->perf_file);
> +               eb->perf_file = NULL;
> +               return err;
> +       }
> +
> +       if (eb->perf_file->private_data != eb->i915->perf.exclusive_stream)
> +               err = -EINVAL;
> +
> +       mutex_unlock(&eb->i915->drm.struct_mutex);

So why can i915->perf.execlusive_stream not change after this point?

> +
> +       if (!err) {
> +               err = i915_perf_get_oa_config_and_bo(
> +                       eb->i915->perf.exclusive_stream,
> +                       eb->extensions.perf_config.oa_config,
> +                       &eb->oa_config, &eb->oa_bo);
> +       }
> +
> +       return err;
> +}
> +
>  static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
>                              struct i915_vma *vma,
>                              unsigned int len)
> @@ -2051,6 +2098,50 @@ add_to_client(struct i915_request *rq, struct drm_file *file)
>         spin_unlock(&file_priv->mm.lock);
>  }
>  
> +static int eb_oa_config(struct i915_execbuffer *eb)
> +{
> +       struct i915_perf_stream *perf_stream;
> +       int err;
> +
> +       if (!eb->oa_config)
> +               return 0;
> +
> +       perf_stream = eb->perf_file->private_data;
> +
> +       err = mutex_lock_interruptible(&perf_stream->config_mutex);
> +       if (err)
> +               return err;
> +
> +       err = i915_active_request_set(&perf_stream->active_config_rq,
> +                                     eb->request);
> +       if (err)
> +               goto out;
> +
> +       /*
> +        * If the config hasn't changed, skip reconfiguring the HW (this is
> +        * subject to a delay we want to avoid has much as possible).
> +        */
> +       if (eb->oa_config == perf_stream->oa_config)
> +               goto out;
> +

We need to wait for any exclusive fences in case we migrate this object
in future:

i915_vma_lock(eb->oa_vma);
err = i915_request_await_object(eb->request, eb->oa_vma->obj, false); /* await_resv already! */
if (err = 0)
	err = i915_vma_move_to_active(eb->oa_vma, eb->request, 0);
i915_vma_unlock(eb->oa_vma);

Though this raises an interesting point, we do not actually want to emit
a semaphore here (albeit the engine is fixed atm) as we are after the
point where we have declared all semaphore waits completed. (Not an
issue to worry about right now!)

> +       if (err)
> +               goto out;
> +
> +       err = eb->engine->emit_bb_start(eb->request,
> +                                       eb->oa_vma->node.start,
> +                                       0, I915_DISPATCH_SECURE);
> +       if (err)
> +               goto out;
> +
> +       swap(eb->oa_config, perf_stream->oa_config);
> +
> +out:
> +       mutex_unlock(&perf_stream->config_mutex);
> +
> +       return err;
> +}
> +
>  static int eb_submit(struct i915_execbuffer *eb)
>  {
>         int err;
> @@ -2077,6 +2168,10 @@ static int eb_submit(struct i915_execbuffer *eb)
>                         return err;
>         }
>  
> +       err = eb_oa_config(eb);
> +       if (err)
> +               return err;

Ok, definitely needs to be after the waits!

> +
>         err = eb->engine->emit_bb_start(eb->request,
>                                         eb->batch->node.start +
>                                         eb->batch_start_offset,
> @@ -2643,8 +2738,25 @@ static int parse_timeline_fences(struct i915_user_extension __user *ext, void *d
>         return 0;
>  }
>  
> +static int parse_perf_config(struct i915_user_extension __user *ext, void *data)
> +{
> +       struct i915_execbuffer *eb = data;
> +
> +       if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF))
> +               return -EINVAL;
> +
> +       if (copy_from_user(&eb->extensions.perf_config, ext,
> +                          sizeof(eb->extensions.perf_config)))
> +               return -EFAULT;
> +
> +       eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF);
> +
> +       return 0;
> +}
> +
>  static const i915_user_extension_fn execbuf_extensions[] = {
>          [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
> +        [DRM_I915_GEM_EXECBUFFER_EXT_PERF] = parse_perf_config,
>  };
>  
>  static int
> @@ -2755,10 +2867,14 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>                 }
>         }
>  
> -       err = eb_create(&eb);
> +       err = get_execbuf_oa_config(&eb);
>         if (err)
>                 goto err_out_fence;
>  
> +       err = eb_create(&eb);
> +       if (err)
> +               goto err_oa_config;
> +
>         GEM_BUG_ON(!eb.lut_size);
>  
>         err = eb_select_context(&eb);
> @@ -2769,6 +2885,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>         if (unlikely(err))
>                 goto err_context;
>  
> +       if (eb.extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
> +               struct i915_perf_stream *perf_stream =
> +                       eb.perf_file->private_data;
> +
> +               if (perf_stream->engine != eb.engine) {
> +                       err = -EINVAL;
> +                       goto err_engine;
> +               }
> +       }
> +
>         err = i915_mutex_lock_interruptible(dev);
>         if (err)
>                 goto err_engine;
> @@ -2889,6 +3015,20 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>                 }
>         }
>  
> +       if (eb.extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
> +               eb.oa_vma = i915_vma_instance(eb.oa_bo,
> +                                             &eb.engine->i915->ggtt.vm, NULL);

eb.engine->gt->ggtt.vm

The i915_vma_instance() can be created outside of the struct_mutex, and
once we have the oa_vma, we don't need to keep a oa_bo pointer.

Later we can do the i915_vma_pin() before struct_mutex as well.

Thanks, that's looking a better with regard the plan to eliminate
struct_mutex.
-Chris