[Intel-gfx] [PATCH v12 09/11] drm/i915: add a new perf configuration execbuf parameter

Fri Aug 30 21:03:33 UTC 2019

On 30/08/2019 18:32, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2019-08-30 15:47:24)
>> +static int
>> +get_execbuf_oa_config(struct i915_execbuffer *eb)
>> +{
>> +       int err = 0;
>> +
>> +       eb->perf_file = NULL;
>> +       eb->oa_config = NULL;
>> +       eb->oa_vma = NULL;
>> +       eb->oa_bo = NULL;
>> +
>> +       if ((eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) == 0)
>> +               return 0;
>> +
>> +       eb->perf_file = fget(eb->extensions.perf_config.perf_fd);
>> +       if (!eb->perf_file)
>> +               return -EINVAL;
>> +
>> +       err = i915_mutex_lock_interruptible(&eb->i915->drm);
>> +       if (err) {
>> +               fput(eb->perf_file);
>> +               eb->perf_file = NULL;
>> +               return err;
>> +       }
>> +
>> +       if (eb->perf_file->private_data != eb->i915->perf.exclusive_stream)
>> +               err = -EINVAL;
>> +
>> +       mutex_unlock(&eb->i915->drm.struct_mutex);
> So why can i915->perf.execlusive_stream not change after this point?

It changes only when the last FD reference is dropped, so if we got it 
here there is at least one ref from the app doing the ioctl and we grab 
another ref on it which hold onto until we return.

>
>> +
>> +       if (!err) {
>> +               err = i915_perf_get_oa_config_and_bo(
>> +                       eb->i915->perf.exclusive_stream,
>> +                       eb->extensions.perf_config.oa_config,
>> +                       &eb->oa_config, &eb->oa_bo);
>> +       }
>> +
>> +       return err;
>> +}
>> +
>>   static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
>>                               struct i915_vma *vma,
>>                               unsigned int len)
>> @@ -2051,6 +2098,50 @@ add_to_client(struct i915_request *rq, struct drm_file *file)
>>          spin_unlock(&file_priv->mm.lock);
>>   }
>>   
>> +static int eb_oa_config(struct i915_execbuffer *eb)
>> +{
>> +       struct i915_perf_stream *perf_stream;
>> +       int err;
>> +
>> +       if (!eb->oa_config)
>> +               return 0;
>> +
>> +       perf_stream = eb->perf_file->private_data;
>> +
>> +       err = mutex_lock_interruptible(&perf_stream->config_mutex);
>> +       if (err)
>> +               return err;
>> +
>> +       err = i915_active_request_set(&perf_stream->active_config_rq,
>> +                                     eb->request);
>> +       if (err)
>> +               goto out;
>> +
>> +       /*
>> +        * If the config hasn't changed, skip reconfiguring the HW (this is
>> +        * subject to a delay we want to avoid has much as possible).
>> +        */
>> +       if (eb->oa_config == perf_stream->oa_config)
>> +               goto out;
>> +
> We need to wait for any exclusive fences in case we migrate this object
> in future:
>
> i915_vma_lock(eb->oa_vma);
> err = i915_request_await_object(eb->request, eb->oa_vma->obj, false); /* await_resv already! */
> if (err = 0)
> 	err = i915_vma_move_to_active(eb->oa_vma, eb->request, 0);
> i915_vma_unlock(eb->oa_vma);
>
> Though this raises an interesting point, we do not actually want to emit
> a semaphore here (albeit the engine is fixed atm) as we are after the
> point where we have declared all semaphore waits completed. (Not an
> issue to worry about right now!)

If we do that for this VMA, don't we have to do it for any?

If that's happening in a different function, I could put it there.

>
>> +       if (err)
>> +               goto out;
>> +
>> +       err = eb->engine->emit_bb_start(eb->request,
>> +                                       eb->oa_vma->node.start,
>> +                                       0, I915_DISPATCH_SECURE);
>> +       if (err)
>> +               goto out;
>> +
>> +       swap(eb->oa_config, perf_stream->oa_config);
>> +
>> +out:
>> +       mutex_unlock(&perf_stream->config_mutex);
>> +
>> +       return err;
>> +}
>> +
>>   static int eb_submit(struct i915_execbuffer *eb)
>>   {
>>          int err;
>> @@ -2077,6 +2168,10 @@ static int eb_submit(struct i915_execbuffer *eb)
>>                          return err;
>>          }
>>   
>> +       err = eb_oa_config(eb);
>> +       if (err)
>> +               return err;
> Ok, definitely needs to be after the waits!
>
>> +
>>          err = eb->engine->emit_bb_start(eb->request,
>>                                          eb->batch->node.start +
>>                                          eb->batch_start_offset,
>> @@ -2643,8 +2738,25 @@ static int parse_timeline_fences(struct i915_user_extension __user *ext, void *d
>>          return 0;
>>   }
>>   
>> +static int parse_perf_config(struct i915_user_extension __user *ext, void *data)
>> +{
>> +       struct i915_execbuffer *eb = data;
>> +
>> +       if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF))
>> +               return -EINVAL;
>> +
>> +       if (copy_from_user(&eb->extensions.perf_config, ext,
>> +                          sizeof(eb->extensions.perf_config)))
>> +               return -EFAULT;
>> +
>> +       eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF);
>> +
>> +       return 0;
>> +}
>> +
>>   static const i915_user_extension_fn execbuf_extensions[] = {
>>           [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
>> +        [DRM_I915_GEM_EXECBUFFER_EXT_PERF] = parse_perf_config,
>>   };
>>   
>>   static int
>> @@ -2755,10 +2867,14 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>                  }
>>          }
>>   
>> -       err = eb_create(&eb);
>> +       err = get_execbuf_oa_config(&eb);
>>          if (err)
>>                  goto err_out_fence;
>>   
>> +       err = eb_create(&eb);
>> +       if (err)
>> +               goto err_oa_config;
>> +
>>          GEM_BUG_ON(!eb.lut_size);
>>   
>>          err = eb_select_context(&eb);
>> @@ -2769,6 +2885,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>          if (unlikely(err))
>>                  goto err_context;
>>   
>> +       if (eb.extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
>> +               struct i915_perf_stream *perf_stream =
>> +                       eb.perf_file->private_data;
>> +
>> +               if (perf_stream->engine != eb.engine) {
>> +                       err = -EINVAL;
>> +                       goto err_engine;
>> +               }
>> +       }
>> +
>>          err = i915_mutex_lock_interruptible(dev);
>>          if (err)
>>                  goto err_engine;
>> @@ -2889,6 +3015,20 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>                  }
>>          }
>>   
>> +       if (eb.extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
>> +               eb.oa_vma = i915_vma_instance(eb.oa_bo,
>> +                                             &eb.engine->i915->ggtt.vm, NULL);
> eb.engine->gt->ggtt.vm

Thanks!

>
> The i915_vma_instance() can be created outside of the struct_mutex, and
> once we have the oa_vma, we don't need to keep a oa_bo pointer.
>
> Later we can do the i915_vma_pin() before struct_mutex as well.
>
> Thanks, that's looking a better with regard the plan to eliminate
> struct_mutex.
> -Chris
>