[Intel-gfx] [PATCH v12 08/11] drm/i915/perf: execute OA configuration from command stream

Mon Sep 2 11:17:52 UTC 2019

On 30/08/2019 18:48, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2019-08-30 15:47:23)
>>   err_unpin:
>> -       __i915_vma_unpin(vma);
>> +       mutex_lock(&i915->drm.struct_mutex);
>> +       i915_vma_unpin_and_release(&vma, 0);
>> +       mutex_unlock(&i915->drm.struct_mutex);
> Strangely unpin_and_release() doesn't need the mutex. But I can clean
> that up later.
>
>>   
>>   err_unref:
>>          i915_gem_object_put(bo);
>> @@ -1947,50 +1961,69 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
>>          return ret;
>>   }
>>   
>> -static void config_oa_regs(struct drm_i915_private *dev_priv,
>> -                          const struct i915_oa_reg *regs,
>> -                          u32 n_regs)
>> +static int emit_oa_config(struct drm_i915_private *i915,
>> +                         struct i915_perf_stream *stream)
>>   {
>> -       u32 i;
>> +       struct i915_request *rq;
>> +       struct i915_vma *vma;
>> +       u32 *cs;
>> +       int err;
>>   
>> -       for (i = 0; i < n_regs; i++) {
>> -               const struct i915_oa_reg *reg = regs + i;
>> +       lockdep_assert_held(&stream->config_mutex);
>>   
>> -               I915_WRITE(reg->addr, reg->value);
>> +       rq = i915_request_create(stream->engine->kernel_context);
>> +       if (IS_ERR(rq))
>> +               return PTR_ERR(rq);
>> +
>> +       err = i915_active_request_set(&stream->active_config_rq,
>> +                                     rq);
>> +       if (err)
>> +               goto err_add_request;
>> +
>> +       vma = i915_vma_instance(stream->initial_oa_config_bo,
>> +                               &i915->ggtt.vm, NULL);
> Safer with stream->engine->gt->ggtt.vm
>
>> +       if (unlikely(IS_ERR(vma))) {
>> +               err = PTR_ERR(vma);
>> +               goto err_add_request;
>>          }
>> -}
>>   
>> -static void delay_after_mux(void)
>> -{
>> -       /*
>> -        * It apparently takes a fairly long time for a new MUX
>> -        * configuration to be be applied after these register writes.
>> -        * This delay duration was derived empirically based on the
>> -        * render_basic config but hopefully it covers the maximum
>> -        * configuration latency.
>> -        *
>> -        * As a fallback, the checks in _append_oa_reports() to skip
>> -        * invalid OA reports do also seem to work to discard reports
>> -        * generated before this config has completed - albeit not
>> -        * silently.
>> -        *
>> -        * Unfortunately this is essentially a magic number, since we
>> -        * don't currently know of a reliable mechanism for predicting
>> -        * how long the MUX config will take to apply and besides
>> -        * seeing invalid reports we don't know of a reliable way to
>> -        * explicitly check that the MUX config has landed.
>> -        *
>> -        * It's even possible we've miss characterized the underlying
>> -        * problem - it just seems like the simplest explanation why
>> -        * a delay at this location would mitigate any invalid reports.
>> -        */
>> -       usleep_range(15000, 20000);
>> +       err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
>> +       if (err)
>> +               goto err_add_request;
> Don't pin inside a request. do the pining before i915_request_create().
> One day, we may need to allocate a request to do the pin.
>
> Be safe,
>
> i915_vma_lock(vma);
> err = i915_request_await_object(rq, vma->obj, 0);
> (yes, we need a better wrapper here)
> if (err)
>> +       err = i915_vma_move_to_active(vma, rq, 0);
> i915_vma_unlock(vma);
>> +       if (err)
>> +               goto err_vma_unpin;
>> +
>
>> @@ -2658,16 +2684,31 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>>          stream->ops = &i915_oa_stream_ops;
>>          dev_priv->perf.exclusive_stream = stream;
>>   
>> +       mutex_lock(&stream->config_mutex);
>>          ret = dev_priv->perf.ops.enable_metric_set(stream);
>>          if (ret) {
>>                  DRM_DEBUG("Unable to enable metric set\n");
>> -               goto err_enable;
>> +               /*
>> +                * Ignore the return value since we already have an error from
>> +                * the enable vfunc.
>> +                */
>> +               i915_active_request_retire(&stream->active_config_rq,
>> +                                          &stream->config_mutex);
>> +       } else {
>> +               ret = i915_active_request_retire(&stream->active_config_rq,
>> +                                                &stream->config_mutex);
> This function doesn't exist anymore. It's basically just waiting for the
> old request. Why do you need it? (Your request flow is otherwise ordered.)

I don't want to enable the OA reports to be written into the OA buffer 
until I know the configuration work has completed.

Otherwise it would write invalid/unconfigured data.

>
>> -       DRM_DEBUG("opening stream oa config uuid=%s\n", stream->oa_config->uuid);
>> -
>> +       mutex_unlock(&stream->config_mutex);
>>          mutex_unlock(&dev_priv->drm.struct_mutex);
>>   
>> +       i915_gem_object_put(stream->initial_oa_config_bo);
>> +       stream->initial_oa_config_bo = NULL;
>> +       if (ret)
>> +               goto err_enable;
> Is it because of this err that may end up freeing the stream? I would
> expect a similar wait-before-free on stream destroy.

Actually it's there, although a bit hidden :

static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
{
         struct drm_i915_private *dev_priv = stream->dev_priv;
         int err;

         BUG_ON(stream != dev_priv->perf.exclusive_stream);

         mutex_lock(&dev_priv->drm.struct_mutex);
         mutex_lock(&stream->config_mutex);
         dev_priv->perf.ops.disable_metric_set(stream);
=>   err = i915_active_request_retire(&stream->active_config_rq,
&stream->config_mutex);
         mutex_unlock(&stream->config_mutex);
         dev_priv->perf.exclusive_stream = NULL;
         mutex_unlock(&dev_priv->drm.struct_mutex);

>   In which case I
> would have the active request hold a reference on the stream. (And you
> might find it more convenient to use i915_active rather than the raw
> i915_active_request. i915_active is geared to tracking multiple
> timelines, so definitely overkill for you use case, just has more
> utility/mid-layer! built in)
> -Chris
>