[Intel-gfx] [PATCH v16 16/17] drm/i915/perf: notify sseu configuration changes

Matthew Auld matthew.william.auld at gmail.com
Fri Jun 9 17:56:55 UTC 2017


On 5 June 2017 at 15:48, Lionel Landwerlin
<lionel.g.landwerlin at intel.com> wrote:
> This adds the ability for userspace to request that the kernel track &
> record sseu configuration changes. These changes are inserted into the
> perf stream so that userspace can interpret the OA reports using the
> configuration applied at the time the OA reports where generated.
>
> v2: Handle timestamps wrapping around 32bits (Lionel)
>
> v3: Use void * instead of u8 * for buffer virtual address (Chris)
>     Use i915_vam_unpin_and_release (Chris)
>     Fix a deadlock when SSEU buffer overflows (Lionel)
>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         |  45 ++++
>  drivers/gpu/drm/i915/i915_gem_context.c |   3 +
>  drivers/gpu/drm/i915/i915_gem_context.h |  21 ++
>  drivers/gpu/drm/i915/i915_perf.c        | 454 ++++++++++++++++++++++++++++++--
>  drivers/gpu/drm/i915/intel_lrc.c        |   7 +-
>  include/uapi/drm/i915_drm.h             |  35 +++
>  6 files changed, 535 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7ef1908e3a98..82f4e52718eb 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1970,6 +1970,12 @@ struct i915_perf_stream {
>         bool noa_restore;
>
>         /**
> +        * @has_sseu: Whether the stream emits SSEU configuration changes
> +        * reports.
> +        */
> +       bool has_sseu;
> +
> +       /**
>          * @enabled: Whether the stream is currently enabled, considering
>          * whether the stream was opened in a disabled state and based
>          * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
> @@ -2508,6 +2514,44 @@ struct drm_i915_private {
>                          */
>                         atomic_t noa_restore;
>                 } oa;
> +
> +               struct {
> +                       /**
> +                        * Buffer containing change reports of the SSEU
> +                        * configuration.
> +                        */
> +                       struct i915_vma *vma;
> +                       void *vaddr;
> +
> +                       /**
> +                        * Scheduler write to the head, and the perf driver
> +                        * reads from tail.
> +                        */
> +                       u32 head;
> +                       u32 tail;
> +
> +                       /**
> +                        * Is the sseu buffer enabled.
> +                        */
> +                       bool enabled;
> +
> +                       /**
> +                        * Whether the buffer overflown.
> +                        */
> +                       bool overflow;
> +
> +                       /**
> +                        * Keeps track of how many times the OA unit has been
> +                        * enabled. This number is used to discard stale
> +                        * @perf_sseu in @i915_gem_context.
> +                        */
> +                       atomic64_t enable_no;
> +
> +                       /**
> +                        * Lock writes & tail pointer updates on this buffer.
> +                        */
> +                       spinlock_t ptr_lock;
> +               } sseu_buffer;
>         } perf;
>
>         /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
> @@ -3558,6 +3602,7 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
>                             struct i915_gem_context *ctx,
>                             uint32_t *reg_state);
>  int i915_oa_emit_noa_config_locked(struct drm_i915_gem_request *req);
> +void i915_perf_emit_sseu_config(struct drm_i915_gem_request *req);
>
>  /* i915_gem_evict.c */
>  int __must_check i915_gem_evict_something(struct i915_address_space *vm,
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 60d0bbf0ae2b..368bb527a75a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -510,6 +510,9 @@ mi_set_context(struct drm_i915_gem_request *req, u32 flags)
>         if (IS_ERR(cs))
>                 return PTR_ERR(cs);
>
> +       /* Notify perf of possible change in SSEU configuration. */
> +       i915_perf_emit_sseu_config(req);
> +
>         /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
>         if (INTEL_GEN(dev_priv) >= 7) {
>                 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index e8247e2f6011..35ebdde1cd16 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -194,6 +194,27 @@ struct i915_gem_context {
>
>         /** remap_slice: Bitmask of cache lines that need remapping */
>         u8 remap_slice;
> +
> +       /**
> +        * @perf_sseu: last SSEU configuration notified to userspace
> +        *
> +        * SSEU configuration changes are notified to userspace through the
> +        * perf infrastructure. We keep track here of the last notified
> +        * configuration for a given context since configuration can change
> +        * per engine.
> +        */
> +       struct sseu_dev_info perf_sseu;
> +
> +       /**
> +        * @perf_enable_no: last perf enable identifier
> +        *
> +        * Userspace can enable/disable the perf infrastructure whenever it
> +        * wants without reconfiguring the OA unit. This number is updated at
> +        * the same time as @perf_sseu by copying
> +        * dev_priv->perf.oa.oa_sseu.enable_no which changes every time the
> +        * user enables the OA unit.
> +        */
> +       u64 perf_enable_no;
>  };
>
>  static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 3f49ce69641b..87ebc450c456 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -212,6 +212,15 @@
>   */
>  #define OA_BUFFER_SIZE         SZ_16M
>
> +/* Dimension the SSEU buffer based on the size of the OA buffer to ensure we
> + * don't run out of space before the OA unit notifies us new data is
> + * available.
> + */
> +#define SSEU_BUFFER_ITEM_SIZE  sizeof(struct perf_sseu_change)
> +#define SSEU_BUFFER_SIZE       ((OA_BUFFER_SIZE * SSEU_BUFFER_ITEM_SIZE) / \
> +                                256 /* Maximum report size */)
> +#define SSEU_BUFFER_NB_ITEM    (SSEU_BUFFER_SIZE / SSEU_BUFFER_ITEM_SIZE)
> +
>  #define OA_TAKEN(tail, head)   ((tail - head) & (OA_BUFFER_SIZE - 1))
>
>  /**
> @@ -352,6 +361,8 @@ struct perf_open_properties {
>
>         bool noa_restore;
>
> +       bool sseu_enable;
> +
>         /* OA sampling state */
>         int metrics_set;
>         int oa_format;
> @@ -359,6 +370,19 @@ struct perf_open_properties {
>         int oa_period_exponent;
>  };
>
> +struct perf_sseu_change {
> +       u32 timestamp;
> +       struct drm_i915_perf_sseu_change change;
> +};
> +
> +static void i915_oa_stream_disable(struct i915_perf_stream *stream);
> +static void i915_oa_stream_enable(struct i915_perf_stream *stream);
> +
> +static bool timestamp_passed(u32 t0, u32 t1)
> +{
> +  return (s32)(t0 - t1) >= 0;
Formatting seems off, also timestamp_passed_or_equal?

> +}
> +
>  static u32 gen8_oa_hw_tail_read(struct drm_i915_private *dev_priv)
>  {
>         return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK;
> @@ -571,6 +595,87 @@ static int append_oa_sample(struct i915_perf_stream *stream,
>         return 0;
>  }
>
> +static int append_sseu_change(struct i915_perf_stream *stream,
> +                             char __user *buf,
> +                             size_t count,
> +                             size_t *offset,
> +                             const struct perf_sseu_change *sseu_report)
> +{
> +       struct drm_i915_perf_record_header header;
> +
> +       header.type = DRM_I915_PERF_RECORD_SSEU_CHANGE;
> +       header.pad = 0;
> +       header.size = sizeof(struct drm_i915_perf_record_header) +
> +               sizeof(struct drm_i915_perf_sseu_change);
> +
> +       if ((count - *offset) < header.size)
> +               return -ENOSPC;
> +
> +       buf += *offset;
> +       if (copy_to_user(buf, &header, sizeof(header)))
> +               return -EFAULT;
> +       buf += sizeof(header);
> +
> +       if (copy_to_user(buf, &sseu_report->change,
> +                        sizeof(struct drm_i915_perf_sseu_change)))
> +               return -EFAULT;
> +
> +       (*offset) += header.size;
> +
> +       return 0;
> +}
> +
> +static struct perf_sseu_change *
> +sseu_buffer_peek(struct drm_i915_private *dev_priv, u32 head, u32 tail)
> +{
> +       if (head == tail)
> +               return NULL;
> +
> +       return dev_priv->perf.sseu_buffer.vaddr + tail * SSEU_BUFFER_ITEM_SIZE;
> +}
> +
> +static struct perf_sseu_change *
> +sseu_buffer_advance(struct drm_i915_private *dev_priv, u32 head, u32 *tail)
> +{
> +       *tail = (*tail + 1) % SSEU_BUFFER_NB_ITEM;
> +
> +       return sseu_buffer_peek(dev_priv, head, *tail);
> +}
> +
> +static void
> +sseu_buffer_read_pointers(struct i915_perf_stream *stream, u32 *head, u32 *tail)
> +{
> +       struct drm_i915_private *dev_priv = stream->dev_priv;
> +
> +       if (!stream->has_sseu) {
> +               *head = 0;
> +               *tail = 0;
> +               return;
> +       }
> +
> +       spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +
> +       *head = dev_priv->perf.sseu_buffer.head;
> +       *tail = dev_priv->perf.sseu_buffer.tail;
> +
> +       spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +}
> +
> +static void
> +sseu_buffer_write_tail_pointer(struct i915_perf_stream *stream, u32 tail)
> +{
> +       struct drm_i915_private *dev_priv = stream->dev_priv;
> +
> +       if (!stream->has_sseu)
> +               return;
> +
> +       spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +
> +       dev_priv->perf.sseu_buffer.tail = tail;
> +
> +       spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +}
> +
>  /**
>   * Copies all buffered OA reports into userspace read() buffer.
>   * @stream: An i915-perf stream opened for OA metrics
> @@ -606,6 +711,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
>         unsigned int aged_tail_idx;
>         u32 head, tail;
>         u32 taken;
> +       struct perf_sseu_change *sseu_report;
> +       u32 sseu_head, sseu_tail;
>         int ret = 0;
>
>         if (WARN_ON(!stream->enabled))
> @@ -646,6 +753,9 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
>                       head, tail))
>                 return -EIO;
>
> +       /* Extract first SSEU report. */
> +       sseu_buffer_read_pointers(stream, &sseu_head, &sseu_tail);
> +       sseu_report = sseu_buffer_peek(dev_priv, sseu_head, sseu_tail);
>
>         for (/* none */;
>              (taken = OA_TAKEN(tail, head));
> @@ -686,6 +796,26 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
>                         continue;
>                 }
>
> +               while (sseu_report &&
> +                      !timestamp_passed(sseu_report->timestamp, report32[1])) {
> +                       /*
> +                        * While filtering for a single context we avoid
> +                        * leaking the IDs of other contexts.
> +                        */
> +                       if (stream->ctx &&
> +                           stream->ctx->hw_id != sseu_report->change.hw_id) {
> +                               sseu_report->change.hw_id = INVALID_CTX_ID;
> +                       }
> +
> +                       ret = append_sseu_change(stream, buf, count,
> +                                                offset, sseu_report);
> +                       if (ret)
> +                               break;
> +
> +                       sseu_report = sseu_buffer_advance(dev_priv, sseu_head,
> +                                                         &sseu_tail);
> +               }
> +
>                 /*
>                  * XXX: Just keep the lower 21 bits for now since I'm not
>                  * entirely sure if the HW touches any of the higher bits in
> @@ -783,6 +913,9 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
>                 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
>         }
>
> +       /* Update tail pointer of the sseu buffer. */
> +       sseu_buffer_write_tail_pointer(stream, sseu_tail);
> +
>         return ret;
>  }
>
> @@ -812,12 +945,29 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
>                         size_t *offset)
>  {
>         struct drm_i915_private *dev_priv = stream->dev_priv;
> +       bool overflow = false;
>         u32 oastatus;
>         int ret;
>
>         if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
>                 return -EIO;
>
> +       spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +       overflow = dev_priv->perf.sseu_buffer.overflow;
> +       spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +
> +       if (overflow) {
> +               DRM_DEBUG("SSEU buffer overflow\n");
> +
> +               ret = append_oa_status(stream, buf, count, offset,
> +                                      DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
> +               if (ret)
> +                       return ret;
> +
> +               i915_oa_stream_disable(stream);
> +               i915_oa_stream_enable(stream);
> +       }
> +
>         oastatus = I915_READ(GEN8_OASTATUS);
>
>         /*
> @@ -835,16 +985,18 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
>          * that something has gone quite badly wrong.
>          */
>         if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) {
> -               ret = append_oa_status(stream, buf, count, offset,
> -                                      DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
> -               if (ret)
> -                       return ret;
> -
>                 DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
>                           dev_priv->perf.oa.period_exponent);
>
> -               dev_priv->perf.oa.ops.oa_disable(dev_priv);
> -               dev_priv->perf.oa.ops.oa_enable(dev_priv);
> +               if (!overflow) {
> +                       ret = append_oa_status(stream, buf, count, offset,
> +                                              DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
> +                       if (ret)
> +                               return ret;
> +
> +                       i915_oa_stream_disable(stream);
> +                       i915_oa_stream_enable(stream);
> +               }
>
>                 /*
>                  * Note: .oa_enable() is expected to re-init the oabuffer and
> @@ -854,10 +1006,12 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
>         }
>
>         if (oastatus & GEN8_OASTATUS_REPORT_LOST) {
> -               ret = append_oa_status(stream, buf, count, offset,
> -                                      DRM_I915_PERF_RECORD_OA_REPORT_LOST);
> -               if (ret)
> -                       return ret;
> +               if (!overflow) {
> +                       ret = append_oa_status(stream, buf, count, offset,
> +                                              DRM_I915_PERF_RECORD_OA_REPORT_LOST);
> +                       if (ret)
> +                               return ret;
> +               }
>                 I915_WRITE(GEN8_OASTATUS,
>                            oastatus & ~GEN8_OASTATUS_REPORT_LOST);
>         }
> @@ -900,6 +1054,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
>         unsigned int aged_tail_idx;
>         u32 head, tail;
>         u32 taken;
> +       struct perf_sseu_change *sseu_report;
> +       u32 sseu_head, sseu_tail;
>         int ret = 0;
>
>         if (WARN_ON(!stream->enabled))
> @@ -937,6 +1093,9 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
>                       head, tail))
>                 return -EIO;
>
> +       /* Extract first SSEU report. */
> +       sseu_buffer_read_pointers(stream, &sseu_head, &sseu_tail);
> +       sseu_report = sseu_buffer_peek(dev_priv, sseu_head, sseu_tail);
>
>         for (/* none */;
>              (taken = OA_TAKEN(tail, head));
> @@ -969,6 +1128,27 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
>                         continue;
>                 }
>
> +               /* Emit any potential sseu report. */
> +               while (sseu_report &&
> +                      !timestamp_passed(sseu_report->timestamp, report32[1])) {
> +                       /*
> +                        * While filtering for a single context we avoid
> +                        * leaking the IDs of other contexts.
> +                        */
> +                       if (stream->ctx &&
> +                           stream->ctx->hw_id != sseu_report->change.hw_id) {
> +                               sseu_report->change.hw_id = INVALID_CTX_ID;
> +                       }
> +
> +                       ret = append_sseu_change(stream, buf, count,
> +                                                offset, sseu_report);
> +                       if (ret)
> +                               break;
> +
> +                       sseu_report = sseu_buffer_advance(dev_priv, sseu_head,
> +                                                         &sseu_tail);
> +               }
> +
>                 ret = append_oa_sample(stream, buf, count, offset, report);
>                 if (ret)
>                         break;
> @@ -998,6 +1178,9 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
>                 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
>         }
>
> +       /* Update tail pointer of the sseu buffer. */
> +       sseu_buffer_write_tail_pointer(stream, sseu_tail);
> +
>         return ret;
>  }
>
> @@ -1023,12 +1206,29 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
>                         size_t *offset)
>  {
>         struct drm_i915_private *dev_priv = stream->dev_priv;
> +       bool overflow = false;
>         u32 oastatus1;
>         int ret;
>
>         if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
>                 return -EIO;
>
> +       spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +       overflow = dev_priv->perf.sseu_buffer.overflow;
> +       spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +
> +       if (overflow) {
> +               DRM_DEBUG("SSEU buffer overflow\n");
> +
> +               ret = append_oa_status(stream, buf, count, offset,
> +                                      DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
> +               if (ret)
> +                       return ret;
> +
> +               i915_oa_stream_disable(stream);
> +               i915_oa_stream_enable(stream);
> +       }
> +
>         oastatus1 = I915_READ(GEN7_OASTATUS1);
>
>         /* XXX: On Haswell we don't have a safe way to clear oastatus1
> @@ -1059,25 +1259,30 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
>          *   now.
>          */
>         if (unlikely(oastatus1 & GEN7_OASTATUS1_OABUFFER_OVERFLOW)) {
> -               ret = append_oa_status(stream, buf, count, offset,
> -                                      DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
> -               if (ret)
> -                       return ret;
> -
>                 DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
>                           dev_priv->perf.oa.period_exponent);
>
> -               dev_priv->perf.oa.ops.oa_disable(dev_priv);
> -               dev_priv->perf.oa.ops.oa_enable(dev_priv);
> +               if (!overflow) {
> +                       ret = append_oa_status(stream, buf, count, offset,
> +                                              DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
> +                       if (ret)
> +                               return ret;
> +
> +                       i915_oa_stream_disable(stream);
> +                       i915_oa_stream_enable(stream);
> +               }
>
>                 oastatus1 = I915_READ(GEN7_OASTATUS1);
>         }
>
>         if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) {
> -               ret = append_oa_status(stream, buf, count, offset,
> -                                      DRM_I915_PERF_RECORD_OA_REPORT_LOST);
> -               if (ret)
> -                       return ret;
> +               if (!overflow) {
> +                       ret = append_oa_status(stream, buf, count, offset,
> +                                              DRM_I915_PERF_RECORD_OA_REPORT_LOST);
> +                       if (ret)
> +                               return ret;
> +               }
> +
>                 dev_priv->perf.oa.gen7_latched_oastatus1 |=
>                         GEN7_OASTATUS1_REPORT_LOST;
>         }
> @@ -1227,18 +1432,21 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
>  }
>
>  static void
> -free_oa_buffer(struct drm_i915_private *i915)
> +free_sseu_buffer(struct drm_i915_private *i915)
>  {
> -       mutex_lock(&i915->drm.struct_mutex);
> +       i915_gem_object_unpin_map(i915->perf.sseu_buffer.vma->obj);
> +       i915_vma_unpin_and_release(&i915->perf.sseu_buffer.vma);
>
> +       i915->perf.sseu_buffer.vaddr = NULL;
> +}
> +
> +static void
> +free_oa_buffer(struct drm_i915_private *i915)
> +{
>         i915_gem_object_unpin_map(i915->perf.oa.oa_buffer.vma->obj);
> -       i915_vma_unpin(i915->perf.oa.oa_buffer.vma);
> -       i915_gem_object_put(i915->perf.oa.oa_buffer.vma->obj);
> +       i915_vma_unpin_and_release(&i915->perf.oa.oa_buffer.vma);
>
> -       i915->perf.oa.oa_buffer.vma = NULL;
>         i915->perf.oa.oa_buffer.vaddr = NULL;
> -
> -       mutex_unlock(&i915->drm.struct_mutex);
>  }
>
>  static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
> @@ -1255,8 +1463,15 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
>
>         dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
>
> +       mutex_lock(&dev_priv->drm.struct_mutex);
> +
> +       if (stream->has_sseu)
> +               free_sseu_buffer(dev_priv);
> +
>         free_oa_buffer(dev_priv);
>
> +       mutex_unlock(&dev_priv->drm.struct_mutex);
> +
>         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
>         intel_runtime_pm_put(dev_priv);
>
> @@ -1269,6 +1484,97 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
>         }
>  }
>
> +static void init_sseu_buffer(struct drm_i915_private *dev_priv,
> +                            bool need_lock)
> +{
> +       /* Cleanup auxilliary buffer. */
> +       memset(dev_priv->perf.sseu_buffer.vaddr, 0, SSEU_BUFFER_SIZE);
> +
> +       atomic64_inc(&dev_priv->perf.sseu_buffer.enable_no);
> +
> +       if (need_lock)
> +               spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +
> +       /* Initialize head & tail pointers. */
> +       dev_priv->perf.sseu_buffer.head = 0;
> +       dev_priv->perf.sseu_buffer.tail = 0;
> +       dev_priv->perf.sseu_buffer.overflow = false;
> +
> +       if (need_lock)
> +               spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +}
> +
> +static int alloc_sseu_buffer(struct drm_i915_private *dev_priv)
> +{
> +       struct drm_i915_gem_object *bo;
> +       struct i915_vma *vma;
> +       int ret = 0;
> +
> +       bo = i915_gem_object_create(dev_priv, SSEU_BUFFER_SIZE);
> +       if (IS_ERR(bo)) {
> +               DRM_ERROR("Failed to allocate SSEU buffer\n");
> +               ret = PTR_ERR(bo);
> +               goto out;
> +       }
> +
> +       vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SSEU_BUFFER_SIZE, 0);
> +       if (IS_ERR(vma)) {
> +               ret = PTR_ERR(vma);
> +               goto err_unref;
> +       }
> +       dev_priv->perf.sseu_buffer.vma = vma;
> +
> +       dev_priv->perf.sseu_buffer.vaddr =
> +               i915_gem_object_pin_map(bo, I915_MAP_WB);
> +       if (IS_ERR(dev_priv->perf.sseu_buffer.vaddr)) {
> +               ret = PTR_ERR(dev_priv->perf.sseu_buffer.vaddr);
> +               goto err_unpin;
> +       }
> +
> +       atomic64_set(&dev_priv->perf.sseu_buffer.enable_no, 0);
> +
> +       init_sseu_buffer(dev_priv, true);
> +
> +       DRM_DEBUG_DRIVER("OA SSEU Buffer initialized, gtt offset = 0x%x, vaddr = %p\n",
> +                        i915_ggtt_offset(dev_priv->perf.sseu_buffer.vma),
> +                        dev_priv->perf.sseu_buffer.vaddr);
> +
> +       goto out;
> +
> +err_unpin:
> +       __i915_vma_unpin(vma);
> +
> +err_unref:
> +       i915_gem_object_put(bo);
> +
> +       dev_priv->perf.sseu_buffer.vaddr = NULL;
> +       dev_priv->perf.sseu_buffer.vma = NULL;
> +
> +out:
> +       return ret;
> +}
> +
> +static void sseu_enable(struct drm_i915_private *dev_priv)
> +{
> +       spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +
> +       init_sseu_buffer(dev_priv, false);
> +
> +       dev_priv->perf.sseu_buffer.enabled = true;
> +
> +       spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +}
> +
> +static void sseu_disable(struct drm_i915_private *dev_priv)
> +{
> +       spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +
> +       dev_priv->perf.sseu_buffer.enabled = false;
> +       dev_priv->perf.sseu_buffer.overflow = false;
> +
> +       spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +}
> +
>  static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
>  {
>         u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
> @@ -1763,6 +2069,9 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
>                 struct intel_context *ce = &ctx->engine[RCS];
>                 u32 *regs;
>
> +               memset(&ctx->perf_sseu, 0, sizeof(ctx->perf_sseu));
> +               ctx->perf_enable_no = 0;
> +
>                 /* OA settings will be set upon first use */
>                 if (!ce->state)
>                         continue;
> @@ -1927,6 +2236,9 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream)
>  {
>         struct drm_i915_private *dev_priv = stream->dev_priv;
>
> +       if (stream->has_sseu)
> +               sseu_enable(dev_priv);
> +
>         dev_priv->perf.oa.ops.oa_enable(dev_priv);
>
>         if (dev_priv->perf.oa.periodic)
> @@ -1957,6 +2269,9 @@ static void i915_oa_stream_disable(struct i915_perf_stream *stream)
>  {
>         struct drm_i915_private *dev_priv = stream->dev_priv;
>
> +       if (stream->has_sseu)
> +               sseu_disable(dev_priv);
> +
>         dev_priv->perf.oa.ops.oa_disable(dev_priv);
>
>         if (dev_priv->perf.oa.periodic)
> @@ -2087,6 +2402,14 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>                 atomic_inc(&dev_priv->perf.oa.noa_restore);
>         }
>
> +       if (props->sseu_enable) {
> +               ret = alloc_sseu_buffer(dev_priv);
> +               if (ret)
> +                       goto err_sseu_buf_alloc;
> +
> +               stream->has_sseu = true;
> +       }
> +
>         ret = alloc_oa_buffer(dev_priv);
>         if (ret)
>                 goto err_oa_buf_alloc;
> @@ -2119,6 +2442,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>  err_enable:
>         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
>         intel_runtime_pm_put(dev_priv);
> +       free_sseu_buffer(dev_priv);
> +
> +err_sseu_buf_alloc:
>         free_oa_buffer(dev_priv);
>
>  err_oa_buf_alloc:
This looks bogus, it needs to be:

err_enable:
     free_oa_buffer()

err_oa_buf_alloc:
     if (stream->has_sseu)
         free_sseu_buffer()

err_sseu_buf_alloc:


> @@ -2212,6 +2538,69 @@ int i915_oa_emit_noa_config_locked(struct drm_i915_gem_request *req)
>  }
>
>  /**
> + * i915_perf_emit_sseu_config - emit the SSEU configuration of a gem request
> + * @req: the request
> + *
> + * If the OA unit is enabled, the write the SSEU configuration of a
> + * given request into a circular buffer. The buffer is read at the
> + * same time as the OA buffer and its elements are inserted into the
> + * perf stream to notify the userspace of SSEU configuration changes.
> + * This is needed to interpret values from the OA reports.
> + */
> +void i915_perf_emit_sseu_config(struct drm_i915_gem_request *req)
> +{
> +       struct drm_i915_private *dev_priv = req->i915;
> +       struct perf_sseu_change *report;
> +       const struct sseu_dev_info *sseu;
> +       u32 head, timestamp;
> +       u64 perf_enable_no;
> +
> +       /* Perf not supported. */
> +       if (!dev_priv->perf.initialized)
> +               return;
> +
> +       /* Nothing's changed, no need to signal userspace. */
> +       sseu = &req->ctx->engine[req->engine->id].sseu;
> +       perf_enable_no = atomic64_read(&dev_priv->perf.sseu_buffer.enable_no);
> +       if (perf_enable_no == req->ctx->perf_enable_no &&
> +           memcmp(&req->ctx->perf_sseu, sseu, sizeof(*sseu)) == 0)
> +               return;
> +
> +       memcpy(&req->ctx->perf_sseu, sseu, sizeof(*sseu));
> +       req->ctx->perf_enable_no = perf_enable_no;
> +
> +       /* Read the timestamp outside the spin lock. */
> +       timestamp = I915_READ(RING_TIMESTAMP(RENDER_RING_BASE));
Except we then proceed to read it again inside the spinlock :)

> +
> +       spin_lock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +
> +       if (!dev_priv->perf.sseu_buffer.enabled)
> +               goto out;
> +
> +       head = dev_priv->perf.sseu_buffer.head;
> +       dev_priv->perf.sseu_buffer.head =
> +               (dev_priv->perf.sseu_buffer.head + 1) % SSEU_BUFFER_NB_ITEM;
> +
> +       /* This should never happen, unless we've wrongly dimensioned the SSEU
> +        * buffer. */
> +       if (dev_priv->perf.sseu_buffer.head == dev_priv->perf.sseu_buffer.tail) {
> +               dev_priv->perf.sseu_buffer.overflow = true;
> +               goto out;
> +       }
What is the intention here if we do overflow, we seem to bail
initially, but if we re-enter and we are still in an overflow state we
will continue writing out reports anyway?

> +
> +       report = dev_priv->perf.sseu_buffer.vaddr + head * SSEU_BUFFER_ITEM_SIZE;
> +       memset(report, 0, sizeof(*report));
> +
> +       report->timestamp = I915_READ(RING_TIMESTAMP(RENDER_RING_BASE));
> +       report->change.hw_id = req->ctx->hw_id;
> +       report->change.slice_mask = sseu->slice_mask;
> +       report->change.subslice_mask = sseu->subslice_mask;
> +
> +out:
> +       spin_unlock_irq(&dev_priv->perf.sseu_buffer.ptr_lock);
> +}
> +
> +/**
>   * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation
>   * @stream: An i915 perf stream
>   * @file: An i915 perf stream file
> @@ -2867,6 +3256,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
>                 case DRM_I915_PERF_PROP_NOA_RESTORE:
>                         props->noa_restore = true;
>                         break;
> +               case DRM_I915_PERF_PROP_SSEU_CHANGE:
> +                       props->sseu_enable = true;
> +                       break;
>                 case DRM_I915_PERF_PROP_MAX:
>                         MISSING_CASE(id);
>                         return -EINVAL;
> @@ -3229,6 +3621,10 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
>
>                 atomic_set(&dev_priv->perf.oa.noa_restore, 0);
>
> +               spin_lock_init(&dev_priv->perf.sseu_buffer.ptr_lock);
> +               dev_priv->perf.sseu_buffer.enabled = false;
> +               atomic64_set(&dev_priv->perf.sseu_buffer.enable_no, 0);
> +
>                 oa_sample_rate_hard_limit =
>                         dev_priv->perf.oa.timestamp_frequency / 2;
>                 dev_priv->perf.sysctl_header = register_sysctl_table(dev_root);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index ad37e2b236b0..e2f83a12337b 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -350,8 +350,10 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
>                 rq = port_unpack(&port[n], &count);
>                 if (rq) {
>                         GEM_BUG_ON(count > !n);
> -                       if (!count++)
> +                       if (!count++) {
> +                               i915_perf_emit_sseu_config(rq);
>                                 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
> +                       }
>                         port_set(&port[n], port_pack(rq, count));
>                         desc = execlists_update_context(rq);
>                         GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
> @@ -1406,6 +1408,9 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
>                 req->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(req->engine);
>         }
>
> +       /* Emit NOA config */
> +       i915_oa_emit_noa_config_locked(req);
> +
>         cs = intel_ring_begin(req, 4);
>         if (IS_ERR(cs))
>                 return PTR_ERR(cs);
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 27cc72a85d07..c82f7473e3a0 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -447,6 +447,16 @@ typedef struct drm_i915_setparam {
>         int value;
>  } drm_i915_setparam_t;
>
> +union drm_i915_gem_param_sseu {
> +       struct {
> +               __u8 slice_mask;
> +               __u8 subslice_mask;
> +               __u8 min_eu_per_subslice;
> +               __u8 max_eu_per_subslice;
> +       } packed;
> +       __u64 value;
> +};
Please kill?

> +
>  /* A memory manager for regions of shared memory:
>   */
>  #define I915_MEM_REGION_AGP 1
> @@ -1376,6 +1386,13 @@ enum drm_i915_perf_property_id {
>          */
>         DRM_I915_PERF_PROP_NOA_RESTORE,
>
> +       /**
> +        * Specifying this property will lead the perf driver to insert sseu
> +        * change reports (see @DRM_I915_PERF_RECORD_SSEU_CHANGE) in the
> +        * stream of reports.
> +        */
> +       DRM_I915_PERF_PROP_SSEU_CHANGE,
> +
>         DRM_I915_PERF_PROP_MAX /* non-ABI */
>  };
>
> @@ -1459,9 +1476,27 @@ enum drm_i915_perf_record_type {
>          */
>         DRM_I915_PERF_RECORD_OA_BUFFER_LOST = 3,
>
> +       /**
> +        * A change in the sseu configuration happened for a particular
> +        * context.
> +        *
> +        * struct {
> +        *     struct drm_i915_perf_record_header header;
> +        *     { struct drm_i915_perf_sseu_change change; } && DRM_I915_PERF_PROP_SSEU_CHANGE
> +        * };
> +        */
> +       DRM_I915_PERF_RECORD_SSEU_CHANGE = 4,
> +
>         DRM_I915_PERF_RECORD_MAX /* non-ABI */
>  };
>
> +struct drm_i915_perf_sseu_change {
> +       __u32 hw_id;
> +       __u16 pad;
> +       __u8 slice_mask;
> +       __u8 subslice_mask;
> +};
> +
>  #if defined(__cplusplus)
>  }
>  #endif
> --
> 2.11.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx


More information about the Intel-gfx mailing list