[Intel-gfx] [PATCH 5/5] drm/i915/perf: improve tail race workaround

Matthew Auld matthew.william.auld at gmail.com
Wed Jan 25 16:22:07 UTC 2017


On 24 January 2017 at 01:25, Robert Bragg <robert at sixbynine.org> wrote:
> There's a HW race condition between OA unit tail pointer register
> updates and writes to memory whereby the tail pointer can sometimes get
> ahead of what's been written out to the OA buffer so far (in terms of
> what's visible to the CPU).
>
> Although this can be observed explicitly while copying reports to
> userspace by checking for a zeroed report-id field in tail reports, we
> want to account for this earlier, as part of the _oa_buffer_check to
> avoid lots of redundant read() attempts.
>
> Previously the driver used to define an effective tail pointer that
> lagged the real pointer by a 'tail margin' measured in bytes derived
> from OA_TAIL_MARGIN_NSEC and the configured sampling frequency.
> Unfortunately this was flawed considering that the OA unit may also
> automatically generate non-periodic reports (such as on context switch)
> or the OA unit may be enabled without any periodic sampling.
>
> This improves how we define a tail pointer for reading that lags the
> real tail pointer by at least %OA_TAIL_MARGIN_NSEC nanoseconds, which
> gives enough time for the corresponding reports to become visible to the
> CPU.
>
> The driver now maintains two tail pointers:
>  1) An 'aging' tail with an associated timestamp that is tracked until we
>     can trust the corresponding data is visible to the CPU; at which point
>     it is considered 'aged'.
>  2) An 'aged' tail that can be used for read()ing.
>
> The two separate pointers let us decouple read()s from tail pointer aging.
>
> The tail pointers are checked and updated at a limited rate within a
> hrtimer callback (the same callback that is used for delivering POLLIN
> events) and since we're now measuring the wall clock time elapsed since
> a given tail pointer was read the mechanism no longer cares about
> the OA unit's periodic sampling frequency.
>
> The natural place to handle the tail pointer updates was in
> gen7_oa_buffer_is_empty() which is called as part of blocking reads and
> the hrtimer callback used for polling, and so this was renamed to
> oa_buffer_check() considering the added side effect while checking
> whether the buffer contains data.
>
> Signed-off-by: Robert Bragg <robert at sixbynine.org>
> ---
>  drivers/gpu/drm/i915/i915_drv.h  |  59 ++++++++-
>  drivers/gpu/drm/i915/i915_perf.c | 275 ++++++++++++++++++++++++++-------------
>  2 files changed, 241 insertions(+), 93 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index e732d0b3bf65..7b2bdc6ccb26 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2038,7 +2038,7 @@ struct i915_oa_ops {
>                     size_t *offset);
>
>         /**
> -        * @oa_buffer_is_empty: Check if OA buffer empty (false positives OK)
> +        * @oa_buffer_check: Check for OA buffer data + update tail
>          *
>          * This is either called via fops or the poll check hrtimer (atomic
>          * ctx) without any locks taken.
> @@ -2051,7 +2051,7 @@ struct i915_oa_ops {
>          * here, which will be handled gracefully - likely resulting in an
>          * %EAGAIN error for userspace.
>          */
> -       bool (*oa_buffer_is_empty)(struct drm_i915_private *dev_priv);
> +       bool (*oa_buffer_check)(struct drm_i915_private *dev_priv);
>  };
>
>  struct drm_i915_private {
> @@ -2383,8 +2383,6 @@ struct drm_i915_private {
>                         int period_exponent;
>                         int timestamp_frequency;
>
> -                       int tail_margin;
> -
>                         int metrics_set;
>
>                         const struct i915_oa_reg *mux_regs;
> @@ -2399,6 +2397,59 @@ struct drm_i915_private {
>                                 int format_size;
>
>                                 /**
> +                                * Locks reads and writes to all head/tail state
> +                                *
> +                                * Consider: the head and tail pointer state
> +                                * needs to be read consistently from a hrtimer
> +                                * callback (atomic context) and read() fop
> +                                * (user context) with tail pointer updates
> +                                * happening in atomic context and head updates
> +                                * in user context and the (unlikely)
> +                                * possibility of read() errors needing to
> +                                * reset all head/tail state.
> +                                *
> +                                * Note: Contention or performance aren't
> +                                * currently a significant concern here
> +                                * considering the relatively low frequency of
> +                                * hrtimer callbacks (5ms period) and that
> +                                * reads typically only happen in response to a
> +                                * hrtimer event and likely complete before the
> +                                * next callback.
> +                                *
> +                                * Note: This lock is not held *while* reading
> +                                * and copying data to userspace so the value
> +                                * of head observed in htrimer callbacks won't
> +                                * represent any partial consumption of data.
> +                                */
> +                               spinlock_t ptr_lock;
> +
> +                               /**
> +                                * One 'aging' tail pointer and one 'aged'
> +                                * tail pointer ready to used for reading.
> +                                *
> +                                * Initial values of 0xffffffff are invalid
> +                                * and imply that an update is required
> +                                * (and should be ignored by an attempted
> +                                * read)
> +                                */
> +                               struct {
> +                                       u32 offset;
> +                               } tails[2];
> +
> +                               /**
> +                                * Index for the aged tail ready to read()
> +                                * data up to.
> +                                */
> +                               unsigned int aged_tail_idx;
> +
> +                               /**
> +                                * A monotonic timestamp for when the current
> +                                * aging tail pointer was read; used to
> +                                * determine when it is old enough to trust.
> +                                */
> +                               u64 aging_timestamp;
> +
> +                               /**
>                                  * Although we can always read back the head
>                                  * pointer register, we prefer to avoid
>                                  * trusting the HW state, just to avoid any
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 4b3babdbd79e..b4b7e9adb8de 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -205,23 +205,46 @@
>
>  #define OA_TAKEN(tail, head)   ((tail - head) & (OA_BUFFER_SIZE - 1))
>
> -/* There's a HW race condition between OA unit tail pointer register updates and
> +/**
> + * DOC: OA Tail Pointer Race
> + *
> + * There's a HW race condition between OA unit tail pointer register updates and
>   * writes to memory whereby the tail pointer can sometimes get ahead of what's
> - * been written out to the OA buffer so far.
> + * been written out to the OA buffer so far (in terms of what's visible to the
> + * CPU).
> + *
> + * Although this can be observed explicitly while copying reports to userspace
> + * by checking for a zeroed report-id field in tail reports, we want to account
> + * for this earlier, as part of the _oa_buffer_check to avoid lots of redundant
> + * read() attempts.
> + *
> + * In effect we define a tail pointer for reading that lags the real tail
> + * pointer by at least %OA_TAIL_MARGIN_NSEC nanoseconds, which gives enough
> + * time for the corresponding reports to become visible to the CPU.
> + *
> + * To manage this we actually track two tail pointers:
> + *  1) An 'aging' tail with an associated timestamp that is tracked until we
> + *     can trust the corresponding data is visible to the CPU; at which point
> + *     it is considered 'aged'.
> + *  2) An 'aged' tail that can be used for read()ing.
>   *
> - * Although this can be observed explicitly by checking for a zeroed report-id
> - * field in tail reports, it seems preferable to account for this earlier e.g.
> - * as part of the _oa_buffer_is_empty checks to minimize -EAGAIN polling cycles
> - * in this situation.
> + * The two separate pointers let us decouple read()s from tail pointer aging.
>   *
> - * To give time for the most recent reports to land before they may be copied to
> - * userspace, the driver operates as if the tail pointer effectively lags behind
> - * the HW tail pointer by 'tail_margin' bytes. The margin in bytes is calculated
> - * based on this constant in nanoseconds, the current OA sampling exponent
> - * and current report size.
> + * The tail pointers are checked and updated at a limited rate within a hrtimer
> + * callback (the same callback that is used for delivering POLLIN events)
>   *
> - * There is also a fallback check while reading to simply skip over reports with
> - * a zeroed report-id.
> + * Initially the tails are marked invalid with a value of 0xffffffff which
> + * indicates that an updated tail pointer is needed.
> + *
> + * Most of the implementation details for this workaround are in
> + * gen7_oa_buffer_check_unlocked() and gen7_appand_oa_reports()
> + *
> + * Note for posterity: previously the driver used to define an effective tail
> + * pointer that lagged the real pointer by a 'tail margin' measured in bytes
> + * derived from %OA_TAIL_MARGIN_NSEC and the configured sampling frequency.
> + * This was flawed considering that the OA unit may also automatically generate
> + * non-periodic reports (such as on context switch) or the OA unit may be
> + * enabled without any periodic sampling.
>   */
>  #define OA_TAIL_MARGIN_NSEC    100000ULL
>
> @@ -308,26 +331,117 @@ struct perf_open_properties {
>         int oa_period_exponent;
>  };
>
> -/* NB: This is either called via fops or the poll check hrtimer (atomic ctx)
> +/**
> + * gen7_oa_buffer_check_unlocked - check for data and update tail ptr state
> + * @dev_priv: i915 device instance
>   *
> - * It's safe to read OA config state here unlocked, assuming that this is only
> - * called while the stream is enabled, while the global OA configuration can't
> - * be modified.
> + * This is either called via fops (for blocking reads in user ctx) or the poll
> + * check hrtimer (atomic ctx) to check the OA buffer tail pointer and check
> + * if there is data available for userspace to read.
>   *
> - * Note: we don't lock around the head/tail reads even though there's the slim
> - * possibility of read() fop errors forcing a re-init of the OA buffer
> - * pointers.  A race here could result in a false positive !empty status which
> - * is acceptable.
> + * This function is central to providing a workaround for the OA unit tail
> + * pointer having a race with respect to what data is visible to the CPU.
> + * It is responsible for reading tail pointers from the hardware and giving
> + * the pointers time to 'age' before they are made available for reading.
> + * (See description of OA_TAIL_MARGIN_NSEC above for further details.)
> + *
> + * Besides returning true when there is data available to read() this function
> + * also has the side effect of updating the oa_buffer.tails[], .aging_timestamp
> + * and .aged_tail_idx state used for reading.
> + *
> + * Note: It's safe to read OA config state here unlocked, assuming that this is
> + * only called while the stream is enabled, while the global OA configuration
> + * can't be modified.
> + *
> + * Returns: %true if the OA buffer contains data, else %false
>   */
> -static bool gen7_oa_buffer_is_empty_fop_unlocked(struct drm_i915_private *dev_priv)
> +static bool gen7_oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
>  {
>         int report_size = dev_priv->perf.oa.oa_buffer.format_size;
> -       u32 oastatus1 = I915_READ(GEN7_OASTATUS1);
> -       u32 head = dev_priv->perf.oa.oa_buffer.head;
> -       u32 tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
> +       unsigned long flags;
> +       unsigned int aged_idx;
> +       u32 oastatus1;
> +       u32 head, hw_tail, aged_tail, aging_tail;
> +       u64 now;
> +
> +       /* We have to consider the (unlikely) possibility that read() errors
> +        * could result in an OA buffer reset which might reset the head,
> +        * tails[] and aged_tail state.
> +        */
> +       spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
> +
> +       /* NB: The head we observe here might effectively be a little out of
> +        * date (between head and tails[aged_idx].offset if there is currently
> +        * a read() in progress.
> +        */
> +       head = dev_priv->perf.oa.oa_buffer.head;
> +
> +       aged_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
> +       aged_tail = dev_priv->perf.oa.oa_buffer.tails[aged_idx].offset;
> +       aging_tail = dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset;
> +
> +       oastatus1 = I915_READ(GEN7_OASTATUS1);
> +       hw_tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
> +
> +       /* The tail pointer increases in 64 byte increments,
> +        * not in report_size steps...
> +        */
> +       hw_tail &= ~(report_size - 1);
> +
> +       now = ktime_get_mono_fast_ns();
> +
> +       /* Update the aging tail
> +        *
> +        * We throttle aging tail updates until we have a new tail that
> +        * represents >= one report more data than is already available for
> +        * reading. This ensures there will be enough data for a successful
> +        * read once this new pointer has aged and ensures we will give the new
> +        * pointer time to age.
> +        */
> +       if (aging_tail == 0xffffffff &&
> +           (aged_tail == 0xffffffff ||
Maybe put the 0xffffffff constant into a define.

> +            OA_TAKEN(hw_tail, aged_tail) >= report_size)) {
> +               struct i915_vma *vma = dev_priv->perf.oa.oa_buffer.vma;
> +               u32 gtt_offset = i915_ggtt_offset(vma);
> +
> +               /* Be paranoid and do a bounds check on the pointer read back
> +                * from hardware, just in case some spurious hardware condition
> +                * could put the tail out of bounds...
> +                */
> +               if (hw_tail >= gtt_offset &&
> +                   hw_tail < (gtt_offset + OA_BUFFER_SIZE)) {
> +                       dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset =
> +                               aging_tail = hw_tail;
> +                       dev_priv->perf.oa.oa_buffer.aging_timestamp = now;
> +               } else {
> +                       DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n",
> +                                 hw_tail);
> +               }
> +       }
> +
> +       /* Update the aged tail
> +        *
> +        * Flip the tail pointer available for read()s once the aging tail is
> +        * old enough to trust that the corresponding data will be visible to
> +        * the CPU...
> +        */
> +       if (aging_tail != 0xffffffff &&
> +           ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) >
> +            OA_TAIL_MARGIN_NSEC)) {
> +
No need for the newline.

> +               aged_idx ^= 1;
> +               dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx;
>
> -       return OA_TAKEN(tail, head) <
> -               dev_priv->perf.oa.tail_margin + report_size;
> +               aged_tail = aging_tail;
> +
> +               /* Mark that we need a new pointer to start aging... */
> +               dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = 0xffffffff;
> +       }
> +
> +       spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
> +
> +       return aged_tail == 0xffffffff ?
> +               false : OA_TAKEN(aged_tail, head) >= report_size;
>  }
>
>  /**
> @@ -442,58 +556,50 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
>         struct drm_i915_private *dev_priv = stream->dev_priv;
>         int report_size = dev_priv->perf.oa.oa_buffer.format_size;
>         u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
> -       int tail_margin = dev_priv->perf.oa.tail_margin;
>         u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
>         u32 mask = (OA_BUFFER_SIZE - 1);
>         size_t start_offset = *offset;
> -       u32 head, oastatus1, tail;
> +       unsigned long flags;
> +       unsigned int aged_tail_idx;
> +       u32 head, tail;
>         u32 taken;
>         int ret = 0;
>
>         if (WARN_ON(!stream->enabled))
>                 return -EIO;
>
> -       head = dev_priv->perf.oa.oa_buffer.head - gtt_offset;
> +       spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
>
> -       /* An out of bounds or misaligned head pointer implies a driver bug
> -        * since we are in full control of head pointer which should only
> -        * be incremented by multiples of the report size (notably also
> -        * all a power of two).
> -        */
> -       if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size,
> -                     "Inconsistent OA buffer head pointer = %u\n", head))
> -               return -EIO;
> +       head = dev_priv->perf.oa.oa_buffer.head;
> +       aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
> +       tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
>
> -       oastatus1 = I915_READ(GEN7_OASTATUS1);
> -       tail = (oastatus1 & GEN7_OASTATUS1_TAIL_MASK) - gtt_offset;
> +       spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
>
> -       /* The OA unit is expected to wrap the tail pointer according to the OA
> -        * buffer size
> +       /* A tail value of 0xffffffff here means we're still waiting for the
> +        * poll hrtimer callback to give us a pointer
>          */
> -       if (tail > OA_BUFFER_SIZE) {
> -               DRM_ERROR("Inconsistent OA buffer tail pointer = %u: force restart\n",
> -                         tail);
> -               dev_priv->perf.oa.ops.oa_disable(dev_priv);
> -               dev_priv->perf.oa.ops.oa_enable(dev_priv);
> -               return -EIO;
> -       }
> -
> +       if (tail == 0xffffffff)
> +               return -EAGAIN;
>
> -       /* The tail pointer increases in 64 byte increments, not in report_size
> -        * steps...
> +       /* NB: oa_buffer.head/tail include the gtt_offset which we don't want
> +        * while indexing relative to oa_buf_base.
>          */
> -       tail &= ~(report_size - 1);
> +       head -= gtt_offset;
> +       tail -= gtt_offset;
>
> -       /* Move the tail pointer back by the current tail_margin to account for
> -        * the possibility that the latest reports may not have really landed
> -        * in memory yet...
> +       /* An out of bounds or misaligned head or tail pointer implies a driver
> +        * bug since we validate + align the tail pointers we read from the
> +        * hardware and we are in full control of the head pointer which should
> +        * only be incremented by multiples of the report size (notably also
> +        * all a power of two).
>          */
> +       if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
> +                     tail > OA_BUFFER_SIZE || tail % report_size,
> +                     "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
> +                     head, tail))
> +               return -EIO;
>
> -       if (OA_TAKEN(tail, head) < report_size + tail_margin)
> -               return -EAGAIN;
> -
> -       tail -= tail_margin;
> -       tail &= mask;
>
>         for (/* none */;
>              (taken = OA_TAKEN(tail, head));
> @@ -540,6 +646,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
>
>
>         if (start_offset != *offset) {
> +               spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
> +
>                 /* We removed the gtt_offset for the copy loop above, indexing
>                  * relative to oa_buf_base so put back here...
>                  */
> @@ -549,6 +657,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
>                            ((head & GEN7_OASTATUS2_HEAD_MASK) |
>                             OA_MEM_SELECT_GGTT));
>                 dev_priv->perf.oa.oa_buffer.head = head;
> +
> +               spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
>         }
>
>         return ret;
> @@ -659,14 +769,8 @@ static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
>         if (!dev_priv->perf.oa.periodic)
>                 return -EIO;
>
> -       /* Note: the oa_buffer_is_empty() condition is ok to run unlocked as it
> -        * just performs mmio reads of the OA buffer head + tail pointers and
> -        * it's assumed we're handling some operation that implies the stream
> -        * can't be destroyed until completion (such as a read()) that ensures
> -        * the device + OA buffer can't disappear
> -        */
>         return wait_event_interruptible(dev_priv->perf.oa.poll_wq,
> -                                       !dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv));
> +                                       dev_priv->perf.oa.ops.oa_buffer_check(dev_priv));
>  }
>
>  /**
> @@ -809,6 +913,9 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
>  static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
>  {
>         u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
> +       unsigned long flags;
> +
> +       spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
>
>         /* Pre-DevBDW: OABUFFER must be set with counters off,
>          * before OASTATUS1, but after OASTATUS2
> @@ -820,6 +927,12 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
>
>         I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */
>
> +       /* Mark that we need updated tail pointers to read from... */
> +       dev_priv->perf.oa.oa_buffer.tails[0].offset = 0xffffffff;
> +       dev_priv->perf.oa.oa_buffer.tails[1].offset = 0xffffffff;
> +
> +       spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
> +
>         /* On Haswell we have to track which OASTATUS1 flags we've
>          * already seen since they can't be cleared while periodic
>          * sampling is enabled.
> @@ -1077,12 +1190,6 @@ static void i915_oa_stream_disable(struct i915_perf_stream *stream)
>                 hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer);
>  }
>
> -static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
> -{
> -       return div_u64(1000000000ULL * (2ULL << exponent),
> -                      dev_priv->perf.oa.timestamp_frequency);
> -}
> -
>  static const struct i915_perf_stream_ops i915_oa_stream_ops = {
>         .destroy = i915_oa_stream_destroy,
>         .enable = i915_oa_stream_enable,
> @@ -1173,20 +1280,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>         dev_priv->perf.oa.metrics_set = props->metrics_set;
>
>         dev_priv->perf.oa.periodic = props->oa_periodic;
> -       if (dev_priv->perf.oa.periodic) {
> -               u32 tail;
> -
> +       if (dev_priv->perf.oa.periodic)
>                 dev_priv->perf.oa.period_exponent = props->oa_period_exponent;
>
> -               /* See comment for OA_TAIL_MARGIN_NSEC for details
> -                * about this tail_margin...
> -                */
> -               tail = div64_u64(OA_TAIL_MARGIN_NSEC,
> -                                oa_exponent_to_ns(dev_priv,
> -                                                  props->oa_period_exponent));
> -               dev_priv->perf.oa.tail_margin = (tail + 1) * format_size;
> -       }
> -
>         if (stream->ctx) {
>                 ret = oa_get_render_ctx_id(stream);
>                 if (ret)
> @@ -1359,7 +1455,7 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
>                 container_of(hrtimer, typeof(*dev_priv),
>                              perf.oa.poll_check_timer);
>
> -       if (!dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv)) {
> +       if (dev_priv->perf.oa.ops.oa_buffer_check(dev_priv)) {
>                 dev_priv->perf.oa.pollin = true;
>                 wake_up(&dev_priv->perf.oa.poll_wq);
>         }
> @@ -2049,6 +2145,7 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
>         INIT_LIST_HEAD(&dev_priv->perf.streams);
>         mutex_init(&dev_priv->perf.lock);
>         spin_lock_init(&dev_priv->perf.hook_lock);
> +       spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
>
>         dev_priv->perf.oa.ops.init_oa_buffer = gen7_init_oa_buffer;
>         dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set;
> @@ -2056,8 +2153,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
>         dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable;
>         dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable;
>         dev_priv->perf.oa.ops.read = gen7_oa_read;
> -       dev_priv->perf.oa.ops.oa_buffer_is_empty =
> -               gen7_oa_buffer_is_empty_fop_unlocked;
> +       dev_priv->perf.oa.ops.oa_buffer_check =
> +               gen7_oa_buffer_check_unlocked;
>
>         dev_priv->perf.oa.timestamp_frequency = 12500000;
Time for nuking, or does gen8+ need it?

Otherwise looks good:
Reviewed-by: Matthew Auld <matthew.auld at intel.com>


More information about the Intel-gfx mailing list