[Intel-gfx] [PATCH v8 02/12] drm/i915: Add i915 perf infrastructure

sourab gupta sourab.gupta at intel.com
Mon Nov 7 08:40:53 UTC 2016


On Fri, 2016-11-04 at 06:19 -0700, Robert Bragg wrote:
> 
> 
> On Fri, Nov 4, 2016 at 8:59 AM, sourab gupta <sourab.gupta at intel.com>
> wrote:
>         On Thu, 2016-10-27 at 19:14 -0700, Robert Bragg wrote:
>         > Adds base i915 perf infrastructure for Gen performance
>         metrics.
>         >
>         > This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an
>         array of uint64
>         > properties to configure a stream of metrics and returns a
>         new fd usable
>         > with standard VFS system calls including read() to read
>         typed and sized
>         > records; ioctl() to enable or disable capture and poll() to
>         wait for
>         > data.
>         >
>         > A stream is opened something like:
>         >
>         >   uint64_t properties[] = {
>         >       /* Single context sampling */
>         >       DRM_I915_PERF_PROP_CTX_HANDLE,        ctx_handle,
>         >
>         >       /* Include OA reports in samples */
>         >       DRM_I915_PERF_PROP_SAMPLE_OA,         true,
>         >
>         >       /* OA unit configuration */
>         >       DRM_I915_PERF_PROP_OA_METRICS_SET,    metrics_set_id,
>         >       DRM_I915_PERF_PROP_OA_FORMAT,         report_format,
>         >       DRM_I915_PERF_PROP_OA_EXPONENT,       period_exponent,
>         >    };
>         >    struct drm_i915_perf_open_param parm = {
>         >       .flags = I915_PERF_FLAG_FD_CLOEXEC |
>         >                I915_PERF_FLAG_FD_NONBLOCK |
>         >                I915_PERF_FLAG_DISABLED,
>         >       .properties_ptr = (uint64_t)properties,
>         >       .num_properties = sizeof(properties) / 16,
>         >    };
>         >    int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN,
>         &param);
>         >
>         > Records read all start with a common { type, size } header
>         with
>         > DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample
>         records
>         > contain an extensible number of fields and it's the
>         > DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening
>         that
>         > determine what's included in every sample.
>         >
>         > No specific streams are supported yet so any attempt to open
>         a stream
>         > will return an error.
>         >
>         > v2:
>         >     use i915_gem_context_get() - Chris Wilson
>         > v3:
>         >     update read() interface to avoid passing state struct -
>         Chris Wilson
>         >     fix some rebase fallout, with i915-perf init/deinit
>         > v4:
>         >     s/DRM_IORW/DRM_IOW/ - Emil Velikov
>         >
>         > Signed-off-by: Robert Bragg <robert at sixbynine.org>
>         > ---
>         >  drivers/gpu/drm/i915/Makefile    |   3 +
>         >  drivers/gpu/drm/i915/i915_drv.c  |   4 +
>         >  drivers/gpu/drm/i915/i915_drv.h  |  91 ++++++++
>         >  drivers/gpu/drm/i915/i915_perf.c | 443
>         +++++++++++++++++++++++++++++++++++++++
>         >  include/uapi/drm/i915_drm.h      |  67 ++++++
>         >  5 files changed, 608 insertions(+)
>         >  create mode 100644 drivers/gpu/drm/i915/i915_perf.c
>         >
>         > diff --git a/drivers/gpu/drm/i915/Makefile
>         b/drivers/gpu/drm/i915/Makefile
>         > index 6123400..8d4e25f 100644
>         > --- a/drivers/gpu/drm/i915/Makefile
>         > +++ b/drivers/gpu/drm/i915/Makefile
>         > @@ -113,6 +113,9 @@ i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) +=
>         i915_gpu_error.o
>         >  # virtual gpu code
>         >  i915-y += i915_vgpu.o
>         >
>         > +# perf code
>         > +i915-y += i915_perf.o
>         > +
>         >  ifeq ($(CONFIG_DRM_I915_GVT),y)
>         >  i915-y += intel_gvt.o
>         >  include $(src)/gvt/Makefile
>         > diff --git a/drivers/gpu/drm/i915/i915_drv.c
>         b/drivers/gpu/drm/i915/i915_drv.c
>         > index af3559d..685c96e 100644
>         > --- a/drivers/gpu/drm/i915/i915_drv.c
>         > +++ b/drivers/gpu/drm/i915/i915_drv.c
>         > @@ -836,6 +836,8 @@ static int i915_driver_init_early(struct
>         drm_i915_private *dev_priv,
>         >
>         >       intel_detect_preproduction_hw(dev_priv);
>         >
>         > +     i915_perf_init(dev_priv);
>         > +
>         >       return 0;
>         >
>         >  err_workqueues:
>         > @@ -849,6 +851,7 @@ static int i915_driver_init_early(struct
>         drm_i915_private *dev_priv,
>         >   */
>         >  static void i915_driver_cleanup_early(struct
>         drm_i915_private *dev_priv)
>         >  {
>         > +     i915_perf_fini(dev_priv);
>         >       i915_gem_load_cleanup(&dev_priv->drm);
>         >       i915_workqueues_cleanup(dev_priv);
>         >  }
>         > @@ -2556,6 +2559,7 @@ static const struct drm_ioctl_desc
>         i915_ioctls[] = {
>         >       DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR,
>         i915_gem_userptr_ioctl, DRM_RENDER_ALLOW),
>         >       DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_GETPARAM,
>         i915_gem_context_getparam_ioctl, DRM_RENDER_ALLOW),
>         >       DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_SETPARAM,
>         i915_gem_context_setparam_ioctl, DRM_RENDER_ALLOW),
>         > +     DRM_IOCTL_DEF_DRV(I915_PERF_OPEN,
>         i915_perf_open_ioctl, DRM_RENDER_ALLOW),
>         >  };
>         >
>         >  static struct drm_driver driver = {
>         > diff --git a/drivers/gpu/drm/i915/i915_drv.h
>         b/drivers/gpu/drm/i915/i915_drv.h
>         > index 5a260db..7a65c0b 100644
>         > --- a/drivers/gpu/drm/i915/i915_drv.h
>         > +++ b/drivers/gpu/drm/i915/i915_drv.h
>         > @@ -1767,6 +1767,84 @@ struct intel_wm_config {
>         >       bool sprites_scaled;
>         >  };
>         >
>         > +struct i915_perf_stream;
>         > +
>         > +struct i915_perf_stream_ops {
>         > +     /* Enables the collection of HW samples, either in
>         response to
>         > +      * I915_PERF_IOCTL_ENABLE or implicitly called when
>         stream is
>         > +      * opened without I915_PERF_FLAG_DISABLED.
>         > +      */
>         > +     void (*enable)(struct i915_perf_stream *stream);
>         > +
>         > +     /* Disables the collection of HW samples, either in
>         response to
>         > +      * I915_PERF_IOCTL_DISABLE or implicitly called before
>         > +      * destroying the stream.
>         > +      */
>         > +     void (*disable)(struct i915_perf_stream *stream);
>         > +
>         > +     /* Return: true if any i915 perf records are ready to
>         read()
>         > +      * for this stream.
>         > +      */
>         > +     bool (*can_read)(struct i915_perf_stream *stream);
>         > +
>         > +     /* Call poll_wait, passing a wait queue that will be
>         woken
>         > +      * once there is something ready to read() for the
>         stream
>         > +      */
>         > +     void (*poll_wait)(struct i915_perf_stream *stream,
>         > +                       struct file *file,
>         > +                       poll_table *wait);
>         > +
>         > +     /* For handling a blocking read, wait until there is
>         something
>         > +      * to ready to read() for the stream. E.g. wait on the
>         same
>         > +      * wait queue that would be passed to poll_wait()
>         until
>         > +      * ->can_read() returns true (if its safe to call
>         ->can_read()
>         > +      * without the i915 perf lock held).
>         > +      */
>         > +     int (*wait_unlocked)(struct i915_perf_stream *stream);
>         > +
>         > +     /* read - Copy buffered metrics as records to
>         userspace
>         > +      * @buf: the userspace, destination buffer
>         > +      * @count: the number of bytes to copy, requested by
>         userspace
>         > +      * @offset: zero at the start of the read, updated as
>         the read
>         > +      *          proceeds, it represents how many bytes
>         have been
>         > +      *          copied so far and the buffer offset for
>         copying the
>         > +      *          next record.
>         > +      *
>         > +      * Copy as many buffered i915 perf samples and records
>         for
>         > +      * this stream to userspace as will fit in the given
>         buffer.
>         > +      *
>         > +      * Only write complete records; returning -ENOSPC if
>         there
>         > +      * isn't room for a complete record.
>         > +      *
>         > +      * Return any error condition that results in a short
>         read
>         > +      * such as -ENOSPC or -EFAULT, even though these may
>         be
>         > +      * squashed before returning to userspace.
>         > +      */
>         > +     int (*read)(struct i915_perf_stream *stream,
>         > +                 char __user *buf,
>         > +                 size_t count,
>         > +                 size_t *offset);
>         > +
>         > +     /* Cleanup any stream specific resources.
>         > +      *
>         > +      * The stream will always be disabled before this is
>         called.
>         > +      */
>         > +     void (*destroy)(struct i915_perf_stream *stream);
>         > +};
>         > +
>         > +struct i915_perf_stream {
>         > +     struct drm_i915_private *dev_priv;
>         > +
>         > +     struct list_head link;
>         > +
>         > +     u32 sample_flags;
>         > +
>         > +     struct i915_gem_context *ctx;
>         > +     bool enabled;
>         > +
>         > +     struct i915_perf_stream_ops *ops;
>         > +};
>         > +
>         >  struct drm_i915_private {
>         >       struct drm_device drm;
>         >
>         > @@ -2069,6 +2147,12 @@ struct drm_i915_private {
>         >
>         >       struct i915_runtime_pm pm;
>         >
>         > +     struct {
>         > +             bool initialized;
>         > +             struct mutex lock;
>         > +             struct list_head streams;
>         > +     } perf;
>         > +
>         >       /* Abstract the submission mechanism (legacy
>         ringbuffer or execlists) away */
>         >       struct {
>         >               void (*resume)(struct drm_i915_private *);
>         > @@ -3482,6 +3566,9 @@ int
>         i915_gem_context_setparam_ioctl(struct drm_device *dev, void
>         *data,
>         >  int i915_gem_context_reset_stats_ioctl(struct drm_device
>         *dev, void *data,
>         >                                      struct drm_file *file);
>         >
>         > +int i915_perf_open_ioctl(struct drm_device *dev, void
>         *data,
>         > +                      struct drm_file *file);
>         > +
>         >  /* i915_gem_evict.c */
>         >  int __must_check i915_gem_evict_something(struct
>         i915_address_space *vm,
>         >                                         u64 min_size, u64
>         alignment,
>         > @@ -3607,6 +3694,10 @@ int intel_engine_cmd_parser(struct
>         intel_engine_cs *engine,
>         >                           u32 batch_len,
>         >                           bool is_master);
>         >
>         > +/* i915_perf.c */
>         > +extern void i915_perf_init(struct drm_i915_private
>         *dev_priv);
>         > +extern void i915_perf_fini(struct drm_i915_private
>         *dev_priv);
>         > +
>         >  /* i915_suspend.c */
>         >  extern int i915_save_state(struct drm_device *dev);
>         >  extern int i915_restore_state(struct drm_device *dev);
>         > diff --git a/drivers/gpu/drm/i915/i915_perf.c
>         b/drivers/gpu/drm/i915/i915_perf.c
>         > new file mode 100644
>         > index 0000000..c45cf92
>         > --- /dev/null
>         > +++ b/drivers/gpu/drm/i915/i915_perf.c
>         > @@ -0,0 +1,443 @@
>         > +/*
>         > + * Copyright © 2015-2016 Intel Corporation
>         > + *
>         > + * Permission is hereby granted, free of charge, to any
>         person obtaining a
>         > + * copy of this software and associated documentation files
>         (the "Software"),
>         > + * to deal in the Software without restriction, including
>         without limitation
>         > + * the rights to use, copy, modify, merge, publish,
>         distribute, sublicense,
>         > + * and/or sell copies of the Software, and to permit
>         persons to whom the
>         > + * Software is furnished to do so, subject to the following
>         conditions:
>         > + *
>         > + * The above copyright notice and this permission notice
>         (including the next
>         > + * paragraph) shall be included in all copies or
>         substantial portions of the
>         > + * Software.
>         > + *
>         > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
>         ANY KIND, EXPRESS OR
>         > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>         MERCHANTABILITY,
>         > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
>         IN NO EVENT SHALL
>         > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
>         CLAIM, DAMAGES OR OTHER
>         > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>         OTHERWISE, ARISING
>         > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>         USE OR OTHER DEALINGS
>         > + * IN THE SOFTWARE.
>         > + *
>         > + * Authors:
>         > + *   Robert Bragg <robert at sixbynine.org>
>         > + */
>         > +
>         > +#include <linux/anon_inodes.h>
>         > +
>         > +#include "i915_drv.h"
>         > +
>         > +struct perf_open_properties {
>         > +     u32 sample_flags;
>         > +
>         > +     u64 single_context:1;
>         > +     u64 ctx_handle;
>         > +};
>         > +
>         > +static ssize_t i915_perf_read_locked(struct
>         i915_perf_stream *stream,
>         > +                                  struct file *file,
>         > +                                  char __user *buf,
>         > +                                  size_t count,
>         > +                                  loff_t *ppos)
>         > +{
>         > +     /* Note we keep the offset (aka bytes read) separate
>         from any
>         > +      * error status so that the final check for whether we
>         return
>         > +      * the bytes read with a higher precedence than any
>         error (see
>         > +      * comment below) doesn't need to be
>         handled/duplicated in
>         > +      * stream->ops->read() implementations.
>         > +      */
>         > +     size_t offset = 0;
>         > +     int ret = stream->ops->read(stream, buf, count,
>         &offset);
>         > +
>         > +     /* If we've successfully copied any data then
>         reporting that
>         > +      * takes precedence over any internal error status, so
>         the
>         > +      * data isn't lost.
>         > +      *
>         > +      * For example ret will be -ENOSPC whenever there is
>         more
>         > +      * buffered data than can be copied to userspace, but
>         that's
>         > +      * only interesting if we weren't able to copy some
>         data
>         > +      * because it implies the userspace buffer is too
>         small to
>         > +      * receive a single record (and we never split
>         records).
>         > +      *
>         > +      * Another case with ret == -EFAULT is more of a grey
>         area
>         > +      * since it would seem like bad form for userspace to
>         ask us
>         > +      * to overrun its buffer, but the user knows best:
>         > +      *
>         > +      *
>          http://yarchive.net/comp/linux/partial_reads_writes.html
>         > +      */
>         > +     return offset ?: (ret ?: -EAGAIN);
>         > +}
>         > +
>         > +static ssize_t i915_perf_read(struct file *file,
>         > +                           char __user *buf,
>         > +                           size_t count,
>         > +                           loff_t *ppos)
>         > +{
>         > +     struct i915_perf_stream *stream = file->private_data;
>         > +     struct drm_i915_private *dev_priv = stream->dev_priv;
>         > +     ssize_t ret;
>         > +
>         > +     if (!(file->f_flags & O_NONBLOCK)) {
>         > +             /* Allow false positives from
>         stream->ops->wait_unlocked.
>         > +              */
>         > +             do {
>         > +                     ret =
>         stream->ops->wait_unlocked(stream);
>         > +                     if (ret)
>         > +                             return ret;
>         > +
>         > +                     mutex_lock(&dev_priv->perf.lock);
>         
>         
>         Should interruptible version be used here, to allow for reads
>         to be
>         interrupted?
> 
> 
> Now that we don't have the context pin hook on haswell we
> could /almost/ get away without this lock except for its use to
> synchronize i915_perf_register with i915_perf_open_ioctl.
> 
> 
> Most of the i915-perf state access is synchronized as a result of
> being fops driven, so this perf.lock was added to deal with a few
> entrypoints outside of fops such as the contect pinning hook we used
> to have (though we avoid it in the hrtimer callback).
> 
> 
> 
> Although the recent change to remove the pin hook has made the lock
> look a bit redundant for now, I think I'd prefer to leave the locks as
> they are to avoid the churn with the gen8+ patches where we do have
> some other entrypoints into i915-perf outside of the fops.
> 
> 
> Given that though, there's currently not really much argument either
> way for them being interruptible. The expectation I have atm is that
> there shouldn't be anything running async within i915-perf outside of
> fops that's expected to be long running. We will probably also want to
> consider the risk of bouncing lots of reads, starving userspace and
> increasing the risk of a buffer overflow if this is interruptible.
> 
Well, that makes sense. I'm okay with rest of the interfaces exposed
here (Have been using them for my work too). So,
Reviewed-by: Sourab Gupta <sourab.gupta at intel.com>
> 
> - Robert
> 




More information about the Intel-gfx mailing list