[PATCH v8 11/12] drm/i915: Add more Haswell OA metric sets

Robert Bragg robert at sixbynine.org
Tue Nov 1 16:53:29 UTC 2016


On Tue, Nov 1, 2016 at 2:57 PM, Chris Wilson <chris at chris-wilson.co.uk>
wrote:

> On Fri, Oct 28, 2016 at 03:14:29AM +0100, Robert Bragg wrote:
> > This adds 'compute', 'compute extended', 'memory reads', 'memory writes'
> > and 'sampler balance' metric sets for Haswell.
> >
> > The code is auto generated from an XML description of metric sets,
> > currently maintained in gputop, ref:
> >
> >  https://github.com/rib/gputop
> >  > gputop-data/oa-*.xml
> >  > scripts/i915-perf-kernelgen.py
> >
> >  $ make -C gputop-data -f Makefile.xml
> >
> > Signed-off-by: Robert Bragg <robert at sixbynine.org>
> > Reviewed-by: Matthew Auld <matthew.auld at intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_oa_hsw.c | 559 ++++++++++++++++++++++++++++++
> ++++++-
> >  1 file changed, 558 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_oa_hsw.c
> b/drivers/gpu/drm/i915/i915_oa_hsw.c
> > index 6af25cf..4ddf756 100644
> > --- a/drivers/gpu/drm/i915/i915_oa_hsw.c
> > +++ b/drivers/gpu/drm/i915/i915_oa_hsw.c
> > @@ -31,9 +31,14 @@
> >
> >  enum metric_set_id {
> >       METRIC_SET_ID_RENDER_BASIC = 1,
> > +     METRIC_SET_ID_COMPUTE_BASIC,
> > +     METRIC_SET_ID_COMPUTE_EXTENDED,
> > +     METRIC_SET_ID_MEMORY_READS,
> > +     METRIC_SET_ID_MEMORY_WRITES,
> > +     METRIC_SET_ID_SAMPLER_BALANCE,
> >  };
> >
> > -int i915_oa_n_builtin_metric_sets_hsw = 1;
> > +int i915_oa_n_builtin_metric_sets_hsw = 6;
> >
> >  static const struct i915_oa_reg b_counter_config_render_basic[] = {
> >       { _MMIO(0x2724), 0x00800000 },
> > @@ -112,6 +117,298 @@ get_render_basic_mux_config(struct
> drm_i915_private *dev_priv,
> >       return mux_config_render_basic;
> >  }
> >
> > +static const struct i915_oa_reg b_counter_config_compute_basic[] = {
> > +     { _MMIO(0x2710), 0x00000000 },
> > +     { _MMIO(0x2714), 0x00800000 },
> > +     { _MMIO(0x2718), 0xaaaaaaaa },
> > +     { _MMIO(0x271c), 0xaaaaaaaa },
> > +     { _MMIO(0x2720), 0x00000000 },
> > +     { _MMIO(0x2724), 0x00800000 },
> > +     { _MMIO(0x2728), 0xaaaaaaaa },
> > +     { _MMIO(0x272c), 0xaaaaaaaa },
> > +     { _MMIO(0x2740), 0x00000000 },
> > +     { _MMIO(0x2744), 0x00000000 },
> > +     { _MMIO(0x2748), 0x00000000 },
> > +     { _MMIO(0x274c), 0x00000000 },
> > +     { _MMIO(0x2750), 0x00000000 },
> > +     { _MMIO(0x2754), 0x00000000 },
> > +     { _MMIO(0x2758), 0x00000000 },
> > +     { _MMIO(0x275c), 0x00000000 },
> > +     { _MMIO(0x236c), 0x00000000 },
> > +};
> > +
> > +static const struct i915_oa_reg mux_config_compute_basic[] = {
> > +     { _MMIO(0x253a4), 0x00000000 },
> > +     { _MMIO(0x2681c), 0x01f00800 },
> > +     { _MMIO(0x26820), 0x00001000 },
> > +     { _MMIO(0x2781c), 0x01f00800 },
> > +     { _MMIO(0x26520), 0x00000007 },
> > +     { _MMIO(0x265a0), 0x00000007 },
> > +     { _MMIO(0x25380), 0x00000010 },
> > +     { _MMIO(0x2538c), 0x00300000 },
> > +     { _MMIO(0x25384), 0xaa8aaaaa },
> > +     { _MMIO(0x25404), 0xffffffff },
> > +     { _MMIO(0x26800), 0x00004202 },
> > +     { _MMIO(0x26808), 0x00605817 },
> > +     { _MMIO(0x2680c), 0x10001005 },
> > +     { _MMIO(0x26804), 0x00000000 },
> > +     { _MMIO(0x27800), 0x00000102 },
> > +     { _MMIO(0x27808), 0x0c0701e0 },
> > +     { _MMIO(0x2780c), 0x000200a0 },
> > +     { _MMIO(0x27804), 0x00000000 },
> > +     { _MMIO(0x26484), 0x44000000 },
> > +     { _MMIO(0x26704), 0x44000000 },
> > +     { _MMIO(0x26500), 0x00000006 },
> > +     { _MMIO(0x26510), 0x00000001 },
> > +     { _MMIO(0x26504), 0x88000000 },
> > +     { _MMIO(0x26580), 0x00000006 },
> > +     { _MMIO(0x26590), 0x00000020 },
> > +     { _MMIO(0x26584), 0x00000000 },
> > +     { _MMIO(0x26104), 0x55822222 },
> > +     { _MMIO(0x26184), 0xaa866666 },
> > +     { _MMIO(0x25420), 0x08320c83 },
> > +     { _MMIO(0x25424), 0x06820c83 },
> > +     { _MMIO(0x2541c), 0x00000000 },
> > +     { _MMIO(0x25428), 0x00000c03 },
> > +};
> > +
> > +static const struct i915_oa_reg *
> > +get_compute_basic_mux_config(struct drm_i915_private *dev_priv,
> > +                          int *len)
> > +{
> > +     *len = ARRAY_SIZE(mux_config_compute_basic);
> > +     return mux_config_compute_basic;
> > +}
>
> > @@ -140,6 +437,106 @@ int i915_oa_select_metric_set_hsw(struct
> drm_i915_private *dev_priv)
> >                       ARRAY_SIZE(b_counter_config_render_basic);
> >
> >               return 0;
> > +     case METRIC_SET_ID_COMPUTE_BASIC:
> > +             dev_priv->perf.oa.mux_regs =
> > +                     get_compute_basic_mux_config(dev_priv,
> > +
> &dev_priv->perf.oa.mux_regs_len);
> > +             if (!dev_priv->perf.oa.mux_regs) {
> > +                     DRM_DEBUG_DRIVER("No suitable MUX config for
> \"COMPUTE_BASIC\" metric set");
> > +
> > +                     /* EINVAL because *_register_sysfs already checked
> this
> > +                      * and so it wouldn't have been advertised so
> userspace and
> > +                      * so shouldn't have been requested
> > +                      */
> > +                     return -EINVAL;
> > +             }
> > +
> > +             dev_priv->perf.oa.b_counter_regs =
> > +                     b_counter_config_compute_basic;
> > +             dev_priv->perf.oa.b_counter_regs_len =
> > +                     ARRAY_SIZE(b_counter_config_compute_basic);
> > +
> > +             return 0;
>
> >  int
> >  i915_perf_register_sysfs_hsw(struct drm_i915_private *dev_priv)
> >  {
> > @@ -178,9 +685,49 @@ i915_perf_register_sysfs_hsw(struct
> drm_i915_private *dev_priv)
> >               if (ret)
> >                       goto error_render_basic;
> >       }
> > +     if (get_compute_basic_mux_config(dev_priv, &mux_len)) {
>
> Why not use the derived state in dev_priv->perf.oa.mux_regs? Then we
> only expose what is initialised.
>

Although for Haswell none of our metric sets have conditional MUX
configurations, the generated code should already be in shape to only
advertising metric sets applicable to the system (which becomes an issue
for gen8+). This was changed relatively recently in the gen8+ series after
Mark Janes was hitting issues on Skylake in some of his tooling due to Mesa
advertising one of the compute metric sets that wasn't really available on
the system he had, which was only discoverable as a GL error when
attempting to use it.

The perf.oa.mux_regs state only pertains to one current metric set that the
OA unit has been configured with, after calling the generated
i915_oa_select_metric_set_hsw() function in hsw_enable_metric_set(). Until
an OA stream is opened and enabled perf.oa.mux_regs won't be initialised.

Notably the recent change for gen8+ mentioned above was to have the
_select_metric_set_<gen>() code and the _register_sysfs_<gen>() code both
work in terms of the get_<metric_set>_mux_config() functions since it's
these functions that will check the fiddly sku specfic details on gen8+ to
select the right MUX config or potentially fail if the metric set isn't
available on the current system. So for gen8+ we can expect
get_compute_basic_mux_config() will fail if the config isn't available and
then won't be advertised via sysfs. On Haswell it looks a little redundant
having these get_ functions unconditionally return a pointer to a
corresponding array.

Hope that clarifies,
- Robert



> -Chris
>
> --
> Chris Wilson, Intel Open Source Technology Centre
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/dri-devel/attachments/20161101/3e926cd9/attachment.html>


More information about the dri-devel mailing list