[Intel-gfx] [PATCH v3 1/1] drm/i915/ehl: Add sysfs interface to control class-of-service

Tue Oct 15 07:38:36 UTC 2019

On Mon, Oct 07, 2019 at 09:37:20PM +0100, Chris Wilson wrote:
> Quoting Prathap Kumar Valsan (2019-10-07 17:52:09)
> > To provide shared last-level-cache isolation to cpu workloads running
> > concurrently with gpu workloads, the gpu allocation of cache lines needs
> > to be restricted to certain ways. Currently GPU hardware supports four
> > class-of-service(CLOS) levels and there is an associated way-mask for
> > each CLOS. Each LLC MOCS register has a field to select the clos level.
> > So in-order to globally set the gpu to a clos level, driver needs
> > to program entire MOCS table.
> > 
> > Hardware supports reading supported way-mask configuration for GPU using
> > a bios pcode interface. This interface has two files--llc_clos_modes and
> > llc_clos. The file llc_clos_modes is read only file and will list the
> > available way masks. The file llc_clos is read/write and will show the
> > currently active way mask and writing a new way mask will update the
> > active way mask of the gpu.
> > 
> > Note of Caution: Restricting cache ways using this mechanism presents a
> > larger attack surface for side-channel attacks.
> > 
> > Example usage:
> > > cat /sys/class/drm/card0/llc_clos_modes
> > 0xfff 0xfc0 0xc00 0x800
> > 
> > >cat /sys/class/drm/card0/llc_clos
> > 0xfff
> > 
> > Update to new clos
> > echo "0x800" > /sys/class/drm/card0/llc_clos
> 
> So the first question is whether this is global on the device or local
> to the GT.

It is global to the device. I have sent the updated patch, which would
explicitly state this in commit message.
>  
> > Signed-off-by: Prathap Kumar Valsan <prathap.kumar.valsan at intel.com>
> > ---
> >  drivers/gpu/drm/i915/gt/intel_lrc.c     |   7 +
> >  drivers/gpu/drm/i915/gt/intel_lrc_reg.h |   1 +
> >  drivers/gpu/drm/i915/gt/intel_mocs.c    | 216 +++++++++++++++++++++++-
> >  drivers/gpu/drm/i915/gt/intel_mocs.h    |   6 +-
> >  drivers/gpu/drm/i915/i915_drv.h         |   8 +
> >  drivers/gpu/drm/i915/i915_reg.h         |   1 +
> >  drivers/gpu/drm/i915/i915_sysfs.c       | 100 +++++++++++
> >  7 files changed, 337 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > index 468438fb47af..054051969d00 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> > @@ -2137,6 +2137,13 @@ __execlists_update_reg_state(const struct intel_context *ce,
> >                         intel_sseu_make_rpcs(engine->i915, &ce->sseu);
> >  
> >                 i915_oa_init_reg_state(ce, engine);
> > +               /*
> > +                * Gen11+ wants to support update of LLC class-of-service via
> > +                * sysfs interface. CLOS is defined in MOCS registers and for
> > +                * Gen11, MOCS is part of context resgister state.
> > +                */
> > +               if (IS_GEN(engine->i915, 11))
> > +                       intel_mocs_init_reg_state(ce);
> 
> Do the filtering in intel_mocs_init_reg_state(). The intel_ prefix says
> it should be common to all.
> 
> Not IS_ELKHARTLAKE(), that is implies by subject?
> 
Gen11 PCode implementation has this support to read way mask. So updated
it to Gen11.
> > +static int
> > +mocs_store_clos(struct i915_request *rq,
> > +               struct intel_context *ce)
> > +{
> > +       struct drm_i915_mocs_table t;
> > +       unsigned int count, active_clos, index;
> > +       u32 offset;
> > +       u32 value;
> > +       u32 *cs;
> > +
> > +       if (!get_mocs_settings(rq->engine->gt, &t))
> > +               return -ENODEV;
> > +
> > +       count = t.n_entries;
> > +       active_clos = rq->i915->clos.active_clos;
> > +       cs = intel_ring_begin(rq, 4 * count);
> > +       if (IS_ERR(cs))
> > +               return PTR_ERR(cs);
> > +
> > +       offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE;
> > +
> > +       for (index = 0; index < count; index++) {
> > +               value = ce->lrc_reg_state[ctx_mocsN(index)];
> 
> Their context image is volatile at this point. They are pinned and
> active. If you must do a rmw, do it on the GPU. But do we not know the
> full value (as I don't expect it to be nonpriv)? [If doing rmw on the
> GPU, we're probably have to use a secure batch here to avoid running out
> of ringspace.]
Right. Fixed.
> 
> > +               value &= ~LE_COS_MASK;
> > +               value |= FIELD_PREP(LE_COS_MASK, active_clos);
> > +
> > +               *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
> > +               *cs++ = offset + ctx_mocsN(index) * sizeof(uint32_t);
> > +               *cs++ = 0;
> > +               *cs++ = value;
> > +       }
> > +
> > +       intel_ring_advance(rq, cs);
> > +
> > +       return 0;
> > +}
> > +
> > +static int modify_context_mocs(struct intel_context *ce)
> > +{
> > +       struct i915_request *rq;
> > +       int err;
> > +
> > +       lockdep_assert_held(&ce->pin_mutex);
> > +
> > +       rq = i915_request_create(ce->engine->kernel_context);
> > +       if (IS_ERR(rq))
> > +               return PTR_ERR(rq);
> > +
> > +       /* Serialise with the remote context */
> > +       err = intel_context_prepare_remote_request(ce, rq);
> > +       if (err == 0)
> > +               err = mocs_store_clos(rq, ce);
> > +
> > +       i915_request_add(rq);
> > +       return err;
> > +}
> > +
> > +static int intel_mocs_configure_context(struct i915_gem_context *ctx)
> > +{
> > +       struct i915_gem_engines_iter it;
> > +       struct intel_context *ce;
> > +       int err = 0;
> > +
> > +       for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
> > +               GEM_BUG_ON(ce == ce->engine->kernel_context);
> > +
> > +               if (ce->engine->class != RENDER_CLASS)
> > +                       continue;
> > +
> > +               err = intel_context_lock_pinned(ce);
> > +               if (err)
> > +                       break;
> > +
> > +               if (intel_context_is_pinned(ce))
> > +                       err = modify_context_mocs(ce);
> > +
> > +               intel_context_unlock_pinned(ce);
> > +               if (err)
> > +                       break;
> > +       }
> > +       i915_gem_context_unlock_engines(ctx);
> > +
> > +       return err;
> > +}
> > +
> > +static int intel_mocs_configure_all_contexts(struct intel_gt *gt)
> 
> So this on the GT,
> 
> > +{
> > +       struct drm_i915_private *i915 = gt->i915;
> > +       struct intel_engine_cs *engine;
> > +       struct i915_gem_context *ctx;
> > +       enum intel_engine_id id;
> > +       int err;
> > +
> > +       lockdep_assert_held(&i915->drm.struct_mutex);
> 
> Wrong lock entirely, you need the context lock for walking the
> i915->gem.contexts.list.
> 
> > +       /*
> > +        * MOCS registers of render engine are context saved and restored to and
> > +        * from a context image.
> > +        * So for any MOCS update to reflect on the existing contexts requires
> > +        * updating the context image.
> > +        */
> > +       list_for_each_entry(ctx, &i915->gem.contexts.list, link) {
> > +               if (ctx == i915->kernel_context)
> > +                       continue;
> > +
> > +               err = intel_mocs_configure_context(ctx);
> 
> but this is global,
> 

Even though in Gen11, global and GT may not make much difference, I made
an attempt to move the global out of GT.
> > +               if (err)
> > +                       return err;
> > +       }
> > +
> > +       /*
> > +        * After updating all other contexts, update render context image of
> > +        * kernel context. Also update the MOCS of non-render engines.
> > +        */
> > +
> > +       for_each_engine(engine, i915, id) {
> 
> and here again you are on the gt.
> 
> > +               struct i915_request *rq;
> > +               struct drm_i915_mocs_table t;
> > +
> > +               rq = i915_request_create(engine->kernel_context);
> > +               if (IS_ERR(rq))
> > +                       return PTR_ERR(rq);
> > +
> > +               get_mocs_settings(rq->engine->gt, &t);
> > +               err = emit_mocs_control_table(rq, &t);
> > +               if (err) {
> > +                       i915_request_skip(rq, err);
> > +                       i915_request_add(rq);
> > +                       return err;
> > +               }
> > +
> > +               i915_request_add(rq);
> > +       }
> > +
> > +       return 0;
> > +}
> 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h
> > index 2ae816b7ca19..65566457408e 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_mocs.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h
> > @@ -49,13 +49,17 @@
> >   * context handling keep the MOCS in step.
> >   */
> >  
> > -struct i915_request;
> >  struct intel_engine_cs;
> > +struct intel_context;
> > +struct i915_request;
> >  struct intel_gt;
> 
> Our OCD is to keep these in alphabetical order.
Fixed.

Thank You,
Prathap
> -Chris