[Intel-gfx] [PATCH 36/36] drm/i915: Support per-context user requests for GPU frequency control

Fri Nov 16 11:14:59 UTC 2018

Quoting Lionel Landwerlin (2018-11-09 19:51:17)
> I think we have some interest in reviving this for the performance query 
> use case.

How are performance queries related?

Regards, Joonas

> Is that on anybody's todo list?
> 
> Thanks,
> 
> -
> Lionel
> 
> On 14/03/2018 09:37, Chris Wilson wrote:
> > Often, we find ourselves facing a workload where the user knows in
> > advance what GPU frequency they require for it to complete in a timely
> > manner, and using past experience they can outperform the HW assisted
> > RPS autotuning. An example might be kodi (HTPC) where they know that
> > video decoding and compositing require a minimum frequency to avoid ever
> > dropping a frame, or conversely know when they are in a powersaving mode
> > and would rather have slower updates than ramp up the GPU frequency and
> > power consumption. Other workloads may defeat the autotuning entirely
> > and need manual control to meet their performance goals, e.g. bursty
> > applications which require low latency.
> >
> > To accommodate the varying needs of different applications, that may be
> > running concurrently, we want a more flexible system than a global limit
> > supplied by sysfs. To this end, we offer the application the option to
> > set their desired frequency bounds on the context itself, and apply those
> > bounds when we execute commands from the application, switching between
> > bounds just as easily as we switch between the clients themselves.
> >
> > The clients can query the range supported by the HW, or at least the
> > range they are restricted to, and then freely select frequencies within
> > that range that they want to run at. (They can select just a single
> > frequency if they so choose.) As this is subject to the global limit
> > supplied by the user in sysfs, and a client can only reduce the range of
> > frequencies they allow the HW to run at, we allow all clients to adjust
> > their request (and not restrict raising the minimum to privileged
> > CAP_SYS_NICE clients).
> >
> > Testcase: igt/gem_ctx_freq
> > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> > Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> > Cc: Praveen Paneri <praveen.paneri at intel.com>
> > Cc: Sagar A Kamble <sagar.a.kamble at intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_debugfs.c                |  16 ++-
> >   drivers/gpu/drm/i915/i915_drv.h                    |   5 +
> >   drivers/gpu/drm/i915/i915_gem_context.c            |  54 +++++++++
> >   drivers/gpu/drm/i915/i915_gem_context.h            |   3 +
> >   drivers/gpu/drm/i915/intel_gt_pm.c                 | 121 ++++++++++++++++---
> >   drivers/gpu/drm/i915/intel_gt_pm.h                 |   4 +
> >   drivers/gpu/drm/i915/intel_guc_submission.c        |  16 ++-
> >   drivers/gpu/drm/i915/intel_lrc.c                   |  15 +++
> >   .../gpu/drm/i915/selftests/i915_mock_selftests.h   |   1 +
> >   drivers/gpu/drm/i915/selftests/intel_gt_pm.c       | 130 +++++++++++++++++++++
> >   include/uapi/drm/i915_drm.h                        |  20 ++++
> >   11 files changed, 368 insertions(+), 17 deletions(-)
> >   create mode 100644 drivers/gpu/drm/i915/selftests/intel_gt_pm.c
> >
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> > index 7c7afdac8c8c..a21b9164ade8 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -2191,6 +2191,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
> >       struct drm_device *dev = &dev_priv->drm;
> >       struct intel_rps *rps = &dev_priv->gt_pm.rps;
> >       struct drm_file *file;
> > +     int n;
> >   
> >       seq_printf(m, "GPU busy? %s [%d requests]\n",
> >                  yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
> > @@ -2198,17 +2199,30 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
> >       seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
> >       seq_printf(m, "Boosts outstanding? %d\n",
> >                  atomic_read(&rps->num_waiters));
> > +     seq_printf(m, "Worker pending? %s\n", yesno(work_busy(&rps->work)));
> >       seq_printf(m, "Frequency requested %d [%d, %d]\n",
> >                  intel_gpu_freq(dev_priv, rps->freq),
> >                  intel_gpu_freq(dev_priv, rps->min),
> >                  intel_gpu_freq(dev_priv, rps->max));
> > -     seq_printf(m, "  min hard:%d, soft:%d user:%d; max user:%d, soft: %d hard:%d\n",
> > +     seq_printf(m, "  min hard:%d, soft:%d, ctx:%d, user:%d; max user:%d, ctx:%d, soft:%d, hard:%d\n",
> >                  intel_gpu_freq(dev_priv, rps->min_freq_hw),
> >                  intel_gpu_freq(dev_priv, rps->min_freq_soft),
> > +                intel_gpu_freq(dev_priv, rps->min_freq_context),
> >                  intel_gpu_freq(dev_priv, rps->min_freq_user),
> >                  intel_gpu_freq(dev_priv, rps->max_freq_user),
> > +                intel_gpu_freq(dev_priv, rps->max_freq_context),
> >                  intel_gpu_freq(dev_priv, rps->max_freq_soft),
> >                  intel_gpu_freq(dev_priv, rps->max_freq_hw));
> > +     seq_printf(m, "  engines min: [");
> > +     for (n = 0; n < ARRAY_SIZE(rps->min_freq_engine); n++)
> > +             seq_printf(m, "%s%d", n ? ", " : "",
> > +                        intel_gpu_freq(dev_priv, rps->min_freq_engine[n]));
> > +     seq_printf(m, "]\n  engines max: [");
> > +     for (n = 0; n < ARRAY_SIZE(rps->max_freq_engine); n++)
> > +             seq_printf(m, "%s%d", n ? ", " : "",
> > +                        intel_gpu_freq(dev_priv, rps->max_freq_engine[n]));
> > +     seq_printf(m, "]\n");
> > +
> >       seq_printf(m, "  idle:%d, efficient:%d, boost:%d\n",
> >                  intel_gpu_freq(dev_priv, rps->idle_freq),
> >                  intel_gpu_freq(dev_priv, rps->efficient_freq),
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 82e9a58bd65f..d754d44cfbc2 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -731,6 +731,7 @@ struct intel_rps_ei {
> >   
> >   struct intel_rps {
> >       struct mutex lock;
> > +     spinlock_t engine_lock; /* protects updates to min/max_freq_context */
> >       struct work_struct work;
> >   
> >       bool active;
> > @@ -763,6 +764,10 @@ struct intel_rps {
> >       u8 max_freq_user;       /* Max frequency permitted by the driver */
> >       u8 min_freq_soft;
> >       u8 max_freq_soft;
> > +     u8 min_freq_context;    /* Min frequency permitted by the context */
> > +     u8 max_freq_context;    /* Max frequency permitted by the context */
> > +     u8 min_freq_engine[I915_NUM_ENGINES];
> > +     u8 max_freq_engine[I915_NUM_ENGINES];
> >   
> >       u8 idle_freq;           /* Frequency to request when we are idle */
> >       u8 efficient_freq;      /* AKA RPe. Pre-determined balanced frequency */
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> > index 65bf92658d92..1d36e2a02479 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > @@ -88,8 +88,10 @@
> >   #include <linux/log2.h>
> >   #include <drm/drmP.h>
> >   #include <drm/i915_drm.h>
> > +
> >   #include "i915_drv.h"
> >   #include "i915_trace.h"
> > +#include "intel_gt_pm.h"
> >   
> >   #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
> >   
> > @@ -281,6 +283,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
> >       list_add_tail(&ctx->link, &dev_priv->contexts.list);
> >       ctx->i915 = dev_priv;
> >       ctx->priority = I915_PRIORITY_NORMAL;
> > +     ctx->min_freq = dev_priv->gt_pm.rps.min_freq_hw;
> > +     ctx->max_freq = dev_priv->gt_pm.rps.max_freq_hw;
> >   
> >       INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
> >       INIT_LIST_HEAD(&ctx->handles_list);
> > @@ -715,6 +719,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
> >   {
> >       struct drm_i915_file_private *file_priv = file->driver_priv;
> >       struct drm_i915_gem_context_param *args = data;
> > +     struct drm_i915_private *i915 = to_i915(dev);
> >       struct i915_gem_context *ctx;
> >       int ret = 0;
> >   
> > @@ -747,6 +752,19 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
> >       case I915_CONTEXT_PARAM_PRIORITY:
> >               args->value = ctx->priority;
> >               break;
> > +     case I915_CONTEXT_PARAM_FREQUENCY:
> > +             if (!HAS_RPS(i915) || !HAS_EXECLISTS(i915)) {
> > +                     ret = -ENODEV;
> > +             } else if (args->size) {
> > +                     ret = -EINVAL;
> > +             } else {
> > +                     u32 min = intel_gpu_freq(i915, ctx->min_freq);
> > +                     u32 max = intel_gpu_freq(i915, ctx->max_freq);
> > +
> > +                     args->value = I915_CONTEXT_SET_FREQUENCY(min, max);
> > +             }
> > +             break;
> > +
> >       default:
> >               ret = -EINVAL;
> >               break;
> > @@ -761,6 +779,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
> >   {
> >       struct drm_i915_file_private *file_priv = file->driver_priv;
> >       struct drm_i915_gem_context_param *args = data;
> > +     struct drm_i915_private *i915 = to_i915(dev);
> >       struct i915_gem_context *ctx;
> >       int ret;
> >   
> > @@ -821,6 +840,41 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
> >                               ctx->priority = priority;
> >               }
> >               break;
> > +     case I915_CONTEXT_PARAM_FREQUENCY:
> > +             if (!HAS_RPS(i915) || !HAS_EXECLISTS(i915)) {
> > +                     ret = -ENODEV;
> > +             } else if (args->size) {
> > +                     ret = -EINVAL;
> > +             } else {
> > +                     struct intel_rps *rps = &i915->gt_pm.rps;
> > +                     u32 min, max;
> > +
> > +                     min = I915_CONTEXT_MIN_FREQUENCY(args->value);
> > +                     min = intel_freq_opcode(i915, min);
> > +
> > +                     max = I915_CONTEXT_MAX_FREQUENCY(args->value);
> > +                     max = intel_freq_opcode(i915, max);
> > +
> > +                     /*
> > +                      * As we constrain the frequency request from the
> > +                      * context (application) by the sysadmin imposed limits,
> > +                      * it is reasonable to allow the application to
> > +                      * specify its preferred range within those limits.
> > +                      * That is we do not need to restrict requesting
> > +                      * a higher frequency to privileged (CAP_SYS_NICE)
> > +                      * processes.
> > +                      */
> > +                     if (max < min) {
> > +                             ret = -EINVAL;
> > +                     } else if (min < rps->min_freq_hw ||
> > +                                max > rps->max_freq_hw) {
> > +                             ret = -EINVAL;
> > +                     } else {
> > +                             ctx->min_freq = min;
> > +                             ctx->max_freq = max;
> > +                     }
> > +             }
> > +             break;
> >   
> >       default:
> >               ret = -EINVAL;
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> > index 7854262ddfd9..98f7b71a787a 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.h
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> > @@ -150,6 +150,9 @@ struct i915_gem_context {
> >        */
> >       int priority;
> >   
> > +     u32 min_freq;
> > +     u32 max_freq;
> > +
> >       /** ggtt_offset_bias: placement restriction for context objects */
> >       u32 ggtt_offset_bias;
> >   
> > diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
> > index 9705205a26b5..4bbfb4080f8f 100644
> > --- a/drivers/gpu/drm/i915/intel_gt_pm.c
> > +++ b/drivers/gpu/drm/i915/intel_gt_pm.c
> > @@ -402,10 +402,10 @@ static int adjust_rps(struct drm_i915_private *dev_priv, int freq, int adj)
> >       GEM_BUG_ON(!rps->active);
> >   
> >       min = clamp_t(int,
> > -                   rps->min_freq_soft,
> > +                   max(rps->min_freq_soft, READ_ONCE(rps->min_freq_context)),
> >                     rps->min_freq_user, rps->max_freq_user);
> >       max = clamp_t(int,
> > -                   rps->max_freq_soft,
> > +                   min(rps->max_freq_soft, READ_ONCE(rps->max_freq_context)),
> >                     min, rps->max_freq_user);
> >       if (atomic_read(&rps->num_waiters) && max < rps->boost_freq)
> >               max = rps->boost_freq;
> > @@ -809,6 +809,75 @@ void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
> >       atomic_inc(client ? &client->boosts : &rps->boosts);
> >   }
> >   
> > +static void __rps_update_engine(struct intel_rps *rps,
> > +                             enum intel_engine_id idx,
> > +                             u32 min, u32 max)
> > +{
> > +     unsigned long flags;
> > +     bool update = false;
> > +     u32 old;
> > +     int n;
> > +
> > +     GEM_BUG_ON(min > max);
> > +
> > +     if (rps->min_freq_engine[idx] != min) {
> > +             spin_lock_irqsave(&rps->engine_lock, flags);
> > +
> > +             rps->min_freq_engine[idx] = min;
> > +
> > +             old = rps->min_freq_context;
> > +             rps->min_freq_context = rps->min_freq_engine[0];
> > +             for (n = 1; n < ARRAY_SIZE(rps->min_freq_engine); n++)
> > +                     if (rps->min_freq_engine[n] > rps->min_freq_context)
> > +                             rps->min_freq_context = rps->min_freq_engine[n];
> > +             update |= rps->min_freq_context != old;
> > +
> > +             spin_unlock_irqrestore(&rps->engine_lock, flags);
> > +     }
> > +
> > +     if (rps->max_freq_engine[idx] != max) {
> > +             spin_lock_irqsave(&rps->engine_lock, flags);
> > +
> > +             rps->max_freq_engine[idx] = max;
> > +
> > +             old = rps->max_freq_context;
> > +             rps->max_freq_context = rps->max_freq_engine[0];
> > +             for (n = 1; n < ARRAY_SIZE(rps->max_freq_engine); n++)
> > +                     if (rps->max_freq_engine[n] < rps->max_freq_context)
> > +                             rps->max_freq_context = rps->max_freq_engine[n];
> > +             update |= rps->max_freq_context != old;
> > +
> > +             spin_unlock_irqrestore(&rps->engine_lock, flags);
> > +     }
> > +
> > +     /* Kick the RPS worker to apply the updated constraints, as needed */
> > +     if (update && !atomic_read(&rps->num_waiters)) {
> > +             old = READ_ONCE(rps->freq);
> > +             if ((old < min || old > max))
> > +                     schedule_work(&rps->work);
> > +     }
> > +}
> > +
> > +void intel_rps_update_engine(const struct intel_engine_cs *engine,
> > +                          const struct i915_gem_context *ctx)
> > +{
> > +     struct intel_rps *rps = &engine->i915->gt_pm.rps;
> > +     u32 min, max;
> > +
> > +     if (!HAS_RPS(engine->i915))
> > +             return;
> > +
> > +     if (ctx) {
> > +             min = ctx->min_freq;
> > +             max = ctx->max_freq;
> > +     } else {
> > +             min = rps->min_freq_hw;
> > +             max = rps->max_freq_hw;
> > +     }
> > +
> > +     __rps_update_engine(rps, engine->id, min, max);
> > +}
> > +
> >   static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
> >   {
> >       I915_WRITE(GEN6_RC_CONTROL, 0);
> > @@ -2379,12 +2448,41 @@ void intel_gt_pm_sanitize(struct drm_i915_private *dev_priv)
> >               gen6_reset_rps_interrupts(dev_priv);
> >   }
> >   
> > +static void intel_rps_init(struct intel_rps *rps)
> > +{
> > +     mutex_init(&rps->lock);
> > +     INIT_WORK(&rps->work, intel_rps_work);
> > +     spin_lock_init(&rps->engine_lock);
> > +}
> > +
> > +static void intel_rps_init__frequencies(struct intel_rps *rps)
> > +{
> > +     int n;
> > +
> > +     rps->max_freq_soft = rps->max_freq_hw;
> > +     rps->min_freq_soft = rps->min_freq_hw;
> > +
> > +     rps->max_freq_context = rps->max_freq_hw;
> > +     rps->min_freq_context = rps->min_freq_hw;
> > +     for (n = 0; n < ARRAY_SIZE(rps->max_freq_engine); n++) {
> > +             rps->max_freq_engine[n] = rps->max_freq_hw;
> > +             rps->min_freq_engine[n] = rps->min_freq_hw;
> > +     }
> > +
> > +     /* Finally allow us to boost to max by default */
> > +     rps->boost_freq = rps->max_freq_hw;
> > +     rps->idle_freq = rps->min_freq_hw;
> > +
> > +     rps->freq = rps->idle_freq;
> > +     rps->min = rps->min_freq_hw;
> > +     rps->max = rps->max_freq_hw;
> > +}
> > +
> >   void intel_gt_pm_init(struct drm_i915_private *dev_priv)
> >   {
> >       struct intel_rps *rps = &dev_priv->gt_pm.rps;
> >   
> > -     mutex_init(&rps->lock);
> > -     INIT_WORK(&rps->work, intel_rps_work);
> > +     intel_rps_init(rps);
> >   
> >       if (HAS_GUC_SCHED(dev_priv))
> >               rps->guc_events = GEN9_GUC_TO_HOST_INT_EVENT;
> > @@ -2449,16 +2547,7 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv)
> >               }
> >       }
> >   
> > -     rps->max_freq_soft = rps->max_freq_hw;
> > -     rps->min_freq_soft = rps->min_freq_hw;
> > -
> > -     /* Finally allow us to boost to max by default */
> > -     rps->boost_freq = rps->max_freq_hw;
> > -     rps->idle_freq = rps->min_freq_hw;
> > -
> > -     rps->freq = rps->idle_freq;
> > -     rps->min = rps->min_freq_hw;
> > -     rps->max = rps->max_freq_hw;
> > +     intel_rps_init__frequencies(rps);
> >   
> >       if (HAS_LLC(dev_priv))
> >               gen6_update_ring_freq(dev_priv);
> > @@ -2703,3 +2792,7 @@ void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv)
> >   
> >       gen9_reset_guc_interrupts(dev_priv);
> >   }
> > +
> > +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> > +#include "selftests/intel_gt_pm.c"
> > +#endif
> > diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
> > index 314912c15126..ef3f27eca529 100644
> > --- a/drivers/gpu/drm/i915/intel_gt_pm.h
> > +++ b/drivers/gpu/drm/i915/intel_gt_pm.h
> > @@ -25,7 +25,9 @@
> >   #define __INTEL_GT_PM_H__
> >   
> >   struct drm_i915_private;
> > +struct i915_gem_context;
> >   struct i915_request;
> > +struct intel_engine_cs;
> >   struct intel_rps_client;
> >   
> >   void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
> > @@ -47,6 +49,8 @@ void intel_gt_pm_idle(struct drm_i915_private *dev_priv);
> >   
> >   void intel_gt_pm_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir);
> >   
> > +void intel_rps_update_engine(const struct intel_engine_cs *engine,
> > +                          const struct i915_gem_context *ctx);
> >   void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
> >   
> >   int intel_gpu_freq(struct drm_i915_private *dev_priv, int val);
> > diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> > index 8a8ad2fe158d..d8eaae683186 100644
> > --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> > @@ -26,9 +26,12 @@
> >   #include <trace/events/dma_fence.h>
> >   
> >   #include "intel_guc_submission.h"
> > -#include "intel_lrc_reg.h"
> > +
> >   #include "i915_drv.h"
> >   
> > +#include "intel_gt_pm.h"
> > +#include "intel_lrc_reg.h"
> > +
> >   #define GUC_PREEMPT_FINISHED                0x1
> >   #define GUC_PREEMPT_BREADCRUMB_DWORDS       0x8
> >   #define GUC_PREEMPT_BREADCRUMB_BYTES        \
> > @@ -650,6 +653,12 @@ static void guc_submit(struct intel_engine_cs *engine)
> >       }
> >   }
> >   
> > +static void update_rps(struct intel_engine_cs *engine)
> > +{
> > +     intel_rps_update_engine(engine,
> > +                             port_request(engine->execlists.port)->ctx);
> > +}
> > +
> >   static void port_assign(struct execlist_port *port, struct i915_request *rq)
> >   {
> >       GEM_BUG_ON(port_isset(port));
> > @@ -728,6 +737,7 @@ static void guc_dequeue(struct intel_engine_cs *engine)
> >       execlists->first = rb;
> >       if (submit) {
> >               port_assign(port, last);
> > +             update_rps(engine);
> >               execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
> >               guc_submit(engine);
> >       }
> > @@ -757,8 +767,10 @@ static void guc_submission_tasklet(unsigned long data)
> >   
> >               rq = port_request(&port[0]);
> >       }
> > -     if (!rq)
> > +     if (!rq) {
> >               execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
> > +             intel_rps_update_engine(engine, NULL);
> > +     }
> >   
> >       if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) &&
> >           intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) ==
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index 3a69b367e565..518f7b3db857 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -138,6 +138,7 @@
> >   #include "i915_drv.h"
> >   #include "i915_gem_render_state.h"
> >   #include "intel_lrc_reg.h"
> > +#include "intel_gt_pm.h"
> >   #include "intel_mocs.h"
> >   
> >   #define RING_EXECLIST_QFULL         (1 << 0x2)
> > @@ -535,6 +536,12 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
> >       execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
> >   }
> >   
> > +static void update_rps(struct intel_engine_cs *engine)
> > +{
> > +     intel_rps_update_engine(engine,
> > +                             port_request(engine->execlists.port)->ctx);
> > +}
> > +
> >   static void execlists_dequeue(struct intel_engine_cs *engine)
> >   {
> >       struct intel_engine_execlists * const execlists = &engine->execlists;
> > @@ -708,6 +715,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> >       spin_unlock_irq(&engine->timeline->lock);
> >   
> >       if (submit) {
> > +             update_rps(engine);
> >               execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
> >               execlists_submit_ports(engine);
> >       }
> > @@ -982,6 +990,11 @@ static void execlists_submission_tasklet(unsigned long data)
> >                                         engine->name, port->context_id);
> >   
> >                               execlists_port_complete(execlists, port);
> > +
> > +                             /* Switch to the next request/context */
> > +                             rq = port_request(port);
> > +                             intel_rps_update_engine(engine,
> > +                                                     rq ? rq->ctx : NULL);
> >                       } else {
> >                               port_set(port, port_pack(rq, count));
> >                       }
> > @@ -1717,6 +1730,8 @@ static void reset_common_ring(struct intel_engine_cs *engine,
> >       __unwind_incomplete_requests(engine);
> >       spin_unlock(&engine->timeline->lock);
> >   
> > +     intel_rps_update_engine(engine, NULL);
> > +
> >       /* Mark all CS interrupts as complete */
> >       execlists->active = 0;
> >   
> > diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> > index 9a48aa441743..85b6e6d020b7 100644
> > --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> > +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> > @@ -14,6 +14,7 @@ selftest(fence, i915_sw_fence_mock_selftests)
> >   selftest(scatterlist, scatterlist_mock_selftests)
> >   selftest(syncmap, i915_syncmap_mock_selftests)
> >   selftest(uncore, intel_uncore_mock_selftests)
> > +selftest(gt_pm, intel_gt_pm_mock_selftests)
> >   selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
> >   selftest(timelines, i915_gem_timeline_mock_selftests)
> >   selftest(requests, i915_request_mock_selftests)
> > diff --git a/drivers/gpu/drm/i915/selftests/intel_gt_pm.c b/drivers/gpu/drm/i915/selftests/intel_gt_pm.c
> > new file mode 100644
> > index 000000000000..c3871eb9eabb
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/selftests/intel_gt_pm.c
> > @@ -0,0 +1,130 @@
> > +/*
> > + * SPDX-License-Identifier: MIT
> > + *
> > + * Copyright © 2018 Intel Corporation
> > + */
> > +
> > +#include "../i915_selftest.h"
> > +#include "i915_random.h"
> > +
> > +#include "mock_gem_device.h"
> > +
> > +static void mock_rps_init(struct drm_i915_private *i915)
> > +{
> > +     struct intel_rps *rps = &i915->gt_pm.rps;
> > +
> > +     /* Disable the register writes */
> > +     mkwrite_device_info(i915)->gen = 0;
> > +     mkwrite_device_info(i915)->has_rps = true;
> > +
> > +     intel_rps_init(rps);
> > +
> > +     rps->min_freq_hw = 0;
> > +     rps->max_freq_hw = 255;
> > +
> > +     rps->min_freq_user = rps->min_freq_hw;
> > +     rps->max_freq_user = rps->max_freq_hw;
> > +
> > +     intel_rps_init__frequencies(rps);
> > +}
> > +
> > +static void mock_rps_fini(struct drm_i915_private *i915)
> > +{
> > +     struct intel_rps *rps = &i915->gt_pm.rps;
> > +
> > +     cancel_work_sync(&rps->work);
> > +}
> > +
> > +static int igt_rps_engine(void *arg)
> > +{
> > +     struct drm_i915_private *i915 = arg;
> > +     struct intel_rps *rps = &i915->gt_pm.rps;
> > +     I915_RND_STATE(prng);
> > +     int err;
> > +     int i;
> > +
> > +     intel_gt_pm_busy(i915); /* Activate RPS */
> > +
> > +     /*
> > +      * Minimum unit tests for intel_rps_update_engine().
> > +      *
> > +      * Whenever we call intel_rps_update_engine, it will
> > +      * replace the context min/max frequency request for a particular
> > +      * engine and then recompute the global max(min)/min(max) over all
> > +      * engines. In this mockup, we are limited to checking those
> > +      * max(min)/min(max) calculations and then seeing if the rps
> > +      * worker uses those bounds.
> > +      */
> > +
> > +     for (i = 0; i < 256 * 256; i++) {
> > +             u8 freq = prandom_u32_state(&prng);
> > +
> > +             __rps_update_engine(rps, 0, freq, freq);
> > +             if (rps->min_freq_context != freq ||
> > +                 rps->max_freq_context != freq) {
> > +                     pr_err("Context min/max frequencies not restricted to %d, found [%d, %d]\n",
> > +                            freq, rps->min_freq_context, rps->max_freq_context);
> > +                     err = -EINVAL;
> > +                     goto out;
> > +             }
> > +             flush_work(&rps->work);
> > +
> > +             if (rps->freq != freq) {
> > +                     pr_err("Tried to restrict frequency to %d, found %d\n",
> > +                            freq, rps->freq);
> > +                     err = -EINVAL;
> > +                     goto out;
> > +             }
> > +     }
> > +
> > +     __rps_update_engine(rps, 0, rps->min_freq_hw, rps->max_freq_hw);
> > +     if (rps->min_freq_context != rps->min_freq_hw ||
> > +         rps->max_freq_context != rps->max_freq_hw) {
> > +             pr_err("Context frequency not restored to [%d, %d], found [%d, %d]\n",
> > +                    rps->min_freq_hw, rps->min_freq_hw,
> > +                    rps->min_freq_context, rps->max_freq_context);
> > +             err = -EINVAL;
> > +             goto out;
> > +     }
> > +
> > +     for (i = 0; i < I915_NUM_ENGINES; i++)
> > +             __rps_update_engine(rps, i, i, 255 - i);
> > +     i--;
> > +     if (rps->min_freq_context != i) {
> > +             pr_err("Minimum context frequency across all engines not raised to %d, found %d\n", i, rps->min_freq_context);
> > +             err = -EINVAL;
> > +             goto out;
> > +     }
> > +     if (rps->max_freq_context != 255 - i) {
> > +             pr_err("Maxmimum context frequency across all engines not lowered to %d, found %d\n", 255 - i, rps->max_freq_context);
> > +             err = -EINVAL;
> > +             goto out;
> > +     }
> > +
> > +     err = 0;
> > +out:
> > +     intel_gt_pm_idle(i915);
> > +     return err;
> > +}
> > +
> > +int intel_gt_pm_mock_selftests(void)
> > +{
> > +     static const struct i915_subtest tests[] = {
> > +             SUBTEST(igt_rps_engine),
> > +     };
> > +     struct drm_i915_private *i915;
> > +     int err;
> > +
> > +     i915 = mock_gem_device();
> > +     if (!i915)
> > +             return -ENOMEM;
> > +
> > +     mock_rps_init(i915);
> > +
> > +     err = i915_subtests(tests, i915);
> > +
> > +     mock_rps_fini(i915);
> > +     drm_dev_unref(&i915->drm);
> > +
> > +     return err;
> > +}
> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index 7f5634ce8e88..64c6377df769 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -1456,6 +1456,26 @@ struct drm_i915_gem_context_param {
> >   #define   I915_CONTEXT_MAX_USER_PRIORITY    1023 /* inclusive */
> >   #define   I915_CONTEXT_DEFAULT_PRIORITY             0
> >   #define   I915_CONTEXT_MIN_USER_PRIORITY    -1023 /* inclusive */
> > +
> > +/*
> > + * I915_CONTEXT_PARAM_FREQUENCY:
> > + *
> > + * Request that when this context runs, the GPU is restricted to run
> > + * in this frequency range; but still contrained by the global user
> > + * restriction specified via sysfs.
> > + *
> > + * The minimum / maximum frequencies are specified in MHz. Each context
> > + * starts in the default unrestricted state, where the range is taken from
> > + * the hardware, and so may be queried.
> > + *
> > + * Note the frequency is only changed on a context switch; if the
> > + * context's frequency is updated whilst the context is currently executing
> > + * the request will not take effect until the next time the context is run.
> > + */
> > +#define I915_CONTEXT_PARAM_FREQUENCY 0x7
> > +#define   I915_CONTEXT_MIN_FREQUENCY(x) ((x) & 0xffffffff)
> > +#define   I915_CONTEXT_MAX_FREQUENCY(x) ((x) >> 32)
> > +#define   I915_CONTEXT_SET_FREQUENCY(min, max) ((__u64)(max) << 32 | (min))
> >       __u64 value;
> >   };
> >   
> 
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx