[Intel-gfx] [PATCH 8/8] drm/i915: Expose RPCS (SSEU) configuration to userspace

Lionel Landwerlin lionel.g.landwerlin at intel.com
Thu Apr 26 10:22:30 UTC 2018


On 26/04/18 11:00, Joonas Lahtinen wrote:
> Quoting Lionel Landwerlin (2018-04-25 14:45:21)
>> From: Chris Wilson <chris at chris-wilson.co.uk>
>>
>> We want to allow userspace to reconfigure the subslice configuration for
>> its own use case. To do so, we expose a context parameter to allow
>> adjustment of the RPCS register stored within the context image (and
>> currently not accessible via LRI). If the context is adjusted before
>> first use, the adjustment is for "free"; otherwise if the context is
>> active we flush the context off the GPU (stalling all users) and forcing
>> the GPU to save the context to memory where we can modify it and so
>> ensure that the register is reloaded on next execution.
>>
>> The overhead of managing additional EU subslices can be significant,
>> especially in multi-context workloads. Non-GPGPU contexts should
>> preferably disable the subslices it is not using, and others should
>> fine-tune the number to match their workload.
> This hit a dead end last time due to the system wide policy needed to
> avoid two parties fighting over the slice count (and going back and
> forth between two slice counts would counter the benefits received from
> this).
>
> Do we now have a solution for the contention? I don't see code to
> negotiate a global value, just raw setter.
>
> Regards, Joonas

I've tried to come up with some numbers about the cost of the back & 
forth (see igt series).

Other than that, I don't think we can expect the kernel to workaround 
the inefficient use of the hardware by userspace.

>
>> We expose complete control over the RPCS register, allowing
>> configuration of slice/subslice, via masks packed into a u64 for
>> simplicity. For example,
>>
>>          struct drm_i915_gem_context_param arg;
>>          struct drm_i915_gem_context_param_sseu sseu = { .flags = I915_EXEC_RENDER };
>>
>>          memset(&arg, 0, sizeof(arg));
>>          arg.ctx_id = ctx;
>>          arg.param = I915_CONTEXT_PARAM_SSEU;
>>          arg.value = (uintptr_t) &sseu;
>>          if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &arg) == 0) {
>>                  sseu.packed.subslice_mask = 0;
>>
>>                  drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &arg);
>>          }
>>
>> could be used to disable all subslices where supported.
>>
>> v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel)
>>
>> v3: Add ability to program this per engine (Chris)
>>
>> v4: Move most get_sseu() into i915_gem_context.c (Lionel)
>>
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100899
>> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>> c: Dmitry Rogozhkin <dmitry.v.rogozhkin at intel.com>
>> CC: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>> CC: Zhipeng Gong <zhipeng.gong at intel.com>
>> CC: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_gem_context.c | 82 ++++++++++++++++++++++++-
>>   drivers/gpu/drm/i915/intel_lrc.c        | 55 +++++++++++++++++
>>   drivers/gpu/drm/i915/intel_lrc.h        |  4 ++
>>   include/uapi/drm/i915_drm.h             | 28 +++++++++
>>   4 files changed, 168 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
>> index bdf050beeb94..b97ddcf47514 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>> @@ -740,6 +740,26 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
>>          return 0;
>>   }
>>   
>> +static struct i915_gem_context_sseu
>> +i915_gem_context_sseu_from_user_sseu(const struct sseu_dev_info *sseu,
>> +                                    const struct drm_i915_gem_context_param_sseu *user_sseu)
>> +{
>> +       struct i915_gem_context_sseu value = {
>> +               .slice_mask = user_sseu->packed.slice_mask == 0 ?
>> +                             sseu->slice_mask :
>> +                             (user_sseu->packed.slice_mask & sseu->slice_mask),
>> +               .subslice_mask = user_sseu->packed.subslice_mask == 0 ?
>> +                                sseu->subslice_mask[0] :
>> +                                (user_sseu->packed.subslice_mask & sseu->subslice_mask[0]),
>> +               .min_eus_per_subslice = min(user_sseu->packed.min_eus_per_subslice,
>> +                                           sseu->max_eus_per_subslice),
>> +               .max_eus_per_subslice = min(user_sseu->packed.max_eus_per_subslice,
>> +                                           sseu->max_eus_per_subslice),
>> +       };
>> +
>> +       return value;
>> +}
>> +
>>   int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>>                                      struct drm_file *file)
>>   {
>> @@ -777,6 +797,37 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>>          case I915_CONTEXT_PARAM_PRIORITY:
>>                  args->value = ctx->sched.priority;
>>                  break;
>> +       case I915_CONTEXT_PARAM_SSEU: {
>> +               struct drm_i915_gem_context_param_sseu param_sseu;
>> +               struct intel_engine_cs *engine;
>> +
>> +               if (copy_from_user(&param_sseu, u64_to_user_ptr(args->value),
>> +                                  sizeof(param_sseu))) {
>> +                       ret = -EFAULT;
>> +                       break;
>> +               }
>> +
>> +               engine = i915_gem_engine_from_flags(to_i915(dev), file,
>> +                                                   param_sseu.flags);
>> +               if (!engine) {
>> +                       ret = -EINVAL;
>> +                       break;
>> +               }
>> +
>> +               param_sseu.packed.slice_mask =
>> +                       ctx->engine[engine->id].sseu.slice_mask;
>> +               param_sseu.packed.subslice_mask =
>> +                       ctx->engine[engine->id].sseu.subslice_mask;
>> +               param_sseu.packed.min_eus_per_subslice =
>> +                       ctx->engine[engine->id].sseu.min_eus_per_subslice;
>> +               param_sseu.packed.max_eus_per_subslice =
>> +                       ctx->engine[engine->id].sseu.max_eus_per_subslice;
>> +
>> +               if (copy_to_user(u64_to_user_ptr(args->value), &param_sseu,
>> +                                sizeof(param_sseu)))
>> +                       ret = -EFAULT;
>> +               break;
>> +       }
>>          default:
>>                  ret = -EINVAL;
>>                  break;
>> @@ -832,7 +883,6 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>>                  else
>>                          i915_gem_context_clear_bannable(ctx);
>>                  break;
>> -
>>          case I915_CONTEXT_PARAM_PRIORITY:
>>                  {
>>                          s64 priority = args->value;
>> @@ -851,7 +901,37 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>>                                  ctx->sched.priority = priority;
>>                  }
>>                  break;
>> +       case I915_CONTEXT_PARAM_SSEU:
>> +               if (args->size)
>> +                       ret = -EINVAL;
>> +               else if (!HAS_EXECLISTS(ctx->i915))
>> +                       ret = -ENODEV;
>> +               else {
>> +                       struct drm_i915_private *dev_priv = to_i915(dev);
>> +                       struct drm_i915_gem_context_param_sseu user_sseu;
>> +                       struct i915_gem_context_sseu ctx_sseu;
>> +                       struct intel_engine_cs *engine;
>> +
>> +                       if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
>> +                                          sizeof(user_sseu))) {
>> +                               ret = -EFAULT;
>> +                               break;
>> +                       }
>> +
>> +                       engine = i915_gem_engine_from_flags(dev_priv, file,
>> +                                                           user_sseu.flags);
>> +                       if (!engine) {
>> +                               ret = -EINVAL;
>> +                               break;
>> +                       }
>> +
>> +                       ctx_sseu =
>> +                               i915_gem_context_sseu_from_user_sseu(&INTEL_INFO(dev_priv)->sseu,
>> +                                                                    &user_sseu);
>>   
>> +                       ret = intel_lr_context_set_sseu(ctx, engine, &ctx_sseu);
>> +               }
>> +               break;
>>          default:
>>                  ret = -EINVAL;
>>                  break;
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>> index dca17ef24de5..9caddcb1f234 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -2711,6 +2711,61 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
>>          }
>>   }
>>   
>> +int intel_lr_context_set_sseu(struct i915_gem_context *ctx,
>> +                             struct intel_engine_cs *engine,
>> +                             struct i915_gem_context_sseu *sseu)
>> +{
>> +       struct drm_i915_private *dev_priv = ctx->i915;
>> +       struct intel_context *ce;
>> +       enum intel_engine_id id;
>> +       int ret;
>> +
>> +       lockdep_assert_held(&dev_priv->drm.struct_mutex);
>> +
>> +       if (memcmp(sseu, &ctx->engine[engine->id].sseu, sizeof(*sseu)) == 0)
>> +               return 0;
>> +
>> +       /*
>> +        * We can only program this on render ring.
>> +        */
>> +       ce = &ctx->engine[RCS];
>> +
>> +       if (ce->pin_count) { /* Assume that the context is active! */
>> +               ret = i915_gem_switch_to_kernel_context(dev_priv);
>> +               if (ret)
>> +                       return ret;
>> +
>> +               ret = i915_gem_wait_for_idle(dev_priv,
>> +                                            I915_WAIT_INTERRUPTIBLE |
>> +                                            I915_WAIT_LOCKED);
>> +               if (ret)
>> +                       return ret;
>> +       }
>> +
>> +       if (ce->state) {
>> +               void *vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
>> +               u32 *regs;
>> +
>> +               if (IS_ERR(vaddr))
>> +                       return PTR_ERR(vaddr);
>> +
>> +               regs = vaddr + LRC_STATE_PN * PAGE_SIZE;
>> +
>> +               regs[CTX_R_PWR_CLK_STATE + 1] =
>> +                       make_rpcs(&INTEL_INFO(dev_priv)->sseu, sseu);
>> +               i915_gem_object_unpin_map(ce->state->obj);
>> +       }
>> +
>> +       /*
>> +        * Apply the configuration to all engine. Our hardware doesn't
>> +        * currently support different configurations for each engine.
>> +        */
>> +       for_each_engine(engine, dev_priv, id)
>> +               ctx->engine[id].sseu = *sseu;
>> +
>> +       return 0;
>> +}
>> +
>>   #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
>>   #include "selftests/intel_lrc.c"
>>   #endif
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
>> index d91d69a17206..4c84266814fa 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.h
>> +++ b/drivers/gpu/drm/i915/intel_lrc.h
>> @@ -112,4 +112,8 @@ intel_lr_context_descriptor(struct i915_gem_context *ctx,
>>          return ctx->engine[engine->id].lrc_desc;
>>   }
>>   
>> +int intel_lr_context_set_sseu(struct i915_gem_context *ctx,
>> +                             struct intel_engine_cs *engine,
>> +                             struct i915_gem_context_sseu *sseu);
>> +
>>   #endif /* _INTEL_LRC_H_ */
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index 96eda8176030..75749ce11c03 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -1456,9 +1456,37 @@ struct drm_i915_gem_context_param {
>>   #define   I915_CONTEXT_MAX_USER_PRIORITY       1023 /* inclusive */
>>   #define   I915_CONTEXT_DEFAULT_PRIORITY                0
>>   #define   I915_CONTEXT_MIN_USER_PRIORITY       -1023 /* inclusive */
>> +       /*
>> +        * When using the following param, value should be a pointer to
>> +        * drm_i915_gem_context_param_sseu.
>> +        */
>> +#define I915_CONTEXT_PARAM_SSEU                0x7
>>          __u64 value;
>>   };
>>   
>> +struct drm_i915_gem_context_param_sseu {
>> +       /*
>> +        * Engine to be configured or queried. Same value you would use with
>> +        * drm_i915_gem_execbuffer2.
>> +        */
>> +       __u64 flags;
>> +
>> +       /*
>> +        * Setting slice_mask or subslice_mask to 0 will make the context use
>> +        * masks reported respectively by I915_PARAM_SLICE_MASK or
>> +        * I915_PARAM_SUBSLICE_MASK.
>> +        */
>> +       union {
>> +               struct {
>> +                       __u8 slice_mask;
>> +                       __u8 subslice_mask;
>> +                       __u8 min_eus_per_subslice;
>> +                       __u8 max_eus_per_subslice;
>> +               } packed;
>> +               __u64 value;
>> +       };
>> +};
>> +
>>   enum drm_i915_oa_format {
>>          I915_OA_FORMAT_A13 = 1,     /* HSW only */
>>          I915_OA_FORMAT_A29,         /* HSW only */
>> -- 
>> 2.17.0
>>



More information about the Intel-gfx mailing list