[Intel-gfx] [PATCH 8/8] drm/i915: Expose RPCS (SSEU) configuration to userspace
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Thu Apr 26 10:22:30 UTC 2018
On 26/04/18 11:00, Joonas Lahtinen wrote:
> Quoting Lionel Landwerlin (2018-04-25 14:45:21)
>> From: Chris Wilson <chris at chris-wilson.co.uk>
>>
>> We want to allow userspace to reconfigure the subslice configuration for
>> its own use case. To do so, we expose a context parameter to allow
>> adjustment of the RPCS register stored within the context image (and
>> currently not accessible via LRI). If the context is adjusted before
>> first use, the adjustment is for "free"; otherwise if the context is
>> active we flush the context off the GPU (stalling all users) and forcing
>> the GPU to save the context to memory where we can modify it and so
>> ensure that the register is reloaded on next execution.
>>
>> The overhead of managing additional EU subslices can be significant,
>> especially in multi-context workloads. Non-GPGPU contexts should
>> preferably disable the subslices it is not using, and others should
>> fine-tune the number to match their workload.
> This hit a dead end last time due to the system wide policy needed to
> avoid two parties fighting over the slice count (and going back and
> forth between two slice counts would counter the benefits received from
> this).
>
> Do we now have a solution for the contention? I don't see code to
> negotiate a global value, just raw setter.
>
> Regards, Joonas
I've tried to come up with some numbers about the cost of the back &
forth (see igt series).
Other than that, I don't think we can expect the kernel to workaround
the inefficient use of the hardware by userspace.
>
>> We expose complete control over the RPCS register, allowing
>> configuration of slice/subslice, via masks packed into a u64 for
>> simplicity. For example,
>>
>> struct drm_i915_gem_context_param arg;
>> struct drm_i915_gem_context_param_sseu sseu = { .flags = I915_EXEC_RENDER };
>>
>> memset(&arg, 0, sizeof(arg));
>> arg.ctx_id = ctx;
>> arg.param = I915_CONTEXT_PARAM_SSEU;
>> arg.value = (uintptr_t) &sseu;
>> if (drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &arg) == 0) {
>> sseu.packed.subslice_mask = 0;
>>
>> drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &arg);
>> }
>>
>> could be used to disable all subslices where supported.
>>
>> v2: Fix offset of CTX_R_PWR_CLK_STATE in intel_lr_context_set_sseu() (Lionel)
>>
>> v3: Add ability to program this per engine (Chris)
>>
>> v4: Move most get_sseu() into i915_gem_context.c (Lionel)
>>
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100899
>> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>> c: Dmitry Rogozhkin <dmitry.v.rogozhkin at intel.com>
>> CC: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>> CC: Zhipeng Gong <zhipeng.gong at intel.com>
>> CC: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
>> ---
>> drivers/gpu/drm/i915/i915_gem_context.c | 82 ++++++++++++++++++++++++-
>> drivers/gpu/drm/i915/intel_lrc.c | 55 +++++++++++++++++
>> drivers/gpu/drm/i915/intel_lrc.h | 4 ++
>> include/uapi/drm/i915_drm.h | 28 +++++++++
>> 4 files changed, 168 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
>> index bdf050beeb94..b97ddcf47514 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>> @@ -740,6 +740,26 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
>> return 0;
>> }
>>
>> +static struct i915_gem_context_sseu
>> +i915_gem_context_sseu_from_user_sseu(const struct sseu_dev_info *sseu,
>> + const struct drm_i915_gem_context_param_sseu *user_sseu)
>> +{
>> + struct i915_gem_context_sseu value = {
>> + .slice_mask = user_sseu->packed.slice_mask == 0 ?
>> + sseu->slice_mask :
>> + (user_sseu->packed.slice_mask & sseu->slice_mask),
>> + .subslice_mask = user_sseu->packed.subslice_mask == 0 ?
>> + sseu->subslice_mask[0] :
>> + (user_sseu->packed.subslice_mask & sseu->subslice_mask[0]),
>> + .min_eus_per_subslice = min(user_sseu->packed.min_eus_per_subslice,
>> + sseu->max_eus_per_subslice),
>> + .max_eus_per_subslice = min(user_sseu->packed.max_eus_per_subslice,
>> + sseu->max_eus_per_subslice),
>> + };
>> +
>> + return value;
>> +}
>> +
>> int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>> struct drm_file *file)
>> {
>> @@ -777,6 +797,37 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>> case I915_CONTEXT_PARAM_PRIORITY:
>> args->value = ctx->sched.priority;
>> break;
>> + case I915_CONTEXT_PARAM_SSEU: {
>> + struct drm_i915_gem_context_param_sseu param_sseu;
>> + struct intel_engine_cs *engine;
>> +
>> + if (copy_from_user(¶m_sseu, u64_to_user_ptr(args->value),
>> + sizeof(param_sseu))) {
>> + ret = -EFAULT;
>> + break;
>> + }
>> +
>> + engine = i915_gem_engine_from_flags(to_i915(dev), file,
>> + param_sseu.flags);
>> + if (!engine) {
>> + ret = -EINVAL;
>> + break;
>> + }
>> +
>> + param_sseu.packed.slice_mask =
>> + ctx->engine[engine->id].sseu.slice_mask;
>> + param_sseu.packed.subslice_mask =
>> + ctx->engine[engine->id].sseu.subslice_mask;
>> + param_sseu.packed.min_eus_per_subslice =
>> + ctx->engine[engine->id].sseu.min_eus_per_subslice;
>> + param_sseu.packed.max_eus_per_subslice =
>> + ctx->engine[engine->id].sseu.max_eus_per_subslice;
>> +
>> + if (copy_to_user(u64_to_user_ptr(args->value), ¶m_sseu,
>> + sizeof(param_sseu)))
>> + ret = -EFAULT;
>> + break;
>> + }
>> default:
>> ret = -EINVAL;
>> break;
>> @@ -832,7 +883,6 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>> else
>> i915_gem_context_clear_bannable(ctx);
>> break;
>> -
>> case I915_CONTEXT_PARAM_PRIORITY:
>> {
>> s64 priority = args->value;
>> @@ -851,7 +901,37 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
>> ctx->sched.priority = priority;
>> }
>> break;
>> + case I915_CONTEXT_PARAM_SSEU:
>> + if (args->size)
>> + ret = -EINVAL;
>> + else if (!HAS_EXECLISTS(ctx->i915))
>> + ret = -ENODEV;
>> + else {
>> + struct drm_i915_private *dev_priv = to_i915(dev);
>> + struct drm_i915_gem_context_param_sseu user_sseu;
>> + struct i915_gem_context_sseu ctx_sseu;
>> + struct intel_engine_cs *engine;
>> +
>> + if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
>> + sizeof(user_sseu))) {
>> + ret = -EFAULT;
>> + break;
>> + }
>> +
>> + engine = i915_gem_engine_from_flags(dev_priv, file,
>> + user_sseu.flags);
>> + if (!engine) {
>> + ret = -EINVAL;
>> + break;
>> + }
>> +
>> + ctx_sseu =
>> + i915_gem_context_sseu_from_user_sseu(&INTEL_INFO(dev_priv)->sseu,
>> + &user_sseu);
>>
>> + ret = intel_lr_context_set_sseu(ctx, engine, &ctx_sseu);
>> + }
>> + break;
>> default:
>> ret = -EINVAL;
>> break;
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>> index dca17ef24de5..9caddcb1f234 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -2711,6 +2711,61 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
>> }
>> }
>>
>> +int intel_lr_context_set_sseu(struct i915_gem_context *ctx,
>> + struct intel_engine_cs *engine,
>> + struct i915_gem_context_sseu *sseu)
>> +{
>> + struct drm_i915_private *dev_priv = ctx->i915;
>> + struct intel_context *ce;
>> + enum intel_engine_id id;
>> + int ret;
>> +
>> + lockdep_assert_held(&dev_priv->drm.struct_mutex);
>> +
>> + if (memcmp(sseu, &ctx->engine[engine->id].sseu, sizeof(*sseu)) == 0)
>> + return 0;
>> +
>> + /*
>> + * We can only program this on render ring.
>> + */
>> + ce = &ctx->engine[RCS];
>> +
>> + if (ce->pin_count) { /* Assume that the context is active! */
>> + ret = i915_gem_switch_to_kernel_context(dev_priv);
>> + if (ret)
>> + return ret;
>> +
>> + ret = i915_gem_wait_for_idle(dev_priv,
>> + I915_WAIT_INTERRUPTIBLE |
>> + I915_WAIT_LOCKED);
>> + if (ret)
>> + return ret;
>> + }
>> +
>> + if (ce->state) {
>> + void *vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
>> + u32 *regs;
>> +
>> + if (IS_ERR(vaddr))
>> + return PTR_ERR(vaddr);
>> +
>> + regs = vaddr + LRC_STATE_PN * PAGE_SIZE;
>> +
>> + regs[CTX_R_PWR_CLK_STATE + 1] =
>> + make_rpcs(&INTEL_INFO(dev_priv)->sseu, sseu);
>> + i915_gem_object_unpin_map(ce->state->obj);
>> + }
>> +
>> + /*
>> + * Apply the configuration to all engine. Our hardware doesn't
>> + * currently support different configurations for each engine.
>> + */
>> + for_each_engine(engine, dev_priv, id)
>> + ctx->engine[id].sseu = *sseu;
>> +
>> + return 0;
>> +}
>> +
>> #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
>> #include "selftests/intel_lrc.c"
>> #endif
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
>> index d91d69a17206..4c84266814fa 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.h
>> +++ b/drivers/gpu/drm/i915/intel_lrc.h
>> @@ -112,4 +112,8 @@ intel_lr_context_descriptor(struct i915_gem_context *ctx,
>> return ctx->engine[engine->id].lrc_desc;
>> }
>>
>> +int intel_lr_context_set_sseu(struct i915_gem_context *ctx,
>> + struct intel_engine_cs *engine,
>> + struct i915_gem_context_sseu *sseu);
>> +
>> #endif /* _INTEL_LRC_H_ */
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index 96eda8176030..75749ce11c03 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -1456,9 +1456,37 @@ struct drm_i915_gem_context_param {
>> #define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */
>> #define I915_CONTEXT_DEFAULT_PRIORITY 0
>> #define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */
>> + /*
>> + * When using the following param, value should be a pointer to
>> + * drm_i915_gem_context_param_sseu.
>> + */
>> +#define I915_CONTEXT_PARAM_SSEU 0x7
>> __u64 value;
>> };
>>
>> +struct drm_i915_gem_context_param_sseu {
>> + /*
>> + * Engine to be configured or queried. Same value you would use with
>> + * drm_i915_gem_execbuffer2.
>> + */
>> + __u64 flags;
>> +
>> + /*
>> + * Setting slice_mask or subslice_mask to 0 will make the context use
>> + * masks reported respectively by I915_PARAM_SLICE_MASK or
>> + * I915_PARAM_SUBSLICE_MASK.
>> + */
>> + union {
>> + struct {
>> + __u8 slice_mask;
>> + __u8 subslice_mask;
>> + __u8 min_eus_per_subslice;
>> + __u8 max_eus_per_subslice;
>> + } packed;
>> + __u64 value;
>> + };
>> +};
>> +
>> enum drm_i915_oa_format {
>> I915_OA_FORMAT_A13 = 1, /* HSW only */
>> I915_OA_FORMAT_A29, /* HSW only */
>> --
>> 2.17.0
>>
More information about the Intel-gfx
mailing list