[Intel-gfx] [PATCH 7/7] drm/i915/selftests: Context SSEU reconfiguration tests

Fri Dec 14 13:22:19 UTC 2018

Quoting Tvrtko Ursulin (2018-12-14 12:34:49)
> +static struct i915_vma *rpcs_query_batch(struct i915_vma *vma)
> +{
> +       struct drm_i915_gem_object *obj;
> +       u32 *cmd;
> +       int err;
> +
> +       if (INTEL_GEN(vma->vm->i915) < 8)
> +               return ERR_PTR(-EINVAL);
> +
> +       obj = i915_gem_object_create_internal(vma->vm->i915, PAGE_SIZE);
> +       if (IS_ERR(obj))
> +               return ERR_CAST(obj);
> +
> +       cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
> +       if (IS_ERR(cmd)) {
> +               err = PTR_ERR(cmd);
> +               goto err;
> +       }
> +
> +       *cmd++ = MI_STORE_REGISTER_MEM_GEN8;
> +       *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
> +       *cmd++ = lower_32_bits(vma->node.start);
> +       *cmd++ = upper_32_bits(vma->node.start);
> +       *cmd = MI_BATCH_BUFFER_END;
> +
> +       i915_gem_object_unpin_map(obj);
> +
> +       err = i915_gem_object_set_to_gtt_domain(obj, false);
> +       if (err)
> +               goto err;
> +
> +       vma = i915_vma_instance(obj, vma->vm, NULL);
> +       if (IS_ERR(vma)) {
> +               err = PTR_ERR(vma);
> +               goto err;
> +       }
> +
> +       err = i915_vma_pin(vma, 0, 0, PIN_USER);
> +       if (err)
> +               goto err;
> +
> +       return vma;
> +
> +err:
> +       i915_gem_object_put(obj);
> +       return ERR_PTR(err);
> +}
> +
> +static int
> +emit_rpcs_query(struct drm_i915_gem_object *obj,
> +               struct i915_gem_context *ctx,
> +               struct intel_engine_cs *engine,
> +               struct i915_request **rq_out)
> +{
> +       struct i915_address_space *vm;
> +       struct i915_request *rq;
> +       struct i915_vma *batch;
> +       struct i915_vma *vma;
> +       int err;
> +
> +       GEM_BUG_ON(!ctx->ppgtt);
> +       GEM_BUG_ON(!intel_engine_can_store_dword(engine));
> +
> +       vm = &ctx->ppgtt->vm;
> +
> +       vma = i915_vma_instance(obj, vm, NULL);
> +       if (IS_ERR(vma))
> +               return PTR_ERR(vma);
> +
> +       err = i915_gem_object_set_to_gtt_domain(obj, false);
> +       if (err)
> +               return err;
> +
> +       err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER);
> +       if (err)
> +               return err;
> +
> +       batch = rpcs_query_batch(vma);
> +       if (IS_ERR(batch)) {
> +               err = PTR_ERR(batch);
> +               goto err_vma;
> +       }

Simpler would be to just emit the SRM into the ring. One less object+vma
to handle.

> +
> +       rq = i915_request_alloc(engine, ctx);
> +       if (IS_ERR(rq)) {
> +               err = PTR_ERR(rq);
> +               goto err_batch;
> +       }
> +
> +       err = engine->emit_bb_start(rq, batch->node.start, batch->node.size, 0);
> +       if (err)
> +               goto err_request;
> +
> +       err = i915_vma_move_to_active(batch, rq, 0);
> +       if (err)
> +               goto skip_request;
> +
> +       err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
> +       if (err)
> +               goto skip_request;
> +
> +       i915_gem_object_set_active_reference(batch->obj);
> +       i915_vma_unpin(batch);
> +       i915_vma_close(batch);
> +
> +       i915_vma_unpin(vma);
> +
> +       *rq_out = i915_request_get(rq);
> +
> +       i915_request_add(rq);
> +
> +       return 0;
> +
> +skip_request:
> +       i915_request_skip(rq, err);
> +err_request:
> +       i915_request_add(rq);
> +err_batch:
> +       i915_vma_unpin(batch);
> +err_vma:
> +       i915_vma_unpin(vma);
> +
> +       return err;
> +}
> +
> +#define TEST_IDLE      (1 << 0)
> +#define TEST_BUSY      (1 << 1)
> +#define TEST_RESET     (1 << 2)
> +
> +int
> +__i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
> +                                   struct intel_engine_cs *engine,
> +                                   struct intel_sseu sseu);
> +
> +static int
> +__pre_set(struct drm_i915_private *i915,
> +         unsigned int flags,
> +         struct i915_gem_context *ctx,
> +         struct intel_engine_cs *engine,
> +         struct igt_spinner **spin_out)
> +{
> +       int ret = 0;
> +
> +       if (flags & (TEST_BUSY | TEST_RESET)) {
> +               struct igt_spinner *spin;
> +               struct i915_request *rq;
> +
> +               spin = kzalloc(sizeof(*spin), GFP_KERNEL);
> +               if (!spin) {
> +                       ret = -ENOMEM;
> +                       goto out;
> +               }
> +
> +               ret = igt_spinner_init(spin, i915);
> +               if (ret)
> +                       return ret;
> +
> +               rq = igt_spinner_create_request(spin, ctx, engine, MI_NOOP);
> +               if (IS_ERR(rq)) {
> +                       ret = PTR_ERR(rq);
> +                       igt_spinner_fini(spin);
> +                       kfree(spin);
> +                       goto out;
> +               }
> +
> +               i915_request_add(rq);
> +
> +               if (!igt_wait_for_spinner(spin, rq)) {
> +                       pr_err("Spinner failed to start!\n");
> +                       igt_spinner_end(spin);
> +                       igt_spinner_fini(spin);
> +                       kfree(spin);
> +                       ret = -ETIMEDOUT;
> +                       goto out;
> +               }
> +
> +               *spin_out = spin;
> +       }
> +
> +out:
> +       return ret;
> +}
> +
> +static int
> +__read_slice_count(struct drm_i915_private *i915,
> +                  struct i915_gem_context *ctx,
> +                  struct intel_engine_cs *engine,
> +                  struct drm_i915_gem_object *obj,
> +                  struct igt_spinner *spin,
> +                  u32 *rpcs)
> +{
> +       struct i915_request *rq = NULL;
> +       u32 s_mask, s_shift;
> +       unsigned int cnt;
> +       u32 *buf, val;
> +       long ret;
> +
> +       ret = emit_rpcs_query(obj, ctx, engine, &rq);
> +       if (ret)
> +               return ret;
> +
> +       if (spin)
> +               igt_spinner_end(spin);
> +
> +       ret = i915_request_wait(rq, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT);
> +       if (ret <= 0)
> +               return ret < 0 ? ret : -ETIME;
> +
> +       i915_request_put(rq);
> +
> +       buf = i915_gem_object_pin_map(obj, I915_MAP_WB);
> +       if (IS_ERR(buf)) {
> +               ret = PTR_ERR(buf);
> +               return ret;
> +       }

You can replace the get + request_wait + put with
i915_gem_obj_prepare_shmem_read. Yes the _shmem_ there is misleading.

> +
> +       if (INTEL_GEN(i915) >= 11) {
> +               s_mask = GEN11_RPCS_S_CNT_MASK;
> +               s_shift = GEN11_RPCS_S_CNT_SHIFT;
> +       } else {
> +               s_mask = GEN8_RPCS_S_CNT_MASK;
> +               s_shift = GEN8_RPCS_S_CNT_SHIFT;
> +       }
> +
> +       val = *buf;
> +       cnt = (val & s_mask) >> s_shift;
> +       *rpcs = val;
> +
> +       i915_gem_object_unpin_map(obj);
> +
> +       return cnt;
> +}
> +
> +static void __err_rpcs(u32 rpcs, unsigned int slices)
> +{
> +       pr_info("RPCS=0x%x; %u%sx%u%s\n",
> +               rpcs, slices,
> +               (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "",
> +               (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT,
> +               (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : "");
> +}
> +
> +static int
> +__post_set(struct drm_i915_private *i915,
> +          unsigned int flags,
> +          struct i915_gem_context *ctx,
> +          struct i915_gem_context *kctx,
> +          struct intel_engine_cs *engine,
> +          struct drm_i915_gem_object *obj,
> +          unsigned int expected,
> +          struct igt_spinner *spin)
> +{
> +       unsigned int slices =
> +               hweight32(intel_device_default_sseu(i915).slice_mask);
> +       u32 rpcs = 0;
> +       int ret = 0;
> +
> +       if (flags & TEST_RESET) {
> +               ret = i915_reset_engine(engine, "sseu");
> +               if (ret)
> +                       goto out;
> +       }
> +
> +       ret = __read_slice_count(i915, ctx, engine, obj,
> +                                flags & TEST_RESET ? NULL : spin, &rpcs);
> +       if (ret < 0) {
> +               goto out;
> +       } else if (ret != expected) {
> +               pr_err("Context slice count %d is not %u!\n",
> +                      ret, expected);

Include stage + phase name (idle, busy, reset, etc) in the error message.

> +               __err_rpcs(rpcs, ret);
> +               ret = -EINVAL;
> +               goto out;
> +       } else {
> +               ret = 0;
> +       }
> +
> +       ret = __read_slice_count(i915, kctx, engine, obj, NULL, &rpcs);
> +       if (ret < 0) {
> +               goto out;
> +       } else if (ret != slices) {
> +               pr_err("Kernel context slice count %d is not %u!\n",
> +                      ret, slices);
> +               __err_rpcs(rpcs, ret);
> +               ret = -EINVAL;
> +               goto out;
> +       } else {
> +               ret = 0;
> +       }
> +
> +out:
> +       if (spin)
> +               igt_spinner_end(spin);
> +
> +       if (flags & TEST_IDLE) {
> +               ret = i915_gem_wait_for_idle(i915,
> +                                            I915_WAIT_LOCKED,
> +                                            MAX_SCHEDULE_TIMEOUT);
> +               if (ret)
> +                       return ret;

Do we want to go full idle (rpm) here? That shouldn't matter for ctx
images, so forget that.

Hmm, but we would like to kickstart the powercontext I guess. I wonder
if we can call __i915_gem_park directly...

> +
> +               ret = __read_slice_count(i915, ctx, engine, obj, NULL, &rpcs);
> +               if (ret < 0) {
> +                       return ret;
> +               } else if (ret != expected) {
> +                       pr_err("Context slice count %d is not %u after idle!\n",
> +                              ret, expected);
> +                       __err_rpcs(rpcs, ret);
> +                       return -EINVAL;
> +               } else {
> +                       ret = 0;
> +               }
> +       }
> +
> +       return ret;

Ok, that should check across context switches and restoration from
reset. And reset should work so long as rpcs SRM as separate requests
from user batches.

> +
> +}
> +
> +static int
> +__sseu_test(struct drm_i915_private *i915,
> +           unsigned int flags,
> +           struct i915_gem_context *ctx,
> +           struct intel_engine_cs *engine,
> +           struct drm_i915_gem_object *obj,
> +           struct intel_sseu sseu)
> +{
> +       struct igt_spinner *spin = NULL;
> +       struct i915_gem_context *kctx;
> +       int ret;
> +
> +       kctx = kernel_context(i915);
> +       if (IS_ERR(kctx))
> +               return PTR_ERR(kctx);
> +
> +       ret = __pre_set(i915, flags, ctx, engine, &spin);

__sseu_prepare & __sseu_finish

Just thinking that pre_set is a little generic and you'll wish you
called it something else next time you add a selftest.

> +       if (ret)
> +               goto out;
> +
> +       ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
> +       if (ret)
> +               goto out;
> +

> +       ret = __post_set(i915, flags, ctx, kctx, engine, obj,
> +                        hweight32(sseu.slice_mask), spin);
> +
> +out:
> +       if (spin) {
> +               igt_spinner_end(spin);
> +               igt_spinner_fini(spin);
> +               kfree(spin);
> +       }
> +
> +       kernel_context_close(kctx);
> +
> +       return ret;
> +}
> +
> +static int __igt_ctx_sseu(struct drm_i915_private *i915, unsigned int flags)
> +{
> +       struct intel_sseu default_sseu = intel_device_default_sseu(i915);
> +       struct intel_engine_cs *engine = i915->engine[RCS];
> +       struct drm_i915_gem_object *obj;
> +       struct i915_gem_context *ctx;
> +       struct intel_sseu pg_sseu;
> +       struct drm_file *file;
> +       int ret;
> +
> +       if (INTEL_GEN(i915) < 9)
> +               return 0;
> +
> +       if (!INTEL_INFO(i915)->sseu.has_slice_pg)
> +               return 0;
> +
> +       if (hweight32(default_sseu.slice_mask) < 2)
> +               return 0;
> +
> +       /*
> +        * Gen11 VME friendly power-gated configuration with half enabled
> +        * sub-slices.
> +        */
> +       pg_sseu = default_sseu;
> +       pg_sseu.slice_mask = 1;
> +       pg_sseu.subslice_mask =
> +               ~(~0 << (hweight32(default_sseu.subslice_mask) / 2));
> +
> +       pr_info("SSE subtest flags=%x, def_slices=%u, pg_slices=%u\n",
> +               flags, hweight32(default_sseu.slice_mask),
> +               hweight32(pg_sseu.slice_mask));
> +
> +       file = mock_file(i915);
> +       if (IS_ERR(file))
> +               return PTR_ERR(file);
> +
> +       if (flags & TEST_RESET)
> +               igt_global_reset_lock(i915);
> +
> +       mutex_lock(&i915->drm.struct_mutex);
> +
> +       ctx = i915_gem_create_context(i915, file->driver_priv);
> +       if (IS_ERR(ctx)) {
> +               ret = PTR_ERR(ctx);
> +               goto out_unlock;
> +       }
> +
> +       obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
> +       if (IS_ERR(obj)) {
> +               ret = PTR_ERR(obj);
> +               goto out_unlock;
> +       }
> +
> +       intel_runtime_pm_get(i915);
> +
> +       /* First set the default mask. */
> +       ret = __sseu_test(i915, flags, ctx, engine, obj, default_sseu);
> +       if (ret)
> +               goto out_fail;
> +
> +       /* Then set a power-gated configuration. */
> +       ret = __sseu_test(i915, flags, ctx, engine, obj, pg_sseu);
> +       if (ret)
> +               goto out_fail;
> +
> +       /* Back to defaults. */
> +       ret = __sseu_test(i915, flags, ctx, engine, obj, default_sseu);
> +       if (ret)
> +               goto out_fail;
> +
> +       /* One last power-gated configuration for the road. */
> +       ret = __sseu_test(i915, flags, ctx, engine, obj, pg_sseu);
> +       if (ret)
> +               goto out_fail;
> +
> +out_fail:
> +       if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +               ret = -EIO;
> +
> +       i915_gem_object_put(obj);
> +
> +       intel_runtime_pm_put(i915);
> +
> +out_unlock:
> +       mutex_unlock(&i915->drm.struct_mutex);
> +
> +       if (flags & TEST_RESET)
> +               igt_global_reset_unlock(i915);
> +
> +       mock_file_free(i915, file);
> +
> +       return ret;
> +}
> +
> +static int igt_ctx_sseu(void *arg)
> +{
> +       return __igt_ctx_sseu(arg, 0);
> +}
> +
> +static int igt_ctx_sseu_idle(void *arg)
> +{
> +       return __igt_ctx_sseu(arg, TEST_IDLE);
> +}
> +
> +static int igt_ctx_sseu_busy(void *arg)
> +{
> +       return __igt_ctx_sseu(arg, TEST_BUSY);
> +}
> +
> +static int igt_ctx_sseu_busy_reset(void *arg)
> +{
> +       return __igt_ctx_sseu(arg, TEST_BUSY | TEST_RESET);
> +}
> +
> +static int igt_ctx_sseu_busy_idle(void *arg)
> +{
> +       return __igt_ctx_sseu(arg, TEST_BUSY | TEST_IDLE);
> +}
> +
> +static int igt_ctx_sseu_reset_idle(void *arg)
> +{
> +       return __igt_ctx_sseu(arg, TEST_RESET | TEST_IDLE);
> +}

Looks ok. The only point I would like to reiterate is to pass down the
subtest names so that we can include them in the error messages for
easier comprehension.
-Chris