[PATCH 10/10] sseu test
Tvrtko Ursulin
tursulin at ursulin.net
Wed Aug 29 13:53:50 UTC 2018
From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
drivers/gpu/drm/i915/i915_gem_context.c | 2 +-
drivers/gpu/drm/i915/intel_lrc.c | 9 +-
.../gpu/drm/i915/selftests/i915_gem_context.c | 271 +++++++++++++++++-
3 files changed, 275 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 2885f3f83982..21ba52077ae9 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -976,7 +976,7 @@ gen8_modify_rpcs_gpu(struct intel_context *ce,
return ret;
}
-static int
+int
i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
struct intel_engine_cs *engine,
struct intel_sseu sseu)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 3d3cb7c00e20..ef5d1d1a2d97 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2492,9 +2492,10 @@ u32 gen8_make_rpcs(struct drm_i915_private *dev_priv,
*/
if (sseu->has_slice_pg) {
rpcs = hweight8(ctx_sseu->slice_mask);
-
+#if 0
if (IS_GEN11(dev_priv) /*FIXME LP */ && rpcs == 1)
en_subslice_pg = false;
+#endif
if (INTEL_GEN(dev_priv) >= 11) {
rpcs <<= GEN11_RPCS_S_CNT_SHIFT;
@@ -2505,9 +2506,12 @@ u32 gen8_make_rpcs(struct drm_i915_private *dev_priv,
}
rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE;
+
+ printk("make_rpcs: slice_mask=%x -> rpcs=%x\n",
+ ctx_sseu->slice_mask, rpcs);
}
- if (en_subslice_pg) {
+ if (en_subslice_pg && ctx_sseu->subslice_mask != 0xff) {
u32 val = hweight8(ctx_sseu->subslice_mask);
val <<= GEN8_RPCS_SS_CNT_SHIFT;
@@ -2522,6 +2526,7 @@ u32 gen8_make_rpcs(struct drm_i915_private *dev_priv,
rpcs |= ctx_sseu->max_eus_per_subslice <<
GEN8_RPCS_EU_MAX_SHIFT;
rpcs |= GEN8_RPCS_ENABLE;
+ printk("make_rpcs: eus -> rpcs=%x\n", rpcs);
}
return rpcs;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 1c92560d35da..090d163eb421 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -340,7 +340,7 @@ static int igt_ctx_exec(void *arg)
unsigned long ncontexts, ndwords, dw;
bool first_shared_gtt = true;
int err = -ENODEV;
-
+return 0;
/*
* Create a few different contexts (with different mm) and write
* through each ctx/mm using the GPU making sure those writes end
@@ -433,6 +433,268 @@ static int igt_ctx_exec(void *arg)
return err;
}
+static struct i915_vma *rpcs_query_batch(struct i915_vma *vma)
+{
+ struct drm_i915_gem_object *obj;
+ u32 *cmd;
+ int err;
+
+ if (INTEL_GEN(vma->vm->i915) < 8)
+ return ERR_PTR(-EINVAL);
+
+ obj = i915_gem_object_create_internal(vma->vm->i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
+ if (IS_ERR(cmd)) {
+ err = PTR_ERR(cmd);
+ goto err;
+ }
+
+ *cmd++ = MI_STORE_REGISTER_MEM_GEN8;
+ *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
+ *cmd++ = lower_32_bits(vma->node.start);
+ *cmd++ = upper_32_bits(vma->node.start);
+ *cmd = MI_BATCH_BUFFER_END;
+
+ i915_gem_object_unpin_map(obj);
+
+ err = i915_gem_object_set_to_gtt_domain(obj, false);
+ if (err)
+ goto err;
+
+ vma = i915_vma_instance(obj, vma->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ goto err;
+
+ return vma;
+
+err:
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+}
+
+static struct i915_request *
+emit_rpcs_query(struct drm_i915_gem_object *obj,
+ struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine,
+ struct i915_request *prev)
+{
+ struct i915_address_space *vm;
+ struct i915_request *rq;
+ struct i915_vma *batch;
+ struct i915_vma *vma;
+ int err;
+
+ GEM_BUG_ON(!ctx->ppgtt);
+ GEM_BUG_ON(!intel_engine_can_store_dword(engine));
+
+ vm = &ctx->ppgtt->vm;
+
+ vma = i915_vma_instance(obj, vm, NULL);
+ if (IS_ERR(vma))
+ return ERR_CAST(vma);
+
+ err = i915_gem_object_set_to_gtt_domain(obj, false);
+ if (err)
+ return ERR_PTR(err);
+
+ err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER);
+ if (err)
+ return ERR_PTR(err);
+
+ batch = rpcs_query_batch(vma);
+ if (IS_ERR(batch)) {
+ err = PTR_ERR(batch);
+ goto err_vma;
+ }
+
+ rq = i915_request_alloc(engine, ctx);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_batch;
+ }
+
+ err = engine->emit_bb_start(rq, batch->node.start, batch->node.size, 0);
+ if (err)
+ goto err_request;
+
+ err = i915_vma_move_to_active(batch, rq, 0);
+ if (err)
+ goto skip_request;
+
+ err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+ if (err)
+ goto skip_request;
+
+ if (prev) {
+ i915_sw_fence_await_sw_fence_gfp(&rq->submit,
+ &prev->submit,
+ I915_FENCE_GFP);
+ i915_request_put(prev);
+ }
+
+ i915_gem_object_set_active_reference(batch->obj);
+ i915_vma_unpin(batch);
+ i915_vma_close(batch);
+
+ i915_vma_unpin(vma);
+
+ i915_request_add(rq);
+
+ return i915_request_get(rq);
+
+skip_request:
+ i915_request_skip(rq, err);
+err_request:
+ i915_request_add(rq);
+err_batch:
+ i915_vma_unpin(batch);
+err_vma:
+ i915_vma_unpin(vma);
+ return ERR_PTR(err);
+}
+
+int
+i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine,
+ struct intel_sseu sseu);
+
+static int igt_ctx_sseu(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct drm_i915_gem_object *obj[2] = { };
+ struct i915_gem_context *ctx[2] = { };
+ struct drm_file *file;
+ bool xtra_run = IS_GEN11(i915);
+ u8 masks[] = { 0x3f, 0x3f, 0x3f, 0xff, 0xff, 0x3f, 0x1f, 0xf, 0x7, 0x3, 0x1, 0x1 };
+ const unsigned int runs = 2 + (xtra_run ? ARRAY_SIZE(masks) : 0);
+ unsigned int tgt = 0;
+ int err = 0;
+ unsigned long iter;
+ unsigned int i, pass;
+ u32 *buf;
+
+ if (!USES_FULL_PPGTT(i915) || !IS_GEN11(i915))
+ return 0;
+
+ file = mock_file(i915);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ mutex_lock(&i915->drm.struct_mutex);
+
+ for (i = 0; i < ARRAY_SIZE(ctx); i++) {
+ ctx[i] = i915_gem_create_context(i915, file->driver_priv);
+ if (IS_ERR(ctx[i])) {
+ err = PTR_ERR(ctx[i]);
+ goto out_unlock;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(obj); i++) {
+ obj[i] = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ if (IS_ERR(obj[i])) {
+ err = PTR_ERR(obj[i]);
+ goto out_unlock;
+ }
+ }
+
+ intel_runtime_pm_get(i915);
+
+ for (pass = 0; pass < runs; pass++) {
+ struct intel_engine_cs *engine = i915->engine[RCS];
+ IGT_TIMEOUT(end_time);
+
+ if (pass > 0) {
+ struct intel_sseu sseu =
+ intel_device_default_sseu(i915);
+
+ tgt ^= 1;
+
+ if (pass == 1)
+ sseu.slice_mask = 1;
+ else
+ sseu.subslice_mask = masks[pass - 2];
+
+ printk("pass=%u ctx=%u slices=%x subslices=%x\n",
+ pass, tgt, sseu.slice_mask, sseu.subslice_mask);
+
+ err = i915_gem_context_reconfigure_sseu(ctx[tgt],
+ engine,
+ sseu);
+ if (err) {
+ pr_err("%u: Failed to configure SSEU! (%d)\n",
+ pass, err);
+ goto out_rpm_put;
+ }
+ }
+
+ iter = 0;
+
+ while (!time_after(jiffies, end_time)) {
+ struct i915_request *prev = NULL;
+
+ for (i = 0; i < ARRAY_SIZE(ctx); i++) {
+ prev = emit_rpcs_query(obj[i], ctx[i], engine, prev);
+ if (IS_ERR(prev)) {
+ pr_err("Failed to emit rpcs query @%lu/%u, err=%ld\n",
+ iter, i, PTR_ERR(prev));
+ goto out_rpm_put;
+ }
+ }
+
+ if (prev)
+ i915_request_put(prev);
+
+ iter++;
+ }
+
+ pr_info("%u: Submitted %lu queries across %lu contexts\n",
+ pass, iter, ARRAY_SIZE(ctx));
+
+ for (i = 0; i < ARRAY_SIZE(ctx); i++) {
+ buf = i915_gem_object_pin_map(obj[i], I915_MAP_WB);
+ if (IS_ERR(buf)) {
+ err = PTR_ERR(buf);
+ continue;
+ }
+
+ pr_info("%u: Context %u: RPCS=0x%x; %u%sx%u%s\n",
+ pass, i, *buf,
+ (*buf & GEN8_RPCS_S_CNT_MASK) >> GEN8_RPCS_S_CNT_SHIFT,
+ (*buf & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "",
+ (*buf & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT,
+ (*buf & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : "");
+
+ i915_gem_object_unpin_map(obj[i]);
+ }
+ }
+
+out_rpm_put:
+ intel_runtime_pm_put(i915);
+
+out_unlock:
+ if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ err = -EIO;
+
+ for (i = 0; i < ARRAY_SIZE(obj); i++)
+ if (!IS_ERR_OR_NULL(obj[i]))
+ i915_gem_object_put(obj[i]);
+
+ mutex_unlock(&i915->drm.struct_mutex);
+
+ mock_file_free(i915, file);
+ return err;
+}
+
static int igt_ctx_readonly(void *arg)
{
struct drm_i915_private *i915 = arg;
@@ -445,7 +707,7 @@ static int igt_ctx_readonly(void *arg)
struct i915_hw_ppgtt *ppgtt;
unsigned long ndwords, dw;
int err = -ENODEV;
-
+return 0;
/*
* Create a few read-only objects (with the occasional writable object)
* and try to write into these object checking that the GPU discards
@@ -627,7 +889,7 @@ static int igt_switch_to_kernel_context(void *arg)
struct i915_gem_context *ctx;
enum intel_engine_id id;
int err;
-
+return 0;
/*
* A core premise of switching to the kernel context is that
* if an engine is already idling in the kernel context, we
@@ -715,6 +977,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
SUBTEST(igt_switch_to_kernel_context),
SUBTEST(igt_ctx_exec),
SUBTEST(igt_ctx_readonly),
+ SUBTEST(igt_ctx_sseu),
};
bool fake_alias = false;
int err;
@@ -723,7 +986,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
return 0;
/* Install a fake aliasing gtt for exercise */
- if (USES_PPGTT(dev_priv) && !dev_priv->mm.aliasing_ppgtt) {
+ if (0 && USES_PPGTT(dev_priv) && !dev_priv->mm.aliasing_ppgtt) {
mutex_lock(&dev_priv->drm.struct_mutex);
err = fake_aliasing_ppgtt_enable(dev_priv);
mutex_unlock(&dev_priv->drm.struct_mutex);
--
2.17.1
More information about the Intel-gfx-trybot
mailing list