[Intel-gfx] [PATCH i-g-t v7] tests/perf_pmu: Improve accuracy by waiting on spinner to start
Chris Wilson
chris at chris-wilson.co.uk
Mon Mar 19 15:33:56 UTC 2018
Quoting Chris Wilson (2018-03-19 15:29:21)
> Quoting Tvrtko Ursulin (2018-03-19 13:56:05)
> > @@ -443,15 +501,12 @@ most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
> > if (!gem_has_engine(gem_fd, e_->class, e_->instance))
> > continue;
> >
> > - if (e == e_) {
> > + if (e == e_)
> > idle_idx = i;
> > - } else if (spin) {
> > - __submit_spin_batch(gem_fd, &obj, e_);
> > - } else {
> > - spin = igt_spin_batch_new(gem_fd, 0,
> > - e2ring(gem_fd, e_), 0);
> > - obj.handle = spin->handle;
> > - }
> > + else if (spin)
> > + __submit_spin_batch(gem_fd, spin, e_);
> > + else
> > + spin = __spin_poll(gem_fd, 0, e2ring(gem_fd, e_));
>
> So this is what is killing snb. We resubmit the spin-batch, with its
> MI_STORE_DWORD_IMM intact, onto each ring. Instant machine death for snb
> when we reach vcs.
>
> If we tweak the spinner to jump to a location 64bytes past the start, we
> can opt out of the MI_STORE_DW when not required. Let me go an cook up a
> delta.
diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c
index b7a89fd..2a3c3b5 100644
--- a/lib/igt_dummyload.c
+++ b/lib/igt_dummyload.c
@@ -84,7 +84,7 @@ emit_recursive_batch(igt_spin_t *spin, int fd, uint32_t ctx, unsigned engine,
#define SCRATCH 0
#define BATCH 1
const int gen = intel_gen(intel_get_drm_devid(fd));
- struct drm_i915_gem_relocation_entry relocs[2];
+ struct drm_i915_gem_relocation_entry relocs[2], *r;
struct drm_i915_gem_execbuffer2 *execbuf;
struct drm_i915_gem_exec_object2 *obj;
unsigned int engines[16];
@@ -182,7 +182,7 @@ emit_recursive_batch(igt_spin_t *spin, int fd, uint32_t ctx, unsigned engine,
execbuf->buffer_count++;
}
- spin->batch = batch;
+ spin->batch = batch_start + 64/sizeof(*batch);
spin->handle = obj[BATCH].handle;
/* Allow ourselves to be preempted */
@@ -202,26 +202,25 @@ emit_recursive_batch(igt_spin_t *spin, int fd, uint32_t ctx, unsigned engine,
batch += 1000;
/* recurse */
- fill_reloc(&relocs[obj[BATCH].relocation_count],
- obj[BATCH].handle, (batch - batch_start) + 1,
- I915_GEM_DOMAIN_COMMAND, 0);
+ r = &relocs[obj[BATCH].relocation_count++];
+ r->target_handle = obj[BATCH].handle;
+ r->offset = (batch + 1 - batch_start) * sizeof(*batch);
+ r->read_domains = I915_GEM_DOMAIN_COMMAND;
+ r->delta = 64;
if (gen >= 8) {
*batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
- *batch++ = 0;
+ *batch++ = r->delta;
*batch++ = 0;
} else if (gen >= 6) {
*batch++ = MI_BATCH_BUFFER_START | 1 << 8;
- *batch++ = 0;
+ *batch++ = r->delta;
} else {
*batch++ = MI_BATCH_BUFFER_START | 2 << 6;
- *batch = 0;
- if (gen < 4) {
- *batch |= 1;
- relocs[obj[BATCH].relocation_count].delta = 1;
- }
+ if (gen < 4)
+ r->delta |= 1;
+ *batch = r->delta;
batch++;
}
- obj[BATCH].relocation_count++;
obj[BATCH].relocs_ptr = to_user_pointer(relocs);
execbuf->buffers_ptr = to_user_pointer(obj +
diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index 87875fb..469b9be 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -474,12 +474,14 @@ busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
static void
__submit_spin_batch(int gem_fd, igt_spin_t *spin,
- const struct intel_execution_engine2 *e)
+ const struct intel_execution_engine2 *e,
+ int offset)
{
struct drm_i915_gem_execbuffer2 eb = spin->execbuf;
eb.flags &= ~(0x3f | I915_EXEC_BSD_MASK);
eb.flags |= e2ring(gem_fd, e) | I915_EXEC_NO_RELOC;
+ eb.batch_start_offset += offset;
gem_execbuf(gem_fd, &eb);
}
@@ -504,7 +506,7 @@ most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
if (e == e_)
idle_idx = i;
else if (spin)
- __submit_spin_batch(gem_fd, spin, e_);
+ __submit_spin_batch(gem_fd, spin, e_, 64);
else
spin = __spin_poll(gem_fd, 0, e2ring(gem_fd, e_));
@@ -561,7 +563,7 @@ all_busy_check_all(int gem_fd, const unsigned int num_engines,
continue;
if (spin)
- __submit_spin_batch(gem_fd, spin, e);
+ __submit_spin_batch(gem_fd, spin, e, 64);
else
spin = __spin_poll(gem_fd, 0, e2ring(gem_fd, e));
@@ -1613,7 +1615,7 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
/* Restart the spinbatch. */
igt_nsec_elapsed(&start);
__rearm_spin_batch(spin);
- __submit_spin_batch(gem_fd, spin, e);
+ __submit_spin_batch(gem_fd, spin, e, 0);
/* Wait for batch to start executing. */
__spin_wait(gem_fd, spin);
More information about the Intel-gfx
mailing list