[igt-dev] [Intel-gfx] [PATCH i-g-t v7] tests/perf_pmu: Improve accuracy by waiting on spinner to start

Mon Mar 19 15:33:56 UTC 2018

Quoting Chris Wilson (2018-03-19 15:29:21)
> Quoting Tvrtko Ursulin (2018-03-19 13:56:05)
> > @@ -443,15 +501,12 @@ most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
> >                 if (!gem_has_engine(gem_fd, e_->class, e_->instance))
> >                         continue;
> >  
> > -               if (e == e_) {
> > +               if (e == e_)
> >                         idle_idx = i;
> > -               } else if (spin) {
> > -                       __submit_spin_batch(gem_fd, &obj, e_);
> > -               } else {
> > -                       spin = igt_spin_batch_new(gem_fd, 0,
> > -                                                 e2ring(gem_fd, e_), 0);
> > -                       obj.handle = spin->handle;
> > -               }
> > +               else if (spin)
> > +                       __submit_spin_batch(gem_fd, spin, e_);
> > +               else
> > +                       spin = __spin_poll(gem_fd, 0, e2ring(gem_fd, e_));
> 
> So this is what is killing snb. We resubmit the spin-batch, with its
> MI_STORE_DWORD_IMM intact, onto each ring. Instant machine death for snb
> when we reach vcs.
> 
> If we tweak the spinner to jump to a location 64bytes past the start, we
> can opt out of the MI_STORE_DW when not required. Let me go an cook up a
> delta.

diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c
index b7a89fd..2a3c3b5 100644
--- a/lib/igt_dummyload.c
+++ b/lib/igt_dummyload.c
@@ -84,7 +84,7 @@ emit_recursive_batch(igt_spin_t *spin, int fd, uint32_t ctx, unsigned engine,
 #define SCRATCH 0
 #define BATCH 1
 	const int gen = intel_gen(intel_get_drm_devid(fd));
-	struct drm_i915_gem_relocation_entry relocs[2];
+	struct drm_i915_gem_relocation_entry relocs[2], *r;
 	struct drm_i915_gem_execbuffer2 *execbuf;
 	struct drm_i915_gem_exec_object2 *obj;
 	unsigned int engines[16];
@@ -182,7 +182,7 @@ emit_recursive_batch(igt_spin_t *spin, int fd, uint32_t ctx, unsigned engine,
 		execbuf->buffer_count++;
 	}
 
-	spin->batch = batch;
+	spin->batch = batch_start + 64/sizeof(*batch);
 	spin->handle = obj[BATCH].handle;
 
 	/* Allow ourselves to be preempted */
@@ -202,26 +202,25 @@ emit_recursive_batch(igt_spin_t *spin, int fd, uint32_t ctx, unsigned engine,
 	batch += 1000;
 
 	/* recurse */
-	fill_reloc(&relocs[obj[BATCH].relocation_count],
-		   obj[BATCH].handle, (batch - batch_start) + 1,
-		   I915_GEM_DOMAIN_COMMAND, 0);
+	r = &relocs[obj[BATCH].relocation_count++];
+	r->target_handle = obj[BATCH].handle;
+	r->offset = (batch + 1 - batch_start) * sizeof(*batch);
+	r->read_domains = I915_GEM_DOMAIN_COMMAND;
+	r->delta = 64;
 	if (gen >= 8) {
 		*batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
-		*batch++ = 0;
+		*batch++ = r->delta;
 		*batch++ = 0;
 	} else if (gen >= 6) {
 		*batch++ = MI_BATCH_BUFFER_START | 1 << 8;
-		*batch++ = 0;
+		*batch++ = r->delta;
 	} else {
 		*batch++ = MI_BATCH_BUFFER_START | 2 << 6;
-		*batch = 0;
-		if (gen < 4) {
-			*batch |= 1;
-			relocs[obj[BATCH].relocation_count].delta = 1;
-		}
+		if (gen < 4)
+			r->delta |= 1;
+		*batch = r->delta;
 		batch++;
 	}
-	obj[BATCH].relocation_count++;
 	obj[BATCH].relocs_ptr = to_user_pointer(relocs);
 
 	execbuf->buffers_ptr = to_user_pointer(obj +
diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index 87875fb..469b9be 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -474,12 +474,14 @@ busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
 
 static void
 __submit_spin_batch(int gem_fd, igt_spin_t *spin,
-		    const struct intel_execution_engine2 *e)
+		    const struct intel_execution_engine2 *e,
+		    int offset)
 {
 	struct drm_i915_gem_execbuffer2 eb = spin->execbuf;
 
 	eb.flags &= ~(0x3f | I915_EXEC_BSD_MASK);
 	eb.flags |= e2ring(gem_fd, e) | I915_EXEC_NO_RELOC;
+	eb.batch_start_offset += offset;
 
 	gem_execbuf(gem_fd, &eb);
 }
@@ -504,7 +506,7 @@ most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
 		if (e == e_)
 			idle_idx = i;
 		else if (spin)
-			__submit_spin_batch(gem_fd, spin, e_);
+			__submit_spin_batch(gem_fd, spin, e_, 64);
 		else
 			spin = __spin_poll(gem_fd, 0, e2ring(gem_fd, e_));
 
@@ -561,7 +563,7 @@ all_busy_check_all(int gem_fd, const unsigned int num_engines,
 			continue;
 
 		if (spin)
-			__submit_spin_batch(gem_fd, spin, e);
+			__submit_spin_batch(gem_fd, spin, e, 64);
 		else
 			spin = __spin_poll(gem_fd, 0, e2ring(gem_fd, e));
 
@@ -1613,7 +1615,7 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
 				/* Restart the spinbatch. */
 				igt_nsec_elapsed(&start);
 				__rearm_spin_batch(spin);
-				__submit_spin_batch(gem_fd, spin, e);
+				__submit_spin_batch(gem_fd, spin, e, 0);
 
 				 /* Wait for batch to start executing. */
 				__spin_wait(gem_fd, spin);