[PATCH i-g-t 46/47] tests/perf_pmu: Adopt to use no-reloc
Zbigniew Kempczyński
zbigniew.kempczynski at intel.com
Wed Jun 30 08:40:02 UTC 2021
Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
---
tests/i915/perf_pmu.c | 135 +++++++++++++++++++++++++++++++-----------
1 file changed, 99 insertions(+), 36 deletions(-)
diff --git a/tests/i915/perf_pmu.c b/tests/i915/perf_pmu.c
index aa297bf19..af845f353 100644
--- a/tests/i915/perf_pmu.c
+++ b/tests/i915/perf_pmu.c
@@ -172,10 +172,11 @@ static unsigned int measured_usleep(unsigned int usec)
#define FLAG_HANG (32)
#define TEST_S3 (64)
-static igt_spin_t * __spin_poll(int fd, uint32_t ctx,
+static igt_spin_t * __spin_poll(int fd, uint64_t ahnd, uint32_t ctx,
const struct intel_execution_engine2 *e)
{
struct igt_spin_factory opts = {
+ .ahnd = ahnd,
.ctx_id = ctx,
.engine = e->flags,
};
@@ -215,25 +216,26 @@ static unsigned long __spin_wait(int fd, igt_spin_t *spin)
return igt_nsec_elapsed(&start);
}
-static igt_spin_t * __spin_sync(int fd, uint32_t ctx,
+static igt_spin_t * __spin_sync(int fd, uint64_t ahnd, uint32_t ctx,
const struct intel_execution_engine2 *e)
{
- igt_spin_t *spin = __spin_poll(fd, ctx, e);
+ igt_spin_t *spin = __spin_poll(fd, ahnd, ctx, e);
__spin_wait(fd, spin);
return spin;
}
-static igt_spin_t * spin_sync(int fd, uint32_t ctx,
+static igt_spin_t * spin_sync(int fd, uint64_t ahnd, uint32_t ctx,
const struct intel_execution_engine2 *e)
{
igt_require_gem(fd);
- return __spin_sync(fd, ctx, e);
+ return __spin_sync(fd, ahnd, ctx, e);
}
-static igt_spin_t * spin_sync_flags(int fd, uint32_t ctx, unsigned int flags)
+static igt_spin_t * spin_sync_flags(int fd, uint64_t ahnd,
+ uint32_t ctx, unsigned int flags)
{
struct intel_execution_engine2 e = { };
@@ -242,7 +244,7 @@ static igt_spin_t * spin_sync_flags(int fd, uint32_t ctx, unsigned int flags)
(I915_EXEC_BSD | I915_EXEC_BSD_RING2) ? 1 : 0;
e.flags = flags;
- return spin_sync(fd, ctx, &e);
+ return spin_sync(fd, ahnd, ctx, &e);
}
static void end_spin(int fd, igt_spin_t *spin, unsigned int flags)
@@ -283,11 +285,12 @@ single(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags)
igt_spin_t *spin;
uint64_t val;
int fd;
+ uint64_t ahnd = get_reloc_ahnd(gem_fd, 0);
fd = open_pmu(gem_fd, I915_PMU_ENGINE_BUSY(e->class, e->instance));
if (flags & TEST_BUSY)
- spin = spin_sync(gem_fd, 0, e);
+ spin = spin_sync(gem_fd, ahnd, 0, e);
else
spin = NULL;
@@ -318,6 +321,7 @@ single(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags)
igt_spin_free(gem_fd, spin);
close(fd);
+ put_ahnd(ahnd);
gem_quiescent_gpu(gem_fd);
}
@@ -329,6 +333,7 @@ busy_start(int gem_fd, const struct intel_execution_engine2 *e)
uint64_t val, ts[2];
igt_spin_t *spin;
int fd;
+ uint64_t ahnd = get_reloc_ahnd(gem_fd, 0);
/*
* Defeat the busy stats delayed disable, we need to guarantee we are
@@ -336,7 +341,7 @@ busy_start(int gem_fd, const struct intel_execution_engine2 *e)
*/
sleep(2);
- spin = __spin_sync(gem_fd, 0, e);
+ spin = __spin_sync(gem_fd, ahnd, 0, e);
fd = open_pmu(gem_fd, I915_PMU_ENGINE_BUSY(e->class, e->instance));
@@ -347,6 +352,7 @@ busy_start(int gem_fd, const struct intel_execution_engine2 *e)
igt_spin_free(gem_fd, spin);
close(fd);
+ put_ahnd(ahnd);
assert_within_epsilon(val, ts[1] - ts[0], tolerance);
gem_quiescent_gpu(gem_fd);
@@ -365,8 +371,10 @@ busy_double_start(int gem_fd, const struct intel_execution_engine2 *e)
igt_spin_t *spin[2];
uint32_t ctx;
int fd;
+ uint64_t ahnd0 = get_reloc_ahnd(gem_fd, 0), ahnd1;
ctx = gem_context_clone_with_engines(gem_fd, 0);
+ ahnd1 = get_reloc_ahnd(gem_fd, ctx);
/*
* Defeat the busy stats delayed disable, we need to guarantee we are
@@ -379,9 +387,10 @@ busy_double_start(int gem_fd, const struct intel_execution_engine2 *e)
* re-submission in execlists mode. Make sure busyness is correctly
* reported with the engine busy, and after the engine went idle.
*/
- spin[0] = __spin_sync(gem_fd, 0, e);
+ spin[0] = __spin_sync(gem_fd, ahnd0, 0, e);
usleep(500e3);
spin[1] = __igt_spin_new(gem_fd,
+ .ahnd = ahnd1,
.ctx_id = ctx,
.engine = e->flags);
@@ -414,6 +423,8 @@ busy_double_start(int gem_fd, const struct intel_execution_engine2 *e)
close(fd);
gem_context_destroy(gem_fd, ctx);
+ put_ahnd(ahnd0);
+ put_ahnd(ahnd1);
assert_within_epsilon(val, ts[1] - ts[0], tolerance);
igt_assert_eq(val2, 0);
@@ -451,6 +462,7 @@ busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
int fd[num_engines];
unsigned long slept;
igt_spin_t *spin;
+ uint64_t ahnd = get_reloc_ahnd(gem_fd, 0);
i = 0;
fd[0] = -1;
@@ -466,7 +478,7 @@ busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
igt_assert_eq(i, num_engines);
- spin = spin_sync(gem_fd, 0, e);
+ spin = spin_sync(gem_fd, ahnd, 0, e);
pmu_read_multi(fd[0], num_engines, tval[0]);
slept = measured_usleep(batch_duration_ns / 1000);
if (flags & TEST_TRAILING_IDLE)
@@ -477,6 +489,7 @@ busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
igt_spin_free(gem_fd, spin);
for (i = 0; i < num_engines; i++)
close(fd[i]);
+ put_ahnd(ahnd);
for (i = 0; i < num_engines; i++)
val[i] = tval[1][i] - tval[0][i];
@@ -517,6 +530,7 @@ most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
unsigned long slept;
igt_spin_t *spin = NULL;
unsigned int idle_idx, i;
+ uint64_t ahnd = get_reloc_ahnd(gem_fd, 0);
i = 0;
__for_each_physical_engine(gem_fd, e_) {
@@ -525,7 +539,7 @@ most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
else if (spin)
__submit_spin(gem_fd, spin, e_, 64);
else
- spin = __spin_poll(gem_fd, 0, e_);
+ spin = __spin_poll(gem_fd, ahnd, 0, e_);
val[i++] = I915_PMU_ENGINE_BUSY(e_->class, e_->instance);
}
@@ -549,6 +563,7 @@ most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
igt_spin_free(gem_fd, spin);
for (i = 0; i < num_engines; i++)
close(fd[i]);
+ put_ahnd(ahnd);
for (i = 0; i < num_engines; i++)
val[i] = tval[1][i] - tval[0][i];
@@ -575,13 +590,14 @@ all_busy_check_all(int gem_fd, const unsigned int num_engines,
unsigned long slept;
igt_spin_t *spin = NULL;
unsigned int i;
+ uint64_t ahnd = get_reloc_ahnd(gem_fd, 0);
i = 0;
__for_each_physical_engine(gem_fd, e) {
if (spin)
__submit_spin(gem_fd, spin, e, 64);
else
- spin = __spin_poll(gem_fd, 0, e);
+ spin = __spin_poll(gem_fd, ahnd, 0, e);
val[i++] = I915_PMU_ENGINE_BUSY(e->class, e->instance);
}
@@ -604,6 +620,7 @@ all_busy_check_all(int gem_fd, const unsigned int num_engines,
igt_spin_free(gem_fd, spin);
for (i = 0; i < num_engines; i++)
close(fd[i]);
+ put_ahnd(ahnd);
for (i = 0; i < num_engines; i++)
val[i] = tval[1][i] - tval[0][i];
@@ -621,6 +638,7 @@ no_sema(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags)
igt_spin_t *spin;
uint64_t val[2][2];
int fd[2];
+ uint64_t ahnd = get_reloc_ahnd(gem_fd, 0);
fd[0] = open_group(gem_fd, I915_PMU_ENGINE_SEMA(e->class, e->instance),
-1);
@@ -628,7 +646,7 @@ no_sema(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags)
fd[0]);
if (flags & TEST_BUSY)
- spin = spin_sync(gem_fd, 0, e);
+ spin = spin_sync(gem_fd, ahnd, 0, e);
else
spin = NULL;
@@ -647,6 +665,7 @@ no_sema(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags)
}
close(fd[0]);
close(fd[1]);
+ put_ahnd(ahnd);
assert_within_epsilon(val[0][0], 0.0f, tolerance);
assert_within_epsilon(val[0][1], 0.0f, tolerance);
@@ -671,6 +690,8 @@ sema_wait(int gem_fd, const struct intel_execution_engine2 *e,
uint32_t batch[16];
uint64_t val[2], ts[2];
int fd;
+ uint64_t ahnd = get_reloc_ahnd(gem_fd, 0);
+ uint64_t obj_offset, bb_offset;
igt_require(intel_gen(intel_get_drm_devid(gem_fd)) >= 8);
@@ -682,23 +703,23 @@ sema_wait(int gem_fd, const struct intel_execution_engine2 *e,
bb_handle = gem_create(gem_fd, 4096);
obj_handle = gem_create(gem_fd, 4096);
+ bb_offset = get_offset(ahnd, bb_handle, 4096, 0);
+ obj_offset = get_offset(ahnd, obj_handle, 4096, 0);
obj_ptr = gem_mmap__wc(gem_fd, obj_handle, 0, 4096, PROT_WRITE);
batch[0] = MI_STORE_DWORD_IMM;
- batch[1] = sizeof(*obj_ptr);
- batch[2] = 0;
+ batch[1] = obj_offset + sizeof(*obj_ptr);
+ batch[2] = (obj_offset + sizeof(*obj_ptr)) >> 32;
batch[3] = 1;
batch[4] = MI_SEMAPHORE_WAIT |
MI_SEMAPHORE_POLL |
MI_SEMAPHORE_SAD_GTE_SDD;
batch[5] = 1;
- batch[6] = 0x0;
- batch[7] = 0x0;
+ batch[6] = obj_offset;
+ batch[7] = obj_offset >> 32;
batch[8] = MI_BATCH_BUFFER_END;
- gem_write(gem_fd, bb_handle, 0, batch, sizeof(batch));
-
reloc[0].target_handle = obj_handle;
reloc[0].offset = 1 * sizeof(uint32_t);
reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
@@ -709,16 +730,25 @@ sema_wait(int gem_fd, const struct intel_execution_engine2 *e,
reloc[1].offset = 6 * sizeof(uint32_t);
reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+ gem_write(gem_fd, bb_handle, 0, batch, sizeof(batch));
+
obj[0].handle = obj_handle;
obj[1].handle = bb_handle;
- obj[1].relocation_count = 2;
+ obj[1].relocation_count = !ahnd ? 2 : 0;
obj[1].relocs_ptr = to_user_pointer(reloc);
eb.buffer_count = 2;
eb.buffers_ptr = to_user_pointer(obj);
eb.flags = e->flags;
+ if (ahnd) {
+ obj[0].flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
+ obj[0].offset = obj_offset;
+ obj[1].flags |= EXEC_OBJECT_PINNED;
+ obj[1].offset = bb_offset;
+ }
+
/**
* Start the semaphore wait PMU and after some known time let the above
* semaphore wait command finish. Then check that the PMU is reporting
@@ -730,6 +760,7 @@ sema_wait(int gem_fd, const struct intel_execution_engine2 *e,
val[0] = pmu_read_single(fd);
gem_execbuf(gem_fd, &eb);
+
do { /* wait for the batch to start executing */
usleep(5e3);
} while (!obj_ptr[1]);
@@ -759,7 +790,8 @@ sema_wait(int gem_fd, const struct intel_execution_engine2 *e,
}
static uint32_t
-create_sema(int gem_fd, struct drm_i915_gem_relocation_entry *reloc)
+create_sema(int gem_fd, uint64_t ahnd,
+ struct drm_i915_gem_relocation_entry *reloc, __u64 *poffset)
{
uint32_t cs[] = {
/* Reset our semaphore wait */
@@ -776,7 +808,12 @@ create_sema(int gem_fd, struct drm_i915_gem_relocation_entry *reloc)
MI_BATCH_BUFFER_END
};
- uint32_t handle = gem_create(gem_fd, 4096);
+ uint32_t handle;
+
+ igt_assert(poffset);
+
+ handle = gem_create(gem_fd, 4096);
+ *poffset = get_offset(ahnd, handle, 4096, 0);
memset(reloc, 0, 2 * sizeof(*reloc));
reloc[0].target_handle = handle;
@@ -784,12 +821,19 @@ create_sema(int gem_fd, struct drm_i915_gem_relocation_entry *reloc)
reloc[1].target_handle = handle;
reloc[1].offset = 64 + 6 * sizeof(uint32_t);
+ if (ahnd) {
+ cs[1] = *poffset;
+ cs[2] = *poffset >> 32;
+ cs[6] = *poffset;
+ cs[7] = *poffset >> 32;
+ }
+
gem_write(gem_fd, handle, 64, cs, sizeof(cs));
return handle;
}
static void
-__sema_busy(int gem_fd, int pmu,
+__sema_busy(int gem_fd, uint64_t ahnd, int pmu,
const struct intel_execution_engine2 *e,
int sema_pct,
int busy_pct)
@@ -802,8 +846,8 @@ __sema_busy(int gem_fd, int pmu,
uint64_t start[2], val[2];
struct drm_i915_gem_relocation_entry reloc[2];
struct drm_i915_gem_exec_object2 obj = {
- .handle = create_sema(gem_fd, reloc),
- .relocation_count = 2,
+ .handle = create_sema(gem_fd, ahnd, reloc, &obj.offset),
+ .relocation_count = !ahnd ? 2 : 0,
.relocs_ptr = to_user_pointer(reloc),
};
struct drm_i915_gem_execbuffer2 eb = {
@@ -822,7 +866,7 @@ __sema_busy(int gem_fd, int pmu,
map = gem_mmap__wc(gem_fd, obj.handle, 0, 4096, PROT_WRITE);
gem_execbuf(gem_fd, &eb);
- spin = igt_spin_new(gem_fd, .engine = e->flags);
+ spin = igt_spin_new(gem_fd, .ahnd = ahnd, .engine = e->flags);
/* Wait until the batch is executed and the semaphore is busy-waiting */
while (!READ_ONCE(*map) && gem_bo_busy(gem_fd, obj.handle))
@@ -867,6 +911,7 @@ sema_busy(int gem_fd,
unsigned int flags)
{
int fd[2];
+ uint64_t ahnd = get_reloc_ahnd(gem_fd, 0);
igt_require(intel_gen(intel_get_drm_devid(gem_fd)) >= 8);
@@ -875,12 +920,13 @@ sema_busy(int gem_fd,
fd[1] = open_group(gem_fd, I915_PMU_ENGINE_BUSY(e->class, e->instance),
fd[0]);
- __sema_busy(gem_fd, fd[0], e, 50, 100);
- __sema_busy(gem_fd, fd[0], e, 25, 50);
- __sema_busy(gem_fd, fd[0], e, 75, 75);
+ __sema_busy(gem_fd, ahnd, fd[0], e, 50, 100);
+ __sema_busy(gem_fd, ahnd, fd[0], e, 25, 50);
+ __sema_busy(gem_fd, ahnd, fd[0], e, 75, 75);
close(fd[0]);
close(fd[1]);
+ put_ahnd(ahnd);
}
static void test_awake(int i915)
@@ -889,13 +935,14 @@ static void test_awake(int i915)
unsigned long slept;
uint64_t val;
int fd;
+ uint64_t ahnd = get_reloc_ahnd(i915, 0);
fd = perf_i915_open(i915, I915_PMU_SOFTWARE_GT_AWAKE_TIME);
igt_skip_on(fd < 0);
/* Check that each engine is captured by the GT wakeref */
__for_each_physical_engine(i915, e) {
- igt_spin_new(i915, .engine = e->flags);
+ igt_spin_new(i915, .ahnd = ahnd, .engine = e->flags);
val = pmu_read_single(fd);
slept = measured_usleep(batch_duration_ns / 1000);
@@ -907,7 +954,7 @@ static void test_awake(int i915)
/* And that the total GT wakeref matches walltime not summation */
__for_each_physical_engine(i915, e)
- igt_spin_new(i915, .engine = e->flags);
+ igt_spin_new(i915, .ahnd = ahnd, .engine = e->flags);
val = pmu_read_single(fd);
slept = measured_usleep(batch_duration_ns / 1000);
@@ -918,6 +965,7 @@ static void test_awake(int i915)
igt_free_spins(i915);
close(fd);
+ put_ahnd(ahnd);
}
#define MI_WAIT_FOR_PIPE_C_VBLANK (1<<21)
@@ -1131,6 +1179,7 @@ multi_client(int gem_fd, const struct intel_execution_engine2 *e)
uint64_t val[2], ts[2], perf_slept[2];
igt_spin_t *spin;
int fd[2];
+ uint64_t ahnd = get_reloc_ahnd(gem_fd, 0);
gem_quiescent_gpu(gem_fd);
@@ -1143,7 +1192,7 @@ multi_client(int gem_fd, const struct intel_execution_engine2 *e)
*/
fd[1] = open_pmu(gem_fd, config);
- spin = spin_sync(gem_fd, 0, e);
+ spin = spin_sync(gem_fd, ahnd, 0, e);
val[0] = val[1] = __pmu_read_single(fd[0], &ts[0]);
slept[1] = measured_usleep(batch_duration_ns / 1000);
@@ -1161,6 +1210,7 @@ multi_client(int gem_fd, const struct intel_execution_engine2 *e)
gem_sync(gem_fd, spin->handle);
igt_spin_free(gem_fd, spin);
close(fd[0]);
+ put_ahnd(ahnd);
assert_within_epsilon(val[0], perf_slept[0], tolerance);
assert_within_epsilon(val[1], perf_slept[1], tolerance);
@@ -1354,6 +1404,7 @@ test_interrupts(int gem_fd)
uint64_t idle, busy;
int fence_fd;
int fd;
+ uint64_t ahnd = get_reloc_ahnd(gem_fd, 0);
gem_quiescent_gpu(gem_fd);
@@ -1362,6 +1413,7 @@ test_interrupts(int gem_fd)
/* Queue spinning batches. */
for (int i = 0; i < target; i++) {
spin[i] = __igt_spin_new(gem_fd,
+ .ahnd = ahnd,
.engine = I915_EXEC_DEFAULT,
.flags = IGT_SPIN_FENCE_OUT);
if (i == 0) {
@@ -1400,6 +1452,7 @@ test_interrupts(int gem_fd)
/* Free batches. */
for (int i = 0; i < target; i++)
igt_spin_free(gem_fd, spin[i]);
+ put_ahnd(ahnd);
/* Check at least as many interrupts has been generated. */
busy = pmu_read_single(fd) - idle;
@@ -1417,6 +1470,7 @@ test_interrupts_sync(int gem_fd)
struct pollfd pfd;
uint64_t idle, busy;
int fd;
+ uint64_t ahnd = get_reloc_ahnd(gem_fd, 0);
gem_quiescent_gpu(gem_fd);
@@ -1425,6 +1479,7 @@ test_interrupts_sync(int gem_fd)
/* Queue spinning batches. */
for (int i = 0; i < target; i++)
spin[i] = __igt_spin_new(gem_fd,
+ .ahnd = ahnd,
.flags = IGT_SPIN_FENCE_OUT);
/* Wait for idle state. */
@@ -1449,6 +1504,7 @@ test_interrupts_sync(int gem_fd)
/* Check at least as many interrupts has been generated. */
busy = pmu_read_single(fd) - idle;
close(fd);
+ put_ahnd(ahnd);
igt_assert_lte(target, busy);
}
@@ -1461,6 +1517,7 @@ test_frequency(int gem_fd)
double min[2], max[2];
igt_spin_t *spin;
int fd[2], sysfs;
+ uint64_t ahnd = get_reloc_ahnd(gem_fd, 0);
sysfs = igt_sysfs_open(gem_fd);
igt_require(sysfs >= 0);
@@ -1488,7 +1545,7 @@ test_frequency(int gem_fd)
igt_require(igt_sysfs_get_u32(sysfs, "gt_boost_freq_mhz") == min_freq);
gem_quiescent_gpu(gem_fd); /* Idle to be sure the change takes effect */
- spin = spin_sync_flags(gem_fd, 0, I915_EXEC_DEFAULT);
+ spin = spin_sync_flags(gem_fd, ahnd, 0, I915_EXEC_DEFAULT);
slept = pmu_read_multi(fd[0], 2, start);
measured_usleep(batch_duration_ns / 1000);
@@ -1514,7 +1571,7 @@ test_frequency(int gem_fd)
igt_require(igt_sysfs_get_u32(sysfs, "gt_min_freq_mhz") == max_freq);
gem_quiescent_gpu(gem_fd);
- spin = spin_sync_flags(gem_fd, 0, I915_EXEC_DEFAULT);
+ spin = spin_sync_flags(gem_fd, ahnd, 0, I915_EXEC_DEFAULT);
slept = pmu_read_multi(fd[0], 2, start);
measured_usleep(batch_duration_ns / 1000);
@@ -1535,6 +1592,7 @@ test_frequency(int gem_fd)
min_freq, igt_sysfs_get_u32(sysfs, "gt_min_freq_mhz"));
close(fd[0]);
close(fd[1]);
+ put_ahnd(ahnd);
igt_info("Min frequency: requested %.1f, actual %.1f\n",
min[0], min[1]);
@@ -1818,9 +1876,13 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
};
uint64_t total_busy_ns = 0, total_ns = 0;
igt_spin_t *spin;
+ uint64_t ahnd;
+
+ intel_allocator_init();
+ ahnd = get_reloc_ahnd(gem_fd, 0);
/* Allocate our spin batch and idle it. */
- spin = igt_spin_new(gem_fd, .engine = e->flags);
+ spin = igt_spin_new(gem_fd, .ahnd = ahnd, .engine = e->flags);
igt_spin_end(spin);
gem_sync(gem_fd, spin->handle);
@@ -1891,6 +1953,7 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
}
igt_spin_free(gem_fd, spin);
+ put_ahnd(ahnd);
}
fd = open_pmu(gem_fd, I915_PMU_ENGINE_BUSY(e->class, e->instance));
--
2.26.0
More information about the Intel-gfx-trybot
mailing list