[PATCH i-g-t 29/54] tests/gem_softpin: Exercise eviction with softpinning
Zbigniew Kempczyński
zbigniew.kempczynski at intel.com
Fri Jul 16 11:49:35 UTC 2021
From: Andrzej Turko <andrzej.turko at linux.intel.com>
Exercise eviction of many gem objects. The added tests are analogous
to gem_exec_gttfill, but they use softpin and do not require relocation
support.
Signed-off-by: Andrzej Turko <andrzej.turko at linux.intel.com>
Cc: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
---
tests/i915/gem_softpin.c | 213 ++++++++++++++++++++++++++++++++++++++-
1 file changed, 212 insertions(+), 1 deletion(-)
diff --git a/tests/i915/gem_softpin.c b/tests/i915/gem_softpin.c
index bdb04821d..82d8a2861 100644
--- a/tests/i915/gem_softpin.c
+++ b/tests/i915/gem_softpin.c
@@ -29,6 +29,7 @@
#include "i915/gem.h"
#include "i915/gem_create.h"
#include "igt.h"
+#include "igt_rand.h"
#include "intel_allocator.h"
#define EXEC_OBJECT_PINNED (1<<4)
@@ -877,9 +878,209 @@ static void test_allocator_fork(int fd)
intel_allocator_multiprocess_stop();
}
+#define BATCH_SIZE (4096<<10)
+/* We don't have alignment detection yet, so assume the worst-case scenario. */
+#define BATCH_ALIGNMENT (1 << 21)
+
+struct batch {
+ uint32_t handle;
+ void *ptr;
+};
+
+static void xchg_batch(void *array, unsigned int i, unsigned int j)
+{
+ struct batch *batches = array;
+ struct batch tmp;
+
+ tmp = batches[i];
+ batches[i] = batches[j];
+ batches[j] = tmp;
+}
+
+static void submit(int fd, int gen,
+ struct drm_i915_gem_execbuffer2 *eb,
+ struct batch *batches, unsigned int count,
+ uint64_t ahnd)
+{
+ struct drm_i915_gem_exec_object2 obj;
+ uint32_t batch[16];
+ uint64_t address;
+ unsigned n;
+
+ memset(&obj, 0, sizeof(obj));
+ obj.flags = EXEC_OBJECT_PINNED;
+
+ for (unsigned i = 0; i < count; i++) {
+ obj.handle = batches[i].handle;
+ obj.offset = intel_allocator_alloc(ahnd, obj.handle,
+ BATCH_SIZE,
+ BATCH_ALIGNMENT);
+ address = obj.offset + BATCH_SIZE - eb->batch_start_offset - 8;
+ n = 0;
+ batch[n] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+ if (gen >= 8) {
+ batch[n] |= 1 << 21;
+ batch[n]++;
+ batch[++n] = address;
+ batch[++n] = address >> 32;
+ } else if (gen >= 4) {
+ batch[++n] = 0;
+ batch[++n] = address;
+ } else {
+ batch[n]--;
+ batch[++n] = address;
+ }
+ batch[++n] = obj.offset; /* lower_32_bits(value) */
+ batch[++n] = obj.offset >> 32; /* upper_32_bits(value) / nop */
+ batch[++n] = MI_BATCH_BUFFER_END;
+ eb->buffers_ptr = to_user_pointer(&obj);
+
+ memcpy(batches[i].ptr + eb->batch_start_offset,
+ batch, sizeof(batch));
+
+ gem_execbuf(fd, eb);
+ }
+ /* As we have been lying about the write_domain, we need to do a sync */
+ gem_sync(fd, obj.handle);
+}
+
+static void test_allocator_evict(int fd, const intel_ctx_t *ctx,
+ unsigned ring, int timeout)
+{
+ const unsigned int gen = intel_gen(intel_get_drm_devid(fd));
+ struct drm_i915_gem_execbuffer2 execbuf;
+ unsigned engines[I915_EXEC_RING_MASK + 1];
+ volatile uint64_t *shared;
+ struct timespec tv = {};
+ struct batch *batches;
+ unsigned nengine;
+ unsigned count;
+ uint64_t size, ahnd;
+
+ shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+ igt_assert(shared != MAP_FAILED);
+
+ nengine = 0;
+ if (ring == ALL_ENGINES) {
+ struct intel_execution_engine2 *e;
+
+ for_each_ctx_engine(fd, ctx, e) {
+ if (!gem_class_can_store_dword(fd, e->class))
+ continue;
+
+ engines[nengine++] = e->flags;
+ }
+ } else {
+ engines[nengine++] = ring;
+ }
+ igt_require(nengine);
+ igt_assert(nengine * 64 <= BATCH_SIZE);
+
+ size = gem_aperture_size(fd);
+ if (!gem_uses_full_ppgtt(fd))
+ size /= 2;
+ if (size > 1ull<<32) /* Limit to 4GiB as we do not use allow-48b */
+ size = 1ull << 32;
+ igt_require(size < (1ull<<32) * BATCH_SIZE);
+
+ count = size / BATCH_SIZE + 1;
+ igt_debug("Using %'d batches to fill %'llu aperture on %d engines\n",
+ count, (long long)size, nengine);
+
+ intel_allocator_multiprocess_start();
+ /* Avoid allocating on the last page */
+ ahnd = intel_allocator_open_full(fd, 0, 0, size - 4096,
+ INTEL_ALLOCATOR_RELOC,
+ ALLOC_STRATEGY_HIGH_TO_LOW);
+
+ intel_require_memory(count, BATCH_SIZE, CHECK_RAM);
+ intel_detect_and_clear_missed_interrupts(fd);
+
+ igt_nsec_elapsed(&tv);
+
+ memset(&execbuf, 0, sizeof(execbuf));
+ execbuf.buffer_count = 1;
+ if (gen < 6)
+ execbuf.flags |= I915_EXEC_SECURE;
+
+ batches = calloc(count, sizeof(*batches));
+ igt_assert(batches);
+ for (unsigned i = 0; i < count; i++) {
+ batches[i].handle = gem_create(fd, BATCH_SIZE);
+ batches[i].ptr =
+ gem_mmap__device_coherent(fd, batches[i].handle,
+ 0, BATCH_SIZE, PROT_WRITE);
+ }
+
+ /* Flush all memory before we start the timer */
+ submit(fd, gen, &execbuf, batches, count, ahnd);
+
+ igt_info("Setup %u batches in %.2fms\n",
+ count, 1e-6 * igt_nsec_elapsed(&tv));
+
+ igt_fork(child, nengine) {
+ uint64_t dst, src, dst_offset, src_offset;
+ uint64_t cycles = 0;
+
+ hars_petruska_f54_1_random_perturb(child);
+ igt_permute_array(batches, count, xchg_batch);
+ execbuf.batch_start_offset = child * 64;
+ execbuf.flags |= engines[child];
+
+ dst_offset = BATCH_SIZE - child*64 - 8;
+ if (gen >= 8)
+ src_offset = child*64 + 3*sizeof(uint32_t);
+ else if (gen >= 4)
+ src_offset = child*64 + 4*sizeof(uint32_t);
+ else
+ src_offset = child*64 + 2*sizeof(uint32_t);
+
+ /* We need to open the allocator again in the new process */
+ ahnd = intel_allocator_open_full(fd, 0, 0, size - 4096,
+ INTEL_ALLOCATOR_RELOC,
+ ALLOC_STRATEGY_HIGH_TO_LOW);
+
+ igt_until_timeout(timeout) {
+ submit(fd, gen, &execbuf, batches, count, ahnd);
+ for (unsigned i = 0; i < count; i++) {
+ dst = *(uint64_t *)(batches[i].ptr + dst_offset);
+ src = *(uint64_t *)(batches[i].ptr + src_offset);
+ igt_assert_eq_u64(dst, src);
+ }
+ cycles++;
+ }
+ shared[child] = cycles;
+ igt_info("engine[%d]: %llu cycles\n", child, (long long)cycles);
+ intel_allocator_close(ahnd);
+ }
+ igt_waitchildren();
+
+ intel_allocator_close(ahnd);
+ intel_allocator_multiprocess_stop();
+
+ for (unsigned i = 0; i < count; i++) {
+ munmap(batches[i].ptr, BATCH_SIZE);
+ gem_close(fd, batches[i].handle);
+ }
+ free(batches);
+
+ shared[nengine] = 0;
+ for (unsigned i = 0; i < nengine; i++)
+ shared[nengine] += shared[i];
+ igt_info("Total: %llu cycles\n", (long long)shared[nengine]);
+
+ igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+}
+
+#define test_each_engine(T, i915, ctx, e) \
+ igt_subtest_with_dynamic(T) for_each_ctx_engine(i915, ctx, e) \
+ igt_dynamic_f("%s", e->name)
+
igt_main
{
+ const struct intel_execution_engine2 *e;
int fd = -1;
+ const intel_ctx_t *ctx;
igt_fixture {
fd = drm_open_driver_master(DRIVER_INTEL);
@@ -887,6 +1088,8 @@ igt_main
gem_require_blitter(fd);
igt_require(gem_has_softpin(fd));
igt_require(gem_can_store_dword(fd, 0));
+
+ ctx = intel_ctx_create_all_physical(fd);
}
igt_subtest("invalid")
@@ -922,6 +1125,12 @@ igt_main
igt_subtest("allocator-fork")
test_allocator_fork(fd);
+
+ test_each_engine("allocator-evict", fd, ctx, e)
+ test_allocator_evict(fd, ctx, e->flags, 20);
+
+ igt_subtest("allocator-evict-all-engines")
+ test_allocator_evict(fd, ctx, ALL_ENGINES, 20);
}
igt_subtest("softpin")
@@ -949,6 +1158,8 @@ igt_main
igt_subtest("evict-hang")
test_evict_hang(fd);
- igt_fixture
+ igt_fixture {
+ intel_ctx_destroy(fd, ctx);
close(fd);
+ }
}
--
2.26.0
More information about the Intel-gfx-trybot
mailing list