[Intel-gfx] [PATCH i-g-t 2/2] tests/prime_vgem: Examine blitter access path
Chris Wilson
chris at chris-wilson.co.uk
Thu Jan 9 15:03:30 UTC 2020
Quoting Janusz Krzysztofik (2020-01-09 14:01:25)
> On future hardware with missing GGTT BAR we won't be able to exercise
> dma-buf access via that path. An alternative to basic-gtt subtest for
> testing dma-buf access is required, as well as basic-fence-mmap and
> coherency-gtt subtest alternatives for testing WC coherency.
>
> Access to the dma sg list feature exposed by dma-buf can be tested
> through blitter. Unfortunately we don't have any equivalently simple
> tests that use blitter. Provide them.
>
> Blitter XY_SRC_COPY method implemented by igt_blitter_src_copy__raw()
> IGT library function has been chosen.
>
> v2: As fast copy is not supported on platforms older than Gen 9,
> use XY_SRC_COPY instead (Chris),
> - add subtest descriptions.
>
> Suggested-by: Chris Wilson <chris at chris-wilson.co.uk>
> Suggested-by: Daniel Vetter <daniel.vetter at ffwll.ch>
> Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik at linux.intel.com>
> ---
> tests/prime_vgem.c | 192 +++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 192 insertions(+)
>
> diff --git a/tests/prime_vgem.c b/tests/prime_vgem.c
> index 69ae8c9b..9ceee47a 100644
> --- a/tests/prime_vgem.c
> +++ b/tests/prime_vgem.c
> @@ -23,6 +23,7 @@
>
> #include "igt.h"
> #include "igt_vgem.h"
> +#include "intel_batchbuffer.h"
>
> #include <sys/ioctl.h>
> #include <sys/poll.h>
> @@ -171,6 +172,77 @@ static void test_fence_mmap(int i915, int vgem)
> close(slave[1]);
> }
>
> +static void test_fence_blt(int i915, int vgem)
> +{
> + struct vgem_bo scratch;
> + uint32_t prime;
> + uint32_t *ptr;
> + uint32_t fence;
> + int dmabuf, i;
> + int master[2], slave[2];
> +
> + igt_assert(pipe(master) == 0);
> + igt_assert(pipe(slave) == 0);
> +
> + scratch.width = 1024;
> + scratch.height = 1024;
> + scratch.bpp = 32;
> + vgem_create(vgem, &scratch);
Hmm. The pitch is returned by the dumb create, did we not report it in
vgem_bo? scratch.pitch;
> +
> + dmabuf = prime_handle_to_fd(vgem, scratch.handle);
> + prime = prime_fd_to_handle(i915, dmabuf);
> + close(dmabuf);
> +
> + igt_fork(child, 1) {
> + uint32_t native;
> +
> + close(master[0]);
> + close(slave[1]);
> +
> + native = gem_create(i915, scratch.size);
> +
> + ptr = gem_mmap__wc(i915, native, 0, scratch.size, PROT_READ);
> + for (i = 0; i < 1024; i++)
for (i = 0; i < scratch.height; i++)
> + igt_assert_eq(ptr[1024 * i], 0);
igt_assert_eq_u32(ptr[scratch.pitch * i / sizeof(*ptr)], 0);
The "%08x" will be easier to read when you have ~i later on.
> +
> + write(master[1], &child, sizeof(child));
> + read(slave[0], &child, sizeof(child));
> +
> + igt_blitter_src_copy__raw(i915, prime, 0,
> + scratch.width * scratch.bpp / 8,
scratch.pitch,
> + I915_TILING_NONE, 0, 0,
> + scratch.width, scratch.height,
> + scratch.bpp, native, 0,
> + scratch.width * scratch.bpp / 8,
> + I915_TILING_NONE, 0, 0);
> + gem_sync(i915, native);
> +
> + for (i = 0; i < 1024; i++)
> + igt_assert_eq(ptr[1024 * i], i);
> +
> + munmap(ptr, scratch.size);
> + gem_close(i915, native);
> + gem_close(i915, prime);
> + }
> +
> + close(master[1]);
> + close(slave[0]);
> + read(master[0], &i, sizeof(i));
> + fence = vgem_fence_attach(vgem, &scratch, VGEM_FENCE_WRITE);
> + write(slave[1], &i, sizeof(i));
Throw in a usleep(50*1000); to emphasize that the only thing stopping
the blitter is the fence.
> +
> + ptr = vgem_mmap(vgem, &scratch, PROT_WRITE);
> + for (i = 0; i < 1024; i++)
> + ptr[1024 * i] = i;
> + munmap(ptr, scratch.size);
> + vgem_fence_signal(vgem, fence);
> + gem_close(vgem, scratch.handle);
> +
> + igt_waitchildren();
> + close(master[0]);
> + close(slave[1]);
> +}
> +
> static void test_write(int vgem, int i915)
> {
> struct vgem_bo scratch;
> @@ -236,6 +308,62 @@ static void test_gtt(int vgem, int i915)
> gem_close(vgem, scratch.handle);
> }
>
> +static void test_blt(int vgem, int i915)
> +{
> + struct vgem_bo scratch;
> + uint32_t prime, native;
> + uint32_t *ptr;
> + int dmabuf, i;
> +
> + scratch.width = 1024;
> + scratch.height = 1024;
> + scratch.bpp = 32;
> + vgem_create(vgem, &scratch);
> +
> + dmabuf = prime_handle_to_fd(vgem, scratch.handle);
> + prime = prime_fd_to_handle(i915, dmabuf);
> + close(dmabuf);
> +
> + native = gem_create(i915, scratch.size);
> +
> + ptr = gem_mmap__wc(i915, native, 0, scratch.size, PROT_WRITE);
> + for (i = 0; i < 1024; i++)
> + ptr[1024 * i] = i;
> + munmap(ptr, scratch.size);
> +
> + igt_blitter_src_copy__raw(i915,
> + native, 0, scratch.width * scratch.bpp / 8,
> + I915_TILING_NONE, 0, 0,
> + scratch.width, scratch.height, scratch.bpp,
> + prime, 0, scratch.width * scratch.bpp / 8,
> + I915_TILING_NONE, 0, 0);
> + gem_sync(i915, prime);
Would this be better with prime_sync_start(dmabuf, true); ?
gem_sync(prime) is nice in its own way (checking sync on imported handle
provides correct serialisation for the vgem_mmap). But I think the
recommended practice would be to use dmabuf for the inter-device sync?
> + ptr = vgem_mmap(vgem, &scratch, PROT_READ | PROT_WRITE);
> + for (i = 0; i < 1024; i++) {
> + igt_assert_eq(ptr[1024 * i], i);
> + ptr[1024 * i] = ~i;
> + }
> + munmap(ptr, scratch.size);
> +
> + igt_blitter_src_copy__raw(i915,
> + prime, 0, scratch.width * scratch.bpp / 8,
> + I915_TILING_NONE, 0, 0,
> + scratch.width, scratch.height, scratch.bpp,
> + native, 0, scratch.width * scratch.bpp / 8,
> + I915_TILING_NONE, 0, 0);
> + gem_sync(i915, native);
> +
> + ptr = gem_mmap__wc(i915, native, 0, scratch.size, PROT_READ);
> + for (i = 0; i < 1024; i++)
> + igt_assert_eq(ptr[1024 * i], ~i);
> + munmap(ptr, scratch.size);
> +
> + gem_close(i915, native);
> + gem_close(i915, prime);
> + gem_close(vgem, scratch.handle);
> +}
> +
> static void test_shrink(int vgem, int i915)
> {
> struct vgem_bo scratch = {
> @@ -319,6 +447,59 @@ static void test_gtt_interleaved(int vgem, int i915)
> gem_close(vgem, scratch.handle);
> }
>
> +static void test_blt_interleaved(int vgem, int i915)
> +{
> + struct vgem_bo scratch;
> + uint32_t prime, native;
> + uint32_t *foreign, *local;
> + int dmabuf, i;
> +
> + scratch.width = 1024;
> + scratch.height = 1024;
> + scratch.bpp = 32;
> + vgem_create(vgem, &scratch);
> +
> + dmabuf = prime_handle_to_fd(vgem, scratch.handle);
> + prime = prime_fd_to_handle(i915, dmabuf);
> + close(dmabuf);
> +
> + native = gem_create(i915, scratch.size);
> +
> + foreign = vgem_mmap(vgem, &scratch, PROT_WRITE);
> + local = gem_mmap__wc(i915, native, 0, scratch.size, PROT_WRITE);
> +
> + for (i = 0; i < 1024; i++) {
> + local[1024 * i] = i;
> + igt_blitter_src_copy__raw(i915, native, 0,
> + scratch.width * scratch.bpp / 8,
> + I915_TILING_NONE, 0, i,
> + scratch.width, 1, scratch.bpp,
> + prime, 0,
> + scratch.width * scratch.bpp / 8,
> + I915_TILING_NONE, 0, i);
> + gem_sync(i915, prime);
> + igt_assert_eq(foreign[1024 * i], i);
> +
> + foreign[1024 * i] = ~i;
> + igt_blitter_src_copy__raw(i915, prime, 0,
> + scratch.width * scratch.bpp / 8,
> + I915_TILING_NONE, 0, i,
> + scratch.width, 1, scratch.bpp,
> + native, 0,
> + scratch.width * scratch.bpp / 8,
> + I915_TILING_NONE, 0, i);
> + gem_sync(i915, native);
> + igt_assert_eq(local[1024 * i], ~i);
> + }
> +
> + munmap(local, scratch.size);
> + munmap(foreign, scratch.size);
> +
> + gem_close(i915, native);
> + gem_close(i915, prime);
> + gem_close(vgem, scratch.handle);
> +}
So other than using scratch.constants where possible, the tests make
sense and look interesting. Asynchronous fence scheduling,
back-and-forth coherency and serialisation.
Reviewed-by: Chris Wilson <chris at chris-wilson.co.uk>
-Chris
More information about the Intel-gfx
mailing list