[Intel-gfx] [PATCH i-g-t v2] igt/gem_userptr: Check read-only mappings

Fri Jun 29 09:44:36 UTC 2018

Quoting Tvrtko Ursulin (2018-06-29 10:31:23)
> 
> On 29/06/2018 08:44, Chris Wilson wrote:
> > Setup a userptr object that only has a read-only mapping back to a file
> > store (memfd). Then attempt to write into that mapping using the GPU and
> > assert that those writes do not land (while also writing via a writable
> > userptr mapping into the same memfd to verify that the GPU is working!)
> > 
> > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> > Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> > ---
> > Minor commentary additions
> > ---
> >   configure.ac              |   1 +
> >   lib/ioctl_wrappers.c      |   4 +-
> >   lib/ioctl_wrappers.h      |   4 +-
> >   lib/meson.build           |   1 +
> >   meson.build               |   1 +
> >   tests/Makefile.am         |   4 +-
> >   tests/gem_userptr_blits.c | 372 +++++++++++++++++++++++++++++++++++++-
> >   7 files changed, 377 insertions(+), 10 deletions(-)
> > 
> > diff --git a/configure.ac b/configure.ac
> > index 1ee4e90e9..195963d4f 100644
> > --- a/configure.ac
> > +++ b/configure.ac
> > @@ -125,6 +125,7 @@ PKG_CHECK_MODULES(PCIACCESS, [pciaccess >= 0.10])
> >   PKG_CHECK_MODULES(KMOD, [libkmod])
> >   PKG_CHECK_MODULES(PROCPS, [libprocps])
> >   PKG_CHECK_MODULES(LIBUNWIND, [libunwind])
> > +PKG_CHECK_MODULES(SSL, [openssl])
> >   PKG_CHECK_MODULES(VALGRIND, [valgrind], [have_valgrind=yes], [have_valgrind=no])
> >   
> >   if test x$have_valgrind = xyes; then
> > diff --git a/lib/ioctl_wrappers.c b/lib/ioctl_wrappers.c
> > index 79db44a8c..d5d2a4e4c 100644
> > --- a/lib/ioctl_wrappers.c
> > +++ b/lib/ioctl_wrappers.c
> > @@ -869,7 +869,7 @@ int gem_madvise(int fd, uint32_t handle, int state)
> >       return madv.retained;
> >   }
> >   
> > -int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle)
> > +int __gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle)
> >   {
> >       struct drm_i915_gem_userptr userptr;
> >   
> > @@ -898,7 +898,7 @@ int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, ui
> >    *
> >    * Returns userptr handle for the GEM object.
> >    */
> > -void gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle)
> > +void gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle)
> >   {
> >       igt_assert_eq(__gem_userptr(fd, ptr, size, read_only, flags, handle), 0);
> >   }
> > diff --git a/lib/ioctl_wrappers.h b/lib/ioctl_wrappers.h
> > index b966f72c9..8e2cd380b 100644
> > --- a/lib/ioctl_wrappers.h
> > +++ b/lib/ioctl_wrappers.h
> > @@ -133,8 +133,8 @@ struct local_i915_gem_userptr {
> >   #define LOCAL_I915_USERPTR_UNSYNCHRONIZED (1<<31)
> >       uint32_t handle;
> >   };
> > -void gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle);
> > -int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle);
> > +void gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle);
> > +int __gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle);
> >   
> >   void gem_sw_finish(int fd, uint32_t handle);
> >   
> > diff --git a/lib/meson.build b/lib/meson.build
> > index 1a355414e..939167f91 100644
> > --- a/lib/meson.build
> > +++ b/lib/meson.build
> > @@ -62,6 +62,7 @@ lib_deps = [
> >       pthreads,
> >       math,
> >       realtime,
> > +     ssl,
> >   ]
> >   
> >   if libdrm_intel.found()
> > diff --git a/meson.build b/meson.build
> > index 4d15d6238..638c01066 100644
> > --- a/meson.build
> > +++ b/meson.build
> > @@ -98,6 +98,7 @@ pciaccess = dependency('pciaccess', version : '>=0.10')
> >   libkmod = dependency('libkmod')
> >   libprocps = dependency('libprocps', required : true)
> >   libunwind = dependency('libunwind', required : true)
> > +ssl = dependency('openssl', required : true)
> >   
> >   valgrind = null_dep
> >   valgrindinfo = 'No'
> > diff --git a/tests/Makefile.am b/tests/Makefile.am
> > index f41ad5096..ba307b220 100644
> > --- a/tests/Makefile.am
> > +++ b/tests/Makefile.am
> > @@ -126,8 +126,8 @@ gem_tiled_swapping_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
> >   gem_tiled_swapping_LDADD = $(LDADD) -lpthread
> >   prime_self_import_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
> >   prime_self_import_LDADD = $(LDADD) -lpthread
> > -gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
> > -gem_userptr_blits_LDADD = $(LDADD) -lpthread
> > +gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) $(SSL_CFLAGS)
> > +gem_userptr_blits_LDADD = $(LDADD) $(SSL_LIBS) -lpthread
> >   perf_pmu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
> >   
> >   gem_eio_LDADD = $(LDADD) -lrt
> > diff --git a/tests/gem_userptr_blits.c b/tests/gem_userptr_blits.c
> > index 7e3b6ef38..0fb7eba04 100644
> > --- a/tests/gem_userptr_blits.c
> > +++ b/tests/gem_userptr_blits.c
> > @@ -43,13 +43,17 @@
> >   #include <fcntl.h>
> >   #include <inttypes.h>
> >   #include <errno.h>
> > +#include <setjmp.h>
> >   #include <sys/stat.h>
> >   #include <sys/time.h>
> >   #include <sys/mman.h>
> > +#include <openssl/sha.h>
> >   #include <signal.h>
> >   #include <pthread.h>
> >   #include <time.h>
> >   
> > +#include <linux/memfd.h>
> > +
> >   #include "drm.h"
> >   #include "i915_drm.h"
> >   
> > @@ -238,6 +242,57 @@ blit(int fd, uint32_t dst, uint32_t src, uint32_t *all_bo, int n_bo)
> >       return ret;
> >   }
> >   
> > +static void store_dword(int fd, uint32_t target,
> > +                     uint32_t offset, uint32_t value)
> > +{
> > +     const int gen = intel_gen(intel_get_drm_devid(fd));
> > +     struct drm_i915_gem_exec_object2 obj[2];
> > +     struct drm_i915_gem_relocation_entry reloc;
> > +     struct drm_i915_gem_execbuffer2 execbuf;
> > +     uint32_t batch[16];
> > +     int i;
> > +
> > +     memset(&execbuf, 0, sizeof(execbuf));
> > +     execbuf.buffers_ptr = to_user_pointer(obj);
> > +     execbuf.buffer_count = ARRAY_SIZE(obj);
> > +     execbuf.flags = 0;
> > +     if (gen < 6)
> > +             execbuf.flags |= I915_EXEC_SECURE;
> > +
> > +     memset(obj, 0, sizeof(obj));
> > +     obj[0].handle = target;
> > +     obj[1].handle = gem_create(fd, 4096);
> > +
> > +     memset(&reloc, 0, sizeof(reloc));
> > +     reloc.target_handle = obj[0].handle;
> > +     reloc.presumed_offset = 0;
> > +     reloc.offset = sizeof(uint32_t);
> > +     reloc.delta = offset;
> > +     reloc.read_domains = I915_GEM_DOMAIN_RENDER;
> > +     reloc.write_domain = I915_GEM_DOMAIN_RENDER;
> > +     obj[1].relocs_ptr = to_user_pointer(&reloc);
> > +     obj[1].relocation_count = 1;
> > +
> > +     i = 0;
> > +     batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
> > +     if (gen >= 8) {
> > +             batch[++i] = offset;
> > +             batch[++i] = 0;
> > +     } else if (gen >= 4) {
> > +             batch[++i] = 0;
> > +             batch[++i] = offset;
> > +             reloc.offset += sizeof(uint32_t);
> > +     } else {
> > +             batch[i]--;
> > +             batch[++i] = offset;
> > +     } > +   batch[++i] = value;
> 
> I still think to avoid too much code duplication:
> 
> batch = __emit_store_dword(batch, gen, offset, value, &reloc.offset)
> 
> Then from there see if something more could be extracted form the three 
> call sites.
> 
> > +     batch[++i] = MI_BATCH_BUFFER_END;
> > +     gem_write(fd, obj[1].handle, 0, batch, sizeof(batch));
> > +     gem_execbuf(fd, &execbuf);
> > +     gem_close(fd, obj[1].handle);
> > +}
> > +
> >   static uint32_t
> >   create_userptr(int fd, uint32_t val, uint32_t *ptr)
> >   {
> > @@ -941,6 +996,310 @@ static int test_dmabuf(void)
> >       return 0;
> >   }
> >   
> > +static void test_readonly(int i915)
> > +{
> > +     unsigned char orig[SHA_DIGEST_LENGTH];
> > +     uint64_t aperture_size;
> > +     uint32_t whandle, rhandle;
> > +     size_t sz, total;
> > +     void *pages, *space;
> > +     int memfd;
> > +
> > +     /*
> > +      * A small batch of pages; small enough to cheaply check for stray
> > +      * writes but large enough that we don't create too many VMA pointing
> > +      * back to this set from the large arena. The limit on total number
> > +      * of VMA for a process is 65,536 (at least on this kernel).
> > +      *
> > +      * We then write from the GPU through the large arena into the smaller
> > +      * backing storage, which we can cheaply check to see if those writes
> > +      * have landed (using a SHA1sum). Repeating the same random GPU writes
> > +      * though a read-only handle to confirm that this time the writes are
> 
> through
> 
> > +      * discarded and the backing store unchanged.
> > +      */
> > +     sz = 16 << 12;
> 
> Here you aim not to exceed the above mentioned per-process VMA limit but 
> please just express that in the code. Maybe re-order the code a bit:
> 
> total = 2Gib
> sz = 2Gib / max_vmas_per_process * 2
> aperture_size = ...
> total = round_down
> 
> Then proceed with allocation etc.
> 
> > +     memfd = memfd_create("pages", 0);
> > +     igt_require(memfd != -1);
> > +     igt_require(ftruncate(memfd, sz) == 0);
> > +
> > +     pages = mmap(NULL, sz, PROT_WRITE, MAP_SHARED, memfd, 0);
> > +     igt_assert(pages != MAP_FAILED);
> > +
> > +     igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, &rhandle) == 0);
> > +     gem_close(i915, rhandle);
> > +
> > +     gem_userptr(i915, pages, sz, false, userptr_flags, &whandle);
> > +
> 
> /*
>  From your reply:
> """
> the largest offset we can use is 4G, and we can't use the full range
> as we need some extra room for batches, and we can't use the full VMA
> limit without serious slow down and risk of exhaustion.
> Then sticking to a pot.
> """
> */
> 
> > +     total = 2048ull << 20;
> > +     aperture_size = gem_aperture_size(i915) / 2;
> > +     if (aperture_size < total)
> > +             total = aperture_size;
> > +     total = total / sz * sz;
> > +     igt_info("Using a %'zuB (%'zu pages) arena onto %zu pages\n",
> > +              total, total >> 12, sz >> 12);
> > +
> > +     /* Create an arena all pointing to the same set of pages */
> > +     space = mmap(NULL, total, PROT_READ, MAP_ANON | MAP_SHARED, -1, 0);
> 
> Why MAP_SHARED?

fork.

> > +     igt_require(space != MAP_FAILED);
> > +     for (size_t offset = 0; offset < total; offset += sz) {
> > +             igt_assert(mmap(space + offset, sz,
> > +                             PROT_WRITE, MAP_SHARED | MAP_FIXED,
> > +                             memfd, 0) != MAP_FAILED);
> > +             *(uint32_t *)(space + offset) = offset;
> 
> AFAIU:
> 
> First write instantiates the backing store, well, one page of it I 
> guess. Depending how memfd works I guess. But mlock later will do all of it.
> 
> > +     }
> > +     igt_assert_eq_u32(*(uint32_t *)pages, (uint32_t)(total - sz));
> 
> ... and this checks that the arena is made up from repeating chunks. 
> (Checking that the signature written into the last chunk is mirrored in 
> the first one.)
> 
> > +     igt_assert(mlock(space, total) == 0);
> 
> So this allocates all 64KiB definitely.
> 
> > +     close(memfd);
> > +
> > +     /* Check we can create a normal userptr bo wrapping the wrapper */
> > +     gem_userptr(i915, space, total, false, userptr_flags, &rhandle);
> 
> This is not read-only so rhandle is a bit misleading. Why do you btw 
> create the whandle so early on and not just here? Hmm... whandle is 
> chunk size, rhandle is arena size.. so the two loops below are different 
> in that respect. Why is that?

Because the arena will be readonly, the backing store is writeable.

> > +     gem_set_domain(i915, rhandle, I915_GEM_DOMAIN_CPU, 0);
> > +     for (size_t offset = 0; offset < total; offset += sz)
> > +             store_dword(i915, rhandle, offset + 4, offset / sz);
> > +     gem_sync(i915, rhandle);
> 
> I did not get your last reply here - once store dwords have completed 
> and you proceed to check the memory via CPU PTEs - do you need to move 
> the userptr bo back to the CPU domain so any flushes would happen?

rhandle, it's a new userptr that we want to verify we can populate.

> > +     igt_assert_eq_u32(*(uint32_t *)(pages + 0), (uint32_t)(total - sz));
> > +     igt_assert_eq_u32(*(uint32_t *)(pages + 4), (uint32_t)(total / sz - 1));
> 
> I really think comment explaining the layout and which side writes at 
> which offset would be beneficial.

It's literally explained in the code in this block and never used again.

> > +     gem_close(i915, rhandle);
> > +
> > +     /* Now enforce read-only henceforth */
> > +     igt_assert(mprotect(space, total, PROT_READ) == 0);
> > +
> > +     SHA1(pages, sz, orig);
> > +     igt_fork(child, 1) {
> > +             const int gen = intel_gen(intel_get_drm_devid(i915));
> > +             const int nreloc = 1024;
> > +             struct drm_i915_gem_relocation_entry *reloc;
> > +             struct drm_i915_gem_exec_object2 obj[2];
> > +             struct drm_i915_gem_execbuffer2 exec;
> > +             unsigned char ref[SHA_DIGEST_LENGTH], result[SHA_DIGEST_LENGTH];
> > +             uint32_t *batch;
> > +             int i;
> > +
> > +             reloc = calloc(sizeof(*reloc), nreloc);
> > +             gem_userptr(i915, space, total, true, userptr_flags, &rhandle);
> > +
> > +             memset(obj, 0, sizeof(obj));
> > +             obj[0].flags = LOCAL_EXEC_OBJECT_SUPPORTS_48B;
> > +             obj[1].handle = gem_create(i915, sz);
> 
> I didn't get your previous reply. This is the batch buffer right? So the 
> size needed is relating to the number of store dword + bbend you need to 
> emit, rather than sz, no? And nreloc is arbitrary subset of sz / 
> sizeof(uint32_t), right?
> 
> So maybe:
> 
> const int nreloc = sz / sizeof(uint32_t) / 16; /* arbitrary sub-size */
> ...
> obj[1].handle = gem_create(i915, sizeof(uint32_t) + nreloc * 
> sizeof(one_store_dword_sz));
> 
> Or I am missing something?

Just pointless.

> > +             obj[1].relocation_count = nreloc;
> > +             obj[1].relocs_ptr = to_user_pointer(reloc);
> > +
> > +             batch = gem_mmap__wc(i915, obj[1].handle, 0, sz, PROT_WRITE);
> > +
> > +             memset(&exec, 0, sizeof(exec));
> > +             exec.buffer_count = 2;
> > +             exec.buffers_ptr = to_user_pointer(obj);
> > +             if (gen < 6)
> > +                     exec.flags |= I915_EXEC_SECURE;
> > +
> > +             for_each_engine(i915, exec.flags) {
> > +                     /* First tweak the backing store through the write */
> > +                     i = 0;
> > +                     obj[0].handle = whandle;
> > +                     for (int n = 0; n < nreloc; n++) {
> > +                             uint64_t offset;
> > +
> > +                             reloc[n].target_handle = obj[0].handle;
> > +                             reloc[n].delta = rand() % (sz / 4) * 4;
> > +                             reloc[n].offset = (i + 1) * sizeof(uint32_t);
> > +                             reloc[n].presumed_offset = obj[0].offset;
> > +                             reloc[n].read_domains = I915_GEM_DOMAIN_RENDER;
> > +                             reloc[n].write_domain = I915_GEM_DOMAIN_RENDER;
> 
> How about:
> 
> __fill_reloc(&reloc, &obj[0], delta, offset) ?

No. I really dislike functions whose only purpose is to obfuscate
copying their arguments into the struct passed in. Because it just makes
it harder to adapt in future, whereas I think this is quite clear as to
how the batch is constructed.

> > +
> > +                             offset = reloc[n].presumed_offset + reloc[n].delta;
> > +
> > +                             batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
> > +                             if (gen >= 8) {
> > +                                     batch[++i] = offset;
> > +                                     batch[++i] = offset >> 32;
> > +                             } else if (gen >= 4) {
> > +                                     batch[++i] = 0;
> > +                                     batch[++i] = offset;
> > +                                     reloc[n].offset += sizeof(uint32_t);
> > +                             } else {
> > +                                     batch[i]--;
> > +                                     batch[++i] = offset;
> > +                             }
> > +                             batch[++i] = rand();
> > +                             i++;
> > +                     }
> > +                     batch[i] = MI_BATCH_BUFFER_END;
> > +                     igt_assert(i * sizeof(uint32_t) < sz);
> > +
> > +                     gem_execbuf(i915, &exec);
> > +                     gem_sync(i915, obj[0].handle);
> > +                     SHA1(pages, sz, ref);
> > +
> > +                     igt_assert(memcmp(ref, orig, sizeof(ref)));
> > +                     memcpy(orig, ref, sizeof(orig));
> > +
> > +                     /* Now try the same through the read-only handle */
> > +                     i = 0;
> > +                     obj[0].handle = rhandle;
> > +                     for (int n = 0; n < nreloc; n++) {
> > +                             uint64_t offset;
> > +
> > +                             reloc[n].target_handle = obj[0].handle;
> > +                             reloc[n].delta = rand() % (total / 4) * 4;
> > +                             reloc[n].offset = (i + 1) * sizeof(uint32_t);
> > +                             reloc[n].presumed_offset = obj[0].offset;
> > +                             reloc[n].read_domains = I915_GEM_DOMAIN_RENDER;
> > +                             reloc[n].write_domain = I915_GEM_DOMAIN_RENDER;
> > +
> > +                             offset = reloc[n].presumed_offset + reloc[n].delta;
> > +
> > +                             batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
> > +                             if (gen >= 8) {
> > +                                     batch[++i] = offset;
> > +                                     batch[++i] = offset >> 32;
> > +                             } else if (gen >= 4) {
> > +                                     batch[++i] = 0;
> > +                                     batch[++i] = offset;
> > +                                     reloc[n].offset += sizeof(uint32_t);
> > +                             } else {
> > +                                     batch[i]--;
> > +                                     batch[++i] = offset;
> > +                             }
> > +                             batch[++i] = rand();
> > +                             i++;
> > +                     }
> > +                     batch[i] = MI_BATCH_BUFFER_END;
> > +
> > +                     gem_execbuf(i915, &exec);
> > +                     gem_sync(i915, obj[0].handle);
> > +                     SHA1(pages, sz, result);
> > +
> > +                     /*
> > +                      * As the writes into the read-only GPU bo should fail,
> > +                      * the SHA1 hash of the backing store should be
> > +                      * unaffected.
> > +                      */
> > +                     igt_assert(memcmp(ref, result, SHA_DIGEST_LENGTH) == 0);
> > +             }
> > +
> > +             munmap(batch, sz);
> > +             gem_close(i915, obj[1].handle);
> > +             gem_close(i915, rhandle);
> > +     }
> > +     igt_waitchildren();
> > +
> > +     munmap(space, total);
> > +     munmap(pages, sz);
> > +}
> > +
> > +static jmp_buf sigjmp;
> > +static void sigjmp_handler(int sig)
> > +{
> > +     siglongjmp(sigjmp, sig);
> > +}
> > +
> > +static void test_readonly_mmap(int i915)
> > +{
> > +     unsigned char original[SHA_DIGEST_LENGTH];
> > +     unsigned char result[SHA_DIGEST_LENGTH];
> > +     uint32_t handle;
> > +     uint32_t sz;
> > +     void *pages;
> > +     void *ptr;
> > +     int sig;
> > +
> > +     /*
> > +      * A quick check to ensure that we cannot circumvent the
> > +      * read-only nature of our memory by creating a GTT mmap into
> > +      * the pages. Imagine receiving a readonly SHM segment from
> > +      * another process, or a readonly file mmap, it must remain readonly
> > +      * on the GPU as well.
> > +      */
> > +
> > +     igt_require(igt_setup_clflush());
> > +
> > +     sz = 16 << 12;
> > +     pages = mmap(NULL, sz, PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
> > +     igt_assert(pages != MAP_FAILED);
> > +
> > +     igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, &handle) == 0);
> > +     gem_set_caching(i915, handle, 0);
> > +
> > +     memset(pages, 0xa5, sz);
> > +     igt_clflush_range(pages, sz);
> 
> Please add comment saying why it is needed.

Hmm, is it not obvious from context?

> > +     SHA1(pages, sz, original);
> > +
> > +     ptr = __gem_mmap__gtt(i915, handle, sz, PROT_WRITE);
> > +     igt_assert(ptr == NULL);
> > +
> > +     ptr = gem_mmap__gtt(i915, handle, sz, PROT_READ);
> > +     gem_close(i915, handle);
> > +
> > +     /* Check that a write into the GTT readonly map fails */
> > +     if (!(sig = sigsetjmp(sigjmp, 1))) {
> > +             signal(SIGBUS, sigjmp_handler);
> > +             signal(SIGSEGV, sigjmp_handler);
> > +             memset(ptr, 0x5a, sz);
> > +             igt_assert(0);
> > +     }
> > +     igt_assert_eq(sig, SIGSEGV);
> > +
> > +     /* Check that we disallow removing the readonly protection */
> > +     igt_assert(mprotect(ptr, sz, PROT_WRITE));
> > +     if (!(sig = sigsetjmp(sigjmp, 1))) {
> 
> Continuing from previous reply - there is no longjmp so I don't know who 
> will jump here. Maybe it is just me since I am not familiar with the 
> facility but I still have a feeling comment on high level setup here is 
> warranted.

Look at sigjmp_handler.
-Chris