[Intel-gfx] [PATCH 2/3] igt/gem_pread: Support to verify pread/pwrite for non-shmem backed obj

Tue Jul 21 05:38:26 PDT 2015

On Fri, 2015-07-03 at 10:37 +0100, Tvrtko Ursulin wrote:
> 
> On 07/01/2015 10:26 AM, ankitprasad.r.sharma at intel.com wrote:
> > From: Ankitprasad Sharma <ankitprasad.r.sharma at intel.com>
> >
> > This patch adds support to verify pread/pwrite for non-shmem backed
> > objects. It also shows the pread/pwrite speed.
> > It also tests speeds for pread with and without user side page faults
> >
> > v2: Rebased to the latest (Ankit)
> >
> > Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma at intel.com>
> > ---
> >   tests/gem_pread.c  | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >   tests/gem_pwrite.c |  45 ++++++++++++++++++++++++
> >   2 files changed, 145 insertions(+)
> >
> > diff --git a/tests/gem_pread.c b/tests/gem_pread.c
> > index cc83948..95162d5 100644
> > --- a/tests/gem_pread.c
> > +++ b/tests/gem_pread.c
> > @@ -41,6 +41,10 @@
> >   #include "drmtest.h"
> >
> >   #define OBJECT_SIZE 16384
> > +#define LARGE_OBJECT_SIZE 1024 * 1024
> > +#define KGRN "\x1B[32m"
> > +#define KRED "\x1B[31m"
> > +#define KNRM "\x1B[0m"
> >
> >   static void do_gem_read(int fd, uint32_t handle, void *buf, int len, int loops)
> >   {
> > @@ -76,11 +80,14 @@ static const char *bytes_per_sec(char *buf, double v)
> >
> >
> >   uint32_t *src, dst;
> > +uint32_t *dst_user, src_stolen, large_stolen;
> > +uint32_t *stolen_pf_user, *stolen_nopf_user;
> >   int fd, count;
> >
> >   int main(int argc, char **argv)
> >   {
> >   	int object_size = 0;
> > +	int large_obj_size = LARGE_OBJECT_SIZE;
> 
> Why have both a define and a variable which does not get modified for 
> the same thing?
> 
> >   	uint32_t buf[20];
> >   	const struct {
> >   		int level;
> > @@ -90,6 +97,9 @@ int main(int argc, char **argv)
> >   		{ 1, "snoop" },
> >   		{ 2, "display" },
> >   		{ -1 },
> > +		{ -1, "stolen-uncached"},
> > +		{ -1, "stolen-snoop"},
> > +		{ -1, "stolen-display"},
> 
> Ugh why so hacky? You only used the new three entries for their names, 
> right?
> 
> What about instead of "igt_subtest((c + 4)->name)" do 
> "igt_subtest_f("stolen-%s", c->name)" and then you don't need to add 
> these hacky entries?
> 
> >   	}, *c;
> >
> >   	igt_subtest_init(argc, argv);
> > @@ -106,6 +116,8 @@ int main(int argc, char **argv)
> >
> >   		dst = gem_create(fd, object_size);
> >   		src = malloc(object_size);
> > +		src_stolen = gem_create_stolen(fd, object_size);
> > +		dst_user = malloc(object_size);
> >   	}
> >
> >   	igt_subtest("normal") {
> > @@ -142,9 +154,97 @@ int main(int argc, char **argv)
> >   		}
> >   	}
> >
> > +	igt_subtest("stolen-normal") {
> > +		for (count = 1; count <= 1<<17; count <<= 1) {
> > +			struct timeval start, end;
> > +
> > +			gettimeofday(&start, NULL);
> > +			do_gem_read(fd, src_stolen, dst_user, object_size, count);
> > +			gettimeofday(&end, NULL);
> > +			igt_info("Time to pread %d bytes x %6d:	%7.3fµs, %s\n",
> > +				 object_size, count,
> > +				 elapsed(&start, &end, count),
> > +				 bytes_per_sec((char *)buf,
> > +				 object_size/elapsed(&start, &end, count)*1e6));
> 
> There is no checking that bytes_per_sec won't overflow buf. Which is 
> also declared as unit32_t just so we can cast here. :) Suggest fixing if 
> you feel like it, won't mandate it since it is existing code.
> 
> > +			fflush(stdout);
> > +		}
> > +	}
> > +	for (c = cache; c->level != -1; c++) {
> > +		igt_subtest((c + 4)->name) {
> > +			gem_set_caching(fd, src_stolen, c->level);
> > +
> > +			for (count = 1; count <= 1<<17; count <<= 1) {
> > +				struct timeval start, end;
> > +
> > +				gettimeofday(&start, NULL);
> > +				do_gem_read(fd, src_stolen, dst_user,
> > +					    object_size, count);
> > +				gettimeofday(&end, NULL);
> > +				igt_info("Time to %s pread %d bytes x %6d:      %7.3fµs, %s\n",
> > +					 (c + 4)->name, object_size, count,
> > +					 elapsed(&start, &end, count),
> > +					 bytes_per_sec((char *)buf,
> > +					 object_size/elapsed(&start, &end, count)*1e6));
> > +				fflush(stdout);
> > +			}
> > +		}
> > +	}
> > +
> > +	/* List pread times for stolen area with 1 page fault (page fault only
> > +	 * first time) and multiple page faults (unmapping the pages at the
> > +	 * end of each iteration)
> > +	 */
> 
> Can you explain more the idea behind this test? It benchmarks minor 
> faulting (well could be major as well but we can't say), but it is 
> nothing in the i915 path as far as I know.
> 
> > +	igt_subtest("pagefault-pread") {
> > +		large_stolen = gem_create_stolen(fd, large_obj_size);
> > +		stolen_nopf_user = (uint32_t *) mmap(NULL, large_obj_size,
> > +						PROT_WRITE,
> > +						MAP_ANONYMOUS|MAP_PRIVATE,
> > +						-1, 0);
> 
> Some asserts for object creationg and mmap?
> 
> > +
> > +		for (count = 1; count <= 10; count ++) {
> > +			struct timeval start, end;
> > +			uint32_t t_elapsed = 0;
> > +
> > +			gettimeofday(&start, NULL);
> > +			do_gem_read(fd, large_stolen, stolen_nopf_user,
> > +				    large_obj_size, 1);
> > +			gettimeofday(&end, NULL);
> > +			t_elapsed = elapsed(&start, &end, 1);
> > +			igt_info("Pagefault-N - Time to pread %d bytes: %7.3fµs, %s\n",
> > +				 large_obj_size,
> > +				 elapsed(&start, &end, 1),
> > +				 bytes_per_sec((char *)buf,
> > +				 large_obj_size/elapsed(&start, &end, 1)*1e6));
> > +
> > +			stolen_pf_user = (uint32_t *) mmap(NULL, large_obj_size,
> > +						      PROT_WRITE,
> > +						      MAP_ANONYMOUS|MAP_PRIVATE,
> > +						      -1, 0);
> > +
> > +			gettimeofday(&start, NULL);
> > +			do_gem_read(fd, large_stolen, stolen_pf_user,
> > +				    large_obj_size, 1);
> > +			gettimeofday(&end, NULL);
> > +			igt_info("Pagefault-Y - Time to pread %d bytes: %7.3fµs, %s%s%s\n",
> > +				 large_obj_size,
> > +				 elapsed(&start, &end, 1),
> > +				 t_elapsed < elapsed(&start, &end, 1) ? KGRN : KRED,
> > +				 bytes_per_sec((char *)buf,
> > +				 large_obj_size/elapsed(&start, &end, 1)*1e6),
> > +				 KNRM);
> > +			fflush(stdout);
> > +			munmap(stolen_pf_user, large_obj_size);
> > +		}
> > +			munmap(stolen_nopf_user, large_obj_size);
> > +			gem_close(fd, large_stolen);
> 
> Indentation is broken here.
> 
> > +	}
> > +
> > +
> >   	igt_fixture {
> >   		free(src);
> >   		gem_close(fd, dst);
> > +		free(dst_user);
> > +		gem_close(fd, src_stolen);
> >
> >   		close(fd);
> >   	}
> > diff --git a/tests/gem_pwrite.c b/tests/gem_pwrite.c
> > index 5b6a77f..c7e3edf 100644
> > --- a/tests/gem_pwrite.c
> > +++ b/tests/gem_pwrite.c
> > @@ -135,6 +135,7 @@ static void test_big_gtt(int fd, int scale)
> >   }
> >
> >   uint32_t *src, dst;
> > +uint32_t *src_user, dst_stolen;
> >   int fd;
> >
> >   int main(int argc, char **argv)
> > @@ -150,6 +151,9 @@ int main(int argc, char **argv)
> >   		{ 1, "snoop" },
> >   		{ 2, "display" },
> >   		{ -1 },
> > +		{ -1, "stolen-uncached"},
> > +		{ -1, "stolen-snoop"},
> > +		{ -1, "stolen-display"},
> 
> Same comment as for pread.
> 
> Hm, should the page faulting benchmark go in gem_pwrite rather than 
> gem_pread? Although not sure - is stolen pageable at all?
Sorry, but I do not think it is of much use in case of pwrite, as here
we are talking about pagefaults in userspace. In case of pwrites, user
would have already filled the buffer and so a rare chance of pagefault
while doing a pwrite.
The concept of pread is reading an object to a userspace buffer, in
which pages may not be yet allocated when the kernel is trying to write
to that buffer and hence need to handle pagefaults here, resulting in
slow read. 
Hence, demonstrating here if there are multiple pagefaults, how slow
pread would be.

Thanks,
Ankit
>