[Intel-gfx] [PATCH igt] igt/gem_fence_thresh: Use streaming reads for verify
Chris Wilson
chris at chris-wilson.co.uk
Thu Sep 7 18:14:48 UTC 2017
Quoting Chris Wilson (2017-08-23 13:55:55)
> At the moment, the verify tests use an extremely brutal write-read of
> every dword, degrading performance to UC. If we break those up into
> cachelines, we can do a wcb write/read at a time instead, roughly 8x
> faster. We lose the accuracy of the forced wcb flushes around every dword,
> but we are retaining the overall behaviour of checking reads following
> writes instead. To compensate, we do check that a single dword write/read
> before using wcb aligned accesses.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
-> Tumbleweed ->
> ---
> tests/gem_fence_thrash.c | 116 +++++++++++++++++++++++++++++++++++++++++------
> 1 file changed, 101 insertions(+), 15 deletions(-)
>
> diff --git a/tests/gem_fence_thrash.c b/tests/gem_fence_thrash.c
> index 52095f26..3e1edb73 100644
> --- a/tests/gem_fence_thrash.c
> +++ b/tests/gem_fence_thrash.c
> @@ -30,7 +30,6 @@
> #include "config.h"
> #endif
>
> -#include "igt.h"
> #include <unistd.h>
> #include <stdlib.h>
> #include <stdio.h>
> @@ -43,6 +42,12 @@
> #include <pthread.h>
> #include "drm.h"
>
> +#include "igt.h"
> +#include "igt_x86.h"
> +
> +#define PAGE_SIZE 4096
> +#define CACHELINE 64
> +
> #define OBJECT_SIZE (128*1024) /* restricted to 1MiB alignment on i915 fences */
>
> /* Before introduction of the LRU list for fences, allocation of a fence for a page
> @@ -104,15 +109,78 @@ bo_copy (void *_arg)
> return NULL;
> }
>
> +#if defined(__x86_64__) && !defined(__clang__)
> +#define MOVNT 512
> +
> +#pragma GCC push_options
> +#pragma GCC target("sse4.1")
> +
> +#include <smmintrin.h>
> +__attribute__((noinline))
> +static void copy_wc_page(void *dst, void *src)
> +{
> + if (igt_x86_features() & SSE4_1) {
> + __m128i *S = (__m128i *)src;
> + __m128i *D = (__m128i *)dst;
> +
> + for (int i = 0; i < PAGE_SIZE/CACHELINE; i++) {
> + __m128i tmp[4];
> +
> + tmp[0] = _mm_stream_load_si128(S++);
> + tmp[1] = _mm_stream_load_si128(S++);
> + tmp[2] = _mm_stream_load_si128(S++);
> + tmp[3] = _mm_stream_load_si128(S++);
> +
> + _mm_store_si128(D++, tmp[0]);
> + _mm_store_si128(D++, tmp[1]);
> + _mm_store_si128(D++, tmp[2]);
> + _mm_store_si128(D++, tmp[3]);
> + }
> + } else
> + memcpy(dst, src, PAGE_SIZE);
> +}
> +static void copy_wc_cacheline(void *dst, void *src)
> +{
> + if (igt_x86_features() & SSE4_1) {
> + __m128i *S = (__m128i *)src;
> + __m128i *D = (__m128i *)dst;
> + __m128i tmp[4];
> +
> + tmp[0] = _mm_stream_load_si128(S++);
> + tmp[1] = _mm_stream_load_si128(S++);
> + tmp[2] = _mm_stream_load_si128(S++);
> + tmp[3] = _mm_stream_load_si128(S++);
> +
> + _mm_store_si128(D++, tmp[0]);
> + _mm_store_si128(D++, tmp[1]);
> + _mm_store_si128(D++, tmp[2]);
> + _mm_store_si128(D++, tmp[3]);
> + } else
> + memcpy(dst, src, CACHELINE);
> +}
> +
> +#pragma GCC pop_options
> +
> +#else
> +static void copy_wc_page(void *dst, const void *src)
> +{
> + memcpy(dst, src, PAGE_SIZE);
> +}
> +static void copy_wc_cacheline(void *dst, const void *src)
> +{
> + memcpy(dst, src, CACHELINE);
> +}
> +#endif
> +
> static void
> _bo_write_verify(struct test *t)
> {
> int fd = t->fd;
> int i, k;
> uint32_t **s;
> - uint32_t v;
> unsigned int dwords = OBJECT_SIZE >> 2;
> const char *tile_str[] = { "none", "x", "y" };
> + uint32_t tmp[PAGE_SIZE/sizeof(uint32_t)];
>
> igt_assert(t->tiling >= 0 && t->tiling <= I915_TILING_Y);
> igt_assert_lt(0, t->num_surfaces);
> @@ -124,21 +192,39 @@ _bo_write_verify(struct test *t)
> s[k] = bo_create(fd, t->tiling);
>
> for (k = 0; k < t->num_surfaces; k++) {
> - volatile uint32_t *a = s[k];
> -
> - for (i = 0; i < dwords; i++) {
> - a[i] = i;
> - v = a[i];
> - igt_assert_f(v == i,
> - "tiling %s: write failed at %d (%x)\n",
> - tile_str[t->tiling], i, v);
> + uint32_t *a = s[k];
> +
> + a[0] = 0xdeadbeef;
> + igt_assert_f(a[0] == 0xdeadbeef,
> + "tiling %s: write failed at start (%x)\n",
> + tile_str[t->tiling], a[0]);
> +
> + a[dwords - 1] = 0xc0ffee;
> + igt_assert_f(a[dwords - 1] == 0xc0ffee,
> + "tiling %s: write failed at end (%x)\n",
> + tile_str[t->tiling], a[dwords - 1]);
> +
> + for (i = 0; i < dwords; i += CACHELINE/sizeof(uint32_t)) {
> + for (int j = 0; j < CACHELINE/sizeof(uint32_t); j++)
> + a[i + j] = ~(i + j);
> +
> + copy_wc_cacheline(tmp, a + i);
> + for (int j = 0; j < CACHELINE/sizeof(uint32_t); j++)
> + igt_assert_f(tmp[j] == ~(i+ j),
> + "tiling %s: write failed at %d (%x)\n",
> + tile_str[t->tiling], i + j, tmp[j]);
> +
> + for (int j = 0; j < CACHELINE/sizeof(uint32_t); j++)
> + a[i + j] = i + j;
> }
>
> - for (i = 0; i < dwords; i++) {
> - v = a[i];
> - igt_assert_f(v == i,
> - "tiling %s: verify failed at %d (%x)\n",
> - tile_str[t->tiling], i, v);
> + for (i = 0; i < dwords; i += PAGE_SIZE/sizeof(uint32_t)) {
> + copy_wc_page(tmp, a + i);
> + for (int j = 0; j < PAGE_SIZE/sizeof(uint32_t); j++) {
> + igt_assert_f(tmp[j] == i + j,
> + "tiling %s: verify failed at %d (%x)\n",
> + tile_str[t->tiling], i + j, tmp[j]);
> + }
> }
> }
>
> --
> 2.14.1
>
More information about the Intel-gfx
mailing list