[Intel-gfx] [PATCH igt] igt/gem_fence_thresh: Use streaming reads for verify

Chris Wilson chris at chris-wilson.co.uk
Thu Sep 7 18:14:48 UTC 2017


Quoting Chris Wilson (2017-08-23 13:55:55)
> At the moment, the verify tests use an extremely brutal write-read of
> every dword, degrading performance to UC. If we break those up into
> cachelines, we can do a wcb write/read at a time instead, roughly 8x
> faster. We lose the accuracy of the forced wcb flushes around every dword,
> but we are retaining the overall behaviour of checking reads following
> writes instead. To compensate, we do check that a single dword write/read
> before using wcb aligned accesses.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

-> Tumbleweed ->

> ---
>  tests/gem_fence_thrash.c | 116 +++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 101 insertions(+), 15 deletions(-)
> 
> diff --git a/tests/gem_fence_thrash.c b/tests/gem_fence_thrash.c
> index 52095f26..3e1edb73 100644
> --- a/tests/gem_fence_thrash.c
> +++ b/tests/gem_fence_thrash.c
> @@ -30,7 +30,6 @@
>  #include "config.h"
>  #endif
>  
> -#include "igt.h"
>  #include <unistd.h>
>  #include <stdlib.h>
>  #include <stdio.h>
> @@ -43,6 +42,12 @@
>  #include <pthread.h>
>  #include "drm.h"
>  
> +#include "igt.h"
> +#include "igt_x86.h"
> +
> +#define PAGE_SIZE 4096
> +#define CACHELINE 64
> +
>  #define OBJECT_SIZE (128*1024) /* restricted to 1MiB alignment on i915 fences */
>  
>  /* Before introduction of the LRU list for fences, allocation of a fence for a page
> @@ -104,15 +109,78 @@ bo_copy (void *_arg)
>         return NULL;
>  }
>  
> +#if defined(__x86_64__) && !defined(__clang__)
> +#define MOVNT 512
> +
> +#pragma GCC push_options
> +#pragma GCC target("sse4.1")
> +
> +#include <smmintrin.h>
> +__attribute__((noinline))
> +static void copy_wc_page(void *dst, void *src)
> +{
> +       if (igt_x86_features() & SSE4_1) {
> +               __m128i *S = (__m128i *)src;
> +               __m128i *D = (__m128i *)dst;
> +
> +               for (int i = 0; i < PAGE_SIZE/CACHELINE; i++) {
> +                       __m128i tmp[4];
> +
> +                       tmp[0] = _mm_stream_load_si128(S++);
> +                       tmp[1] = _mm_stream_load_si128(S++);
> +                       tmp[2] = _mm_stream_load_si128(S++);
> +                       tmp[3] = _mm_stream_load_si128(S++);
> +
> +                       _mm_store_si128(D++, tmp[0]);
> +                       _mm_store_si128(D++, tmp[1]);
> +                       _mm_store_si128(D++, tmp[2]);
> +                       _mm_store_si128(D++, tmp[3]);
> +               }
> +       } else
> +               memcpy(dst, src, PAGE_SIZE);
> +}
> +static void copy_wc_cacheline(void *dst, void *src)
> +{
> +       if (igt_x86_features() & SSE4_1) {
> +               __m128i *S = (__m128i *)src;
> +               __m128i *D = (__m128i *)dst;
> +               __m128i tmp[4];
> +
> +               tmp[0] = _mm_stream_load_si128(S++);
> +               tmp[1] = _mm_stream_load_si128(S++);
> +               tmp[2] = _mm_stream_load_si128(S++);
> +               tmp[3] = _mm_stream_load_si128(S++);
> +
> +               _mm_store_si128(D++, tmp[0]);
> +               _mm_store_si128(D++, tmp[1]);
> +               _mm_store_si128(D++, tmp[2]);
> +               _mm_store_si128(D++, tmp[3]);
> +       } else
> +               memcpy(dst, src, CACHELINE);
> +}
> +
> +#pragma GCC pop_options
> +
> +#else
> +static void copy_wc_page(void *dst, const void *src)
> +{
> +       memcpy(dst, src, PAGE_SIZE);
> +}
> +static void copy_wc_cacheline(void *dst, const void *src)
> +{
> +       memcpy(dst, src, CACHELINE);
> +}
> +#endif
> +
>  static void
>  _bo_write_verify(struct test *t)
>  {
>         int fd = t->fd;
>         int i, k;
>         uint32_t **s;
> -       uint32_t v;
>         unsigned int dwords = OBJECT_SIZE >> 2;
>         const char *tile_str[] = { "none", "x", "y" };
> +       uint32_t tmp[PAGE_SIZE/sizeof(uint32_t)];
>  
>         igt_assert(t->tiling >= 0 && t->tiling <= I915_TILING_Y);
>         igt_assert_lt(0, t->num_surfaces);
> @@ -124,21 +192,39 @@ _bo_write_verify(struct test *t)
>                 s[k] = bo_create(fd, t->tiling);
>  
>         for (k = 0; k < t->num_surfaces; k++) {
> -               volatile uint32_t *a = s[k];
> -
> -               for (i = 0; i < dwords; i++) {
> -                       a[i] = i;
> -                       v = a[i];
> -                       igt_assert_f(v == i,
> -                                    "tiling %s: write failed at %d (%x)\n",
> -                                    tile_str[t->tiling], i, v);
> +               uint32_t *a = s[k];
> +
> +               a[0] = 0xdeadbeef;
> +               igt_assert_f(a[0] == 0xdeadbeef,
> +                            "tiling %s: write failed at start (%x)\n",
> +                            tile_str[t->tiling], a[0]);
> +
> +               a[dwords - 1] = 0xc0ffee;
> +               igt_assert_f(a[dwords - 1] == 0xc0ffee,
> +                            "tiling %s: write failed at end (%x)\n",
> +                            tile_str[t->tiling], a[dwords - 1]);
> +
> +               for (i = 0; i < dwords; i += CACHELINE/sizeof(uint32_t)) {
> +                       for (int j = 0; j < CACHELINE/sizeof(uint32_t); j++)
> +                               a[i + j] = ~(i + j);
> +
> +                       copy_wc_cacheline(tmp, a + i);
> +                       for (int j = 0; j < CACHELINE/sizeof(uint32_t); j++)
> +                               igt_assert_f(tmp[j] == ~(i+ j),
> +                                            "tiling %s: write failed at %d (%x)\n",
> +                                            tile_str[t->tiling], i + j, tmp[j]);
> +
> +                       for (int j = 0; j < CACHELINE/sizeof(uint32_t); j++)
> +                               a[i + j] = i + j;
>                 }
>  
> -               for (i = 0; i < dwords; i++) {
> -                       v = a[i];
> -                       igt_assert_f(v == i,
> -                                    "tiling %s: verify failed at %d (%x)\n",
> -                                    tile_str[t->tiling], i, v);
> +               for (i = 0; i < dwords; i += PAGE_SIZE/sizeof(uint32_t)) {
> +                       copy_wc_page(tmp, a + i);
> +                       for (int j = 0; j < PAGE_SIZE/sizeof(uint32_t); j++) {
> +                               igt_assert_f(tmp[j] == i + j,
> +                                            "tiling %s: verify failed at %d (%x)\n",
> +                                            tile_str[t->tiling], i + j, tmp[j]);
> +                       }
>                 }
>         }
>  
> -- 
> 2.14.1
> 


More information about the Intel-gfx mailing list