Mesa (master): i965: Revert recent tiled memcpy changes.

Kenneth Graunke kwg at kemper.freedesktop.org
Sat May 26 23:27:46 UTC 2018


Module: Mesa
Branch: master
Commit: 58fb613a51994d111ee77a65bc7f3d60b155c687
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=58fb613a51994d111ee77a65bc7f3d60b155c687

Author: Kenneth Graunke <kenneth at whitecape.org>
Date:   Sat May 26 16:25:34 2018 -0700

i965: Revert recent tiled memcpy changes.

This reverts commit 79fe00efb474b3f3f0ba4c88826ff67c53a02aef.
This reverts commit f5e8b13f78a085bc95a1c0895e4a38ff6b87b375.
This reverts commit d21c086d819d78fb3f6abcbb14aa492970f442aa.

They broke the Android build and I'd rather not leave it broken
for the long holiday weekend.

---

 src/mesa/drivers/dri/i965/Makefile.am          |   7 --
 src/mesa/drivers/dri/i965/Makefile.sources     |   6 +-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c  | 102 +------------------------
 src/mesa/drivers/dri/i965/intel_tiled_memcpy.c |  62 ---------------
 src/mesa/drivers/dri/i965/meson.build          |  18 +----
 5 files changed, 9 insertions(+), 186 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am
index ff47add93f..889d4c68a2 100644
--- a/src/mesa/drivers/dri/i965/Makefile.am
+++ b/src/mesa/drivers/dri/i965/Makefile.am
@@ -92,14 +92,8 @@ libi965_gen11_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=110
 
 noinst_LTLIBRARIES = \
 	libi965_dri.la \
-	libintel_tiled_memcpy.la \
 	$(I965_PERGEN_LIBS)
 
-libintel_tiled_memcpy_la_SOURCES = \
-	$(intel_tiled_memcpy_FILES)
-libintel_tiled_memcpy_la_CFLAGS = \
-	$(AM_CFLAGS) $(SSE41_CFLAGS)
-
 libi965_dri_la_SOURCES = \
 	$(i965_FILES) \
 	$(i965_oa_GENERATED_FILES)
@@ -110,7 +104,6 @@ libi965_dri_la_LIBADD = \
 	$(top_builddir)/src/intel/compiler/libintel_compiler.la \
 	$(top_builddir)/src/intel/blorp/libblorp.la \
 	$(I965_PERGEN_LIBS) \
-	libintel_tiled_memcpy.la
 	$(LIBDRM_LIBS)
 
 BUILT_SOURCES = $(i965_oa_GENERATED_FILES)
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index ce7633c53c..db6591ab90 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -110,13 +110,11 @@ i965_FILES = \
 	intel_tex_image.c \
 	intel_tex_obj.h \
 	intel_tex_validate.c \
+	intel_tiled_memcpy.c \
+	intel_tiled_memcpy.h \
 	intel_upload.c \
 	libdrm_macros.h
 
-intel_tiled_memcpy_FILES = \
-	intel_tiled_memcpy.c \
-	intel_tiled_memcpy.h
-
 i965_gen4_FILES = \
 	genX_blorp_exec.c \
 	genX_state_upload.c
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 269bd70677..7d1fa96b91 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -31,7 +31,6 @@
 #include "intel_image.h"
 #include "intel_mipmap_tree.h"
 #include "intel_tex.h"
-#include "intel_tiled_memcpy.h"
 #include "intel_blit.h"
 #include "intel_fbo.h"
 
@@ -3024,7 +3023,7 @@ intel_miptree_unmap_raw(struct intel_mipmap_tree *mt)
 }
 
 static void
-intel_miptree_unmap_map(struct brw_context *brw,
+intel_miptree_unmap_gtt(struct brw_context *brw,
                         struct intel_mipmap_tree *mt,
                         struct intel_miptree_map *map,
                         unsigned int level, unsigned int slice)
@@ -3033,7 +3032,7 @@ intel_miptree_unmap_map(struct brw_context *brw,
 }
 
 static void
-intel_miptree_map_map(struct brw_context *brw,
+intel_miptree_map_gtt(struct brw_context *brw,
 		      struct intel_mipmap_tree *mt,
 		      struct intel_miptree_map *map,
 		      unsigned int level, unsigned int slice)
@@ -3081,7 +3080,7 @@ intel_miptree_map_map(struct brw_context *brw,
        mt, _mesa_get_format_name(mt->format),
        x, y, map->ptr, map->stride);
 
-   map->unmap = intel_miptree_unmap_map;
+   map->unmap = intel_miptree_unmap_gtt;
 }
 
 static void
@@ -3113,94 +3112,6 @@ intel_miptree_unmap_blit(struct brw_context *brw,
    intel_miptree_release(&map->linear_mt);
 }
 
-/* Compute extent parameters for use with tiled_memcpy functions.
- * xs are in units of bytes and ys are in units of strides.
- */
-static inline void
-tile_extents(struct intel_mipmap_tree *mt, struct intel_miptree_map *map,
-             unsigned int level, unsigned int slice, unsigned int *x1_B,
-             unsigned int *x2_B, unsigned int *y1_el, unsigned int *y2_el)
-{
-   unsigned int block_width, block_height;
-   unsigned int x0_el, y0_el;
-
-   _mesa_get_format_block_size(mt->format, &block_width, &block_height);
-
-   assert(map->x % block_width == 0);
-   assert(map->y % block_height == 0);
-
-   intel_miptree_get_image_offset(mt, level, slice, &x0_el, &y0_el);
-   *x1_B = (map->x / block_width + x0_el) * mt->cpp;
-   *y1_el = map->y / block_height + y0_el;
-   *x2_B = (DIV_ROUND_UP(map->x + map->w, block_width) + x0_el) * mt->cpp;
-   *y2_el = DIV_ROUND_UP(map->y + map->h, block_height) + y0_el;
-}
-
-static void
-intel_miptree_unmap_tiled_memcpy(struct brw_context *brw,
-                                 struct intel_mipmap_tree *mt,
-                                 struct intel_miptree_map *map,
-                                 unsigned int level,
-                                 unsigned int slice)
-{
-   if (map->mode & GL_MAP_WRITE_BIT) {
-      unsigned int x1, x2, y1, y2;
-      tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
-
-      char *dst = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
-      dst += mt->offset;
-
-      linear_to_tiled(x1, x2, y1, y2, dst, map->ptr, mt->surf.row_pitch,
-                      map->stride, brw->has_swizzling, mt->surf.tiling, memcpy);
-
-      intel_miptree_unmap_raw(mt);
-   }
-   _mesa_align_free(map->buffer);
-   map->buffer = map->ptr = NULL;
-}
-
-static void
-intel_miptree_map_tiled_memcpy(struct brw_context *brw,
-                               struct intel_mipmap_tree *mt,
-                               struct intel_miptree_map *map,
-                               unsigned int level, unsigned int slice)
-{
-   intel_miptree_access_raw(brw, mt, level, slice,
-                            map->mode & GL_MAP_WRITE_BIT);
-
-   unsigned int x1, x2, y1, y2;
-   tile_extents(mt, map, level, slice, &x1, &x2, &y1, &y2);
-   map->stride = ALIGN(_mesa_format_row_stride(mt->format, map->w), 16);
-
-   /* The tiling and detiling functions require that the linear buffer
-    * has proper 16-byte alignment (that is, its `x0` is 16-byte
-    * aligned). Here we over-allocate the linear buffer by enough
-    * bytes to get the proper alignment.
-    */
-   map->buffer = _mesa_align_malloc(map->stride * (y2 - y1) + (x1 & 0xf), 16);
-   map->ptr = (char *)map->buffer + (x1 & 0xf);
-   assert(map->buffer);
-
-   if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
-      char *src = intel_miptree_map_raw(brw, mt, map->mode | MAP_RAW);
-      src += mt->offset;
-
-      const mem_copy_fn fn =
-#if defined(USE_SSE41)
-         cpu_has_sse4_1 ? (mem_copy_fn)_mesa_streaming_load_memcpy :
-#endif
-         memcpy;
-
-      tiled_to_linear(x1, x2, y1, y2, map->ptr, src, map->stride,
-                      mt->surf.row_pitch, brw->has_swizzling, mt->surf.tiling,
-                      fn);
-
-      intel_miptree_unmap_raw(mt);
-   }
-
-   map->unmap = intel_miptree_unmap_tiled_memcpy;
-}
-
 static void
 intel_miptree_map_blit(struct brw_context *brw,
 		       struct intel_mipmap_tree *mt,
@@ -3732,7 +3643,6 @@ intel_miptree_map(struct brw_context *brw,
                   void **out_ptr,
                   ptrdiff_t *out_stride)
 {
-   const struct gen_device_info *devinfo = &brw->screen->devinfo;
    struct intel_miptree_map *map;
 
    assert(mt->surf.samples == 1);
@@ -3753,8 +3663,6 @@ intel_miptree_map(struct brw_context *brw,
       intel_miptree_map_depthstencil(brw, mt, map, level, slice);
    } else if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) {
       intel_miptree_map_blit(brw, mt, map, level, slice);
-   } else if (mt->surf.tiling != ISL_TILING_LINEAR && devinfo->gen > 4) {
-      intel_miptree_map_tiled_memcpy(brw, mt, map, level, slice);
 #if defined(USE_SSE41)
    } else if (!(mode & GL_MAP_WRITE_BIT) &&
               !mt->compressed && cpu_has_sse4_1 &&
@@ -3762,9 +3670,7 @@ intel_miptree_map(struct brw_context *brw,
       intel_miptree_map_movntdqa(brw, mt, map, level, slice);
 #endif
    } else {
-      if (mt->surf.tiling != ISL_TILING_LINEAR)
-         perf_debug("intel_miptree_map: mapping via gtt");
-      intel_miptree_map_map(brw, mt, map, level, slice);
+      intel_miptree_map_gtt(brw, mt, map, level, slice);
    }
 
    *out_ptr = map->ptr;
diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
index 6440dceac3..7c6bde990d 100644
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
@@ -36,10 +36,6 @@
 #include "brw_context.h"
 #include "intel_tiled_memcpy.h"
 
-#if defined(USE_SSE41)
-#include "main/streaming-load-memcpy.h"
-#include <smmintrin.h>
-#endif
 #if defined(__SSSE3__)
 #include <tmmintrin.h>
 #elif defined(__SSE2__)
@@ -217,31 +213,6 @@ rgba8_copy_aligned_src(void *dst, const void *src, size_t bytes)
    return dst;
 }
 
-#if defined(USE_SSE41)
-static ALWAYS_INLINE void *
-_memcpy_streaming_load(void *dest, const void *src, size_t count)
-{
-   if (count == 16) {
-      __m128i val = _mm_stream_load_si128((__m128i *)src);
-      _mm_storeu_si128((__m128i *)dest, val);
-      return dest;
-   } else if (count == 64) {
-      __m128i val0 = _mm_stream_load_si128(((__m128i *)src) + 0);
-      __m128i val1 = _mm_stream_load_si128(((__m128i *)src) + 1);
-      __m128i val2 = _mm_stream_load_si128(((__m128i *)src) + 2);
-      __m128i val3 = _mm_stream_load_si128(((__m128i *)src) + 3);
-      _mm_storeu_si128(((__m128i *)dest) + 0, val0);
-      _mm_storeu_si128(((__m128i *)dest) + 1, val1);
-      _mm_storeu_si128(((__m128i *)dest) + 2, val2);
-      _mm_storeu_si128(((__m128i *)dest) + 3, val3);
-      return dest;
-   } else {
-      assert(count < 64); /* and (count < 16) for ytiled */
-      return memcpy(dest, src, count);
-   }
-}
-#endif
-
 /**
  * Each row from y0 to y1 is copied in three parts: [x0,x1), [x1,x2), [x2,x3).
  * These ranges are in bytes, i.e. pixels * bytes-per-pixel.
@@ -706,12 +677,6 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
          return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
                                  dst, src, dst_pitch, swizzle_bit,
                                  rgba8_copy, rgba8_copy_aligned_src);
-#if defined(USE_SSE41)
-      else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy)
-         return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
-                                 dst, src, dst_pitch, swizzle_bit,
-                                 memcpy, _memcpy_streaming_load);
-#endif
       else
          unreachable("not reached");
    } else {
@@ -722,12 +687,6 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
          return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
                                  dst, src, dst_pitch, swizzle_bit,
                                  rgba8_copy, rgba8_copy_aligned_src);
-#if defined(USE_SSE41)
-      else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy)
-         return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
-                                 dst, src, dst_pitch, swizzle_bit,
-                                 memcpy, _memcpy_streaming_load);
-#endif
       else
          unreachable("not reached");
    }
@@ -760,12 +719,6 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
          return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
                                  dst, src, dst_pitch, swizzle_bit,
                                  rgba8_copy, rgba8_copy_aligned_src);
-#if defined(USE_SSE41)
-      else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy)
-         return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
-                                 dst, src, dst_pitch, swizzle_bit,
-                                 memcpy, _memcpy_streaming_load);
-#endif
       else
          unreachable("not reached");
    } else {
@@ -776,12 +729,6 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
          return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
                                  dst, src, dst_pitch, swizzle_bit,
                                  rgba8_copy, rgba8_copy_aligned_src);
-#if defined(USE_SSE41)
-      else if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy)
-         return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
-                                 dst, src, dst_pitch, swizzle_bit,
-                                 memcpy, _memcpy_streaming_load);
-#endif
       else
          unreachable("not reached");
    }
@@ -921,15 +868,6 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2,
       unreachable("unsupported tiling");
    }
 
-#if defined(USE_SSE41)
-   if (mem_copy == (mem_copy_fn)_mesa_streaming_load_memcpy) {
-      /* The hidden cacheline sized register used by movntdqa can apparently
-       * give you stale data, so do an mfence to invalidate it.
-       */
-      _mm_mfence();
-   }
-#endif
-
    /* Round out to tile boundaries. */
    xt0 = ALIGN_DOWN(xt1, tw);
    xt3 = ALIGN_UP  (xt2, tw);
diff --git a/src/mesa/drivers/dri/i965/meson.build b/src/mesa/drivers/dri/i965/meson.build
index 1eac329f49..20404d5b05 100644
--- a/src/mesa/drivers/dri/i965/meson.build
+++ b/src/mesa/drivers/dri/i965/meson.build
@@ -129,13 +129,10 @@ files_i965 = files(
   'intel_tex_image.c',
   'intel_tex_obj.h',
   'intel_tex_validate.c',
-  'intel_upload.c',
-  'libdrm_macros.h',
-)
-
-files_intel_tiled_memcpy = files(
   'intel_tiled_memcpy.c',
   'intel_tiled_memcpy.h',
+  'intel_upload.c',
+  'libdrm_macros.h',
 )
 
 i965_gen_libs = []
@@ -179,15 +176,6 @@ i965_oa_sources = custom_target(
   ],
 )
 
-intel_tiled_memcpy = static_library(
-  'intel_tiled_memcpy',
-  [files_intel_tiled_memcpy],
-  include_directories : [
-    inc_common, inc_intel, inc_dri_common, inc_drm_uapi,
-  ],
-  c_args : [c_vis_args, no_override_init_args, '-msse2', sse41_args],
-)
-
 libi965 = static_library(
   'i965',
   [files_i965, i965_oa_sources, ir_expression_operation_h,
@@ -199,7 +187,7 @@ libi965 = static_library(
   cpp_args : [cpp_vis_args, '-msse2'],
   link_with : [
     i965_gen_libs, libintel_common, libintel_dev, libisl, libintel_compiler,
-    libblorp, intel_tiled_memcpy,
+    libblorp,
   ],
   dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers],
 )




More information about the mesa-commit mailing list