[Mesa-dev] [PATCH] Avoid mixing #ifdef USE_SSE41 and control flow

Kristian Høgsberg krh at bitplanet.net
Tue Dec 30 15:04:07 PST 2014


We avoid using the USE_SSE41 #ifdef by #defining cpu_has_sse4_1 to 0
when USE_SSE41 is undefined.  This way, we only need to test if
cpu_has_sse4_1 is true before calling into SSE 4.1 code.  If USE_SSE41
is undefined, we typically end up with if (0), and the compiler will
optimize the SSE4.1 code away.

The main benefit is that we avoid #ifdef chopping up control flow across
basic blocks, but also that we always compile all the code (and let the
compiler discard unused code) instead of sometimes compiling some of the code
depending on which #defines are active.

Signed-off-by: Kristian Høgsberg <krh at bitplanet.net>
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c |  8 ++------
 src/mesa/main/sse_minmax.h                    | 15 +++++++++++++++
 src/mesa/main/streaming-load-memcpy.h         | 13 +++++++++++++
 src/mesa/vbo/vbo_exec_array.c                 |  2 --
 src/mesa/x86/common_x86_features.h            |  4 ++++
 5 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index f815fbe..c471a76 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1859,7 +1859,6 @@ intel_miptree_unmap_blit(struct brw_context *brw,
 /**
  * "Map" a buffer by copying it to an untiled temporary using MOVNTDQA.
  */
-#if defined(USE_SSE41)
 static void
 intel_miptree_map_movntdqa(struct brw_context *brw,
                            struct intel_mipmap_tree *mt,
@@ -1868,6 +1867,7 @@ intel_miptree_map_movntdqa(struct brw_context *brw,
 {
    assert(map->mode & GL_MAP_READ_BIT);
    assert(!(map->mode & GL_MAP_WRITE_BIT));
+   assert(cpu_has_sse4_1);
 
    DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
        map->x, map->y, map->w, map->h,
@@ -1923,11 +1923,11 @@ intel_miptree_unmap_movntdqa(struct brw_context *brw,
                              unsigned int level,
                              unsigned int slice)
 {
+   assert(cpu_has_sse4_1);
    _mesa_align_free(map->buffer);
    map->buffer = NULL;
    map->ptr = NULL;
 }
-#endif
 
 static void
 intel_miptree_map_s8(struct brw_context *brw,
@@ -2319,10 +2319,8 @@ intel_miptree_map(struct brw_context *brw,
               mt->bo->size >= brw->max_gtt_map_object_size) {
       assert(can_blit_slice(mt, level, slice));
       intel_miptree_map_blit(brw, mt, map, level, slice);
-#if defined(USE_SSE41)
    } else if (!(mode & GL_MAP_WRITE_BIT) && !mt->compressed && cpu_has_sse4_1) {
       intel_miptree_map_movntdqa(brw, mt, map, level, slice);
-#endif
    } else {
       intel_miptree_map_gtt(brw, mt, map, level, slice);
    }
@@ -2359,10 +2357,8 @@ intel_miptree_unmap(struct brw_context *brw,
       intel_miptree_unmap_depthstencil(brw, mt, map, level, slice);
    } else if (map->mt) {
       intel_miptree_unmap_blit(brw, mt, map, level, slice);
-#if defined(USE_SSE41)
    } else if (map->buffer && cpu_has_sse4_1) {
       intel_miptree_unmap_movntdqa(brw, mt, map, level, slice);
-#endif
    } else {
       intel_miptree_unmap_gtt(brw, mt, map, level, slice);
    }
diff --git a/src/mesa/main/sse_minmax.h b/src/mesa/main/sse_minmax.h
index 953c4e9..d890a79 100644
--- a/src/mesa/main/sse_minmax.h
+++ b/src/mesa/main/sse_minmax.h
@@ -25,6 +25,21 @@
  *
  */
 
+#if defined(USE_SSE41)
+
 void
 _mesa_uint_array_min_max(const unsigned *ui_indices, unsigned *min_index,
                          unsigned *max_index, const unsigned count);
+
+
+#else
+
+static inline void
+_mesa_uint_array_min_max(const unsigned *ui_indices, unsigned *min_index,
+                         unsigned *max_index, const unsigned count);
+{
+   unreachable("calling sse4.1 function without checking cpu_has_sse4_1.");
+}
+
+
+#endif
diff --git a/src/mesa/main/streaming-load-memcpy.h b/src/mesa/main/streaming-load-memcpy.h
index 41eeeec..7cfd114 100644
--- a/src/mesa/main/streaming-load-memcpy.h
+++ b/src/mesa/main/streaming-load-memcpy.h
@@ -29,5 +29,18 @@
 /* Copies memory from src to dst, using SSE 4.1's MOVNTDQA to get streaming
  * read performance from uncached memory.
  */
+#if defined(USE_SSE41)
+
 void
 _mesa_streaming_load_memcpy(void *restrict dst, void *restrict src, size_t len);
+
+#else
+
+static inline void
+_mesa_streaming_load_memcpy(void *restrict dst, void *restrict src, size_t len)
+{
+   unreachable("calling sse4.1 function without checking cpu_has_sse4_1.");
+}
+
+
+#endif
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index 6eac841..e6efa78 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -121,12 +121,10 @@ vbo_get_minmax_index(struct gl_context *ctx,
          }
       }
       else {
-#if defined(USE_SSE41)
          if (cpu_has_sse4_1) {
             _mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count);
          }
          else
-#endif
             for (i = 0; i < count; i++) {
                if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
                if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
diff --git a/src/mesa/x86/common_x86_features.h b/src/mesa/x86/common_x86_features.h
index 65634aa..960de0e 100644
--- a/src/mesa/x86/common_x86_features.h
+++ b/src/mesa/x86/common_x86_features.h
@@ -87,11 +87,15 @@
 
 #define cpu_has_3dnowext	(_mesa_x86_cpu_features & X86_FEATURE_3DNOWEXT)
 
+#ifdef USE_SSE41
 #ifdef __SSE4_1__
 #define cpu_has_sse4_1		1
 #else
 #define cpu_has_sse4_1		(_mesa_x86_cpu_features & X86_FEATURE_SSE4_1)
 #endif
+#else
+#define cpu_has_sse4_1		0
+#endif
 
 #endif
 
-- 
2.1.0



More information about the mesa-dev mailing list