<div dir="ltr"><div>Yeah, I like this; it's much cleaner. #ifdef's are always liable to break the build with different compile options. I'm CC'ing matt because I'd like his quick eye as he knows far more about the build system than I do. As far as I'm concerned, it looks good.<br><br></div>Reviewed-by: Jason Ekstrand <<a href="mailto:jason.ekstrand@intel.com">jason.ekstrand@intel.com</a>><br></div><div class="gmail_extra"><br><div class="gmail_quote">On Tue, Dec 30, 2014 at 3:04 PM, Kristian Høgsberg <span dir="ltr"><<a href="mailto:krh@bitplanet.net" target="_blank">krh@bitplanet.net</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">We avoid using the USE_SSE41 #ifdef by #defining cpu_has_sse4_1 to 0<br>
when USE_SSE41 is undefined. This way, we only need to test if<br>
cpu_has_sse4_1 is true before calling into SSE 4.1 code. If USE_SSE41<br>
is undefined, we typically end up with if (0), and the compiler will<br>
optimize the SSE4.1 code away.<br>
<br>
The main benefit is that we avoid #ifdef chopping up control flow across<br>
basic blocks, but also that we always compile all the code (and let the<br>
compiler discard unused code) instead of sometimes compiling some of the code<br>
depending on which #defines are active.<br>
<br>
Signed-off-by: Kristian Høgsberg <<a href="mailto:krh@bitplanet.net">krh@bitplanet.net</a>><br>
---<br>
src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 8 ++------<br>
src/mesa/main/sse_minmax.h | 15 +++++++++++++++<br>
src/mesa/main/streaming-load-memcpy.h | 13 +++++++++++++<br>
src/mesa/vbo/vbo_exec_array.c | 2 --<br>
src/mesa/x86/common_x86_features.h | 4 ++++<br>
5 files changed, 34 insertions(+), 8 deletions(-)<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c<br>
index f815fbe..c471a76 100644<br>
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c<br>
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c<br>
@@ -1859,7 +1859,6 @@ intel_miptree_unmap_blit(struct brw_context *brw,<br>
/**<br>
* "Map" a buffer by copying it to an untiled temporary using MOVNTDQA.<br>
*/<br>
-#if defined(USE_SSE41)<br>
static void<br>
intel_miptree_map_movntdqa(struct brw_context *brw,<br>
struct intel_mipmap_tree *mt,<br>
@@ -1868,6 +1867,7 @@ intel_miptree_map_movntdqa(struct brw_context *brw,<br>
{<br>
assert(map->mode & GL_MAP_READ_BIT);<br>
assert(!(map->mode & GL_MAP_WRITE_BIT));<br>
+ assert(cpu_has_sse4_1);<br>
<br>
DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,<br>
map->x, map->y, map->w, map->h,<br>
@@ -1923,11 +1923,11 @@ intel_miptree_unmap_movntdqa(struct brw_context *brw,<br>
unsigned int level,<br>
unsigned int slice)<br>
{<br>
+ assert(cpu_has_sse4_1);<br>
_mesa_align_free(map->buffer);<br>
map->buffer = NULL;<br>
map->ptr = NULL;<br>
}<br>
-#endif<br>
<br>
static void<br>
intel_miptree_map_s8(struct brw_context *brw,<br>
@@ -2319,10 +2319,8 @@ intel_miptree_map(struct brw_context *brw,<br>
mt->bo->size >= brw->max_gtt_map_object_size) {<br>
assert(can_blit_slice(mt, level, slice));<br>
intel_miptree_map_blit(brw, mt, map, level, slice);<br>
-#if defined(USE_SSE41)<br>
} else if (!(mode & GL_MAP_WRITE_BIT) && !mt->compressed && cpu_has_sse4_1) {<br>
intel_miptree_map_movntdqa(brw, mt, map, level, slice);<br>
-#endif<br>
} else {<br>
intel_miptree_map_gtt(brw, mt, map, level, slice);<br>
}<br>
@@ -2359,10 +2357,8 @@ intel_miptree_unmap(struct brw_context *brw,<br>
intel_miptree_unmap_depthstencil(brw, mt, map, level, slice);<br>
} else if (map->mt) {<br>
intel_miptree_unmap_blit(brw, mt, map, level, slice);<br>
-#if defined(USE_SSE41)<br>
} else if (map->buffer && cpu_has_sse4_1) {<br>
intel_miptree_unmap_movntdqa(brw, mt, map, level, slice);<br>
-#endif<br>
} else {<br>
intel_miptree_unmap_gtt(brw, mt, map, level, slice);<br>
}<br>
diff --git a/src/mesa/main/sse_minmax.h b/src/mesa/main/sse_minmax.h<br>
index 953c4e9..d890a79 100644<br>
--- a/src/mesa/main/sse_minmax.h<br>
+++ b/src/mesa/main/sse_minmax.h<br>
@@ -25,6 +25,21 @@<br>
*<br>
*/<br>
<br>
+#if defined(USE_SSE41)<br>
+<br>
void<br>
_mesa_uint_array_min_max(const unsigned *ui_indices, unsigned *min_index,<br>
unsigned *max_index, const unsigned count);<br>
+<br>
+<br>
+#else<br>
+<br>
+static inline void<br>
+_mesa_uint_array_min_max(const unsigned *ui_indices, unsigned *min_index,<br>
+ unsigned *max_index, const unsigned count);<br>
+{<br>
+ unreachable("calling sse4.1 function without checking cpu_has_sse4_1.");<br>
+}<br>
+<br>
+<br>
+#endif<br>
diff --git a/src/mesa/main/streaming-load-memcpy.h b/src/mesa/main/streaming-load-memcpy.h<br>
index 41eeeec..7cfd114 100644<br>
--- a/src/mesa/main/streaming-load-memcpy.h<br>
+++ b/src/mesa/main/streaming-load-memcpy.h<br>
@@ -29,5 +29,18 @@<br>
/* Copies memory from src to dst, using SSE 4.1's MOVNTDQA to get streaming<br>
* read performance from uncached memory.<br>
*/<br>
+#if defined(USE_SSE41)<br>
+<br>
void<br>
_mesa_streaming_load_memcpy(void *restrict dst, void *restrict src, size_t len);<br>
+<br>
+#else<br>
+<br>
+static inline void<br>
+_mesa_streaming_load_memcpy(void *restrict dst, void *restrict src, size_t len)<br>
+{<br>
+ unreachable("calling sse4.1 function without checking cpu_has_sse4_1.");<br>
+}<br>
+<br>
+<br>
+#endif<br>
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c<br>
index 6eac841..e6efa78 100644<br>
--- a/src/mesa/vbo/vbo_exec_array.c<br>
+++ b/src/mesa/vbo/vbo_exec_array.c<br>
@@ -121,12 +121,10 @@ vbo_get_minmax_index(struct gl_context *ctx,<br>
}<br>
}<br>
else {<br>
-#if defined(USE_SSE41)<br>
if (cpu_has_sse4_1) {<br>
_mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count);<br>
}<br>
else<br>
-#endif<br>
for (i = 0; i < count; i++) {<br>
if (ui_indices[i] > max_ui) max_ui = ui_indices[i];<br>
if (ui_indices[i] < min_ui) min_ui = ui_indices[i];<br>
diff --git a/src/mesa/x86/common_x86_features.h b/src/mesa/x86/common_x86_features.h<br>
index 65634aa..960de0e 100644<br>
--- a/src/mesa/x86/common_x86_features.h<br>
+++ b/src/mesa/x86/common_x86_features.h<br>
@@ -87,11 +87,15 @@<br>
<br>
#define cpu_has_3dnowext (_mesa_x86_cpu_features & X86_FEATURE_3DNOWEXT)<br>
<br>
+#ifdef USE_SSE41<br>
#ifdef __SSE4_1__<br>
#define cpu_has_sse4_1 1<br>
#else<br>
#define cpu_has_sse4_1 (_mesa_x86_cpu_features & X86_FEATURE_SSE4_1)<br>
#endif<br>
+#else<br>
+#define cpu_has_sse4_1 0<br>
+#endif<br>
<br>
#endif<br>
<span class="HOEnZb"><font color="#888888"><br>
--<br>
2.1.0<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div>