[Mesa-dev] [PATCH] Avoid mixing #ifdef USE_SSE41 and control flow

Jason Ekstrand jason at jlekstrand.net
Tue Dec 30 15:24:13 PST 2014


Yeah, I like this; it's much cleaner.  #ifdef's are always liable to break
the build with different compile options.  I'm CC'ing matt because I'd like
his quick eye as he knows far more about the build system than I do.  As
far as I'm concerned, it looks good.

Reviewed-by: Jason Ekstrand <jason.ekstrand at intel.com>

On Tue, Dec 30, 2014 at 3:04 PM, Kristian Høgsberg <krh at bitplanet.net>
wrote:

> We avoid using the USE_SSE41 #ifdef by #defining cpu_has_sse4_1 to 0
> when USE_SSE41 is undefined.  This way, we only need to test if
> cpu_has_sse4_1 is true before calling into SSE 4.1 code.  If USE_SSE41
> is undefined, we typically end up with if (0), and the compiler will
> optimize the SSE4.1 code away.
>
> The main benefit is that we avoid #ifdef chopping up control flow across
> basic blocks, but also that we always compile all the code (and let the
> compiler discard unused code) instead of sometimes compiling some of the
> code
> depending on which #defines are active.
>
> Signed-off-by: Kristian Høgsberg <krh at bitplanet.net>
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c |  8 ++------
>  src/mesa/main/sse_minmax.h                    | 15 +++++++++++++++
>  src/mesa/main/streaming-load-memcpy.h         | 13 +++++++++++++
>  src/mesa/vbo/vbo_exec_array.c                 |  2 --
>  src/mesa/x86/common_x86_features.h            |  4 ++++
>  5 files changed, 34 insertions(+), 8 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index f815fbe..c471a76 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -1859,7 +1859,6 @@ intel_miptree_unmap_blit(struct brw_context *brw,
>  /**
>   * "Map" a buffer by copying it to an untiled temporary using MOVNTDQA.
>   */
> -#if defined(USE_SSE41)
>  static void
>  intel_miptree_map_movntdqa(struct brw_context *brw,
>                             struct intel_mipmap_tree *mt,
> @@ -1868,6 +1867,7 @@ intel_miptree_map_movntdqa(struct brw_context *brw,
>  {
>     assert(map->mode & GL_MAP_READ_BIT);
>     assert(!(map->mode & GL_MAP_WRITE_BIT));
> +   assert(cpu_has_sse4_1);
>
>     DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
>         map->x, map->y, map->w, map->h,
> @@ -1923,11 +1923,11 @@ intel_miptree_unmap_movntdqa(struct brw_context
> *brw,
>                               unsigned int level,
>                               unsigned int slice)
>  {
> +   assert(cpu_has_sse4_1);
>     _mesa_align_free(map->buffer);
>     map->buffer = NULL;
>     map->ptr = NULL;
>  }
> -#endif
>
>  static void
>  intel_miptree_map_s8(struct brw_context *brw,
> @@ -2319,10 +2319,8 @@ intel_miptree_map(struct brw_context *brw,
>                mt->bo->size >= brw->max_gtt_map_object_size) {
>        assert(can_blit_slice(mt, level, slice));
>        intel_miptree_map_blit(brw, mt, map, level, slice);
> -#if defined(USE_SSE41)
>     } else if (!(mode & GL_MAP_WRITE_BIT) && !mt->compressed &&
> cpu_has_sse4_1) {
>        intel_miptree_map_movntdqa(brw, mt, map, level, slice);
> -#endif
>     } else {
>        intel_miptree_map_gtt(brw, mt, map, level, slice);
>     }
> @@ -2359,10 +2357,8 @@ intel_miptree_unmap(struct brw_context *brw,
>        intel_miptree_unmap_depthstencil(brw, mt, map, level, slice);
>     } else if (map->mt) {
>        intel_miptree_unmap_blit(brw, mt, map, level, slice);
> -#if defined(USE_SSE41)
>     } else if (map->buffer && cpu_has_sse4_1) {
>        intel_miptree_unmap_movntdqa(brw, mt, map, level, slice);
> -#endif
>     } else {
>        intel_miptree_unmap_gtt(brw, mt, map, level, slice);
>     }
> diff --git a/src/mesa/main/sse_minmax.h b/src/mesa/main/sse_minmax.h
> index 953c4e9..d890a79 100644
> --- a/src/mesa/main/sse_minmax.h
> +++ b/src/mesa/main/sse_minmax.h
> @@ -25,6 +25,21 @@
>   *
>   */
>
> +#if defined(USE_SSE41)
> +
>  void
>  _mesa_uint_array_min_max(const unsigned *ui_indices, unsigned *min_index,
>                           unsigned *max_index, const unsigned count);
> +
> +
> +#else
> +
> +static inline void
> +_mesa_uint_array_min_max(const unsigned *ui_indices, unsigned *min_index,
> +                         unsigned *max_index, const unsigned count);
> +{
> +   unreachable("calling sse4.1 function without checking
> cpu_has_sse4_1.");
> +}
> +
> +
> +#endif
> diff --git a/src/mesa/main/streaming-load-memcpy.h
> b/src/mesa/main/streaming-load-memcpy.h
> index 41eeeec..7cfd114 100644
> --- a/src/mesa/main/streaming-load-memcpy.h
> +++ b/src/mesa/main/streaming-load-memcpy.h
> @@ -29,5 +29,18 @@
>  /* Copies memory from src to dst, using SSE 4.1's MOVNTDQA to get
> streaming
>   * read performance from uncached memory.
>   */
> +#if defined(USE_SSE41)
> +
>  void
>  _mesa_streaming_load_memcpy(void *restrict dst, void *restrict src,
> size_t len);
> +
> +#else
> +
> +static inline void
> +_mesa_streaming_load_memcpy(void *restrict dst, void *restrict src,
> size_t len)
> +{
> +   unreachable("calling sse4.1 function without checking
> cpu_has_sse4_1.");
> +}
> +
> +
> +#endif
> diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
> index 6eac841..e6efa78 100644
> --- a/src/mesa/vbo/vbo_exec_array.c
> +++ b/src/mesa/vbo/vbo_exec_array.c
> @@ -121,12 +121,10 @@ vbo_get_minmax_index(struct gl_context *ctx,
>           }
>        }
>        else {
> -#if defined(USE_SSE41)
>           if (cpu_has_sse4_1) {
>              _mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count);
>           }
>           else
> -#endif
>              for (i = 0; i < count; i++) {
>                 if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
>                 if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
> diff --git a/src/mesa/x86/common_x86_features.h
> b/src/mesa/x86/common_x86_features.h
> index 65634aa..960de0e 100644
> --- a/src/mesa/x86/common_x86_features.h
> +++ b/src/mesa/x86/common_x86_features.h
> @@ -87,11 +87,15 @@
>
>  #define cpu_has_3dnowext       (_mesa_x86_cpu_features &
> X86_FEATURE_3DNOWEXT)
>
> +#ifdef USE_SSE41
>  #ifdef __SSE4_1__
>  #define cpu_has_sse4_1         1
>  #else
>  #define cpu_has_sse4_1         (_mesa_x86_cpu_features &
> X86_FEATURE_SSE4_1)
>  #endif
> +#else
> +#define cpu_has_sse4_1         0
> +#endif
>
>  #endif
>
> --
> 2.1.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20141230/26a3dffc/attachment.html>


More information about the mesa-dev mailing list