[Mesa-dev] [PATCH] i965: Make intel_texsubimage_tiled_memcpy compile with Clang.

Ilia Mirkin imirkin at alum.mit.edu
Tue Apr 29 08:46:00 PDT 2014


On Tue, Apr 29, 2014 at 11:38 AM, Kenneth Graunke <kenneth at whitecape.org> wrote:
> Apparently Clang needs an extra #include, and doesn't understand
> __attribute__((flatten)).  Marking the helper functions as
> always_inline works just as well, and actually compiles with Clang.
>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
>  src/mesa/drivers/dri/i965/intel_tex_subimage.c | 17 ++++++-----------
>  1 file changed, 6 insertions(+), 11 deletions(-)
>
> Seems to work with GCC 4.8 and Clang 3.4.  The _mm_loadu_ps change seems
> to be harmless enough, but I'm sure it'll break somebody's build somehow...
>
> diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
> index 891a46d..69c808f 100644
> --- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c
> +++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
> @@ -26,6 +26,7 @@
>   *
>   **************************************************************************/
>
> +#include <xmmintrin.h>

FYI, this file starts with

#ifndef __SSE__
# error "SSE instruction set not enabled"

Not sure if that's an issue or not (e.g. could someone be building
mesa with i965 support with, e.g., -march=i586, or -mno-sse or
something?)

The usual thing is to include <immintrin.h>. However as I found out
the hard way, that file is not included at all on, e.g., ppc (nor is
xmmintrin & co). Of course the chances of an i965 chip on ppc is
considerably lower than it is for nouveau.

>  #include "main/bufferobj.h"
>  #include "main/image.h"
>  #include "main/macros.h"
> @@ -179,7 +180,7 @@ typedef char v16 __attribute__((vector_size(16)));
>  /* NOTE: dst must be 16 byte aligned */
>  #define rgba8_copy_16(dst, src)                     \
>     *(v16*)(dst) = __builtin_ia32_pshufb128(         \

Why not switch this one over to _mm_something as well? I note that
this whole section is under ifdef __SSSE3__, which means if you
include immintrin.h (instead of xmmintrin.h), you will get all the
right definitions.

> -       (v16) __builtin_ia32_loadups((float*)(src)), \
> +       (v16) _mm_loadu_ps((float*)(src)), \
>        *(v16*) rgba8_permutation                     \
>     )
>  #endif
> @@ -233,7 +234,7 @@ rgba8_copy(void *dst, const void *src, size_t bytes)
>   *
>   * \copydoc tile_copy_fn
>   */
> -static inline void
> +static __attribute__((always_inline)) inline void
>  xtile_copy(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
>             uint32_t y0, uint32_t y1,
>             char *dst, const char *src,
> @@ -274,7 +275,7 @@ xtile_copy(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
>   *
>   * \copydoc tile_copy_fn
>   */
> -static inline void
> +static __attribute__((always_inline)) inline void
>  ytile_copy(
>     uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
>     uint32_t y0, uint32_t y1,
> @@ -331,12 +332,6 @@ ytile_copy(
>     }
>  }
>
> -#ifdef __GNUC__
> -#define FLATTEN __attribute__((flatten))
> -#else
> -#define FLATTEN
> -#endif
> -
>  /**
>   * Copy texture data from linear to X tile layout, faster.
>   *
> @@ -346,7 +341,7 @@ ytile_copy(
>   *
>   * \copydoc tile_copy_fn
>   */
> -static FLATTEN void
> +static void
>  xtile_copy_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
>                    uint32_t y0, uint32_t y1,
>                    char *dst, const char *src,
> @@ -382,7 +377,7 @@ xtile_copy_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
>   *
>   * \copydoc tile_copy_fn
>   */
> -static FLATTEN void
> +static void
>  ytile_copy_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
>                    uint32_t y0, uint32_t y1,
>                    char *dst, const char *src,
> --
> 1.9.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list