[Mesa-dev] [PATCH] util: use GCC atomic intrinsics with explicit memory model

Jan Vesely jan.vesely at rutgers.edu
Tue Oct 4 15:50:55 UTC 2016


On Tue, 2016-10-04 at 16:14 +0200, Nicolai Hähnle wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
> 
> This is motivated by the fact that p_atomic_read and p_atomic_set may
> somewhat surprisingly not do the right thing in the old version:
> while
> stores and loads are de facto atomic at least on x86,

afaik, this is only true for naturally aligned loads/stores (even for
x86).

Jan

>  the compiler may
> apply re-ordering and speculation quite liberally. Basically, the old
> version uses the "relaxed" memory ordering.
> 
> The new ordering always uses acquire/release ordering. This is the
> strongest possible memory ordering that doesn't require additional
> fence instructions on x86. (And the only stronger ordering is
> "sequentially consistent", which is usually more than you need
> anyway.)
> 
> I would feel more comfortable if p_atomic_set/read in the old
> implementation were at least using volatile loads and stores, but I
> don't see a way to get there without typeof (which we cannot use here
> since the code is compiled with -std=c99).
> 
> Eventually, we should really just move to something that is based on
> the atomics in C11 / C++11.
> ---
>  configure.ac        | 11 +++++++++++
>  src/util/u_atomic.h | 21 +++++++++++++++++++++
>  2 files changed, 32 insertions(+)
> 
> diff --git a/configure.ac b/configure.ac
> index 1bfac3b..421f4f3 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -380,20 +380,31 @@ int main () {
>      c = _mm_max_epu32(a, b);
>      return _mm_cvtsi128_si32(c);
>  }]])], SSE41_SUPPORTED=1)
>  CFLAGS="$save_CFLAGS"
>  if test "x$SSE41_SUPPORTED" = x1; then
>      DEFINES="$DEFINES -DUSE_SSE41"
>  fi
>  AM_CONDITIONAL([SSE41_SUPPORTED], [test x$SSE41_SUPPORTED = x1])
>  AC_SUBST([SSE41_CFLAGS], $SSE41_CFLAGS)
>  
> +dnl Check for new-style atomic builtins
> +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
> +int main() {
> +    int n;
> +    return __atomic_load_n(&n, __ATOMIC_ACQUIRE);
> +}]])], GCC_ATOMIC_BUILTINS_SUPPORTED=1)
> +if test "x$GCC_ATOMIC_BUILTINS_SUPPORTED" = x1; then
> +    DEFINES="$DEFINES -DUSE_GCC_ATOMIC_BUILTINS"
> +fi
> +AM_CONDITIONAL([GCC_ATOMIC_BUILTINS_SUPPORTED], [test
> x$GCC_ATOMIC_BUILTINS_SUPPORTED = x1])
> +
>  dnl Check for Endianness
>  AC_C_BIGENDIAN(
>     little_endian=no,
>     little_endian=yes,
>     little_endian=no,
>     little_endian=no
>  )
>  
>  dnl Check for POWER8 Architecture
>  PWR8_CFLAGS="-mpower8-vector"
> diff --git a/src/util/u_atomic.h b/src/util/u_atomic.h
> index 8675903..2a5bbae 100644
> --- a/src/util/u_atomic.h
> +++ b/src/util/u_atomic.h
> @@ -29,28 +29,49 @@
>  #error "Unsupported platform"
>  #endif
>  
>  
>  /* Implementation using GCC-provided synchronization intrinsics
>   */
>  #if defined(PIPE_ATOMIC_GCC_INTRINSIC)
>  
>  #define PIPE_ATOMIC "GCC Sync Intrinsics"
>  
> +#if defined(USE_GCC_ATOMIC_BUILTINS)
> +
> +/* The builtins with explicit memory model are available since GCC
> 4.7. */
> +#define p_atomic_set(_v, _i) __atomic_store_n((_v), (_i),
> __ATOMIC_RELEASE)
> +#define p_atomic_read(_v) __atomic_load_n((_v), __ATOMIC_ACQUIRE)
> +#define p_atomic_dec_zero(v) (__atomic_sub_fetch((v), 1,
> __ATOMIC_ACQ_REL) == 0)
> +#define p_atomic_inc(v) (void) __atomic_add_fetch((v), 1,
> __ATOMIC_ACQ_REL)
> +#define p_atomic_dec(v) (void) __atomic_sub_fetch((v), 1,
> __ATOMIC_ACQ_REL)
> +#define p_atomic_add(v, i) (void) __atomic_add_fetch((v), (i),
> __ATOMIC_ACQ_REL)
> +#define p_atomic_inc_return(v) __atomic_add_fetch((v), 1,
> __ATOMIC_ACQ_REL)
> +#define p_atomic_dec_return(v) __atomic_sub_fetch((v), 1,
> __ATOMIC_ACQ_REL)
> +
> +#else
> +
>  #define p_atomic_set(_v, _i) (*(_v) = (_i))
>  #define p_atomic_read(_v) (*(_v))
>  #define p_atomic_dec_zero(v) (__sync_sub_and_fetch((v), 1) == 0)
>  #define p_atomic_inc(v) (void) __sync_add_and_fetch((v), 1)
>  #define p_atomic_dec(v) (void) __sync_sub_and_fetch((v), 1)
>  #define p_atomic_add(v, i) (void) __sync_add_and_fetch((v), (i))
>  #define p_atomic_inc_return(v) __sync_add_and_fetch((v), 1)
>  #define p_atomic_dec_return(v) __sync_sub_and_fetch((v), 1)
> +
> +#endif
> +
> +/* There is no __atomic_* compare and exchange that returns the
> current value.
> + * Also, GCC 5.4 seems unable to optimize a compound statement
> expression that
> + * uses an additional stack variable with
> __atomic_compare_exchange[_n].
> + */
>  #define p_atomic_cmpxchg(v, old, _new) \
>     __sync_val_compare_and_swap((v), (old), (_new))
>  
>  #endif
>  
>  
>  
>  /* Unlocked version for single threaded environments, such as some
>   * windows kernel modules.
>   */
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20161004/25c7100e/attachment.sig>


More information about the mesa-dev mailing list