[Mesa-dev] [PATCH] util: use GCC atomic intrinsics with explicit memory model
Marek Olšák
maraeo at gmail.com
Tue Oct 4 14:21:45 UTC 2016
Acked-by: Marek Olšák <marek.olsak at amd.com>
Somebody else should review the configure.ac change.
Marek
On Tue, Oct 4, 2016 at 4:14 PM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> This is motivated by the fact that p_atomic_read and p_atomic_set may
> somewhat surprisingly not do the right thing in the old version: while
> stores and loads are de facto atomic at least on x86, the compiler may
> apply re-ordering and speculation quite liberally. Basically, the old
> version uses the "relaxed" memory ordering.
>
> The new ordering always uses acquire/release ordering. This is the
> strongest possible memory ordering that doesn't require additional
> fence instructions on x86. (And the only stronger ordering is
> "sequentially consistent", which is usually more than you need anyway.)
>
> I would feel more comfortable if p_atomic_set/read in the old
> implementation were at least using volatile loads and stores, but I
> don't see a way to get there without typeof (which we cannot use here
> since the code is compiled with -std=c99).
>
> Eventually, we should really just move to something that is based on
> the atomics in C11 / C++11.
> ---
> configure.ac | 11 +++++++++++
> src/util/u_atomic.h | 21 +++++++++++++++++++++
> 2 files changed, 32 insertions(+)
>
> diff --git a/configure.ac b/configure.ac
> index 1bfac3b..421f4f3 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -380,20 +380,31 @@ int main () {
> c = _mm_max_epu32(a, b);
> return _mm_cvtsi128_si32(c);
> }]])], SSE41_SUPPORTED=1)
> CFLAGS="$save_CFLAGS"
> if test "x$SSE41_SUPPORTED" = x1; then
> DEFINES="$DEFINES -DUSE_SSE41"
> fi
> AM_CONDITIONAL([SSE41_SUPPORTED], [test x$SSE41_SUPPORTED = x1])
> AC_SUBST([SSE41_CFLAGS], $SSE41_CFLAGS)
>
> +dnl Check for new-style atomic builtins
> +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
> +int main() {
> + int n;
> + return __atomic_load_n(&n, __ATOMIC_ACQUIRE);
> +}]])], GCC_ATOMIC_BUILTINS_SUPPORTED=1)
> +if test "x$GCC_ATOMIC_BUILTINS_SUPPORTED" = x1; then
> + DEFINES="$DEFINES -DUSE_GCC_ATOMIC_BUILTINS"
> +fi
> +AM_CONDITIONAL([GCC_ATOMIC_BUILTINS_SUPPORTED], [test x$GCC_ATOMIC_BUILTINS_SUPPORTED = x1])
> +
> dnl Check for Endianness
> AC_C_BIGENDIAN(
> little_endian=no,
> little_endian=yes,
> little_endian=no,
> little_endian=no
> )
>
> dnl Check for POWER8 Architecture
> PWR8_CFLAGS="-mpower8-vector"
> diff --git a/src/util/u_atomic.h b/src/util/u_atomic.h
> index 8675903..2a5bbae 100644
> --- a/src/util/u_atomic.h
> +++ b/src/util/u_atomic.h
> @@ -29,28 +29,49 @@
> #error "Unsupported platform"
> #endif
>
>
> /* Implementation using GCC-provided synchronization intrinsics
> */
> #if defined(PIPE_ATOMIC_GCC_INTRINSIC)
>
> #define PIPE_ATOMIC "GCC Sync Intrinsics"
>
> +#if defined(USE_GCC_ATOMIC_BUILTINS)
> +
> +/* The builtins with explicit memory model are available since GCC 4.7. */
> +#define p_atomic_set(_v, _i) __atomic_store_n((_v), (_i), __ATOMIC_RELEASE)
> +#define p_atomic_read(_v) __atomic_load_n((_v), __ATOMIC_ACQUIRE)
> +#define p_atomic_dec_zero(v) (__atomic_sub_fetch((v), 1, __ATOMIC_ACQ_REL) == 0)
> +#define p_atomic_inc(v) (void) __atomic_add_fetch((v), 1, __ATOMIC_ACQ_REL)
> +#define p_atomic_dec(v) (void) __atomic_sub_fetch((v), 1, __ATOMIC_ACQ_REL)
> +#define p_atomic_add(v, i) (void) __atomic_add_fetch((v), (i), __ATOMIC_ACQ_REL)
> +#define p_atomic_inc_return(v) __atomic_add_fetch((v), 1, __ATOMIC_ACQ_REL)
> +#define p_atomic_dec_return(v) __atomic_sub_fetch((v), 1, __ATOMIC_ACQ_REL)
> +
> +#else
> +
> #define p_atomic_set(_v, _i) (*(_v) = (_i))
> #define p_atomic_read(_v) (*(_v))
> #define p_atomic_dec_zero(v) (__sync_sub_and_fetch((v), 1) == 0)
> #define p_atomic_inc(v) (void) __sync_add_and_fetch((v), 1)
> #define p_atomic_dec(v) (void) __sync_sub_and_fetch((v), 1)
> #define p_atomic_add(v, i) (void) __sync_add_and_fetch((v), (i))
> #define p_atomic_inc_return(v) __sync_add_and_fetch((v), 1)
> #define p_atomic_dec_return(v) __sync_sub_and_fetch((v), 1)
> +
> +#endif
> +
> +/* There is no __atomic_* compare and exchange that returns the current value.
> + * Also, GCC 5.4 seems unable to optimize a compound statement expression that
> + * uses an additional stack variable with __atomic_compare_exchange[_n].
> + */
> #define p_atomic_cmpxchg(v, old, _new) \
> __sync_val_compare_and_swap((v), (old), (_new))
>
> #endif
>
>
>
> /* Unlocked version for single threaded environments, such as some
> * windows kernel modules.
> */
> --
> 2.7.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list