[Mesa-dev] [PATCH] ac, ac/nir: use a better sync scope for shared atomics

Samuel Pitoiset samuel.pitoiset at gmail.com
Fri Apr 26 14:10:23 UTC 2019


Thanks for your investigations Rhys. As discussed over IRC, this makes 
sense to me.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>

On 4/26/19 3:48 PM, Rhys Perry wrote:
> https://reviews.llvm.org/rL356946 (present in LLVM 9 and later) changed
> the meaning of the "system" sync scope, making it no longer restricted to
> the memory operation's address space. So a single address space sync scope
> is needed for shared atomic operations (such as "system-one-as" or
> "workgroup-one-as") otherwise buffer_wbinvl1 and s_waitcnt instructions
> can be created at each shared atomic operation.
>
> This mostly reimplements LLVMBuildAtomicRMW and LLVMBuildAtomicCmpXchg
> to allow for more sync scopes and uses the new functions in ac->nir with
> the "workgroup-one-as" or "workgroup" sync scopes.
>
>        F1 2017 (4K, Ultra High settings, TAA), avg FPS : 59 -> 59.67 (+1.14%)
>       Strange Brigade (4K, ~highest settings), avg FPS : 51.5 -> 51.6 (+0.19%)
> RotTR/mountain (4K, VeryHigh settings, FXAA), avg FPS : 57.2 -> 57.2 (+0.0%)
>      RotTR/tomb (4K, VeryHigh settings, FXAA), avg FPS : 42.5 -> 43.0 (+1.17%)
>    RotTR/valley (4K, VeryHigh settings, FXAA), avg FPS : 40.7 -> 41.6 (+2.21%)
>                           Warhammer II/fallen, avg FPS : 31.63 -> 31.83 (+0.63%)
>                           Warhammer II/skaven, avg FPS : 37.77 -> 38.07 (+0.79%)
>
> Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
> ---
>   src/amd/common/ac_llvm_build.h    | 10 +++++-
>   src/amd/common/ac_llvm_helper.cpp | 59 +++++++++++++++++++++++++++++++
>   src/amd/common/ac_nir_to_llvm.c   | 12 +++----
>   3 files changed, 72 insertions(+), 9 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index f4cee667153..98f856106d6 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -26,7 +26,7 @@
>   #define AC_LLVM_BUILD_H
>   
>   #include <stdbool.h>
> -#include <llvm-c/TargetMachine.h>
> +#include <llvm-c/Core.h>
>   #include "compiler/nir/nir.h"
>   #include "amd_family.h"
>   
> @@ -694,6 +694,14 @@ ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij);
>   LLVMValueRef
>   ac_build_load_helper_invocation(struct ac_llvm_context *ctx);
>   
> +LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
> +				 LLVMValueRef ptr, LLVMValueRef val,
> +				 const char *sync_scope);
> +
> +LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
> +				      LLVMValueRef cmp, LLVMValueRef val,
> +				      const char *sync_scope);
> +
>   #ifdef __cplusplus
>   }
>   #endif
> diff --git a/src/amd/common/ac_llvm_helper.cpp b/src/amd/common/ac_llvm_helper.cpp
> index dcfb8008546..e5030c6f472 100644
> --- a/src/amd/common/ac_llvm_helper.cpp
> +++ b/src/amd/common/ac_llvm_helper.cpp
> @@ -31,6 +31,7 @@
>   
>   #include "ac_binary.h"
>   #include "ac_llvm_util.h"
> +#include "ac_llvm_build.h"
>   
>   #include <llvm-c/Core.h>
>   #include <llvm/Target/TargetMachine.h>
> @@ -167,3 +168,61 @@ void ac_enable_global_isel(LLVMTargetMachineRef tm)
>   {
>     reinterpret_cast<llvm::TargetMachine*>(tm)->setGlobalISel(true);
>   }
> +
> +LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
> +				 LLVMValueRef ptr, LLVMValueRef val,
> +				 const char *sync_scope) {
> +	llvm::AtomicRMWInst::BinOp binop;
> +	switch (op) {
> +	case LLVMAtomicRMWBinOpXchg:
> +		binop = llvm::AtomicRMWInst::Xchg;
> +		break;
> +	case LLVMAtomicRMWBinOpAdd:
> +		binop = llvm::AtomicRMWInst::Add;
> +		break;
> +	case LLVMAtomicRMWBinOpSub:
> +		binop = llvm::AtomicRMWInst::Sub;
> +		break;
> +	case LLVMAtomicRMWBinOpAnd:
> +		binop = llvm::AtomicRMWInst::And;
> +		break;
> +	case LLVMAtomicRMWBinOpNand:
> +		binop = llvm::AtomicRMWInst::Nand;
> +		break;
> +	case LLVMAtomicRMWBinOpOr:
> +		binop = llvm::AtomicRMWInst::Or;
> +		break;
> +	case LLVMAtomicRMWBinOpXor:
> +		binop = llvm::AtomicRMWInst::Xor;
> +		break;
> +	case LLVMAtomicRMWBinOpMax:
> +		binop = llvm::AtomicRMWInst::Max;
> +		break;
> +	case LLVMAtomicRMWBinOpMin:
> +		binop = llvm::AtomicRMWInst::Min;
> +		break;
> +	case LLVMAtomicRMWBinOpUMax:
> +		binop = llvm::AtomicRMWInst::UMax;
> +		break;
> +	case LLVMAtomicRMWBinOpUMin:
> +		binop = llvm::AtomicRMWInst::UMin;
> +		break;
> +	default:
> +		unreachable(!"invalid LLVMAtomicRMWBinOp");
> +	   break;
> +	}
> +	unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
> +	return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicRMW(
> +		binop, llvm::unwrap(ptr), llvm::unwrap(val),
> +		llvm::AtomicOrdering::SequentiallyConsistent, SSID));
> +}
> +
> +LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
> +				      LLVMValueRef cmp, LLVMValueRef val,
> +				      const char *sync_scope) {
> +	unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
> +	return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicCmpXchg(
> +			  llvm::unwrap(ptr), llvm::unwrap(cmp), llvm::unwrap(val),
> +			  llvm::AtomicOrdering::SequentiallyConsistent,
> +			  llvm::AtomicOrdering::SequentiallyConsistent, SSID));
> +}
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 78f25b8e742..7253ecd61da 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -2897,14 +2897,12 @@ static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx,
>   	LLVMValueRef result;
>   	LLVMValueRef src = get_src(ctx, instr->src[src_idx]);
>   
> +	const char *sync_scope = HAVE_LLVM >= 0x0900 ? "workgroup-one-as" : "workgroup";
> +
>   	if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap ||
>   	    instr->intrinsic == nir_intrinsic_deref_atomic_comp_swap) {
>   		LLVMValueRef src1 = get_src(ctx, instr->src[src_idx + 1]);
> -		result = LLVMBuildAtomicCmpXchg(ctx->ac.builder,
> -						ptr, src, src1,
> -						LLVMAtomicOrderingSequentiallyConsistent,
> -						LLVMAtomicOrderingSequentiallyConsistent,
> -						false);
> +		result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, src, src1, sync_scope);
>   		result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
>   	} else {
>   		LLVMAtomicRMWBinOp op;
> @@ -2949,9 +2947,7 @@ static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx,
>   			return NULL;
>   		}
>   
> -		result = LLVMBuildAtomicRMW(ctx->ac.builder, op, ptr, ac_to_integer(&ctx->ac, src),
> -					    LLVMAtomicOrderingSequentiallyConsistent,
> -					    false);
> +		result = ac_build_atomic_rmw(&ctx->ac, op, ptr, ac_to_integer(&ctx->ac, src), sync_scope);
>   	}
>   	return result;
>   }


More information about the mesa-dev mailing list