[Mesa-dev] [PATCH v3 5/7] ac: add LLVM build functions for subgroup instrinsics

Nicolai Hähnle nhaehnle at gmail.com
Sun Apr 15 19:13:37 UTC 2018


I'm a bit late, but just some stylistic nitpicks for the future:

As a general stylistic note, prefer `false' as the last argument to 
LLVMConstInt (since it's a boolean, whether to sign-extend).

[snip]
> +{
> +	ac_build_optimization_barrier(ctx, &src);
> +	LLVMValueRef result;
> +	LLVMValueRef identity = get_reduction_identity(ctx, op,
> +								ac_get_type_size(LLVMTypeOf(src)));
> +	result = LLVMBuildBitCast(ctx->builder,
> +								ac_build_set_inactive(ctx, src, identity),
> +								LLVMTypeOf(identity), "");

Weird whitespace, are you using a non-default tab width?


> +	result = ac_build_scan(ctx, op, result, identity);
> +
> +	return ac_build_wwm(ctx, result);
> +}
> +
> +LLVMValueRef
> +ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op)
> +{
> +	ac_build_optimization_barrier(ctx, &src);
> +	LLVMValueRef result;
> +	LLVMValueRef identity = get_reduction_identity(ctx, op,
> +								ac_get_type_size(LLVMTypeOf(src)));
> +	result = LLVMBuildBitCast(ctx->builder,
> +								ac_build_set_inactive(ctx, src, identity),
> +								LLVMTypeOf(identity), "");
> +	result = ac_build_dpp(ctx, identity, result, dpp_wf_sr1, 0xf, 0xf, false);
> +	result = ac_build_scan(ctx, op, result, identity);
> +
> +	return ac_build_wwm(ctx, result);
> +}
> +
> +LLVMValueRef
> +ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size)
> +{
> +	if (cluster_size == 1) return src;

Prefer to put the return on a new line, please.

Thanks,
Nicolai


> +	ac_build_optimization_barrier(ctx, &src);
> +	LLVMValueRef result, swap;
> +	LLVMValueRef identity = get_reduction_identity(ctx, op,
> +								ac_get_type_size(LLVMTypeOf(src)));
> +	result = LLVMBuildBitCast(ctx->builder,
> +								ac_build_set_inactive(ctx, src, identity),
> +								LLVMTypeOf(identity), "");

Weird whitespace again.

With those style nitpicks fixed,

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>


> +	swap = ac_build_quad_swizzle(ctx, result, 1, 0, 3, 2);
> +	result = ac_build_alu_op(ctx, result, swap, op);
> +	if (cluster_size == 2) return ac_build_wwm(ctx, result);
> +
> +	swap = ac_build_quad_swizzle(ctx, result, 2, 3, 0, 1);
> +	result = ac_build_alu_op(ctx, result, swap, op);
> +	if (cluster_size == 4) return ac_build_wwm(ctx, result);
> +
> +	if (ctx->chip_class >= VI)
> +		swap = ac_build_dpp(ctx, identity, result, dpp_row_half_mirror, 0xf, 0xf, false);
> +	else
> +		swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x04));
> +	result = ac_build_alu_op(ctx, result, swap, op);
> +	if (cluster_size == 8) return ac_build_wwm(ctx, result);
> +
> +	if (ctx->chip_class >= VI)
> +		swap = ac_build_dpp(ctx, identity, result, dpp_row_mirror, 0xf, 0xf, false);
> +	else
> +		swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x08));
> +	result = ac_build_alu_op(ctx, result, swap, op);
> +	if (cluster_size == 16) return ac_build_wwm(ctx, result);
> +
> +	if (ctx->chip_class >= VI && cluster_size != 32)
> +		swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false);
> +	else
> +		swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x10));
> +	result = ac_build_alu_op(ctx, result, swap, op);
> +	if (cluster_size == 32) return ac_build_wwm(ctx, result);
> +
> +	if (ctx->chip_class >= VI) {
> +		swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
> +		result = ac_build_alu_op(ctx, result, swap, op);
> +		result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 63, 0));
> +		return ac_build_wwm(ctx, result);
> +	} else {
> +		swap = ac_build_readlane(ctx, result, ctx->i32_0);
> +		result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 32, 0));
> +		result = ac_build_alu_op(ctx, result, swap, op);
> +		return ac_build_wwm(ctx, result);
> +	}
> +}
> +
> +LLVMValueRef
> +ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
> +		unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3)
> +{
> +	unsigned mask = dpp_quad_perm(lane0, lane1, lane2, lane3);
> +	if (ctx->chip_class >= VI && HAVE_LLVM >= 0x0600) {
> +		return ac_build_dpp(ctx, src, src, mask, 0xf, 0xf, false);
> +	} else {
> +		return ac_build_ds_swizzle(ctx, src, (1 << 15) | mask);
> +	}
> +}
> +
> +LLVMValueRef
> +ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index)
> +{
> +	index = LLVMBuildMul(ctx->builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
> +	return ac_build_intrinsic(ctx,
> +		  "llvm.amdgcn.ds.bpermute", ctx->i32,
> +		  (LLVMValueRef []) {index, src}, 2,
> +		  AC_FUNC_ATTR_READNONE |
> +		  AC_FUNC_ATTR_CONVERGENT);
> +}
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index 8b35028a31..44811ce6d9 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -27,7 +27,7 @@
>   
>   #include <stdbool.h>
>   #include <llvm-c/TargetMachine.h>
> -
> +#include <nir/nir.h>
>   #include "amd_family.h"
>   
>   #ifdef __cplusplus
> @@ -417,6 +417,34 @@ LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param,
>   void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
>   			      LLVMValueRef *addr, bool is_array_tex);
>   
> +LLVMValueRef
> +ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask);
> +
> +LLVMValueRef
> +ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane);
> +
> +LLVMValueRef
> +ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value, LLVMValueRef lane);
> +
> +LLVMValueRef
> +ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask);
> +
> +LLVMValueRef
> +ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
> +
> +LLVMValueRef
> +ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
> +
> +LLVMValueRef
> +ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size);
> +
> +LLVMValueRef
> +ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
> +		unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3);
> +
> +LLVMValueRef
> +ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index);
> +
>   #ifdef __cplusplus
>   }
>   #endif
> 


-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.


More information about the mesa-dev mailing list