[Mesa-dev] [PATCH] radeon/ac: use ds_swizzle for derivs on si/cik.

Andres Gomez agomez at igalia.com
Thu Aug 17 12:46:50 UTC 2017


Hi Dave,

this patch has been collected for 17.2.0-rc3 but, for 17.1.x, the
conflicts are not so trivial and the landed commit was tagged for
stable without specifying the actual branch.

Therefore, unless you intended this also for 17.1.x and would like to
provide a backport, I'm more keen on dropping it for that branch.

WDYT?

Br.

On Tue, 2017-08-01 at 14:14 +1000, Dave Airlie wrote:
> From: Dave Airlie <airlied at redhat.com>
> 
> This looks like it's supported since llvm 3.9 at least,
> so switch over radeonsi and radv to using it, -pro also
> uses this. We can now drop creating lds for these operations
> as the ds_swizzle operation doesn't actually write to lds at all.
> 
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  src/amd/common/ac_llvm_build.c           | 57 +++++++++++++++++++++++---------
>  src/amd/common/ac_llvm_build.h           |  1 -
>  src/amd/common/ac_nir_to_llvm.c          |  9 +----
>  src/gallium/drivers/radeonsi/si_shader.c | 16 +--------
>  4 files changed, 44 insertions(+), 39 deletions(-)
> 
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 9b939c1..a38aad6 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -796,21 +796,21 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
>  	      bool has_ds_bpermute,
>  	      uint32_t mask,
>  	      int idx,
> -	      LLVMValueRef lds,
>  	      LLVMValueRef val)
>  {
> -	LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2];
> +	LLVMValueRef tl, trbl, args[2];
>  	LLVMValueRef result;
>  
> -	thread_id = ac_get_thread_id(ctx);
> +	if (has_ds_bpermute) {
> +		LLVMValueRef thread_id, tl_tid, trbl_tid;
> +		thread_id = ac_get_thread_id(ctx);
>  
> -	tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
> -			      LLVMConstInt(ctx->i32, mask, false), "");
> +		tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
> +				      LLVMConstInt(ctx->i32, mask, false), "");
>  
> -	trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
> -				LLVMConstInt(ctx->i32, idx, false), "");
> +		trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
> +					LLVMConstInt(ctx->i32, idx, false), "");
>  
> -	if (has_ds_bpermute) {
>  		args[0] = LLVMBuildMul(ctx->builder, tl_tid,
>  				       LLVMConstInt(ctx->i32, 4, false), "");
>  		args[1] = val;
> @@ -828,15 +828,42 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
>  					  AC_FUNC_ATTR_READNONE |
>  					  AC_FUNC_ATTR_CONVERGENT);
>  	} else {
> -		LLVMValueRef store_ptr, load_ptr0, load_ptr1;
> +		uint32_t masks[2];
> +
> +		switch (mask) {
> +		case AC_TID_MASK_TOP_LEFT:
> +			masks[0] = 0x8000;
> +			if (idx == 1)
> +				masks[1] = 0x8055;
> +			else
> +				masks[1] = 0x80aa;
> +
> +			break;
> +		case AC_TID_MASK_TOP:
> +			masks[0] = 0x8044;
> +			masks[1] = 0x80ee;
> +			break;
> +		case AC_TID_MASK_LEFT:
> +			masks[0] = 0x80a0;
> +			masks[1] = 0x80f5;
> +			break;
> +		}
>  
> -		store_ptr = ac_build_gep0(ctx, lds, thread_id);
> -		load_ptr0 = ac_build_gep0(ctx, lds, tl_tid);
> -		load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid);
> +		args[0] = val;
> +		args[1] = LLVMConstInt(ctx->i32, masks[0], false);
>  
> -		LLVMBuildStore(ctx->builder, val, store_ptr);
> -		tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
> -		trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
> +		tl = ac_build_intrinsic(ctx,
> +					"llvm.amdgcn.ds.swizzle", ctx->i32,
> +					args, 2,
> +					AC_FUNC_ATTR_READNONE |
> +					AC_FUNC_ATTR_CONVERGENT);
> +
> +		args[1] = LLVMConstInt(ctx->i32, masks[1], false);
> +		trbl = ac_build_intrinsic(ctx,
> +					"llvm.amdgcn.ds.swizzle", ctx->i32,
> +					args, 2,
> +					AC_FUNC_ATTR_READNONE |
> +					AC_FUNC_ATTR_CONVERGENT);
>  	}
>  
>  	tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index 09fd585..ee27d3c 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -174,7 +174,6 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
>  	      bool has_ds_bpermute,
>  	      uint32_t mask,
>  	      int idx,
> -	      LLVMValueRef lds,
>  	      LLVMValueRef val);
>  
>  #define AC_SENDMSG_GS 2
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 530b581..dc765fe 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -68,8 +68,6 @@ struct ac_nir_context {
>  	int num_locals;
>  	LLVMValueRef *locals;
>  
> -	LLVMValueRef ddxy_lds;
> -
>  	struct nir_to_llvm_context *nctx; /* TODO get rid of this */
>  };
>  
> @@ -1453,11 +1451,6 @@ static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
>  	LLVMValueRef result;
>  	bool has_ds_bpermute = ctx->abi->chip_class >= VI;
>  
> -	if (!ctx->ddxy_lds && !has_ds_bpermute)
> -		ctx->ddxy_lds = LLVMAddGlobalInAddressSpace(ctx->ac.module,
> -						       LLVMArrayType(ctx->ac.i32, 64),
> -						       "ddxy_lds", LOCAL_ADDR_SPACE);
> -
>  	if (op == nir_op_fddx_fine || op == nir_op_fddx)
>  		mask = AC_TID_MASK_LEFT;
>  	else if (op == nir_op_fddy_fine || op == nir_op_fddy)
> @@ -1474,7 +1467,7 @@ static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
>  		idx = 2;
>  
>  	result = ac_build_ddxy(&ctx->ac, has_ds_bpermute,
> -			      mask, idx, ctx->ddxy_lds,
> +			      mask, idx,
>  			      src0);
>  	return result;
>  }
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 628e6f8..09053c3 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -3591,7 +3591,7 @@ static void si_llvm_emit_ddxy(
>  
>  	val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], ctx->i32, "");
>  	val = ac_build_ddxy(&ctx->ac, ctx->screen->has_ds_bpermute,
> -			    mask, idx, ctx->lds, val);
> +			    mask, idx, val);
>  	emit_data->output[emit_data->chan] = val;
>  }
>  
> @@ -4635,20 +4635,6 @@ static void create_function(struct si_shader_context *ctx)
>  	assert(shader->info.num_input_vgprs >= num_prolog_vgprs);
>  	shader->info.num_input_vgprs -= num_prolog_vgprs;
>  
> -	if (!ctx->screen->has_ds_bpermute &&
> -	    bld_base->info &&
> -	    (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
> -	     bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 ||
> -	     bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 ||
> -	     bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0 ||
> -	     bld_base->info->opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0 ||
> -	     bld_base->info->opcode_count[TGSI_OPCODE_INTERP_SAMPLE] > 0))
> -		ctx->lds =
> -			LLVMAddGlobalInAddressSpace(gallivm->module,
> -						    LLVMArrayType(ctx->i32, 64),
> -						    "ddxy_lds",
> -						    LOCAL_ADDR_SPACE);
> -
>  	if (shader->key.as_ls ||
>  	    ctx->type == PIPE_SHADER_TESS_CTRL ||
>  	    /* GFX9 has the ESGS ring buffer in LDS. */
-- 
Br,

Andres


More information about the mesa-dev mailing list