[Mesa-dev] [PATCH 07/10] radeonsi: don't use the AMDGPU intrinsic for CMP

Tom Stellard tom at stellard.net
Thu Oct 15 08:01:31 PDT 2015


On Sun, Oct 11, 2015 at 03:29:47AM +0200, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
> 

Reviewed-by: Tom Stellard <thomas.stellard at amd.comd@amd.com>

> The increase in VGPRs in unfortunate, but the decrease in the scratch size
> is always welcome.
> 
> Totals:
> SGPRS: 344552 -> 344368 (-0.05 %)
> VGPRS: 197132 -> 197552 (0.21 %)
> Code Size: 7375376 -> 7366304 (-0.12 %) bytes
> LDS: 91 -> 91 (0.00 %) blocks
> Scratch: 1679360 -> 1615872 (-3.78 %) bytes per wave
> 
> Totals from affected shaders:
> SGPRS: 47736 -> 47552 (-0.39 %)
> VGPRS: 27952 -> 28372 (1.50 %)
> Code Size: 1392724 -> 1383652 (-0.65 %) bytes
> LDS: 39 -> 39 (0.00 %) blocks
> Scratch: 513024 -> 449536 (-12.38 %) bytes per wave
> ---
>  .../drivers/radeon/radeon_setup_tgsi_llvm.c        | 31 +++++++++++++++-------
>  1 file changed, 22 insertions(+), 9 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index c22ea7c..ac99e73 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -919,7 +919,21 @@ static void emit_ucmp(
>  		LLVMBuildSelect(builder, v, emit_data->args[1], emit_data->args[2], "");
>  }
>  
> -static void emit_cmp(
> +static void emit_cmp(const struct lp_build_tgsi_action *action,
> +		     struct lp_build_tgsi_context *bld_base,
> +		     struct lp_build_emit_data *emit_data)
> +{
> +	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> +	LLVMValueRef cond, *args = emit_data->args;
> +
> +	cond = LLVMBuildFCmp(builder, LLVMRealOLT, args[0],
> +			     bld_base->base.zero, "");
> +
> +	emit_data->output[emit_data->chan] =
> +		LLVMBuildSelect(builder, cond, args[1], args[2], "");
> +}
> +
> +static void emit_set_cond(
>  		const struct lp_build_tgsi_action *action,
>  		struct lp_build_tgsi_context * bld_base,
>  		struct lp_build_emit_data * emit_data)
> @@ -1503,8 +1517,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
>  	bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.ceil.f32";
>  	bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = build_tgsi_intrinsic_nomem;
>  	bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name = "llvm.AMDIL.clamp.";
> -	bld_base->op_actions[TGSI_OPCODE_CMP].emit = build_tgsi_intrinsic_nomem;
> -	bld_base->op_actions[TGSI_OPCODE_CMP].intr_name = "llvm.AMDGPU.cndlt";
> +	bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cmp;
>  	bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
>  	bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
>  	bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
> @@ -1573,13 +1586,13 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
>  	bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.rint.f32";
>  	bld_base->op_actions[TGSI_OPCODE_RSQ].intr_name = "llvm.AMDGPU.rsq.clamped.f32";
>  	bld_base->op_actions[TGSI_OPCODE_RSQ].emit = build_tgsi_intrinsic_nomem;
> -	bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_cmp;
> -	bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_cmp;
> +	bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_set_cond;
> +	bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_set_cond;
>  	bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
> -	bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_cmp;
> -	bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_cmp;
> -	bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_cmp;
> -	bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_cmp;
> +	bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_set_cond;
> +	bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_set_cond;
> +	bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_set_cond;
> +	bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_set_cond;
>  	bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem;
>  	bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32";
>  	bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem;
> -- 
> 2.1.4
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list