[Mesa-dev] [PATCH] radeonsi: keep using v_rcp_f32 for division in future LLVM
Nicolai Hähnle
nhaehnle at gmail.com
Fri Jun 24 07:41:50 UTC 2016
Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
On 23.06.2016 00:03, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> ---
> src/gallium/drivers/radeon/radeon_llvm.h | 3 +++
> .../drivers/radeon/radeon_setup_tgsi_llvm.c | 28 ++++++++++++++++++++--
> 2 files changed, 29 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
> index ec16def..61afa7a 100644
> --- a/src/gallium/drivers/radeon/radeon_llvm.h
> +++ b/src/gallium/drivers/radeon/radeon_llvm.h
> @@ -101,6 +101,9 @@ struct radeon_llvm_context {
> LLVMValueRef main_fn;
> LLVMTypeRef return_type;
>
> + unsigned fpmath_md_kind;
> + LLVMValueRef fpmath_md_2p5_ulp;
> +
> struct gallivm_state gallivm;
> };
>
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index d395208..79ab4ef 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -1523,19 +1523,34 @@ static void emit_up2h(const struct lp_build_tgsi_action *action,
> }
> }
>
> +static void emit_fdiv(const struct lp_build_tgsi_action *action,
> + struct lp_build_tgsi_context *bld_base,
> + struct lp_build_emit_data *emit_data)
> +{
> + struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
> +
> + emit_data->output[emit_data->chan] =
> + LLVMBuildFDiv(bld_base->base.gallivm->builder,
> + emit_data->args[0], emit_data->args[1], "");
> +
> + /* Use v_rcp_f32 instead of precise division. */
> + LLVMSetMetadata(emit_data->output[emit_data->chan],
> + ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
> +}
> +
> /* 1/sqrt is translated to rsq for f32 if fp32 denormals are not enabled in
> * the target machine. f64 needs global unsafe math flags to get rsq. */
> static void emit_rsq(const struct lp_build_tgsi_action *action,
> struct lp_build_tgsi_context *bld_base,
> struct lp_build_emit_data *emit_data)
> {
> - LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> LLVMValueRef sqrt =
> lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_SQRT,
> emit_data->args[0]);
>
> emit_data->output[emit_data->chan] =
> - LLVMBuildFDiv(builder, bld_base->base.one, sqrt, "");
> + lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DIV,
> + bld_base->base.one, sqrt);
> }
>
> void radeon_llvm_context_init(struct radeon_llvm_context * ctx, const char *triple)
> @@ -1586,6 +1601,14 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx, const char *trip
> bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = radeon_llvm_emit_fetch;
> bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
>
> + /* metadata allowing 2.5 ULP */
> + ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context,
> + "fpmath", 6);
> + LLVMValueRef arg = LLVMMDStringInContext(ctx->gallivm.context,
> + "float 2.5", 9);
> + ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context,
> + &arg, 1);
> +
> /* Allocate outputs */
> ctx->soa.outputs = ctx->outputs;
>
> @@ -1615,6 +1638,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx, const char *trip
> bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem;
> bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64";
> bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac;
> + bld_base->op_actions[TGSI_OPCODE_DIV].emit = emit_fdiv;
> bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg;
> bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp;
> bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp;
>
More information about the mesa-dev
mailing list