[Mesa-dev] [PATCH] gallivm: fix [IU]MUL_HI regression
Roland Scheidegger
sroland at vmware.com
Tue Nov 8 13:44:49 UTC 2016
Sorry for breaking radeonsi, I somehow thought this way only used for
cpu only already, without actually checking...
And thanks for fixing that typo, apparently you can pass piglits
umul_hi/imul_hi tests (at least those from the shader_integer_mix group)
even with the square of argument a...
btw as I didn't consider this, I don't know if you want to change the
shift/trunc to shuffle in the end - feel free to change it back if it
doesn't generate good code on radeonsi...
Reviewed-by: Roland Scheidegger <sroland at vmware.com>
Am 08.11.2016 um 10:15 schrieb Nicolai Hähnle:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> This patch does two things:
>
> 1. It separates the host-CPU code generation from the generic code
> generation. This guards against accidently breaking things for
> radeonsi in the future.
>
> 2. It makes sure we actually use both arguments and don't just compute
> a square :-p
>
> Fixes a regression introduced by commit 29279f44b3172ef3b84d470e70fc7684695ced4b
>
> Cc: Roland Scheidegger <sroland at vmware.com>
> ---
> src/gallium/auxiliary/gallivm/lp_bld_arit.c | 72 ++++++++++++++--------
> src/gallium/auxiliary/gallivm/lp_bld_arit.h | 6 ++
> src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 40 +++++++++++-
> 3 files changed, 90 insertions(+), 28 deletions(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> index 3de4628..43ad238 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> @@ -1087,26 +1087,28 @@ lp_build_mul(struct lp_build_context *bld,
> res = LLVMBuildLShr(builder, res, shift, "");
> }
> }
>
> return res;
> }
>
> /*
> * Widening mul, valid for 32x32 bit -> 64bit only.
> * Result is low 32bits, high bits returned in res_hi.
> + *
> + * Emits code that is meant to be compiled for the host CPU.
> */
> LLVMValueRef
> -lp_build_mul_32_lohi(struct lp_build_context *bld,
> - LLVMValueRef a,
> - LLVMValueRef b,
> - LLVMValueRef *res_hi)
> +lp_build_mul_32_lohi_cpu(struct lp_build_context *bld,
> + LLVMValueRef a,
> + LLVMValueRef b,
> + LLVMValueRef *res_hi)
> {
> struct gallivm_state *gallivm = bld->gallivm;
> LLVMBuilderRef builder = gallivm->builder;
>
> assert(bld->type.width == 32);
> assert(bld->type.floating == 0);
> assert(bld->type.fixed == 0);
> assert(bld->type.norm == 0);
>
> /*
> @@ -1209,43 +1211,61 @@ lp_build_mul_32_lohi(struct lp_build_context *bld,
> *res_hi = LLVMBuildShuffleVector(builder, muleven, mulodd, shuf_vec, "");
>
> for (i = 0; i < bld->type.length; i += 2) {
> shuf[i] = lp_build_const_int32(gallivm, i);
> shuf[i+1] = lp_build_const_int32(gallivm, i + bld->type.length);
> }
> shuf_vec = LLVMConstVector(shuf, bld->type.length);
> return LLVMBuildShuffleVector(builder, muleven, mulodd, shuf_vec, "");
> }
> else {
> - LLVMValueRef tmp;
> - struct lp_type type_tmp;
> - LLVMTypeRef wide_type, cast_type;
> -
> - type_tmp = bld->type;
> - type_tmp.width *= 2;
> - wide_type = lp_build_vec_type(gallivm, type_tmp);
> - type_tmp = bld->type;
> - type_tmp.length *= 2;
> - cast_type = lp_build_vec_type(gallivm, type_tmp);
> -
> - if (bld->type.sign) {
> - a = LLVMBuildSExt(builder, a, wide_type, "");
> - b = LLVMBuildSExt(builder, b, wide_type, "");
> - } else {
> - a = LLVMBuildZExt(builder, a, wide_type, "");
> - b = LLVMBuildZExt(builder, b, wide_type, "");
> - }
> - tmp = LLVMBuildMul(builder, a, b, "");
> - tmp = LLVMBuildBitCast(builder, tmp, cast_type, "");
> - *res_hi = lp_build_uninterleave1(gallivm, bld->type.length * 2, tmp, 1);
> - return lp_build_uninterleave1(gallivm, bld->type.length * 2, tmp, 0);
> + return lp_build_mul_32_lohi(bld, a, b, res_hi);
> + }
> +}
> +
> +
> +/*
> + * Widening mul, valid for 32x32 bit -> 64bit only.
> + * Result is low 32bits, high bits returned in res_hi.
> + *
> + * Emits generic code.
> + */
> +LLVMValueRef
> +lp_build_mul_32_lohi(struct lp_build_context *bld,
> + LLVMValueRef a,
> + LLVMValueRef b,
> + LLVMValueRef *res_hi)
> +{
> + struct gallivm_state *gallivm = bld->gallivm;
> + LLVMBuilderRef builder = gallivm->builder;
> + LLVMValueRef tmp;
> + struct lp_type type_tmp;
> + LLVMTypeRef wide_type, cast_type;
> +
> + type_tmp = bld->type;
> + type_tmp.width *= 2;
> + wide_type = lp_build_vec_type(gallivm, type_tmp);
> + type_tmp = bld->type;
> + type_tmp.length *= 2;
> + cast_type = lp_build_vec_type(gallivm, type_tmp);
> +
> + if (bld->type.sign) {
> + a = LLVMBuildSExt(builder, a, wide_type, "");
> + b = LLVMBuildSExt(builder, b, wide_type, "");
> + } else {
> + a = LLVMBuildZExt(builder, a, wide_type, "");
> + b = LLVMBuildZExt(builder, b, wide_type, "");
> }
> + tmp = LLVMBuildMul(builder, a, b, "");
> + tmp = LLVMBuildBitCast(builder, tmp, cast_type, "");
> + *res_hi = lp_build_uninterleave1(gallivm, bld->type.length * 2, tmp, 1);
> + return lp_build_uninterleave1(gallivm, bld->type.length * 2, tmp, 0);
> }
>
>
> /* a * b + c */
> LLVMValueRef
> lp_build_mad(struct lp_build_context *bld,
> LLVMValueRef a,
> LLVMValueRef b,
> LLVMValueRef c)
> {
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
> index 5d48b1c..2a4137a 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
> @@ -70,20 +70,26 @@ LLVMValueRef
> lp_build_sub(struct lp_build_context *bld,
> LLVMValueRef a,
> LLVMValueRef b);
>
> LLVMValueRef
> lp_build_mul(struct lp_build_context *bld,
> LLVMValueRef a,
> LLVMValueRef b);
>
> LLVMValueRef
> +lp_build_mul_32_lohi_cpu(struct lp_build_context *bld,
> + LLVMValueRef a,
> + LLVMValueRef b,
> + LLVMValueRef *res_hi);
> +
> +LLVMValueRef
> lp_build_mul_32_lohi(struct lp_build_context *bld,
> LLVMValueRef a,
> LLVMValueRef b,
> LLVMValueRef *res_hi);
>
> LLVMValueRef
> lp_build_mul_imm(struct lp_build_context *bld,
> LLVMValueRef a,
> int b);
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> index 72d4579..9c6fc4b 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> @@ -842,39 +842,73 @@ imul_hi_emit(
> struct lp_build_tgsi_context * bld_base,
> struct lp_build_emit_data * emit_data)
> {
> struct lp_build_context *int_bld = &bld_base->int_bld;
> LLVMValueRef hi_bits;
>
> assert(int_bld->type.width == 32);
>
> /* low result bits are tossed away */
> lp_build_mul_32_lohi(int_bld, emit_data->args[0],
> - emit_data->args[0], &hi_bits);
> + emit_data->args[1], &hi_bits);
> + emit_data->output[emit_data->chan] = hi_bits;
> +}
> +
> +static void
> +imul_hi_emit_cpu(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_context *int_bld = &bld_base->int_bld;
> + LLVMValueRef hi_bits;
> +
> + assert(int_bld->type.width == 32);
> +
> + /* low result bits are tossed away */
> + lp_build_mul_32_lohi_cpu(int_bld, emit_data->args[0],
> + emit_data->args[1], &hi_bits);
> emit_data->output[emit_data->chan] = hi_bits;
> }
>
> /* TGSI_OPCODE_UMUL_HI */
> static void
> umul_hi_emit(
> const struct lp_build_tgsi_action * action,
> struct lp_build_tgsi_context * bld_base,
> struct lp_build_emit_data * emit_data)
> {
> struct lp_build_context *uint_bld = &bld_base->uint_bld;
> LLVMValueRef hi_bits;
>
> assert(uint_bld->type.width == 32);
>
> /* low result bits are tossed away */
> lp_build_mul_32_lohi(uint_bld, emit_data->args[0],
> - emit_data->args[0], &hi_bits);
> + emit_data->args[1], &hi_bits);
> + emit_data->output[emit_data->chan] = hi_bits;
> +}
> +
> +static void
> +umul_hi_emit_cpu(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + struct lp_build_context *uint_bld = &bld_base->uint_bld;
> + LLVMValueRef hi_bits;
> +
> + assert(uint_bld->type.width == 32);
> +
> + /* low result bits are tossed away */
> + lp_build_mul_32_lohi_cpu(uint_bld, emit_data->args[0],
> + emit_data->args[1], &hi_bits);
> emit_data->output[emit_data->chan] = hi_bits;
> }
>
> /* TGSI_OPCODE_MAX */
> static void fmax_emit(
> const struct lp_build_tgsi_action * action,
> struct lp_build_tgsi_context * bld_base,
> struct lp_build_emit_data * emit_data)
> {
> LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> @@ -2574,20 +2608,22 @@ lp_set_default_actions_cpu(
> bld_base->op_actions[TGSI_OPCODE_I2F].emit = i2f_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_IABS].emit = iabs_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_IDIV].emit = idiv_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_INEG].emit = ineg_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_IMAX].emit = imax_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_IMIN].emit = imin_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_ISGE].emit = isge_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_ISHR].emit = ishr_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_ISLT].emit = islt_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_ISSG].emit = issg_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_IMUL_HI].emit = imul_hi_emit_cpu;
> + bld_base->op_actions[TGSI_OPCODE_UMUL_HI].emit = umul_hi_emit_cpu;
>
> bld_base->op_actions[TGSI_OPCODE_LG2].emit = lg2_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_LOG].emit = log_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_MAD].emit = mad_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_MAX].emit = max_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_MIN].emit = min_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_MOD].emit = mod_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_NOT].emit = not_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_OR].emit = or_emit_cpu;
> bld_base->op_actions[TGSI_OPCODE_POW].emit = pow_emit_cpu;
>
More information about the mesa-dev
mailing list