[Mesa-dev] [PATCH 07/12] nir: Allow [iu]mul_high on non-32-bit types

Mon Oct 8 20:46:55 UTC 2018

On 10/05/2018 09:10 PM, Jason Ekstrand wrote:
> ---
>  src/compiler/nir/nir_constant_expressions.py |  1 +
>  src/compiler/nir/nir_opcodes.py              | 43 ++++++++++++++++++--
>  2 files changed, 40 insertions(+), 4 deletions(-)
> 
> diff --git a/src/compiler/nir/nir_constant_expressions.py b/src/compiler/nir/nir_constant_expressions.py
> index 118af9f7818..afc0739e8b2 100644
> --- a/src/compiler/nir/nir_constant_expressions.py
> +++ b/src/compiler/nir/nir_constant_expressions.py
> @@ -79,6 +79,7 @@ template = """\
>  #include <math.h>
>  #include "util/rounding.h" /* for _mesa_roundeven */
>  #include "util/half_float.h"
> +#include "util/bigmath.h"
>  #include "nir_constant_expressions.h"
>  
>  /**
> diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
> index 4ef4ecc6f22..209f0c5509b 100644
> --- a/src/compiler/nir/nir_opcodes.py
> +++ b/src/compiler/nir/nir_opcodes.py
> @@ -443,12 +443,47 @@ binop("isub", tint, "", "src0 - src1")
>  binop("fmul", tfloat, commutative + associative, "src0 * src1")
>  # low 32-bits of signed/unsigned integer multiply
>  binop("imul", tint, commutative + associative, "src0 * src1")
> +
>  # high 32-bits of signed integer multiply
> -binop("imul_high", tint32, commutative,
> -      "(int32_t)(((int64_t) src0 * (int64_t) src1) >> 32)")
> +binop("imul_high", tint, commutative, """

This will enable imul_high for all integer types (ditto for umul_high
below).  A later patch adds lowering for 64-bit integer type.  Will the
backend do the right thing for [iu]mul_high of 16- or 8-bit types?

> +if (bit_size == 64) {
> +   /* We need to do a full 128-bit x 128-bit multiply in order for the sign
> +    * extension to work properly.  The casts are kind-of annoying but needed
> +    * to prevent compiler warnings.
> +    */
> +   uint32_t src0_u32[4] = {
> +      src0,
> +      (int64_t)src0 >> 32,
> +      (int64_t)src0 >> 63,
> +      (int64_t)src0 >> 63,
> +   };
> +   uint32_t src1_u32[4] = {
> +      src1,
> +      (int64_t)src1 >> 32,
> +      (int64_t)src1 >> 63,
> +      (int64_t)src1 >> 63,
> +   };
> +   uint32_t prod_u32[4];
> +   ubm_mul_u32arr(prod_u32, src0_u32, src1_u32);
> +   dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32);
> +} else {
> +   dst = ((int64_t)src0 * (int64_t)src1) >> bit_size;
> +}
> +""")
> +
>  # high 32-bits of unsigned integer multiply
> -binop("umul_high", tuint32, commutative,
> -      "(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)")
> +binop("umul_high", tuint, commutative, """
> +if (bit_size == 64) {
> +   /* The casts are kind-of annoying but needed to prevent compiler warnings. */
> +   uint32_t src0_u32[2] = { src0, (uint64_t)src0 >> 32 };
> +   uint32_t src1_u32[2] = { src1, (uint64_t)src1 >> 32 };
> +   uint32_t prod_u32[4];
> +   ubm_mul_u32arr(prod_u32, src0_u32, src1_u32);
> +   dst = (uint64_t)prod_u32[2] | ((uint64_t)prod_u32[3] << 32);
> +} else {
> +   dst = ((uint64_t)src0 * (uint64_t)src1) >> bit_size;
> +}
> +""")
>  
>  binop("fdiv", tfloat, "", "src0 / src1")
>  binop("idiv", tint, "", "src1 == 0 ? 0 : (src0 / src1)")