[Beignet] [PATCH 6/8] Backend: Add half float ASM output support.
Matt Turner
mattst88 at gmail.com
Thu May 21 14:59:33 PDT 2015
On Thu, May 21, 2015 at 1:25 AM, <junyan.he at inbox.com> wrote:
> From: Junyan He <junyan.he at linux.intel.com>
>
> Signed-off-by: Junyan He <junyan.he at linux.intel.com>
> ---
> backend/src/backend/gen/gen_mesa_disasm.c | 83 +++++++++++++++++++++++++++++--
> 1 file changed, 78 insertions(+), 5 deletions(-)
>
> diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
> index 705f5e2..a8a3aa0 100644
> --- a/backend/src/backend/gen/gen_mesa_disasm.c
> +++ b/backend/src/backend/gen/gen_mesa_disasm.c
> @@ -257,7 +257,7 @@ static const char *access_mode[2] = {
> [1] = "align16",
> };
>
> -static const char *reg_encoding[10] = {
> +static const char *reg_encoding[11] = {
> [0] = ":UD",
> [1] = ":D",
> [2] = ":UW",
> @@ -267,10 +267,11 @@ static const char *reg_encoding[10] = {
> [6] = ":DF",
> [7] = ":F",
> [8] = ":UQ",
> - [9] = ":Q"
> + [9] = ":Q",
> + [10] = ":HF"
> };
>
> -int reg_type_size[10] = {
> +int reg_type_size[11] = {
> [0] = 4,
> [1] = 4,
> [2] = 2,
> @@ -280,7 +281,8 @@ int reg_type_size[10] = {
> [6] = 8,
> [7] = 4,
> [8] = 8,
> - [9] = 8
> + [9] = 8,
> + [10] = 2,
> };
>
> static const char *reg_file[4] = {
> @@ -463,6 +465,17 @@ static int gen_version;
> bits; \
> })
>
> +#define GEN_BITS_FIELD_WITH_TYPE(inst, gen, TYPE) \
> + ({ \
> + TYPE bits; \
> + if (gen_version < 80) \
> + bits = ((const union Gen7NativeInstruction *)inst)->gen; \
> + else \
> + bits = ((const union Gen8NativeInstruction *)inst)->gen; \
> + bits; \
> + })
> +
> +
> #define GEN_BITS_FIELD2(inst, gen7, gen8) \
> ({ \
> int bits; \
> @@ -954,6 +967,57 @@ static int src2_3src(FILE *file, const void* inst)
> return err;
> }
>
> +static uint32_t __conv_half_to_float(uint16_t h)
> +{
> + struct __FP32 {
> + uint32_t mantissa:23;
> + uint32_t exponent:8;
> + uint32_t sign:1;
> + };
> + struct __FP16 {
> + uint32_t mantissa:10;
> + uint32_t exponent:5;
> + uint32_t sign:1;
> + };
> + uint32_t f;
> + struct __FP32 o;
> + memset(&o, 0, sizeof(o));
> + struct __FP16 i;
> + memcpy(&i, &h, sizeof(uint16_t));
> +
> + if (i.exponent == 0 && i.mantissa == 0) // (Signed) zero
> + o.sign = i.sign;
> + else {
> + if (i.exponent == 0) { // Denormal (converts to normalized)
> + // Adjust mantissa so it's normalized (and keep
> + // track of exponent adjustment)
> + int e = -1;
> + uint m = i.mantissa;
> + do {
> + e++;
> + m <<= 1;
> + } while ((m & 0x400) == 0);
> +
> + o.mantissa = (m & 0x3ff) << 13;
> + o.exponent = 127 - 15 - e;
> + o.sign = i.sign;
> + } else if (i.exponent == 0x1f) { // Inf/NaN
> + // NOTE: Both can be handled with same code path
> + // since we just pass through mantissa bits.
> + o.mantissa = i.mantissa << 13;
> + o.exponent = 255;
> + o.sign = i.sign;
> + } else { // Normalized number
> + o.mantissa = i.mantissa << 13;
> + o.exponent = 127 - 15 + i.exponent;
> + o.sign = i.sign;
> + }
> + }
Using the F16C intrinsics here might really be worth it, at least from
a code saving perspective. See the f16intrin.h header shipped with
gcc.
More information about the Beignet
mailing list