[Mesa-dev] [RFC 10/16] nir: Introduce half float opcodes

Fri May 15 02:39:37 PDT 2015

Signed-off-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
---
 src/glsl/nir/nir.h                       |  2 +
 src/glsl/nir/nir_constant_expressions.py |  8 +++-
 src/glsl/nir/nir_opcodes.py              | 78 +++++++++++++++++++++++++++++++-
 3 files changed, 86 insertions(+), 2 deletions(-)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 697d37e..3c9d5ba 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -642,6 +642,7 @@ void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
 typedef enum {
    nir_type_invalid = 0, /* Not a valid type */
    nir_type_float,
+   nir_type_hfloat,
    nir_type_int,
    nir_type_unsigned,
    nir_type_bool
@@ -1064,6 +1065,7 @@ nir_tex_instr_src_index(nir_tex_instr *instr, nir_tex_src_type type)
 typedef struct {
    union {
       float f[4];
+      float h[4];
       int32_t i[4];
       uint32_t u[4];
    };
diff --git a/src/glsl/nir/nir_constant_expressions.py b/src/glsl/nir/nir_constant_expressions.py
index bf82fe5..6e570d1 100644
--- a/src/glsl/nir/nir_constant_expressions.py
+++ b/src/glsl/nir/nir_constant_expressions.py
@@ -31,6 +31,12 @@ template = """\
 #include "util/rounding.h" /* for _mesa_roundeven */
 #include "nir_constant_expressions.h"
 
+/**
+ * Constant values for half floats are treated as normal single precision
+ * floats in compile time.
+ */
+#define hfloat float
+
 #if defined(_MSC_VER) && (_MSC_VER < 1800)
 static int isnormal(double x)
 {
@@ -224,7 +230,7 @@ unpack_half_1x16(uint16_t u)
 }
 
 /* Some typed vector structures to make things like src0.y work */
-% for type in ["float", "int", "unsigned", "bool"]:
+% for type in ["float", "hfloat", "int", "unsigned", "bool"]:
 struct ${type}_vec {
    ${type} x;
    ${type} y;
diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py
index 56e96d9..766bfd9 100644
--- a/src/glsl/nir/nir_opcodes.py
+++ b/src/glsl/nir/nir_opcodes.py
@@ -89,6 +89,7 @@ class Opcode(object):
 
 # helper variables for strings
 tfloat = "float"
+thalf = "hfloat"
 tint = "int"
 tbool = "bool"
 tunsigned = "unsigned"
@@ -136,70 +137,106 @@ def unop_reduce(name, output_size, output_type, input_type, prereduce_expr,
               final(reduce_(reduce_(src0, src1), reduce_(src2, src3))))
 
 
-# These two move instructions differ in what modifiers they support and what
+# These three move instructions differ in what modifiers they support and what
 # the negate modifier means. Otherwise, they are identical.
 unop("fmov", tfloat, "src0")
+unop("hmov", thalf, "src0")
 unop("imov", tint, "src0")
 
 unop("ineg", tint, "-src0")
 unop("fneg", tfloat, "-src0")
+unop("hneg", thalf, "-src0")
 unop("inot", tint, "~src0") # invert every bit of the integer
 unop("fnot", tfloat, "(src0 == 0.0f) ? 1.0f : 0.0f")
+unop("hnot", thalf, "(src0 == 0.0f) ? 1.0f : 0.0f")
 unop("fsign", tfloat, "(src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : -1.0f)")
+unop("hsign", thalf, "(src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : -1.0f)")
 unop("isign", tint, "(src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1)")
 unop("iabs", tint, "(src0 < 0) ? -src0 : src0")
 unop("fabs", tfloat, "fabsf(src0)")
+unop("habs", thalf, "fabsf(src0)")
 unop("fsat", tfloat, "(src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0)")
+unop("hsat", thalf, "(src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0)")
 unop("frcp", tfloat, "1.0f / src0")
+unop("hrcp", thalf, "1.0f / src0")
 unop("frsq", tfloat, "1.0f / sqrtf(src0)")
+unop("hrsq", thalf, "1.0f / sqrtf(src0)")
 unop("fsqrt", tfloat, "sqrtf(src0)")
+unop("hsqrt", thalf, "sqrtf(src0)")
 unop("fexp2", tfloat, "exp2f(src0)")
+unop("hexp2", thalf, "exp2f(src0)")
 unop("flog2", tfloat, "log2f(src0)")
+unop("hlog2", thalf, "log2f(src0)")
 unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion.
+unop_convert("h2i", thalf, tint, "src0") # Float-to-integer conversion.
 unop_convert("f2u", tfloat, tunsigned, "src0") # Float-to-unsigned conversion
+unop_convert("h2u", thalf, tunsigned, "src0") # Float-to-unsigned conversion
 unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion.
+unop_convert("i2h", tint, thalf, "src0") # Integer-to-float conversion.
+unop_convert("h2f", thalf, tfloat, "src0") # Half-to-float conversion.
+unop_convert("f2h", tfloat, thalf, "src0") # Float-to-half conversion.
 # Float-to-boolean conversion
 unop_convert("f2b", tfloat, tbool, "src0 != 0.0f")
+unop_convert("h2b", thalf, tbool, "src0 != 0.0f")
 # Boolean-to-float conversion
 unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f")
+unop_convert("b2h", tbool, thalf, "src0 ? 1.0f : 0.0f")
 # Int-to-boolean conversion
 unop_convert("i2b", tint, tbool, "src0 != 0")
 unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion
 unop_convert("u2f", tunsigned, tfloat, "src0") #Unsigned-to-float conversion.
+unop_convert("u2h", tunsigned, thalf, "src0") #Unsigned-to-float conversion.
 
 unop_reduce("bany", 1, tbool, tbool, "{src}", "{src0} || {src1}", "{src}")
 unop_reduce("ball", 1, tbool, tbool, "{src}", "{src0} && {src1}", "{src}")
 unop_reduce("fany", 1, tfloat, tfloat, "{src} != 0.0f", "{src0} || {src1}",
             "{src} ? 1.0f : 0.0f")
+unop_reduce("hany", 1, thalf, thalf, "{src} != 0.0f", "{src0} || {src1}",
+            "{src} ? 1.0f : 0.0f")
 unop_reduce("fall", 1, tfloat, tfloat, "{src} != 0.0f", "{src0} && {src1}",
             "{src} ? 1.0f : 0.0f")
+unop_reduce("hall", 1, thalf, thalf, "{src} != 0.0f", "{src0} && {src1}",
+            "{src} ? 1.0f : 0.0f")
 
 # Unary floating-point rounding operations.
 
 
 unop("ftrunc", tfloat, "truncf(src0)")
+unop("htrunc", thalf, "truncf(src0)")
 unop("fceil", tfloat, "ceilf(src0)")
+unop("hceil", thalf, "ceilf(src0)")
 unop("ffloor", tfloat, "floorf(src0)")
+unop("hfloor", thalf, "floorf(src0)")
 unop("ffract", tfloat, "src0 - floorf(src0)")
+unop("hfract", thalf, "src0 - floorf(src0)")
 unop("fround_even", tfloat, "_mesa_roundevenf(src0)")
+unop("hround_even", thalf, "_mesa_roundevenf(src0)")
 
 
 # Trigonometric operations.
 
 
 unop("fsin", tfloat, "sinf(src0)")
+unop("hsin", thalf, "sinf(src0)")
 unop("fcos", tfloat, "cosf(src0)")
+unop("hcos", thalf, "cosf(src0)")
 
 
 # Partial derivatives.
 
 
 unop("fddx", tfloat, "0.0f") # the derivative of a constant is 0.
+unop("hddx", thalf, "0.0f")
 unop("fddy", tfloat, "0.0f")
+unop("hddy", thalf, "0.0f")
 unop("fddx_fine", tfloat, "0.0f")
+unop("hddx_fine", thalf, "0.0f")
 unop("fddy_fine", tfloat, "0.0f")
+unop("hddy_fine", thalf, "0.0f")
 unop("fddx_coarse", tfloat, "0.0f")
+unop("hddx_coarse", thalf, "0.0f")
 unop("fddy_coarse", tfloat, "0.0f")
+unop("hddy_coarse", thalf, "0.0f")
 
 
 # Floating point pack and unpack operations.
@@ -310,6 +347,10 @@ for i in xrange(1, 5):
    for j in xrange(1, 5):
       unop_horiz("fnoise{0}_{1}".format(i, j), i, tfloat, j, tfloat, "0.0f")
 
+for i in xrange(1, 5):
+   for j in xrange(1, 5):
+      unop_horiz("hnoise{0}_{1}".format(i, j), i, thalf, j, thalf, "0.0f")
+
 def binop_convert(name, out_type, in_type, alg_props, const_expr):
    opcode(name, 0, out_type, [0, 0], [in_type, in_type], alg_props, const_expr)
 
@@ -347,11 +388,14 @@ def binop_reduce(name, output_size, output_type, src_type, prereduce_expr,
           final(reduce_(reduce_(src0, src1), reduce_(src2, src3))))
 
 binop("fadd", tfloat, commutative + associative, "src0 + src1")
+binop("hadd", thalf, commutative + associative, "src0 + src1")
 binop("iadd", tint, commutative + associative, "src0 + src1")
 binop("fsub", tfloat, "", "src0 - src1")
+binop("hsub", thalf, "", "src0 - src1")
 binop("isub", tint, "", "src0 - src1")
 
 binop("fmul", tfloat, commutative + associative, "src0 * src1")
+binop("hmul", thalf, commutative + associative, "src0 * src1")
 # low 32-bits of signed/unsigned integer multiply
 binop("imul", tint, commutative + associative, "src0 * src1")
 # high 32-bits of signed integer multiply
@@ -362,6 +406,7 @@ binop("umul_high", tunsigned, commutative,
       "(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)")
 
 binop("fdiv", tfloat, "", "src0 / src1")
+binop("hdiv", thalf, "", "src0 / src1")
 binop("idiv", tint, "", "src0 / src1")
 binop("udiv", tunsigned, "", "src0 / src1")
 
@@ -376,6 +421,7 @@ binop_convert("uadd_carry", tbool, tunsigned, commutative, "src0 + src1 < src0")
 binop_convert("usub_borrow", tbool, tunsigned, "", "src1 < src0")
 
 binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)")
+binop("hmod", thalf, "", "src0 - src1 * floorf(src0 / src1)")
 binop("umod", tunsigned, "", "src1 == 0 ? 0 : src0 % src1")
 
 #
@@ -386,9 +432,13 @@ binop("umod", tunsigned, "", "src1 == 0 ? 0 : src0 % src1")
 # these integer-aware comparisons return a boolean (0 or ~0)
 
 binop_compare("flt", tfloat, "", "src0 < src1")
+binop_compare("hlt", thalf, "", "src0 < src1")
 binop_compare("fge", tfloat, "", "src0 >= src1")
+binop_compare("hge", thalf, "", "src0 >= src1")
 binop_compare("feq", tfloat, commutative, "src0 == src1")
+binop_compare("heq", thalf, commutative, "src0 == src1")
 binop_compare("fne", tfloat, commutative, "src0 != src1")
+binop_compare("hne", thalf, commutative, "src0 != src1")
 binop_compare("ilt", tint, "", "src0 < src1")
 binop_compare("ige", tint, "", "src0 >= src1")
 binop_compare("ieq", tint, commutative, "src0 == src1")
@@ -400,8 +450,12 @@ binop_compare("uge", tunsigned, "", "src0 >= src1")
 
 binop_reduce("ball_fequal",  1, tbool, tfloat, "{src0} == {src1}",
              "{src0} && {src1}", "{src}")
+binop_reduce("ball_hequal",  1, tbool, thalf, "{src0} == {src1}",
+             "{src0} && {src1}", "{src}")
 binop_reduce("bany_fnequal", 1, tbool, tfloat, "{src0} != {src1}",
              "{src0} || {src1}", "{src}")
+binop_reduce("bany_hnequal", 1, tbool, thalf, "{src0} != {src1}",
+             "{src0} || {src1}", "{src}")
 binop_reduce("ball_iequal",  1, tbool, tint, "{src0} == {src1}",
              "{src0} && {src1}", "{src}")
 binop_reduce("bany_inequal", 1, tbool, tint, "{src0} != {src1}",
@@ -411,16 +465,24 @@ binop_reduce("bany_inequal", 1, tbool, tint, "{src0} != {src1}",
 
 binop_reduce("fall_equal",  1, tfloat, tfloat, "{src0} == {src1}",
              "{src0} && {src1}", "{src} ? 1.0f : 0.0f")
+binop_reduce("hall_equal",  1, thalf, thalf, "{src0} == {src1}",
+             "{src0} && {src1}", "{src} ? 1.0f : 0.0f")
 binop_reduce("fany_nequal", 1, tfloat, tfloat, "{src0} != {src1}",
              "{src0} || {src1}", "{src} ? 1.0f : 0.0f")
+binop_reduce("hany_nequal", 1, thalf, thalf, "{src0} != {src1}",
+             "{src0} || {src1}", "{src} ? 1.0f : 0.0f")
 
 # These comparisons for integer-less hardware return 1.0 and 0.0 for true
 # and false respectively
 
 binop("slt", tfloat, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than
+binop("hslt", thalf, "", "(src0 < src1) ? 1.0f : 0.0f")
 binop("sge", tfloat, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal
+binop("hsge", thalf, "", "(src0 >= src1) ? 1.0f : 0.0f")
 binop("seq", tfloat, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal
+binop("hseq", thalf, commutative, "(src0 == src1) ? 1.0f : 0.0f")
 binop("sne", tfloat, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
+binop("hsne", thalf, commutative, "(src0 != src1) ? 1.0f : 0.0f")
 
 
 binop("ishl", tint, "", "src0 << src1")
@@ -445,22 +507,33 @@ binop("ixor", tunsigned, commutative + associative, "src0 ^ src1")
 
 binop("fand", tfloat, commutative,
       "((src0 != 0.0f) && (src1 != 0.0f)) ? 1.0f : 0.0f")
+binop("hand", thalf, commutative,
+      "((src0 != 0.0f) && (src1 != 0.0f)) ? 1.0f : 0.0f")
 binop("for", tfloat, commutative,
       "((src0 != 0.0f) || (src1 != 0.0f)) ? 1.0f : 0.0f")
+binop("hor", thalf, commutative,
+      "((src0 != 0.0f) || (src1 != 0.0f)) ? 1.0f : 0.0f")
 binop("fxor", tfloat, commutative,
       "(src0 != 0.0f && src1 == 0.0f) || (src0 == 0.0f && src1 != 0.0f) ? 1.0f : 0.0f")
+binop("hxor", thalf, commutative,
+      "(src0 != 0.0f && src1 == 0.0f) || (src0 == 0.0f && src1 != 0.0f) ? 1.0f : 0.0f")
 
 binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
              "{src}")
+binop_reduce("hdot", 1, thalf, thalf, "{src0} * {src1}", "{src0} + {src1}",
+             "{src}")
 
 binop("fmin", tfloat, "", "fminf(src0, src1)")
+binop("hmin", thalf, "", "fminf(src0, src1)")
 binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1")
 binop("umin", tunsigned, commutative + associative, "src1 > src0 ? src0 : src1")
 binop("fmax", tfloat, "", "fmaxf(src0, src1)")
+binop("hmax", thalf, "", "fmaxf(src0, src1)")
 binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0")
 binop("umax", tunsigned, commutative + associative, "src1 > src0 ? src1 : src0")
 
 binop("fpow", tfloat, "", "powf(src0, src1)")
+binop("hpow", thalf, "", "powf(src0, src1)")
 
 binop_horiz("pack_half_2x16_split", 1, tunsigned, 1, tfloat, 1, tfloat,
             "pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)")
@@ -495,8 +568,10 @@ def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr):
    [tunsigned, tunsigned, tunsigned], "", const_expr)
 
 triop("ffma", tfloat, "src0 * src1 + src2")
+triop("hfma", thalf, "src0 * src1 + src2")
 
 triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
+triop("hlrp", thalf, "src0 * (1 - src2) + src1 * src2")
 
 # Conditional Select
 #
@@ -506,6 +581,7 @@ triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
 
 
 triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2")
+triop("hcsel", thalf, "(src0 != 0.0f) ? src1 : src2")
 opcode("bcsel", 0, tunsigned, [0, 0, 0],
       [tbool, tunsigned, tunsigned], "", "src0 ? src1 : src2")
 
-- 
1.9.3