[Mesa-dev] [RFC 10/16] nir: Introduce half float opcodes
Topi Pohjolainen
topi.pohjolainen at intel.com
Fri May 15 02:39:37 PDT 2015
Signed-off-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
---
src/glsl/nir/nir.h | 2 +
src/glsl/nir/nir_constant_expressions.py | 8 +++-
src/glsl/nir/nir_opcodes.py | 78 +++++++++++++++++++++++++++++++-
3 files changed, 86 insertions(+), 2 deletions(-)
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 697d37e..3c9d5ba 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -642,6 +642,7 @@ void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
typedef enum {
nir_type_invalid = 0, /* Not a valid type */
nir_type_float,
+ nir_type_hfloat,
nir_type_int,
nir_type_unsigned,
nir_type_bool
@@ -1064,6 +1065,7 @@ nir_tex_instr_src_index(nir_tex_instr *instr, nir_tex_src_type type)
typedef struct {
union {
float f[4];
+ float h[4];
int32_t i[4];
uint32_t u[4];
};
diff --git a/src/glsl/nir/nir_constant_expressions.py b/src/glsl/nir/nir_constant_expressions.py
index bf82fe5..6e570d1 100644
--- a/src/glsl/nir/nir_constant_expressions.py
+++ b/src/glsl/nir/nir_constant_expressions.py
@@ -31,6 +31,12 @@ template = """\
#include "util/rounding.h" /* for _mesa_roundeven */
#include "nir_constant_expressions.h"
+/**
+ * Constant values for half floats are treated as normal single precision
+ * floats in compile time.
+ */
+#define hfloat float
+
#if defined(_MSC_VER) && (_MSC_VER < 1800)
static int isnormal(double x)
{
@@ -224,7 +230,7 @@ unpack_half_1x16(uint16_t u)
}
/* Some typed vector structures to make things like src0.y work */
-% for type in ["float", "int", "unsigned", "bool"]:
+% for type in ["float", "hfloat", "int", "unsigned", "bool"]:
struct ${type}_vec {
${type} x;
${type} y;
diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py
index 56e96d9..766bfd9 100644
--- a/src/glsl/nir/nir_opcodes.py
+++ b/src/glsl/nir/nir_opcodes.py
@@ -89,6 +89,7 @@ class Opcode(object):
# helper variables for strings
tfloat = "float"
+thalf = "hfloat"
tint = "int"
tbool = "bool"
tunsigned = "unsigned"
@@ -136,70 +137,106 @@ def unop_reduce(name, output_size, output_type, input_type, prereduce_expr,
final(reduce_(reduce_(src0, src1), reduce_(src2, src3))))
-# These two move instructions differ in what modifiers they support and what
+# These three move instructions differ in what modifiers they support and what
# the negate modifier means. Otherwise, they are identical.
unop("fmov", tfloat, "src0")
+unop("hmov", thalf, "src0")
unop("imov", tint, "src0")
unop("ineg", tint, "-src0")
unop("fneg", tfloat, "-src0")
+unop("hneg", thalf, "-src0")
unop("inot", tint, "~src0") # invert every bit of the integer
unop("fnot", tfloat, "(src0 == 0.0f) ? 1.0f : 0.0f")
+unop("hnot", thalf, "(src0 == 0.0f) ? 1.0f : 0.0f")
unop("fsign", tfloat, "(src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : -1.0f)")
+unop("hsign", thalf, "(src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : -1.0f)")
unop("isign", tint, "(src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1)")
unop("iabs", tint, "(src0 < 0) ? -src0 : src0")
unop("fabs", tfloat, "fabsf(src0)")
+unop("habs", thalf, "fabsf(src0)")
unop("fsat", tfloat, "(src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0)")
+unop("hsat", thalf, "(src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0)")
unop("frcp", tfloat, "1.0f / src0")
+unop("hrcp", thalf, "1.0f / src0")
unop("frsq", tfloat, "1.0f / sqrtf(src0)")
+unop("hrsq", thalf, "1.0f / sqrtf(src0)")
unop("fsqrt", tfloat, "sqrtf(src0)")
+unop("hsqrt", thalf, "sqrtf(src0)")
unop("fexp2", tfloat, "exp2f(src0)")
+unop("hexp2", thalf, "exp2f(src0)")
unop("flog2", tfloat, "log2f(src0)")
+unop("hlog2", thalf, "log2f(src0)")
unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion.
+unop_convert("h2i", thalf, tint, "src0") # Float-to-integer conversion.
unop_convert("f2u", tfloat, tunsigned, "src0") # Float-to-unsigned conversion
+unop_convert("h2u", thalf, tunsigned, "src0") # Float-to-unsigned conversion
unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion.
+unop_convert("i2h", tint, thalf, "src0") # Integer-to-float conversion.
+unop_convert("h2f", thalf, tfloat, "src0") # Half-to-float conversion.
+unop_convert("f2h", tfloat, thalf, "src0") # Float-to-half conversion.
# Float-to-boolean conversion
unop_convert("f2b", tfloat, tbool, "src0 != 0.0f")
+unop_convert("h2b", thalf, tbool, "src0 != 0.0f")
# Boolean-to-float conversion
unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f")
+unop_convert("b2h", tbool, thalf, "src0 ? 1.0f : 0.0f")
# Int-to-boolean conversion
unop_convert("i2b", tint, tbool, "src0 != 0")
unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion
unop_convert("u2f", tunsigned, tfloat, "src0") #Unsigned-to-float conversion.
+unop_convert("u2h", tunsigned, thalf, "src0") #Unsigned-to-float conversion.
unop_reduce("bany", 1, tbool, tbool, "{src}", "{src0} || {src1}", "{src}")
unop_reduce("ball", 1, tbool, tbool, "{src}", "{src0} && {src1}", "{src}")
unop_reduce("fany", 1, tfloat, tfloat, "{src} != 0.0f", "{src0} || {src1}",
"{src} ? 1.0f : 0.0f")
+unop_reduce("hany", 1, thalf, thalf, "{src} != 0.0f", "{src0} || {src1}",
+ "{src} ? 1.0f : 0.0f")
unop_reduce("fall", 1, tfloat, tfloat, "{src} != 0.0f", "{src0} && {src1}",
"{src} ? 1.0f : 0.0f")
+unop_reduce("hall", 1, thalf, thalf, "{src} != 0.0f", "{src0} && {src1}",
+ "{src} ? 1.0f : 0.0f")
# Unary floating-point rounding operations.
unop("ftrunc", tfloat, "truncf(src0)")
+unop("htrunc", thalf, "truncf(src0)")
unop("fceil", tfloat, "ceilf(src0)")
+unop("hceil", thalf, "ceilf(src0)")
unop("ffloor", tfloat, "floorf(src0)")
+unop("hfloor", thalf, "floorf(src0)")
unop("ffract", tfloat, "src0 - floorf(src0)")
+unop("hfract", thalf, "src0 - floorf(src0)")
unop("fround_even", tfloat, "_mesa_roundevenf(src0)")
+unop("hround_even", thalf, "_mesa_roundevenf(src0)")
# Trigonometric operations.
unop("fsin", tfloat, "sinf(src0)")
+unop("hsin", thalf, "sinf(src0)")
unop("fcos", tfloat, "cosf(src0)")
+unop("hcos", thalf, "cosf(src0)")
# Partial derivatives.
unop("fddx", tfloat, "0.0f") # the derivative of a constant is 0.
+unop("hddx", thalf, "0.0f")
unop("fddy", tfloat, "0.0f")
+unop("hddy", thalf, "0.0f")
unop("fddx_fine", tfloat, "0.0f")
+unop("hddx_fine", thalf, "0.0f")
unop("fddy_fine", tfloat, "0.0f")
+unop("hddy_fine", thalf, "0.0f")
unop("fddx_coarse", tfloat, "0.0f")
+unop("hddx_coarse", thalf, "0.0f")
unop("fddy_coarse", tfloat, "0.0f")
+unop("hddy_coarse", thalf, "0.0f")
# Floating point pack and unpack operations.
@@ -310,6 +347,10 @@ for i in xrange(1, 5):
for j in xrange(1, 5):
unop_horiz("fnoise{0}_{1}".format(i, j), i, tfloat, j, tfloat, "0.0f")
+for i in xrange(1, 5):
+ for j in xrange(1, 5):
+ unop_horiz("hnoise{0}_{1}".format(i, j), i, thalf, j, thalf, "0.0f")
+
def binop_convert(name, out_type, in_type, alg_props, const_expr):
opcode(name, 0, out_type, [0, 0], [in_type, in_type], alg_props, const_expr)
@@ -347,11 +388,14 @@ def binop_reduce(name, output_size, output_type, src_type, prereduce_expr,
final(reduce_(reduce_(src0, src1), reduce_(src2, src3))))
binop("fadd", tfloat, commutative + associative, "src0 + src1")
+binop("hadd", thalf, commutative + associative, "src0 + src1")
binop("iadd", tint, commutative + associative, "src0 + src1")
binop("fsub", tfloat, "", "src0 - src1")
+binop("hsub", thalf, "", "src0 - src1")
binop("isub", tint, "", "src0 - src1")
binop("fmul", tfloat, commutative + associative, "src0 * src1")
+binop("hmul", thalf, commutative + associative, "src0 * src1")
# low 32-bits of signed/unsigned integer multiply
binop("imul", tint, commutative + associative, "src0 * src1")
# high 32-bits of signed integer multiply
@@ -362,6 +406,7 @@ binop("umul_high", tunsigned, commutative,
"(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)")
binop("fdiv", tfloat, "", "src0 / src1")
+binop("hdiv", thalf, "", "src0 / src1")
binop("idiv", tint, "", "src0 / src1")
binop("udiv", tunsigned, "", "src0 / src1")
@@ -376,6 +421,7 @@ binop_convert("uadd_carry", tbool, tunsigned, commutative, "src0 + src1 < src0")
binop_convert("usub_borrow", tbool, tunsigned, "", "src1 < src0")
binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)")
+binop("hmod", thalf, "", "src0 - src1 * floorf(src0 / src1)")
binop("umod", tunsigned, "", "src1 == 0 ? 0 : src0 % src1")
#
@@ -386,9 +432,13 @@ binop("umod", tunsigned, "", "src1 == 0 ? 0 : src0 % src1")
# these integer-aware comparisons return a boolean (0 or ~0)
binop_compare("flt", tfloat, "", "src0 < src1")
+binop_compare("hlt", thalf, "", "src0 < src1")
binop_compare("fge", tfloat, "", "src0 >= src1")
+binop_compare("hge", thalf, "", "src0 >= src1")
binop_compare("feq", tfloat, commutative, "src0 == src1")
+binop_compare("heq", thalf, commutative, "src0 == src1")
binop_compare("fne", tfloat, commutative, "src0 != src1")
+binop_compare("hne", thalf, commutative, "src0 != src1")
binop_compare("ilt", tint, "", "src0 < src1")
binop_compare("ige", tint, "", "src0 >= src1")
binop_compare("ieq", tint, commutative, "src0 == src1")
@@ -400,8 +450,12 @@ binop_compare("uge", tunsigned, "", "src0 >= src1")
binop_reduce("ball_fequal", 1, tbool, tfloat, "{src0} == {src1}",
"{src0} && {src1}", "{src}")
+binop_reduce("ball_hequal", 1, tbool, thalf, "{src0} == {src1}",
+ "{src0} && {src1}", "{src}")
binop_reduce("bany_fnequal", 1, tbool, tfloat, "{src0} != {src1}",
"{src0} || {src1}", "{src}")
+binop_reduce("bany_hnequal", 1, tbool, thalf, "{src0} != {src1}",
+ "{src0} || {src1}", "{src}")
binop_reduce("ball_iequal", 1, tbool, tint, "{src0} == {src1}",
"{src0} && {src1}", "{src}")
binop_reduce("bany_inequal", 1, tbool, tint, "{src0} != {src1}",
@@ -411,16 +465,24 @@ binop_reduce("bany_inequal", 1, tbool, tint, "{src0} != {src1}",
binop_reduce("fall_equal", 1, tfloat, tfloat, "{src0} == {src1}",
"{src0} && {src1}", "{src} ? 1.0f : 0.0f")
+binop_reduce("hall_equal", 1, thalf, thalf, "{src0} == {src1}",
+ "{src0} && {src1}", "{src} ? 1.0f : 0.0f")
binop_reduce("fany_nequal", 1, tfloat, tfloat, "{src0} != {src1}",
"{src0} || {src1}", "{src} ? 1.0f : 0.0f")
+binop_reduce("hany_nequal", 1, thalf, thalf, "{src0} != {src1}",
+ "{src0} || {src1}", "{src} ? 1.0f : 0.0f")
# These comparisons for integer-less hardware return 1.0 and 0.0 for true
# and false respectively
binop("slt", tfloat, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than
+binop("hslt", thalf, "", "(src0 < src1) ? 1.0f : 0.0f")
binop("sge", tfloat, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal
+binop("hsge", thalf, "", "(src0 >= src1) ? 1.0f : 0.0f")
binop("seq", tfloat, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal
+binop("hseq", thalf, commutative, "(src0 == src1) ? 1.0f : 0.0f")
binop("sne", tfloat, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
+binop("hsne", thalf, commutative, "(src0 != src1) ? 1.0f : 0.0f")
binop("ishl", tint, "", "src0 << src1")
@@ -445,22 +507,33 @@ binop("ixor", tunsigned, commutative + associative, "src0 ^ src1")
binop("fand", tfloat, commutative,
"((src0 != 0.0f) && (src1 != 0.0f)) ? 1.0f : 0.0f")
+binop("hand", thalf, commutative,
+ "((src0 != 0.0f) && (src1 != 0.0f)) ? 1.0f : 0.0f")
binop("for", tfloat, commutative,
"((src0 != 0.0f) || (src1 != 0.0f)) ? 1.0f : 0.0f")
+binop("hor", thalf, commutative,
+ "((src0 != 0.0f) || (src1 != 0.0f)) ? 1.0f : 0.0f")
binop("fxor", tfloat, commutative,
"(src0 != 0.0f && src1 == 0.0f) || (src0 == 0.0f && src1 != 0.0f) ? 1.0f : 0.0f")
+binop("hxor", thalf, commutative,
+ "(src0 != 0.0f && src1 == 0.0f) || (src0 == 0.0f && src1 != 0.0f) ? 1.0f : 0.0f")
binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
"{src}")
+binop_reduce("hdot", 1, thalf, thalf, "{src0} * {src1}", "{src0} + {src1}",
+ "{src}")
binop("fmin", tfloat, "", "fminf(src0, src1)")
+binop("hmin", thalf, "", "fminf(src0, src1)")
binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1")
binop("umin", tunsigned, commutative + associative, "src1 > src0 ? src0 : src1")
binop("fmax", tfloat, "", "fmaxf(src0, src1)")
+binop("hmax", thalf, "", "fmaxf(src0, src1)")
binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0")
binop("umax", tunsigned, commutative + associative, "src1 > src0 ? src1 : src0")
binop("fpow", tfloat, "", "powf(src0, src1)")
+binop("hpow", thalf, "", "powf(src0, src1)")
binop_horiz("pack_half_2x16_split", 1, tunsigned, 1, tfloat, 1, tfloat,
"pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)")
@@ -495,8 +568,10 @@ def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr):
[tunsigned, tunsigned, tunsigned], "", const_expr)
triop("ffma", tfloat, "src0 * src1 + src2")
+triop("hfma", thalf, "src0 * src1 + src2")
triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
+triop("hlrp", thalf, "src0 * (1 - src2) + src1 * src2")
# Conditional Select
#
@@ -506,6 +581,7 @@ triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2")
+triop("hcsel", thalf, "(src0 != 0.0f) ? src1 : src2")
opcode("bcsel", 0, tunsigned, [0, 0, 0],
[tbool, tunsigned, tunsigned], "", "src0 ? src1 : src2")
--
1.9.3
More information about the mesa-dev
mailing list