[Mesa-dev] [PATCH v3 10/44] nir: add rounding mode support to Opcode class in nir_opcodes.py

Wed Feb 6 10:44:39 UTC 2019

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
---
 src/compiler/nir/nir.h            |  3 ++
 src/compiler/nir/nir_opcodes.py   | 90 ++++++++++++++++---------------
 src/compiler/nir/nir_opcodes_c.py |  4 +-
 3 files changed, 52 insertions(+), 45 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index a84c46507e2..cbc5bcff7d3 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -995,6 +995,9 @@ typedef struct {
 
    /* Whether this represents a numeric conversion opcode */
    bool is_conversion;
+
+   /* Shader float controls mode */
+   nir_rounding_mode rounding_mode;
 } nir_op_info;
 
 extern const nir_op_info nir_op_infos[nir_num_opcodes];
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index dc4cd9ac63d..21f6ee6f742 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -33,7 +33,8 @@ class Opcode(object):
    NOTE: this must be kept in sync with nir_op_info
    """
    def __init__(self, name, output_size, output_type, input_sizes,
-                input_types, is_conversion, algebraic_properties, const_expr):
+                input_types, is_conversion, algebraic_properties, const_expr,
+                rounding_mode):
       """Parameters:
 
       - name is the name of the opcode (prepend nir_op_ for the enum name)
@@ -74,6 +75,7 @@ class Opcode(object):
       assert isinstance(is_conversion, bool)
       assert isinstance(algebraic_properties, str)
       assert isinstance(const_expr, str)
+      assert isinstance(rounding_mode, str)
       assert len(input_sizes) == len(input_types)
       assert 0 <= output_size <= 4
       for size in input_sizes:
@@ -88,6 +90,7 @@ class Opcode(object):
       self.input_types = input_types
       self.is_conversion = is_conversion
       self.algebraic_properties = algebraic_properties
+      self.rounding_mode = rounding_mode
       self.const_expr = const_expr
 
 # helper variables for strings
@@ -141,22 +144,22 @@ associative = "associative "
 opcodes = {}
 
 def opcode(name, output_size, output_type, input_sizes, input_types,
-           is_conversion, algebraic_properties, const_expr):
+           is_conversion, algebraic_properties, const_expr, rounding_mode):
    assert name not in opcodes
    opcodes[name] = Opcode(name, output_size, output_type, input_sizes,
                           input_types, is_conversion, algebraic_properties,
-                          const_expr)
+                          const_expr, rounding_mode)
 
-def unop_convert(name, out_type, in_type, const_expr):
-   opcode(name, 0, out_type, [0], [in_type], False, "", const_expr)
+def unop_convert(name, out_type, in_type, const_expr, rounding_mode):
+   opcode(name, 0, out_type, [0], [in_type], False, "", const_expr, rounding_mode)
 
 def unop(name, ty, const_expr):
-   opcode(name, 0, ty, [0], [ty], False, "", const_expr)
+   opcode(name, 0, ty, [0], [ty], False, "", const_expr, "")
 
 def unop_horiz(name, output_size, output_type, input_size, input_type,
                const_expr):
    opcode(name, output_size, output_type, [input_size], [input_type],
-          False, "", const_expr)
+          False, "", const_expr, "")
 
 def unop_reduce(name, output_size, output_type, input_type, prereduce_expr,
                 reduce_expr, final_expr):
@@ -177,8 +180,8 @@ def unop_reduce(name, output_size, output_type, input_type, prereduce_expr,
    unop_horiz(name + "4", output_size, output_type, 4, input_type,
               final(reduce_(reduce_(src0, src1), reduce_(src2, src3))))
 
-def unop_numeric_convert(name, out_type, in_type, const_expr):
-   opcode(name, 0, out_type, [0], [in_type], True, "", const_expr)
+def unop_numeric_convert(name, out_type, in_type, const_expr, rnd_mode):
+   opcode(name, 0, out_type, [0], [in_type], True, "", const_expr, rnd_mode)
 
 # These two move instructions differ in what modifiers they support and what
 # the negate modifier means. Otherwise, they are identical.
@@ -223,12 +226,11 @@ for src_t in [tint, tuint, tfloat, tbool]:
               for rnd_mode in rnd_modes:
                   unop_numeric_convert("{0}2{1}{2}{3}".format(src_t[0], dst_t[0],
                                                               bit_size, rnd_mode),
-                                       dst_t + str(bit_size), src_t, "src0")
+                                       dst_t + str(bit_size), src_t, "src0", rnd_mode)
           else:
               conv_expr = "src0 != 0" if dst_t == tbool else "src0"
               unop_numeric_convert("{0}2{1}{2}".format(src_t[0], dst_t[0], bit_size),
-                                   dst_t + str(bit_size), src_t, conv_expr)
-
+                                   dst_t + str(bit_size), src_t, conv_expr, "")
 
 # Unary floating-point rounding operations.
 
@@ -248,8 +250,8 @@ unop("fsin", tfloat, "bit_size == 64 ? sin(src0) : sinf(src0)")
 unop("fcos", tfloat, "bit_size == 64 ? cos(src0) : cosf(src0)")
 
 # dfrexp
-unop_convert("frexp_exp", tint32, tfloat64, "frexp(src0, &dst);")
-unop_convert("frexp_sig", tfloat64, tfloat64, "int n; dst = frexp(src0, &n);")
+unop_convert("frexp_exp", tint32, tfloat64, "frexp(src0, &dst);", "")
+unop_convert("frexp_sig", tfloat64, tfloat64, "int n; dst = frexp(src0, &n);", "")
 
 # Partial derivatives.
 
@@ -337,15 +339,15 @@ unop_horiz("unpack_32_2x16", 2, tuint16, 1, tuint32,
 
 
 unop_convert("unpack_half_2x16_split_x", tfloat32, tuint32,
-             "unpack_half_1x16((uint16_t)(src0 & 0xffff))")
+             "unpack_half_1x16((uint16_t)(src0 & 0xffff))", "")
 unop_convert("unpack_half_2x16_split_y", tfloat32, tuint32,
-             "unpack_half_1x16((uint16_t)(src0 >> 16))")
+             "unpack_half_1x16((uint16_t)(src0 >> 16))", "")
 
-unop_convert("unpack_32_2x16_split_x", tuint16, tuint32, "src0")
-unop_convert("unpack_32_2x16_split_y", tuint16, tuint32, "src0 >> 16")
+unop_convert("unpack_32_2x16_split_x", tuint16, tuint32, "src0", "")
+unop_convert("unpack_32_2x16_split_y", tuint16, tuint32, "src0 >> 16", "")
 
-unop_convert("unpack_64_2x32_split_x", tuint32, tuint64, "src0")
-unop_convert("unpack_64_2x32_split_y", tuint32, tuint64, "src0 >> 32")
+unop_convert("unpack_64_2x32_split_x", tuint32, tuint64, "src0", "")
+unop_convert("unpack_64_2x32_split_y", tuint32, tuint64, "src0 >> 32", "")
 
 # Bit operations, part of ARB_gpu_shader5.
 
@@ -362,7 +364,7 @@ for (unsigned bit = 0; bit < bit_size; bit++) {
    if ((src0 >> bit) & 1)
       dst++;
 }
-""")
+""", "")
 
 unop_convert("ufind_msb", tint32, tuint, """
 dst = -1;
@@ -372,7 +374,7 @@ for (int bit = bit_size - 1; bit >= 0; bit--) {
       break;
    }
 }
-""")
+""", "")
 
 unop("ifind_msb", tint32, """
 dst = -1;
@@ -396,7 +398,7 @@ for (unsigned bit = 0; bit < bit_size; bit++) {
       break;
    }
 }
-""")
+""", "")
 
 
 for i in range(1, 5):
@@ -433,7 +435,7 @@ if (src0.z < 0 && absZ >= absX && absZ >= absY) dst.x = 5;
 
 def binop_convert(name, out_type, in_type, alg_props, const_expr):
    opcode(name, 0, out_type, [0, 0], [in_type, in_type],
-          False, alg_props, const_expr)
+          False, alg_props, const_expr, "")
 
 def binop(name, ty, alg_props, const_expr):
    binop_convert(name, ty, ty, alg_props, const_expr)
@@ -447,7 +449,7 @@ def binop_compare32(name, ty, alg_props, const_expr):
 def binop_horiz(name, out_size, out_type, src1_size, src1_type, src2_size,
                 src2_type, const_expr):
    opcode(name, out_size, out_type, [src1_size, src2_size], [src1_type, src2_type],
-          False, "", const_expr)
+          False, "", const_expr, "")
 
 def binop_reduce(name, output_size, output_type, src_type, prereduce_expr,
                  reduce_expr, final_expr):
@@ -463,13 +465,13 @@ def binop_reduce(name, output_size, output_type, src_type, prereduce_expr,
    src3 = prereduce("src0.w", "src1.w")
    opcode(name + "2", output_size, output_type,
           [2, 2], [src_type, src_type], False, commutative,
-          final(reduce_(src0, src1)))
+          final(reduce_(src0, src1)), "")
    opcode(name + "3", output_size, output_type,
           [3, 3], [src_type, src_type], False, commutative,
-          final(reduce_(reduce_(src0, src1), src2)))
+          final(reduce_(reduce_(src0, src1), src2)), "")
    opcode(name + "4", output_size, output_type,
           [4, 4], [src_type, src_type], False, commutative,
-          final(reduce_(reduce_(src0, src1), reduce_(src2, src3))))
+          final(reduce_(reduce_(src0, src1), reduce_(src2, src3))), "")
 
 binop("fadd", tfloat, commutative + associative, "src0 + src1")
 binop("iadd", tint, commutative + associative, "src0 + src1")
@@ -618,9 +620,9 @@ binop("seq", tfloat32, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equ
 binop("sne", tfloat32, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
 
 
-opcode("ishl", 0, tint, [0, 0], [tint, tuint32], False, "", "src0 << src1")
-opcode("ishr", 0, tint, [0, 0], [tint, tuint32], False, "", "src0 >> src1")
-opcode("ushr", 0, tuint, [0, 0], [tuint, tuint32], False, "", "src0 >> src1")
+opcode("ishl", 0, tint, [0, 0], [tint, tuint32], False, "", "src0 << src1", "")
+opcode("ishr", 0, tint, [0, 0], [tint, tuint32], False, "", "src0 >> src1", "")
+opcode("ushr", 0, tuint, [0, 0], [tuint, tuint32], False, "", "src0 >> src1", "")
 
 # bitwise logic operators
 #
@@ -652,9 +654,9 @@ binop_reduce("fdot_replicated", 4, tfloat, tfloat,
              "{src0} * {src1}", "{src0} + {src1}", "{src}")
 
 opcode("fdph", 1, tfloat, [3, 4], [tfloat, tfloat], False, "",
-       "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w")
+       "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w", "")
 opcode("fdph_replicated", 4, tfloat, [3, 4], [tfloat, tfloat], False, "",
-       "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w")
+       "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w", "")
 
 binop("fmin", tfloat, "", "fminf(src0, src1)")
 binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1")
@@ -735,7 +737,7 @@ dst = (bit_size == 64) ? ldexp(src0, src1) : ldexpf(src0, src1);
 /* flush denormals to zero. */
 if (!isnormal(dst))
    dst = copysignf(0.0f, src0);
-""")
+""", "")
 
 # Combines the first component of each input to make a 2-component vector.
 
@@ -754,11 +756,11 @@ binop("extract_i16", tint, "", "(int16_t)(src0 >> (src1 * 16))")
 
 
 def triop(name, ty, const_expr):
-   opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], False, "", const_expr)
+   opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], False, "", const_expr, "")
 def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr):
    opcode(name, output_size, tuint,
    [src1_size, src2_size, src3_size],
-   [tuint, tuint, tuint], False, "", const_expr)
+   [tuint, tuint, tuint], False, "", const_expr, "")
 
 triop("ffma", tfloat, "src0 * src1 + src2")
 
@@ -787,9 +789,9 @@ triop("imed3", tint, "MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1))")
 triop("umed3", tuint, "MAX2(MIN2(MAX2(src0, src1), src2), MIN2(src0, src1))")
 
 opcode("bcsel", 0, tuint, [0, 0, 0],
-      [tbool1, tuint, tuint], False, "", "src0 ? src1 : src2")
+      [tbool1, tuint, tuint], False, "", "src0 ? src1 : src2", "")
 opcode("b32csel", 0, tuint, [0, 0, 0],
-       [tbool32, tuint, tuint], False, "", "src0 ? src1 : src2")
+       [tbool32, tuint, tuint], False, "", "src0 ? src1 : src2", "")
 
 # SM5 bfi assembly
 triop("bfi", tuint32, """
@@ -820,7 +822,7 @@ if (bits == 0) {
 } else {
    dst = base >> offset;
 }
-""")
+""", "")
 opcode("ibfe", 0, tint32,
        [0, 0, 0], [tint32, tint32, tint32], False, "", """
 int base = src0;
@@ -834,7 +836,7 @@ if (bits == 0) {
 } else {
    dst = base >> offset;
 }
-""")
+""", "")
 
 # GLSL bitfieldExtract()
 opcode("ubitfield_extract", 0, tuint32,
@@ -848,7 +850,7 @@ if (bits == 0) {
 } else {
    dst = (base >> offset) & ((1ull << bits) - 1);
 }
-""")
+""", "")
 opcode("ibitfield_extract", 0, tint32,
        [0, 0, 0], [tint32, tint32, tint32], False, "", """
 int base = src0;
@@ -860,7 +862,7 @@ if (bits == 0) {
 } else {
    dst = (base << (32 - offset - bits)) >> offset; /* use sign-extending shift */
 }
-""")
+""", "")
 
 # Combines the first component of each input to make a 3-component vector.
 
@@ -875,7 +877,7 @@ def quadop_horiz(name, output_size, src1_size, src2_size, src3_size,
    opcode(name, output_size, tuint,
           [src1_size, src2_size, src3_size, src4_size],
           [tuint, tuint, tuint, tuint],
-          False, "", const_expr)
+          False, "", const_expr, "")
 
 opcode("bitfield_insert", 0, tuint32, [0, 0, 0, 0],
        [tuint32, tuint32, tint32, tint32], False, "", """
@@ -889,7 +891,7 @@ if (bits == 0) {
    unsigned mask = ((1ull << bits) - 1) << offset;
    dst = (base & ~mask) | ((insert << offset) & mask);
 }
-""")
+""", "")
 
 quadop_horiz("vec4", 4, 1, 1, 1, 1, """
 dst.x = src0.x;
diff --git a/src/compiler/nir/nir_opcodes_c.py b/src/compiler/nir/nir_opcodes_c.py
index 96c71a1b2c5..c6cab5a5e1a 100644
--- a/src/compiler/nir/nir_opcodes_c.py
+++ b/src/compiler/nir/nir_opcodes_c.py
@@ -121,7 +121,9 @@ const nir_op_info nir_op_infos[nir_num_opcodes] = {
    .algebraic_properties =
       ${ "0" if opcode.algebraic_properties == "" else " | ".join(
             "NIR_OP_IS_" + prop.upper() for prop in
-               opcode.algebraic_properties.strip().split(" ")) }
+               opcode.algebraic_properties.strip().split(" ")) },
+   .rounding_mode =
+      ${ "nir_rounding_mode_undef" if opcode.rounding_mode == "" else "nir_rounding_mode" + opcode.rounding_mode }
 },
 % endfor
 };
-- 
2.19.1