Mesa (master): mesa: implement glUniform for packed FP16 uniforms

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Apr 13 05:32:52 UTC 2021


Module: Mesa
Branch: master
Commit: 6bf18fe036c6e6b14494ecfd2b66674b21d7085a
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=6bf18fe036c6e6b14494ecfd2b66674b21d7085a

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Fri Feb 12 08:44:31 2021 -0500

mesa: implement glUniform for packed FP16 uniforms

Let's just say that this passes dEQP.

If you think the  _mesa_float_to_half conversions are costly, you can
enable FP16 uniforms only if the CPU supports F16C, which is fast.
Drivers will control whether this is used, not common code.

ARM will need something that is equivalent to F16C.

Acked-by: Eric Anholt <eric at anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9050>

---

 src/mesa/main/uniform_query.cpp | 168 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 163 insertions(+), 5 deletions(-)

diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp
index c3833dcf988..c62816e2d74 100644
--- a/src/mesa/main/uniform_query.cpp
+++ b/src/mesa/main/uniform_query.cpp
@@ -963,6 +963,9 @@ validate_uniform(GLint location, GLsizei count, const GLvoid *values,
    case GLSL_TYPE_IMAGE:
       match = (basicType == GLSL_TYPE_INT && _mesa_is_desktop_gl(ctx));
       break;
+   case GLSL_TYPE_FLOAT16:
+      match = basicType == GLSL_TYPE_FLOAT;
+      break;
    default:
       match = (basicType == uni->type->base_type);
       break;
@@ -1071,8 +1074,9 @@ copy_uniforms_to_storage(gl_constant_value *storage,
    const gl_constant_value *src = (const gl_constant_value*)values;
    bool copy_as_uint64 = uni->is_bindless &&
                          (uni->type->is_sampler() || uni->type->is_image());
+   bool copy_to_float16 = uni->type->base_type == GLSL_TYPE_FLOAT16;
 
-   if (!uni->type->is_boolean() && !copy_as_uint64) {
+   if (!uni->type->is_boolean() && !copy_as_uint64 && !copy_to_float16) {
       unsigned size = sizeof(storage[0]) * components * count * size_mul;
 
       if (!memcmp(storage, values, size))
@@ -1082,6 +1086,46 @@ copy_uniforms_to_storage(gl_constant_value *storage,
          _mesa_flush_vertices_for_uniforms(ctx, uni);
 
       memcpy(storage, values, size);
+      return true;
+   } else if (copy_to_float16) {
+      assert(ctx->Const.PackedDriverUniformStorage);
+      const unsigned dst_components = align(components, 2);
+      uint16_t *dst = (uint16_t*)storage;
+
+      int i = 0;
+      unsigned c = 0;
+
+      if (flush) {
+         /* Find the first element that's different. */
+         for (; i < count; i++) {
+            for (; c < components; c++) {
+               if (dst[c] != _mesa_float_to_half(src[c].f)) {
+                  _mesa_flush_vertices_for_uniforms(ctx, uni);
+                  flush = false;
+                  goto break_loops;
+               }
+            }
+            c = 0;
+            dst += dst_components;
+            src += components;
+         }
+      break_loops:
+         if (flush)
+            return false; /* No change. */
+      }
+
+      /* Set the remaining elements. We know that at least 1 element is
+       * different and that we have flushed.
+       */
+      for (; i < count; i++) {
+         for (; c < components; c++)
+            dst[c] = _mesa_float_to_half(src[c].f);
+
+         c = 0;
+         dst += dst_components;
+         src += components;
+      }
+
       return true;
    } else if (copy_as_uint64) {
       const unsigned elems = components * count;
@@ -1230,8 +1274,14 @@ _mesa_uniform(GLint location, GLsizei count, const GLvoid *values,
    if (ctx->Const.PackedDriverUniformStorage &&
        (uni->is_bindless || !uni->type->contains_opaque())) {
       for (unsigned s = 0; s < uni->num_driver_storage; s++) {
+         unsigned dword_components = components;
+
+         /* 16-bit uniforms are packed. */
+         if (glsl_base_type_is_16bit(uni->type->base_type))
+            dword_components = DIV_ROUND_UP(dword_components, 2);
+
          storage = (gl_constant_value *)
-            uni->driver_storage[s].data + (size_mul * offset * components);
+            uni->driver_storage[s].data + (size_mul * offset * dword_components);
 
          if (copy_uniforms_to_storage(storage, uni, ctx, count, values, size_mul,
                                       offset, components, basicType, !ctx_flushed))
@@ -1362,7 +1412,106 @@ copy_uniform_matrix_to_storage(struct gl_context *ctx,
    const unsigned elements = components * vectors;
    const unsigned size = sizeof(storage[0]) * elements * count * size_mul;
 
-   if (!transpose) {
+   if (uni->type->base_type == GLSL_TYPE_FLOAT16) {
+      assert(ctx->Const.PackedDriverUniformStorage);
+      const unsigned dst_components = align(components, 2);
+      const unsigned dst_elements = dst_components * vectors;
+
+      if (!transpose) {
+         const float *src = (const float *)values;
+         uint16_t *dst = (uint16_t*)storage;
+
+         unsigned i = 0, r = 0, c = 0;
+
+         if (flush) {
+            /* Find the first element that's different. */
+            for (; i < count; i++) {
+               for (; c < cols; c++) {
+                  for (; r < rows; r++) {
+                     if (dst[(c * dst_components) + r] !=
+                         _mesa_float_to_half(src[(c * components) + r])) {
+                        _mesa_flush_vertices_for_uniforms(ctx, uni);
+                        flush = false;
+                        goto break_loops_16bit;
+                     }
+                  }
+                  r = 0;
+               }
+               c = 0;
+               dst += dst_elements;
+               src += elements;
+            }
+
+         break_loops_16bit:
+            if (flush)
+               return false; /* No change. */
+         }
+
+         /* Set the remaining elements. We know that at least 1 element is
+          * different and that we have flushed.
+          */
+         for (; i < count; i++) {
+            for (; c < cols; c++) {
+               for (; r < rows; r++) {
+                  dst[(c * dst_components) + r] =
+                     _mesa_float_to_half(src[(c * components) + r]);
+               }
+               r = 0;
+            }
+            c = 0;
+            dst += dst_elements;
+            src += elements;
+         }
+         return true;
+      } else {
+         /* Transpose the matrix. */
+         const float *src = (const float *)values;
+         uint16_t *dst = (uint16_t*)storage;
+
+         unsigned i = 0, r = 0, c = 0;
+
+         if (flush) {
+            /* Find the first element that's different. */
+            for (; i < count; i++) {
+               for (; r < rows; r++) {
+                  for (; c < cols; c++) {
+                     if (dst[(c * dst_components) + r] !=
+                         _mesa_float_to_half(src[c + (r * vectors)])) {
+                        _mesa_flush_vertices_for_uniforms(ctx, uni);
+                        flush = false;
+                        goto break_loops_16bit_transpose;
+                     }
+                  }
+                  c = 0;
+               }
+               r = 0;
+               dst += elements;
+               src += elements;
+            }
+
+         break_loops_16bit_transpose:
+            if (flush)
+               return false; /* No change. */
+         }
+
+         /* Set the remaining elements. We know that at least 1 element is
+          * different and that we have flushed.
+          */
+         for (; i < count; i++) {
+            for (; r < rows; r++) {
+               for (; c < cols; c++) {
+                  dst[(c * dst_components) + r] =
+                     _mesa_float_to_half(src[c + (r * vectors)]);
+               }
+               c = 0;
+            }
+            r = 0;
+            dst += elements;
+            src += elements;
+         }
+         return true;
+      }
+   } else if (!transpose) {
       if (!memcmp(storage, values, size))
          return false;
 
@@ -1529,7 +1678,9 @@ _mesa_uniform_matrix(GLint location, GLsizei count,
     * There are no Boolean matrix types, so we do not need to allow
     * GLSL_TYPE_BOOL here (as _mesa_uniform does).
     */
-   if (uni->type->base_type != basicType) {
+   if (uni->type->base_type != basicType &&
+       !(uni->type->base_type == GLSL_TYPE_FLOAT16 &&
+         basicType == GLSL_TYPE_FLOAT)) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glUniformMatrix%ux%u(\"%s\"@%d is %s, not %s)",
                   cols, rows, uni->name, location,
@@ -1566,8 +1717,15 @@ _mesa_uniform_matrix(GLint location, GLsizei count,
       bool flushed = false;
 
       for (unsigned s = 0; s < uni->num_driver_storage; s++) {
+         unsigned dword_components = components;
+
+         /* 16-bit uniforms are packed. */
+         if (glsl_base_type_is_16bit(uni->type->base_type))
+            dword_components = DIV_ROUND_UP(dword_components, 2);
+
          storage = (gl_constant_value *)
-            uni->driver_storage[s].data + (size_mul * offset * elements);
+            uni->driver_storage[s].data +
+            (size_mul * offset * dword_components * vectors);
 
          if (copy_uniform_matrix_to_storage(ctx, storage, uni, count, values,
                                             size_mul, offset, components,



More information about the mesa-commit mailing list