[Mesa-dev] [PATCH 2/6] util: Add a uint inverse helper

Thu Sep 13 19:40:52 UTC 2018

This is useful for compiler optimizations that want to convert integer
division by constants into mul+add+shift.  It's put in util because it
may also be useful for other things and because having it split out of
the compiler pass itself makes it much easier to test.
---
 configure.ac                                  |   1 +
 src/util/Makefile.am                          |   3 +-
 src/util/meson.build                          |   1 +
 src/util/tests/uint_inverse/Makefile.am       |  43 ++++
 src/util/tests/uint_inverse/meson.build       |  30 +++
 .../tests/uint_inverse/uint_inverse_test.cpp  |  83 +++++++
 src/util/uint_inverse.h                       | 206 ++++++++++++++++++
 7 files changed, 366 insertions(+), 1 deletion(-)
 create mode 100644 src/util/tests/uint_inverse/Makefile.am
 create mode 100644 src/util/tests/uint_inverse/meson.build
 create mode 100644 src/util/tests/uint_inverse/uint_inverse_test.cpp
 create mode 100644 src/util/uint_inverse.h

diff --git a/configure.ac b/configure.ac
index f8bb131cb63..588a4f40d48 100644
--- a/configure.ac
+++ b/configure.ac
@@ -3180,6 +3180,7 @@ AC_CONFIG_FILES([Makefile
                  src/util/tests/hash_table/Makefile
                  src/util/tests/set/Makefile
                  src/util/tests/string_buffer/Makefile
+                 src/util/tests/uint_inverse/Makefile
                  src/util/tests/vma/Makefile
                  src/util/xmlpool/Makefile
                  src/vulkan/Makefile])
diff --git a/src/util/Makefile.am b/src/util/Makefile.am
index efb94caff71..0e25731a261 100644
--- a/src/util/Makefile.am
+++ b/src/util/Makefile.am
@@ -23,7 +23,8 @@ SUBDIRS = . \
 	xmlpool \
 	tests/hash_table \
 	tests/string_buffer \
-	tests/set
+	tests/set \
+	tests/uint_inverse
 
 if HAVE_STD_CXX11
 SUBDIRS += tests/vma
diff --git a/src/util/meson.build b/src/util/meson.build
index 9a99d60c158..fea2fcf2778 100644
--- a/src/util/meson.build
+++ b/src/util/meson.build
@@ -172,4 +172,5 @@ if with_tests
   subdir('tests/string_buffer')
   subdir('tests/vma')
   subdir('tests/set')
+  subdir('tests/uint_inverse')
 endif
diff --git a/src/util/tests/uint_inverse/Makefile.am b/src/util/tests/uint_inverse/Makefile.am
new file mode 100644
index 00000000000..ca7b9c1e952
--- /dev/null
+++ b/src/util/tests/uint_inverse/Makefile.am
@@ -0,0 +1,43 @@
+# Copyright © 2018 Intel
+#
+#  Permission is hereby granted, free of charge, to any person obtaining a
+#  copy of this software and associated documentation files (the "Software"),
+#  to deal in the Software without restriction, including without limitation
+#  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+#  and/or sell copies of the Software, and to permit persons to whom the
+#  Software is furnished to do so, subject to the following conditions:
+#
+#  The above copyright notice and this permission notice (including the next
+#  paragraph) shall be included in all copies or substantial portions of the
+#  Software.
+#
+#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+#  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+#  IN THE SOFTWARE.
+
+AM_CPPFLAGS = \
+	-I$(top_srcdir)/src \
+	-I$(top_srcdir)/include \
+	-I$(top_srcdir)/src/gallium/include \
+	-I$(top_srcdir)/src/gtest/include \
+	$(PTHREAD_CFLAGS) \
+	$(DEFINES)
+
+TESTS = uint_inverse_test
+
+check_PROGRAMS = $(TESTS)
+
+uint_inverse_test_SOURCES = \
+	uint_inverse_test.cpp
+
+uint_inverse_test_LDADD = \
+	$(top_builddir)/src/gtest/libgtest.la \
+	$(top_builddir)/src/util/libmesautil.la \
+	$(PTHREAD_LIBS) \
+	$(DLOPEN_LIBS)
+
+EXTRA_DIST = meson.build
diff --git a/src/util/tests/uint_inverse/meson.build b/src/util/tests/uint_inverse/meson.build
new file mode 100644
index 00000000000..c038d253502
--- /dev/null
+++ b/src/util/tests/uint_inverse/meson.build
@@ -0,0 +1,30 @@
+# Copyright © 2018 Intel Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+test(
+  'uint_inverse',
+  executable(
+    'uint_inverse_test',
+    'uint_inverse_test.cpp',
+    dependencies : [dep_thread, dep_dl, idep_gtest],
+    include_directories : inc_common,
+    link_with : [libmesa_util],
+  )
+)
diff --git a/src/util/tests/uint_inverse/uint_inverse_test.cpp b/src/util/tests/uint_inverse/uint_inverse_test.cpp
new file mode 100644
index 00000000000..c4baad37b7d
--- /dev/null
+++ b/src/util/tests/uint_inverse/uint_inverse_test.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <gtest/gtest.h>
+#include "util/uint_inverse.h"
+
+static uint64_t
+rand_uint(unsigned bits)
+{
+   assert(bits <= 64);
+   uint64_t r = 0;
+   for (unsigned i = 0; i < 8; i++)
+      r |= ((uint64_t)rand() & 0xf) << i * 8;
+   return r >> (63 - (rand() % bits));
+}
+
+static uint64_t
+udiv(unsigned bit_size, uint64_t a, uint64_t b)
+{
+   switch (bit_size) {
+   case 64: return (uint64_t)a / (uint64_t)b;
+   case 32: return (uint32_t)a / (uint32_t)b;
+   case 16: return (uint16_t)a / (uint16_t)b;
+   case 8:  return  (uint8_t)a / (uint8_t)b;
+   default:
+      assert(!"Invalid bit size");
+      return 0;
+   }
+}
+
+static void
+random_test(unsigned bits, unsigned count)
+{
+   for (unsigned i = 0; i < count; i++) {
+      uint64_t n = rand_uint(bits);
+      uint64_t d;
+      do { d = rand_uint(bits); } while (d <= 1);
+      EXPECT_EQ(util_uint_divide_using_inverse(bits, n, d), udiv(bits, n, d));
+   }
+}
+
+TEST(set, uint8)
+{
+   /* 8-bit is small enough we can brute-force the entire space */
+   for (unsigned n = 0; n < 256; n++)
+      for (unsigned d = 1; d < 256; d++)
+         EXPECT_EQ(util_uint_divide_using_inverse(8, n, d), udiv(8, n, d));
+}
+
+TEST(set, uint16)
+{
+   random_test(16, 10000);
+}
+
+TEST(set, uint32)
+{
+   random_test(32, 10000);
+}
+
+TEST(set, uint64)
+{
+   random_test(64, 10000);
+}
diff --git a/src/util/uint_inverse.h b/src/util/uint_inverse.h
new file mode 100644
index 00000000000..86c0d230a49
--- /dev/null
+++ b/src/util/uint_inverse.h
@@ -0,0 +1,206 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "bigmath.h"
+#include "u_math.h"
+#include "bitscan.h"
+
+#ifndef UTIL_UINT_INVERSE_H
+#define UTIL_UINT_INVERSE_H
+
+static inline bool
+util_is_uint_inverse(unsigned bit_size, unsigned l,
+                     uint64_t m_low, bool m_high, uint64_t d)
+{
+   if (bit_size <= 32) {
+      assert(d < (1ull << 33));
+      assert(!m_high && m_low < (1ull << 34));
+      uint64_t m = m_low;
+
+      return (m * d >= (1ull << (bit_size + l))) &&
+             (m * d <  (1ull << (bit_size + l)) + (1ull << l));
+   } else {
+      /* The code below works for all bit sizes but we can do it more
+       * efficiently if we know we're less than 32 bits.
+       */
+      uint32_t m32[3] = { (uint32_t)m_low, (uint32_t)(m_low >> 32), m_high };
+      uint32_t d32[2] = { (uint32_t)d, (uint32_t)(d >> 32) };
+
+      uint32_t md32[5];
+      ubm_mul_u32arr(md32, m32, d32);
+
+      /* Set cmp to 2^(N+l) */
+      uint32_t cmp[5] = { 0, };
+      unsigned Nl = bit_size + l;
+      cmp[Nl / 32] |= 1 << (Nl % 32);
+
+      if (ubm_cmp_u32arr(md32, cmp) < 0)
+         return false;
+
+      /* Set cmp to 2^(N+l) + 2^l */
+      cmp[l / 32] |= 1 << (l % 32);
+      if (ubm_cmp_u32arr(md32, cmp) >= 0)
+         return false;
+
+      return true;
+   }
+}
+
+/** Compute the fixed-point inverse of d
+ *
+ * Theorem 4.2 in "Division by Invariant Integers using Multiplication" by
+ * Granlund and Montgomery states:
+ *
+ *    "Suppose m, d, and l are nonnegative integers such that d != 0 and
+ *
+ *                   2^(N+l) <= m * d <= 2^(N+l) + 2^l.
+ *
+ *    Then floor(n/d) = floor(m * n / 2^(N+l)) for every integer n with
+ *    0 <= n < 2^N.
+ *
+ * We already have d given.  Our objective is to find such integers l and m
+ * that satisfy 2^(N+l) <= m * d <= 2^(N+l) + 2^l where N is the bit size
+ * of our division operation.
+ *
+ * We are guaranteed that, if 2^l >= d, then such an m exists.  However, we
+ * would ideally like m to be as small as possible to make the multiply
+ * faster.  In particular, we want to have m < 2^N so that we don't have
+ * any overflow problems.  Unfortunately, this isn't always achievable.
+ */
+static inline void
+util_uint_inverse(unsigned bit_size, uint64_t d,
+                  uint64_t *m_low_out, bool *m_high_out, unsigned *l_out)
+{
+   /* We don't handle denominators of zero */
+   assert(d > 0);
+
+   /* We don't handle powers of two */
+   assert(!util_is_power_of_two_or_zero64(d));
+
+   /* Start with the first power of two higher than d */
+   unsigned bits = util_logbase2_64(d) + 1;
+   assert(bits > 1 && bits <= 64);
+
+   uint64_t n_low = bits < 64 ? (1ull << bits) : 0;
+   bool n_high = (bits == 64);
+
+   /* Compute the quotient q = 2^(bits + q_len) / d */
+   uint64_t q = 0;
+   unsigned q_len = 0;
+   while (1) {
+      assert(q_len <= bit_size);
+      q <<= 1;
+      q_len++;
+      if (n_high || n_low >= d) {
+         q |= 1;
+         n_low -= d;
+      }
+      n_high = (n_low >> 63);
+      n_low <<= 1;
+
+      /* Figure out an l such that m * d >= 2^(N+l).  If there is none, then
+       * it's not even worth checking if it's an inverse.  See the comment
+       * below for details about rounding error.
+       */
+      int l = q_len + bits - bit_size - 1;
+      if (l < 0)
+         continue;
+
+      /* The division algorithm here is the normal division algorithm which
+       * rounds down but we want to round up.  Since the dividend is a power
+       * of two and the divisor is not, we know that the divisor cannot evenly
+       * divide the dividend so we can round up by simply adding 1.
+       *
+       * Furthermore, we know that q + 1 will not overflow 64 bits.  If it
+       * did, that would imply that q_len >= 64 and q.low == 2^64 - 1.  This
+       * would imply
+       *
+       *    2^(bits + q_len - 1) == (2^64 - 1) * d + r       where 0 <= r < d
+       *                         == 2^64 * d - d + r
+       *
+       * Since bits >= 2 and q_len >= 64, we can divide through by 2^64 and we
+       * get that
+       *
+       *    2^(bits + q_len - 65) == d + (r - d) / 2^64
+       *
+       * which implies that (r - d) / 2^64 is an integer.  However,
+       *
+       *    0 <= d - r < 2^64
+       *
+       * so we have a contradiction and q.low != 2^64 - 1. QED
+       */
+      uint64_t m_low = q + 1;
+      bool m_high = (q_len == 65);
+
+      if (util_is_uint_inverse(bit_size, l, m_low, m_high, d)) {
+         *m_low_out = m_low;
+         *m_high_out = m_high;
+         *l_out = l;
+         return;
+      }
+   }
+}
+
+/* The use of this function in real code is not recommended.  It exists to
+ * showcase the inverse function above and to be used in tests.
+ */
+static inline uint64_t
+util_uint_divide_using_inverse(unsigned bit_size, uint64_t n, uint64_t d)
+{
+   assert(d != 0);
+   assert(bit_size <= 64);
+
+   if (util_is_power_of_two_or_zero64(d)) {
+      return n >> util_logbase2_64(d);
+   } else {
+      uint64_t m_low;
+      bool m_high;
+      unsigned l;
+      util_uint_inverse(bit_size, d, &m_low, &m_high, &l);
+
+      uint64_t mask = (bit_size < 64) ? ((1ull << bit_size) - 1) : ~0ull;
+
+      /* Compute umul_high(n, m_low) */
+      uint64_t t;
+      if (bit_size <= 32) {
+         t = (n * (m_low & mask)) >> bit_size;
+      } else {
+         uint32_t n32[2] = { (uint32_t)n, (uint32_t)(n >> 32) };
+         uint32_t m_low32[2] = { (uint32_t)m_low, (uint32_t)(m_low >> 32) };
+         uint32_t res32[4] = { 0, 0, 0, 0 };
+         ubm_mul_u32arr(res32, n32, m_low32);
+         t = res32[2] | ((uint64_t)res32[3] << 32);
+      }
+
+      if (m_high || (bit_size < 64 && m_low >= (1ull << bit_size))) {
+         assert(l > 0);
+         unsigned l1 = l ? 1 : 0;
+         unsigned l2 = l - l1;
+         return ((t + (((n - t) & mask) >> l1)) & mask) >> l2;
+      } else {
+         return t >> l;
+      }
+   }
+}
+
+#endif /* UTIL_UINT_INVERSE_H */
-- 
2.17.1