[Mesa-dev] [PATCH 07/23] i965: Import array utils for the surface message builder.

Tue Apr 28 11:44:18 PDT 2015

Define a few transformations on register arrays which will be used
frequently during the construction of typed and untyped surface
message payloads.  Their purpose is simple but the implementation is
rather messy, so it makes a lot of sense to factor them out as
separate functions.
---
 src/mesa/drivers/dri/i965/Makefile.sources         |   1 +
 src/mesa/drivers/dri/i965/brw_ir_surface_builder.h | 188 +++++++++++++++++++++
 2 files changed, 189 insertions(+)
 create mode 100644 src/mesa/drivers/dri/i965/brw_ir_surface_builder.h

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 5bb6f06..4e8b25c 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -67,6 +67,7 @@ i965_FILES = \
 	brw_interpolation_map.c \
 	brw_ir_allocator.h \
 	brw_ir_fs.h \
+	brw_ir_surface_builder.h \
 	brw_ir_svec4.h \
 	brw_ir_vec4.h \
 	brw_lower_texture_gradients.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_ir_surface_builder.h b/src/mesa/drivers/dri/i965/brw_ir_surface_builder.h
new file mode 100644
index 0000000..b2c0043
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_ir_surface_builder.h
@@ -0,0 +1,188 @@
+/* -*- c++ -*- */
+/*
+ * Copyright © 2013-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef BRW_IR_SURFACE_BUILDER_H
+#define BRW_IR_SURFACE_BUILDER_H
+
+#include "brw_fs_builder.h"
+#include "brw_vec4_builder.h"
+
+namespace brw {
+   namespace array_utils {
+      namespace detail {
+         /**
+          * Fetch the i-th logical component of an array of registers and
+          * return it as a natural-width register according to the current
+          * SIMD mode.
+          *
+          * Each logical component may be in fact a vector with a number of
+          * per-channel values depending on the dispatch width and SIMD mode.
+          * E.g. a single physical 32B register contains 4, 1, or 0.5 logical
+          * 32-bit components depending on whether we're building SIMD4x2,
+          * SIMD8 or SIMD16 code respectively.
+          */
+         template<typename B>
+         typename B::scalar_builder::dst_reg
+         index(const B &bld, const array_reg &reg, unsigned i)
+         {
+            const unsigned chan_size = B::dst_reg::traits::chan_size;
+            return component(offset(bld.scalar().natural_reg(reg),
+                                    i / chan_size),
+                             i % chan_size);
+         }
+      }
+
+      /**
+       * "Flatten" a vector of \p size components into a simple array of
+       * registers, getting rid of swizzles, strides and funky regioning
+       * modes.
+       */
+      template<typename B, typename S>
+      array_reg
+      emit_flatten(const B &bld, const S &src, unsigned size)
+      {
+         if (src.file == BAD_FILE || size == 0) {
+            return array_reg();
+
+         } else {
+            const unsigned chan_size = S::traits::chan_size;
+            const unsigned mask = (1 << size) - 1;
+            const unsigned n = DIV_ROUND_UP(size, chan_size);
+            const array_reg dst =
+               bld.array_reg(src.type, n * bld.dispatch_width() / 8);
+
+            bld.MOV(writemask(bld.natural_reg(dst), mask), src);
+            if (size < chan_size)
+               bld.MOV(writemask(bld.natural_reg(dst), ~mask), 0);
+
+            return dst;
+         }
+      }
+
+      /**
+       * Copy one every \p src_stride logical components of the argument into
+       * one every \p dst_stride logical components of the result.
+       */
+      template<typename B>
+      array_reg
+      emit_stride(const B &bld, const array_reg &src, unsigned size,
+                  unsigned dst_stride, unsigned src_stride)
+      {
+         if (src.file == BAD_FILE || size == 0) {
+            return array_reg();
+
+         } else if (dst_stride == 1 && src_stride == 1) {
+            return src;
+
+         } else {
+            using detail::index;
+            const unsigned chan_size = B::src_reg::traits::chan_size;
+            const unsigned n = DIV_ROUND_UP(size * dst_stride, chan_size);
+            const array_reg dst =
+               bld.array_reg(src.type, n * bld.dispatch_width() / 8);
+
+            for (unsigned i = 0; i < size; ++i)
+               bld.scalar().MOV(index(bld, dst, i * dst_stride),
+                                index(bld, src, i * src_stride));
+
+            return dst;
+         }
+      }
+
+      /**
+       * Interleave logical components from the given arguments.  If two
+       * arguments are provided \p size components will be copied from each to
+       * the even and odd components of the result respectively.
+       *
+       * It may be safely used to merge the two halves of a value calculated
+       * separately.
+       */
+      template<typename B>
+      array_reg
+      emit_zip(const B &bld, const array_reg &src0, const array_reg &src1,
+               unsigned size)
+      {
+         using detail::index;
+         const unsigned chan_size = B::src_reg::traits::chan_size;
+         const unsigned n = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
+         const array_reg srcs[] = { src0, src1 };
+         const array_reg dst = (size * n == 0 ? array_reg() :
+                                bld.array_reg(src0.type,
+                                              DIV_ROUND_UP(size * n, chan_size)
+                                              * bld.dispatch_width() / 8));
+
+         for (unsigned i = 0; i < size; ++i) {
+            for (unsigned j = 0; j < n; ++j)
+               exec_all(bld.scalar().MOV(index(bld, dst, j + i * n),
+                                         index(bld, srcs[j], i)));
+         }
+
+         return dst;
+      }
+
+      /**
+       * Concatenate a number of register arrays passed in as arguments.
+       */
+      template<typename B>
+      array_reg
+      emit_collect(const B &vbld,
+                   const array_reg &src0 = array_reg(),
+                   const array_reg &src1 = array_reg(),
+                   const array_reg &src2 = array_reg(),
+                   const array_reg &src3 = array_reg())
+      {
+         typedef typename B::scalar_builder::src_reg src_reg;
+         const typename B::scalar_builder bld = vbld.scalar();
+         const array_reg srcs[] = { src0, src1, src2, src3 };
+         const unsigned size = src0.size + src1.size + src2.size + src3.size;
+         const array_reg dst = (size == 0 ? array_reg() :
+                                bld.array_reg(BRW_REGISTER_TYPE_UD, size));
+         src_reg *const components = new src_reg[size];
+         unsigned n = 0;
+
+         for (unsigned i = 0; i < ARRAY_SIZE(srcs); ++i) {
+            /* Split the array in m elements of maximal width. */
+            const unsigned width =
+               (srcs[i].size * 8 % bld.dispatch_width() == 0 ?
+                bld.dispatch_width() : bld.dispatch_width() / 2);
+            const unsigned m = srcs[i].size * 8 / width;
+
+            /* Get a builder of maximal width. */
+            const typename B::scalar_builder ubld =
+               (width == bld.dispatch_width() ? bld : bld.half(0));
+
+            for (unsigned j = 0; j < m; ++j)
+               components[n++] = retype(offset(ubld.natural_reg(srcs[i]), j),
+                                        BRW_REGISTER_TYPE_UD);
+         }
+
+         bld.LOAD_PAYLOAD(bld.natural_reg(dst), components, n);
+
+         delete[] components;
+         return dst;
+      }
+   }
+}
+
+#endif
-- 
2.3.5