[Mesa-dev] [PATCH 07/25] i965/fs: Import array utils for the surface message builder.

Wed May 13 12:07:35 PDT 2015

On Tue, May 5, 2015 at 2:48 PM, Francisco Jerez <currojerez at riseup.net> wrote:
> Define a few transformations on register arrays which will be used
> frequently during the construction of typed and untyped surface
> message payloads.  Their purpose is simple but the implementation is
> rather messy, so it makes a lot of sense to factor them out as
> separate functions.
>
> v2: Drop VEC4 suport.
> ---
>  src/mesa/drivers/dri/i965/Makefile.sources         |   2 +
>  .../drivers/dri/i965/brw_fs_surface_builder.cpp    | 218 +++++++++++++++++++++
>  src/mesa/drivers/dri/i965/brw_fs_surface_builder.h |  31 +++
>  3 files changed, 251 insertions(+)
>  create mode 100644 src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
>  create mode 100644 src/mesa/drivers/dri/i965/brw_fs_surface_builder.h
>
> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
> index 260e448..f299913 100644
> --- a/src/mesa/drivers/dri/i965/Makefile.sources
> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
> @@ -59,6 +59,8 @@ i965_FILES = \
>         brw_fs_register_coalesce.cpp \
>         brw_fs_saturate_propagation.cpp \
>         brw_fs_sel_peephole.cpp \
> +       brw_fs_surface_builder.cpp \
> +       brw_fs_surface_builder.h \
>         brw_fs_vector_splitting.cpp \
>         brw_fs_visitor.cpp \
>         brw_gs.c \
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
> new file mode 100644
> index 0000000..007d5f4
> --- /dev/null
> +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
> @@ -0,0 +1,218 @@
> +/*
> + * Copyright © 2013-2015 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "brw_fs_surface_builder.h"
> +
> +using namespace brw;
> +
> +namespace {
> +   namespace array_utils {
> +      /**
> +       * A plain contiguous region of memory in your register file,
> +       * with well-defined size and no fancy addressing modes,
> +       * swizzling or striding.
> +       */
> +      struct array_reg : public backend_reg {
> +         array_reg() : backend_reg(), size(0)
> +         {
> +         }
> +
> +         explicit
> +         array_reg(const backend_reg &reg, unsigned size = 1) :
> +            backend_reg(reg), size(size)
> +         {
> +         }
> +
> +         /** Size of the region in 32B registers. */
> +         unsigned size;
> +      };
> +
> +      /**
> +       * Increase the register base offset by the specified amount
> +       * given in 32B registers.
> +       */
> +      array_reg
> +      offset(array_reg reg, unsigned delta)
> +      {
> +         assert(delta == 0 || (reg.file != HW_REG && reg.file != IMM));
> +         reg.reg_offset += delta;
> +         return reg;
> +      }
> +
> +      /**
> +       * Create a register of natural vector size and SIMD width
> +       * using array \p reg as storage.
> +       */
> +      fs_reg
> +      natural_reg(const fs_builder &bld, const array_reg &reg)
> +      {
> +         return fs_reg(reg, bld.dispatch_width());
> +      }
> +
> +      /**
> +       * Allocate a raw chunk of memory from the virtual GRF file
> +       * with no special vector size or SIMD width.  \p n is given
> +       * in units of 32B registers.
> +       */
> +      array_reg
> +      alloc_array_reg(const fs_builder &bld, enum brw_reg_type type, unsigned n)
> +      {
> +         return array_reg(
> +            bld.vgrf(type,
> +                     DIV_ROUND_UP(n * REG_SIZE,
> +                                  type_sz(type) * bld.dispatch_width())),
> +            n);
> +      }
> +
> +      /**
> +       * Fetch the i-th logical component of an array of registers and return
> +       * it as a natural-width register according to the current SIMD mode.
> +       *
> +       * Each logical component may be in fact a vector with a number of
> +       * per-channel values depending on the dispatch width and SIMD mode.
> +       * E.g. a single physical 32B register contains 4, 1, or 0.5 logical
> +       * 32-bit components depending on whether we're building SIMD4x2, SIMD8
> +       * or SIMD16 code respectively.
> +       */
> +      fs_reg
> +      index(const fs_builder &bld, const array_reg &reg, unsigned i)
> +      {
> +         return offset(natural_reg(bld, reg), i);
> +      }
> +
> +      /**
> +       * "Flatten" a vector of \p size components into a simple array of
> +       * registers, getting rid of funky regioning modes.
> +       */
> +      array_reg
> +      emit_flatten(const fs_builder &bld, const fs_reg &src, unsigned size)
> +      {
> +         if (src.file == BAD_FILE || size == 0) {
> +            return array_reg();
> +
> +         } else {
> +            const array_reg dst =
> +               alloc_array_reg(bld, src.type, size * bld.dispatch_width() / 8);
> +
> +            for (unsigned c = 0; c < size; ++c)
> +               bld.MOV(index(bld, dst, c), offset(src, c));
> +
> +            return dst;
> +         }
> +      }
> +
> +      /**
> +       * Copy one every \p src_stride logical components of the argument into
> +       * one every \p dst_stride logical components of the result.
> +       */
> +      array_reg
> +      emit_stride(const fs_builder &bld, const array_reg &src, unsigned size,
> +                  unsigned dst_stride, unsigned src_stride)
> +      {
> +         if (src.file == BAD_FILE || size == 0) {
> +            return array_reg();
> +
> +         } else if (dst_stride == 1 && src_stride == 1) {
> +            return src;
> +
> +         } else {
> +            const array_reg dst = alloc_array_reg(
> +               bld, src.type,
> +               size * dst_stride * bld.dispatch_width() / 8);
> +
> +            for (unsigned i = 0; i < size; ++i)
> +               bld.MOV(index(bld, dst, i * dst_stride),
> +                       index(bld, src, i * src_stride));
> +
> +            return dst;
> +         }
> +      }
> +
> +      /**
> +       * Interleave logical components from the given arguments.  If two
> +       * arguments are provided \p size components will be copied from each to
> +       * the even and odd components of the result respectively.
> +       *
> +       * It may be safely used to merge the two halves of a value calculated
> +       * separately.
> +       */
> +      array_reg
> +      emit_zip(const fs_builder &bld,
> +               const array_reg &src0, const array_reg &src1,
> +               unsigned size)
> +      {
> +         const unsigned n = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
> +         const array_reg srcs[] = { src0, src1 };
> +         const array_reg dst = size * n == 0 ? array_reg() :
> +            alloc_array_reg(bld, src0.type,
> +                             size * n * bld.dispatch_width() / 8);
> +
> +         for (unsigned i = 0; i < size; ++i) {
> +            for (unsigned j = 0; j < n; ++j)
> +               exec_all(bld.MOV(index(bld, dst, j + i * n),
> +                                index(bld, srcs[j], i)));
> +         }
> +
> +         return dst;
> +      }
> +
> +      /**
> +       * Concatenate a number of register arrays passed in as arguments.
> +       */
> +      array_reg
> +      emit_collect(const fs_builder &bld,
> +                   const array_reg &src0 = array_reg(),
> +                   const array_reg &src1 = array_reg(),
> +                   const array_reg &src2 = array_reg(),
> +                   const array_reg &src3 = array_reg())

I promised you a comment on emit_collect and here it is.  I *really*
like the idea of having a helper that creates the payload register
array, fills it out, emits the LOAD_PAYLOAD and returns the
destination register.  I do, however, have two comments.  First is the
name.  Calling it emit_collect makes it sound like another "collect"
instruction is being emitted.  I'd like to see it have a different
name.  Maybe it would be better just as a second LOAD_PAYLOAD helper?

Second, I think it would work better if the first source were
explicitly a header source and the others were not.  If the header
source is a BAD_FILE, this would indicate that there is no header.
The other three sources would be regular non-header payload sources.
This would make it match much better with LOAD_PAYLOAD.  I know that
you and I disagreed on how LOAD_PAYLOAD works and this function
reflects that difference.  However, for the sake of consistency, I'd
like to see them match up better.  I don't know how that change would
interact with the rest of the code, but let's table that until we get
some of these other discussions out of the way.
--Jason

> +      {
> +         const array_reg srcs[] = { src0, src1, src2, src3 };
> +         const unsigned size = src0.size + src1.size + src2.size + src3.size;
> +         const array_reg dst = size == 0 ? array_reg() :
> +            alloc_array_reg(bld, BRW_REGISTER_TYPE_UD, size);
> +         fs_reg *const components = new fs_reg[size];
> +         unsigned n = 0;
> +
> +         for (unsigned i = 0; i < ARRAY_SIZE(srcs); ++i) {
> +            /* Split the array in m elements of maximal width. */
> +            const unsigned width =
> +               (srcs[i].size * 8 % bld.dispatch_width() == 0 ?
> +                bld.dispatch_width() : bld.dispatch_width() / 2);
> +            const unsigned m = srcs[i].size * 8 / width;
> +
> +            /* Get a builder of maximal width. */
> +            const fs_builder ubld =
> +               (width == bld.dispatch_width() ? bld : bld.half(0));
> +
> +            for (unsigned j = 0; j < m; ++j)
> +               components[n++] = retype(index(ubld, srcs[i], j),
> +                                        BRW_REGISTER_TYPE_UD);
> +         }
> +
> +         bld.LOAD_PAYLOAD(natural_reg(bld, dst), components, n);
> +
> +         delete[] components;
> +         return dst;
> +      }
> +   }
> +}
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.h b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.h
> new file mode 100644
> index 0000000..ad79e76
> --- /dev/null
> +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.h
> @@ -0,0 +1,31 @@
> +/* -*- c++ -*- */
> +/*
> + * Copyright © 2013-2015 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#ifndef BRW_FS_SURFACE_BUILDER_H
> +#define BRW_FS_SURFACE_BUILDER_H
> +
> +#include "brw_fs_builder.h"
> +#include "brw_context.h"
> +
> +#endif
> --
> 2.3.5
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev