[Mesa-dev] [PATCH 07/25] i965/fs: Import array utils for the surface message builder.

Thu May 14 08:29:57 PDT 2015

Jason Ekstrand <jason at jlekstrand.net> writes:

> On Tue, May 5, 2015 at 2:48 PM, Francisco Jerez <currojerez at riseup.net> wrote:
>> Define a few transformations on register arrays which will be used
>> frequently during the construction of typed and untyped surface
>> message payloads.  Their purpose is simple but the implementation is
>> rather messy, so it makes a lot of sense to factor them out as
>> separate functions.
>>
>> v2: Drop VEC4 suport.
>> ---
>>  src/mesa/drivers/dri/i965/Makefile.sources         |   2 +
>>  .../drivers/dri/i965/brw_fs_surface_builder.cpp    | 218 +++++++++++++++++++++
>>  src/mesa/drivers/dri/i965/brw_fs_surface_builder.h |  31 +++
>>  3 files changed, 251 insertions(+)
>>  create mode 100644 src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
>>  create mode 100644 src/mesa/drivers/dri/i965/brw_fs_surface_builder.h
>>
>> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
>> index 260e448..f299913 100644
>> --- a/src/mesa/drivers/dri/i965/Makefile.sources
>> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
>> @@ -59,6 +59,8 @@ i965_FILES = \
>>         brw_fs_register_coalesce.cpp \
>>         brw_fs_saturate_propagation.cpp \
>>         brw_fs_sel_peephole.cpp \
>> +       brw_fs_surface_builder.cpp \
>> +       brw_fs_surface_builder.h \
>>         brw_fs_vector_splitting.cpp \
>>         brw_fs_visitor.cpp \
>>         brw_gs.c \
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
>> new file mode 100644
>> index 0000000..007d5f4
>> --- /dev/null
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
>> @@ -0,0 +1,218 @@
>> +/*
>> + * Copyright © 2013-2015 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the next
>> + * paragraph) shall be included in all copies or substantial portions of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>> + * IN THE SOFTWARE.
>> + */
>> +
>> +#include "brw_fs_surface_builder.h"
>> +
>> +using namespace brw;
>> +
>> +namespace {
>> +   namespace array_utils {
>> +      /**
>> +       * A plain contiguous region of memory in your register file,
>> +       * with well-defined size and no fancy addressing modes,
>> +       * swizzling or striding.
>> +       */
>> +      struct array_reg : public backend_reg {
>> +         array_reg() : backend_reg(), size(0)
>> +         {
>> +         }
>> +
>> +         explicit
>> +         array_reg(const backend_reg &reg, unsigned size = 1) :
>> +            backend_reg(reg), size(size)
>> +         {
>> +         }
>> +
>> +         /** Size of the region in 32B registers. */
>> +         unsigned size;
>> +      };
>> +
>> +      /**
>> +       * Increase the register base offset by the specified amount
>> +       * given in 32B registers.
>> +       */
>> +      array_reg
>> +      offset(array_reg reg, unsigned delta)
>> +      {
>> +         assert(delta == 0 || (reg.file != HW_REG && reg.file != IMM));
>> +         reg.reg_offset += delta;
>> +         return reg;
>> +      }
>> +
>> +      /**
>> +       * Create a register of natural vector size and SIMD width
>> +       * using array \p reg as storage.
>> +       */
>> +      fs_reg
>> +      natural_reg(const fs_builder &bld, const array_reg &reg)
>> +      {
>> +         return fs_reg(reg, bld.dispatch_width());
>> +      }
>> +
>> +      /**
>> +       * Allocate a raw chunk of memory from the virtual GRF file
>> +       * with no special vector size or SIMD width.  \p n is given
>> +       * in units of 32B registers.
>> +       */
>> +      array_reg
>> +      alloc_array_reg(const fs_builder &bld, enum brw_reg_type type, unsigned n)
>> +      {
>> +         return array_reg(
>> +            bld.vgrf(type,
>> +                     DIV_ROUND_UP(n * REG_SIZE,
>> +                                  type_sz(type) * bld.dispatch_width())),
>> +            n);
>> +      }
>> +
>> +      /**
>> +       * Fetch the i-th logical component of an array of registers and return
>> +       * it as a natural-width register according to the current SIMD mode.
>> +       *
>> +       * Each logical component may be in fact a vector with a number of
>> +       * per-channel values depending on the dispatch width and SIMD mode.
>> +       * E.g. a single physical 32B register contains 4, 1, or 0.5 logical
>> +       * 32-bit components depending on whether we're building SIMD4x2, SIMD8
>> +       * or SIMD16 code respectively.
>> +       */
>> +      fs_reg
>> +      index(const fs_builder &bld, const array_reg &reg, unsigned i)
>> +      {
>> +         return offset(natural_reg(bld, reg), i);
>> +      }
>> +
>> +      /**
>> +       * "Flatten" a vector of \p size components into a simple array of
>> +       * registers, getting rid of funky regioning modes.
>> +       */
>> +      array_reg
>> +      emit_flatten(const fs_builder &bld, const fs_reg &src, unsigned size)
>> +      {
>> +         if (src.file == BAD_FILE || size == 0) {
>> +            return array_reg();
>> +
>> +         } else {
>> +            const array_reg dst =
>> +               alloc_array_reg(bld, src.type, size * bld.dispatch_width() / 8);
>> +
>> +            for (unsigned c = 0; c < size; ++c)
>> +               bld.MOV(index(bld, dst, c), offset(src, c));
>> +
>> +            return dst;
>> +         }
>> +      }
>> +
>> +      /**
>> +       * Copy one every \p src_stride logical components of the argument into
>> +       * one every \p dst_stride logical components of the result.
>> +       */
>> +      array_reg
>> +      emit_stride(const fs_builder &bld, const array_reg &src, unsigned size,
>> +                  unsigned dst_stride, unsigned src_stride)
>> +      {
>> +         if (src.file == BAD_FILE || size == 0) {
>> +            return array_reg();
>> +
>> +         } else if (dst_stride == 1 && src_stride == 1) {
>> +            return src;
>> +
>> +         } else {
>> +            const array_reg dst = alloc_array_reg(
>> +               bld, src.type,
>> +               size * dst_stride * bld.dispatch_width() / 8);
>> +
>> +            for (unsigned i = 0; i < size; ++i)
>> +               bld.MOV(index(bld, dst, i * dst_stride),
>> +                       index(bld, src, i * src_stride));
>> +
>> +            return dst;
>> +         }
>> +      }
>> +
>> +      /**
>> +       * Interleave logical components from the given arguments.  If two
>> +       * arguments are provided \p size components will be copied from each to
>> +       * the even and odd components of the result respectively.
>> +       *
>> +       * It may be safely used to merge the two halves of a value calculated
>> +       * separately.
>> +       */
>> +      array_reg
>> +      emit_zip(const fs_builder &bld,
>> +               const array_reg &src0, const array_reg &src1,
>> +               unsigned size)
>> +      {
>> +         const unsigned n = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
>> +         const array_reg srcs[] = { src0, src1 };
>> +         const array_reg dst = size * n == 0 ? array_reg() :
>> +            alloc_array_reg(bld, src0.type,
>> +                             size * n * bld.dispatch_width() / 8);
>> +
>> +         for (unsigned i = 0; i < size; ++i) {
>> +            for (unsigned j = 0; j < n; ++j)
>> +               exec_all(bld.MOV(index(bld, dst, j + i * n),
>> +                                index(bld, srcs[j], i)));
>> +         }
>> +
>> +         return dst;
>> +      }
>> +
>> +      /**
>> +       * Concatenate a number of register arrays passed in as arguments.
>> +       */
>> +      array_reg
>> +      emit_collect(const fs_builder &bld,
>> +                   const array_reg &src0 = array_reg(),
>> +                   const array_reg &src1 = array_reg(),
>> +                   const array_reg &src2 = array_reg(),
>> +                   const array_reg &src3 = array_reg())
>
> I promised you a comment on emit_collect and here it is.  I *really*
> like the idea of having a helper that creates the payload register
> array, fills it out, emits the LOAD_PAYLOAD and returns the
> destination register.

I'm glad I'm not the only one to feel this need. :)

> I do, however, have two comments.  First is the name.  Calling it
> emit_collect makes it sound like another "collect" instruction is
> being emitted.  I'd like to see it have a different name.  Maybe it
> would be better just as a second LOAD_PAYLOAD helper?
>
Maybe, I'm not sure.  Back when I wrote this code "collect" made a lot
of sense because the arguments were symmetric, the only thing you had to
be aware about the helper was that it concatenated the provided register
arrays after each other, the fact that it was implemented in terms of
LOAD_PAYLOAD was just an implementation detail.  After your LOAD_PAYLOAD
rework the first argument gained special semantics which really only
make sense if you're building an actual message payload, so you're right
that it's probably somewhat dishonest to pretend that the helper is
doing anything more general than loading a payload.  I'd be fine with
calling it LOAD_PAYLOAD, or we could call it emit_payload() to stress
that it does something slightly higher-level than emitting a
LOAD_PAYLOAD instruction with the sources provided as arguments, as is
usually the case with other all-caps helpers which typically match the
semantics of the underlying instruction very closely.  I don't know, I'm
somewhat undecided.

> Second, I think it would work better if the first source were
> explicitly a header source and the others were not.  If the header
> source is a BAD_FILE, this would indicate that there is no header.
> The other three sources would be regular non-header payload sources.
> This would make it match much better with LOAD_PAYLOAD.  I know that
> you and I disagreed on how LOAD_PAYLOAD works and this function
> reflects that difference.  However, for the sake of consistency, I'd
> like to see them match up better.  I don't know how that change would
> interact with the rest of the code, but let's table that until we get
> some of these other discussions out of the way.

Yeah, I completely agree, in fact that's what I did after you landed
your LOAD_PAYLOAD rework, as I pointed out on IRC there's a more
up-to-date branch based on master in my mesa tree:
http://cgit.freedesktop.org/~currojerez/mesa/log/?h=image-load-store-nir

> --Jason
>
>> +      {
>> +         const array_reg srcs[] = { src0, src1, src2, src3 };
>> +         const unsigned size = src0.size + src1.size + src2.size + src3.size;
>> +         const array_reg dst = size == 0 ? array_reg() :
>> +            alloc_array_reg(bld, BRW_REGISTER_TYPE_UD, size);
>> +         fs_reg *const components = new fs_reg[size];
>> +         unsigned n = 0;
>> +
>> +         for (unsigned i = 0; i < ARRAY_SIZE(srcs); ++i) {
>> +            /* Split the array in m elements of maximal width. */
>> +            const unsigned width =
>> +               (srcs[i].size * 8 % bld.dispatch_width() == 0 ?
>> +                bld.dispatch_width() : bld.dispatch_width() / 2);
>> +            const unsigned m = srcs[i].size * 8 / width;
>> +
>> +            /* Get a builder of maximal width. */
>> +            const fs_builder ubld =
>> +               (width == bld.dispatch_width() ? bld : bld.half(0));
>> +
>> +            for (unsigned j = 0; j < m; ++j)
>> +               components[n++] = retype(index(ubld, srcs[i], j),
>> +                                        BRW_REGISTER_TYPE_UD);
>> +         }
>> +
>> +         bld.LOAD_PAYLOAD(natural_reg(bld, dst), components, n);
>> +
>> +         delete[] components;
>> +         return dst;
>> +      }
>> +   }
>> +}
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.h b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.h
>> new file mode 100644
>> index 0000000..ad79e76
>> --- /dev/null
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.h
>> @@ -0,0 +1,31 @@
>> +/* -*- c++ -*- */
>> +/*
>> + * Copyright © 2013-2015 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the next
>> + * paragraph) shall be included in all copies or substantial portions of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>> + * IN THE SOFTWARE.
>> + */
>> +
>> +#ifndef BRW_FS_SURFACE_BUILDER_H
>> +#define BRW_FS_SURFACE_BUILDER_H
>> +
>> +#include "brw_fs_builder.h"
>> +#include "brw_context.h"
>> +
>> +#endif
>> --
>> 2.3.5
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 212 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20150514/55b4e28d/attachment.sig>