[Mesa-dev] [PATCH 07/25] i965/fs: Import array utils for the surface message builder.
Jason Ekstrand
jason at jlekstrand.net
Wed May 13 12:07:35 PDT 2015
On Tue, May 5, 2015 at 2:48 PM, Francisco Jerez <currojerez at riseup.net> wrote:
> Define a few transformations on register arrays which will be used
> frequently during the construction of typed and untyped surface
> message payloads. Their purpose is simple but the implementation is
> rather messy, so it makes a lot of sense to factor them out as
> separate functions.
>
> v2: Drop VEC4 suport.
> ---
> src/mesa/drivers/dri/i965/Makefile.sources | 2 +
> .../drivers/dri/i965/brw_fs_surface_builder.cpp | 218 +++++++++++++++++++++
> src/mesa/drivers/dri/i965/brw_fs_surface_builder.h | 31 +++
> 3 files changed, 251 insertions(+)
> create mode 100644 src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
> create mode 100644 src/mesa/drivers/dri/i965/brw_fs_surface_builder.h
>
> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
> index 260e448..f299913 100644
> --- a/src/mesa/drivers/dri/i965/Makefile.sources
> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
> @@ -59,6 +59,8 @@ i965_FILES = \
> brw_fs_register_coalesce.cpp \
> brw_fs_saturate_propagation.cpp \
> brw_fs_sel_peephole.cpp \
> + brw_fs_surface_builder.cpp \
> + brw_fs_surface_builder.h \
> brw_fs_vector_splitting.cpp \
> brw_fs_visitor.cpp \
> brw_gs.c \
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
> new file mode 100644
> index 0000000..007d5f4
> --- /dev/null
> +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
> @@ -0,0 +1,218 @@
> +/*
> + * Copyright © 2013-2015 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "brw_fs_surface_builder.h"
> +
> +using namespace brw;
> +
> +namespace {
> + namespace array_utils {
> + /**
> + * A plain contiguous region of memory in your register file,
> + * with well-defined size and no fancy addressing modes,
> + * swizzling or striding.
> + */
> + struct array_reg : public backend_reg {
> + array_reg() : backend_reg(), size(0)
> + {
> + }
> +
> + explicit
> + array_reg(const backend_reg ®, unsigned size = 1) :
> + backend_reg(reg), size(size)
> + {
> + }
> +
> + /** Size of the region in 32B registers. */
> + unsigned size;
> + };
> +
> + /**
> + * Increase the register base offset by the specified amount
> + * given in 32B registers.
> + */
> + array_reg
> + offset(array_reg reg, unsigned delta)
> + {
> + assert(delta == 0 || (reg.file != HW_REG && reg.file != IMM));
> + reg.reg_offset += delta;
> + return reg;
> + }
> +
> + /**
> + * Create a register of natural vector size and SIMD width
> + * using array \p reg as storage.
> + */
> + fs_reg
> + natural_reg(const fs_builder &bld, const array_reg ®)
> + {
> + return fs_reg(reg, bld.dispatch_width());
> + }
> +
> + /**
> + * Allocate a raw chunk of memory from the virtual GRF file
> + * with no special vector size or SIMD width. \p n is given
> + * in units of 32B registers.
> + */
> + array_reg
> + alloc_array_reg(const fs_builder &bld, enum brw_reg_type type, unsigned n)
> + {
> + return array_reg(
> + bld.vgrf(type,
> + DIV_ROUND_UP(n * REG_SIZE,
> + type_sz(type) * bld.dispatch_width())),
> + n);
> + }
> +
> + /**
> + * Fetch the i-th logical component of an array of registers and return
> + * it as a natural-width register according to the current SIMD mode.
> + *
> + * Each logical component may be in fact a vector with a number of
> + * per-channel values depending on the dispatch width and SIMD mode.
> + * E.g. a single physical 32B register contains 4, 1, or 0.5 logical
> + * 32-bit components depending on whether we're building SIMD4x2, SIMD8
> + * or SIMD16 code respectively.
> + */
> + fs_reg
> + index(const fs_builder &bld, const array_reg ®, unsigned i)
> + {
> + return offset(natural_reg(bld, reg), i);
> + }
> +
> + /**
> + * "Flatten" a vector of \p size components into a simple array of
> + * registers, getting rid of funky regioning modes.
> + */
> + array_reg
> + emit_flatten(const fs_builder &bld, const fs_reg &src, unsigned size)
> + {
> + if (src.file == BAD_FILE || size == 0) {
> + return array_reg();
> +
> + } else {
> + const array_reg dst =
> + alloc_array_reg(bld, src.type, size * bld.dispatch_width() / 8);
> +
> + for (unsigned c = 0; c < size; ++c)
> + bld.MOV(index(bld, dst, c), offset(src, c));
> +
> + return dst;
> + }
> + }
> +
> + /**
> + * Copy one every \p src_stride logical components of the argument into
> + * one every \p dst_stride logical components of the result.
> + */
> + array_reg
> + emit_stride(const fs_builder &bld, const array_reg &src, unsigned size,
> + unsigned dst_stride, unsigned src_stride)
> + {
> + if (src.file == BAD_FILE || size == 0) {
> + return array_reg();
> +
> + } else if (dst_stride == 1 && src_stride == 1) {
> + return src;
> +
> + } else {
> + const array_reg dst = alloc_array_reg(
> + bld, src.type,
> + size * dst_stride * bld.dispatch_width() / 8);
> +
> + for (unsigned i = 0; i < size; ++i)
> + bld.MOV(index(bld, dst, i * dst_stride),
> + index(bld, src, i * src_stride));
> +
> + return dst;
> + }
> + }
> +
> + /**
> + * Interleave logical components from the given arguments. If two
> + * arguments are provided \p size components will be copied from each to
> + * the even and odd components of the result respectively.
> + *
> + * It may be safely used to merge the two halves of a value calculated
> + * separately.
> + */
> + array_reg
> + emit_zip(const fs_builder &bld,
> + const array_reg &src0, const array_reg &src1,
> + unsigned size)
> + {
> + const unsigned n = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
> + const array_reg srcs[] = { src0, src1 };
> + const array_reg dst = size * n == 0 ? array_reg() :
> + alloc_array_reg(bld, src0.type,
> + size * n * bld.dispatch_width() / 8);
> +
> + for (unsigned i = 0; i < size; ++i) {
> + for (unsigned j = 0; j < n; ++j)
> + exec_all(bld.MOV(index(bld, dst, j + i * n),
> + index(bld, srcs[j], i)));
> + }
> +
> + return dst;
> + }
> +
> + /**
> + * Concatenate a number of register arrays passed in as arguments.
> + */
> + array_reg
> + emit_collect(const fs_builder &bld,
> + const array_reg &src0 = array_reg(),
> + const array_reg &src1 = array_reg(),
> + const array_reg &src2 = array_reg(),
> + const array_reg &src3 = array_reg())
I promised you a comment on emit_collect and here it is. I *really*
like the idea of having a helper that creates the payload register
array, fills it out, emits the LOAD_PAYLOAD and returns the
destination register. I do, however, have two comments. First is the
name. Calling it emit_collect makes it sound like another "collect"
instruction is being emitted. I'd like to see it have a different
name. Maybe it would be better just as a second LOAD_PAYLOAD helper?
Second, I think it would work better if the first source were
explicitly a header source and the others were not. If the header
source is a BAD_FILE, this would indicate that there is no header.
The other three sources would be regular non-header payload sources.
This would make it match much better with LOAD_PAYLOAD. I know that
you and I disagreed on how LOAD_PAYLOAD works and this function
reflects that difference. However, for the sake of consistency, I'd
like to see them match up better. I don't know how that change would
interact with the rest of the code, but let's table that until we get
some of these other discussions out of the way.
--Jason
> + {
> + const array_reg srcs[] = { src0, src1, src2, src3 };
> + const unsigned size = src0.size + src1.size + src2.size + src3.size;
> + const array_reg dst = size == 0 ? array_reg() :
> + alloc_array_reg(bld, BRW_REGISTER_TYPE_UD, size);
> + fs_reg *const components = new fs_reg[size];
> + unsigned n = 0;
> +
> + for (unsigned i = 0; i < ARRAY_SIZE(srcs); ++i) {
> + /* Split the array in m elements of maximal width. */
> + const unsigned width =
> + (srcs[i].size * 8 % bld.dispatch_width() == 0 ?
> + bld.dispatch_width() : bld.dispatch_width() / 2);
> + const unsigned m = srcs[i].size * 8 / width;
> +
> + /* Get a builder of maximal width. */
> + const fs_builder ubld =
> + (width == bld.dispatch_width() ? bld : bld.half(0));
> +
> + for (unsigned j = 0; j < m; ++j)
> + components[n++] = retype(index(ubld, srcs[i], j),
> + BRW_REGISTER_TYPE_UD);
> + }
> +
> + bld.LOAD_PAYLOAD(natural_reg(bld, dst), components, n);
> +
> + delete[] components;
> + return dst;
> + }
> + }
> +}
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.h b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.h
> new file mode 100644
> index 0000000..ad79e76
> --- /dev/null
> +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.h
> @@ -0,0 +1,31 @@
> +/* -*- c++ -*- */
> +/*
> + * Copyright © 2013-2015 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#ifndef BRW_FS_SURFACE_BUILDER_H
> +#define BRW_FS_SURFACE_BUILDER_H
> +
> +#include "brw_fs_builder.h"
> +#include "brw_context.h"
> +
> +#endif
> --
> 2.3.5
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list