[Mesa-dev] [PATCH 5/9] ir3/nir: Add a new pass 'ir3_nir_lower_io_offsets'

Mon Feb 25 17:54:30 UTC 2019

On Wed, Feb 13, 2019 at 4:30 PM Eduardo Lima Mitev <elima at igalia.com> wrote:
>
> This pass moves to NIR some offset computations that are currently
> implemented on the IR3 backend compiler, to allow NIR to possibly
> optimize them.
>
> For now, it only supports lowering byte-offset computation for image
> store and atomics.
> ---
>  src/freedreno/Makefile.sources               |   1 +
>  src/freedreno/ir3/ir3_nir.h                  |   1 +
>  src/freedreno/ir3/ir3_nir_lower_io_offsets.c | 334 +++++++++++++++++++
>  3 files changed, 336 insertions(+)
>  create mode 100644 src/freedreno/ir3/ir3_nir_lower_io_offsets.c
>
> diff --git a/src/freedreno/Makefile.sources b/src/freedreno/Makefile.sources
> index 7fea9de39ef..235fec1c4f2 100644
> --- a/src/freedreno/Makefile.sources
> +++ b/src/freedreno/Makefile.sources
> @@ -31,6 +31,7 @@ ir3_SOURCES := \
>         ir3/ir3_legalize.c \
>         ir3/ir3_nir.c \
>         ir3/ir3_nir.h \
> +       ir3/ir3_nir_lower_io_offsets.c \
>         ir3/ir3_nir_lower_tg4_to_tex.c \
>         ir3/ir3_print.c \
>         ir3/ir3_ra.c \
> diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h
> index 74201d34160..7983b74af2c 100644
> --- a/src/freedreno/ir3/ir3_nir.h
> +++ b/src/freedreno/ir3/ir3_nir.h
> @@ -36,6 +36,7 @@ void ir3_nir_scan_driver_consts(nir_shader *shader, struct ir3_driver_const_layo
>
>  bool ir3_nir_apply_trig_workarounds(nir_shader *shader);
>  bool ir3_nir_lower_tg4_to_tex(nir_shader *shader);
> +bool ir3_nir_lower_io_offsets(nir_shader *shader);
>
>  const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler);
>  bool ir3_key_lowers_nir(const struct ir3_shader_key *key);
> diff --git a/src/freedreno/ir3/ir3_nir_lower_io_offsets.c b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c
> new file mode 100644
> index 00000000000..a43b3895fd8
> --- /dev/null
> +++ b/src/freedreno/ir3/ir3_nir_lower_io_offsets.c
> @@ -0,0 +1,334 @@
> +/*
> + * Copyright © 2018-2019 Igalia S.L.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "ir3_nir.h"
> +#include "compiler/nir/nir_builder.h"
> +
> +/**
> + * This pass moves to NIR certain offset computations for different I/O
> + * ops that are currently implemented on the IR3 backend compiler, to
> + * give NIR a chance to optimize them:
> + *
> + * - Byte-offset for image store and atomics: Emit instructions to
> + *   compute (x*bpp) + y*y_stride + z*z_stride), and place the resulting
> + *   SSA value in the 4th-component of the vec4 instruction that defines
> + *   the offset.
> + */
> +
> +
> +static bool
> +intrinsic_is_image_atomic(unsigned intrinsic)
> +{
> +       switch (intrinsic) {
> +       case nir_intrinsic_image_deref_atomic_add:
> +       case nir_intrinsic_image_deref_atomic_min:
> +       case nir_intrinsic_image_deref_atomic_max:
> +       case nir_intrinsic_image_deref_atomic_and:
> +       case nir_intrinsic_image_deref_atomic_or:
> +       case nir_intrinsic_image_deref_atomic_xor:
> +       case nir_intrinsic_image_deref_atomic_exchange:
> +       case nir_intrinsic_image_deref_atomic_comp_swap:
> +               return true;
> +       default:
> +               break;
> +       }
> +
> +       return false;
> +}
> +
> +static bool
> +intrinsic_is_image_store_or_atomic(unsigned intrinsic)
> +{
> +       if (intrinsic == nir_intrinsic_image_deref_store)
> +               return true;
> +       else
> +               return intrinsic_is_image_atomic(intrinsic);
> +}
> +
> +/*
> + * FIXME: shamelessly copied from ir3_compiler_nir until it gets factorized
> + * out at some point.

Sorry, I'd overlooked this patchset.. blame gitlab MR's

Anyways, the good news is these helpers were refactored out into
ir3_image.c.. the bad news is rebasing this series will be a bit
conflicty since I broke out the image/ssbo intrinsics into
ir3_a6xx/ir3_a4xx.  (Since they are different depending on the
generation.. but offhand I think we can use the same nir->nir lowering
and just ignore one of the nir_src's on a6xx.)

BR,
-R

> + */
> +static unsigned
> +get_image_coords(const nir_variable *var)
> +{
> +       const struct glsl_type *type = glsl_without_array(var->type);
> +       unsigned coords;
> +
> +       switch (glsl_get_sampler_dim(type)) {
> +       case GLSL_SAMPLER_DIM_1D:
> +       case GLSL_SAMPLER_DIM_BUF:
> +               coords = 1;
> +               break;
> +       case GLSL_SAMPLER_DIM_2D:
> +       case GLSL_SAMPLER_DIM_RECT:
> +       case GLSL_SAMPLER_DIM_EXTERNAL:
> +       case GLSL_SAMPLER_DIM_MS:
> +               coords = 2;
> +               break;
> +       case GLSL_SAMPLER_DIM_3D:
> +       case GLSL_SAMPLER_DIM_CUBE:
> +               coords = 3;
> +               break;
> +       default:
> +               unreachable("bad sampler dim");
> +               return 0;
> +       }
> +
> +       if (glsl_sampler_type_is_array(type)) {
> +               /* adjust # of coords to include array idx: */
> +               coords++;
> +       }
> +
> +       return coords;
> +}
> +
> +/* Returns the bytes-per-pixel for the different GL formats corresponding to
> + * all supported image formats.
> + */
> +static unsigned
> +bytes_per_pixel_for_gl_format(GLuint format)
> +{
> +       switch (format) {
> +       case GL_R8I:
> +       case GL_R8UI:
> +       case GL_R8:
> +       case GL_R8_SNORM:
> +               return 1;
> +
> +       case GL_R16F:
> +       case GL_R16I:
> +       case GL_R16UI:
> +       case GL_R16:
> +       case GL_R16_SNORM:
> +       case GL_RG8I:
> +       case GL_RG8UI:
> +       case GL_RG8:
> +       case GL_RG8_SNORM:
> +               return 2;
> +
> +       case GL_R32F:
> +       case GL_R32I:
> +       case GL_R32UI:
> +       case GL_RG16F:
> +       case GL_RG16I:
> +       case GL_RG16UI:
> +       case GL_RG16:
> +       case GL_RG16_SNORM:
> +       case GL_RGBA8I:
> +       case GL_RGBA8UI:
> +       case GL_RGBA8:
> +       case GL_RGBA8_SNORM:
> +       case GL_RGB10_A2UI:
> +       case GL_RGB10_A2:
> +       case GL_R11F_G11F_B10F:
> +               return 4;
> +
> +       case GL_RG32F:
> +       case GL_RG32I:
> +       case GL_RG32UI:
> +       case GL_RGBA16F:
> +       case GL_RGBA16I:
> +       case GL_RGBA16UI:
> +       case GL_RGBA16:
> +       case GL_RGBA16_SNORM:
> +               return 8;
> +
> +       case GL_RGBA32F:
> +       case GL_RGBA32I:
> +       case GL_RGBA32UI:
> +               return 16;
> +
> +       default:
> +               debug_assert(!"Unhandled GL format");
> +       }
> +
> +       return 0;
> +}
> +
> +static nir_ssa_def *
> +insert_load_image_param(nir_builder *b, nir_ssa_def *image_deref,
> +                                               unsigned dimension)
> +{
> +       nir_intrinsic_instr *load =
> +               nir_intrinsic_instr_create(b->shader,
> +                                                                  nir_intrinsic_image_deref_load_param_ir3);
> +       load->num_components = 1;
> +       load->const_index[0] = dimension;
> +       load->src[0] = nir_src_for_ssa(image_deref);
> +       nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
> +
> +       nir_builder_instr_insert(b, &load->instr);
> +
> +       return &load->dest.ssa;
> +}
> +
> +static bool
> +lower_offset_for_image_store_or_atomic(nir_intrinsic_instr *intrinsic,
> +                                                                          const nir_variable *var, nir_builder *b,
> +                                                                          void *mem_ctx)
> +{
> +       nir_ssa_def *image_deref;
> +
> +       /* Find the instruction that defines the coord source of the image
> +        * store/atomic intrinsic. It must be a "vec4" ALU instruction.
> +        */
> +       debug_assert(intrinsic->src[1].is_ssa);
> +       nir_ssa_def *offset_src_def = intrinsic->src[1].ssa;
> +
> +       nir_instr *offset_parent_instr = offset_src_def->parent_instr;
> +       debug_assert(offset_parent_instr->type == nir_instr_type_alu);
> +
> +       nir_alu_instr *vec4_instr = nir_instr_as_alu(offset_parent_instr);
> +       debug_assert(vec4_instr->op == nir_op_vec4);
> +
> +       unsigned coords = get_image_coords(var);
> +
> +       b->cursor = nir_before_instr(&vec4_instr->instr);
> +
> +       /* These are actually offsets into image_dims register file (for
> +        * a given image).
> +        */
> +       enum {
> +               BYTES_PER_PIXEL = 0,
> +               Y_STRIDE        = 1,
> +               Z_STRIDE        = 2
> +       };
> +
> +       /* x_offset = coord.x * bytes_per_pixel */
> +       nir_ssa_def *x_coord = vec4_instr->src[0].src.ssa;
> +       unsigned bpp = bytes_per_pixel_for_gl_format(var->data.image.format);
> +       nir_ssa_def *offset = nir_imul_imm(b, x_coord, bpp);
> +       nir_alu_instr *imul = nir_instr_as_alu(offset->parent_instr);
> +       debug_assert(imul);
> +       imul->src[0].swizzle[0] = vec4_instr->src[0].swizzle[0];
> +       debug_assert(offset);
> +
> +       /* For Y and Z dimensions, we emit a temporary image_deref_load_param
> +        * intrinsic, to be consumed by ir3_compiler_nir::emit_intrinsic(), which
> +        * will just emit an uniform with the right value from image_dims[].
> +        */
> +
> +       if (coords > 1) {
> +               /* src0 of the intrinsic is the dereferenced image. */
> +               debug_assert(intrinsic->src[0].is_ssa);
> +               image_deref = intrinsic->src[0].ssa;
> +
> +               nir_ssa_def *y_coord = vec4_instr->src[1].src.ssa;
> +               nir_ssa_def *y_stride =
> +                       insert_load_image_param(b, image_deref, Y_STRIDE);
> +
> +               /* y_offset = coord.y * y_stride + x_offset */
> +               offset = nir_imad24_ir3(b, y_stride, y_coord, offset);
> +               debug_assert(offset);
> +               nir_alu_instr *imad = nir_instr_as_alu(offset->parent_instr);
> +               debug_assert(imad);
> +               imad->src[1].swizzle[0] = vec4_instr->src[1].swizzle[0];
> +
> +               if (coords > 2) {
> +                       nir_ssa_def *z_coord = vec4_instr->src[2].src.ssa;
> +                       nir_ssa_def *z_stride =
> +                               insert_load_image_param(b, image_deref, Z_STRIDE);
> +
> +                       /* z_offset = coord.z * z_stride + y_offset */
> +                       offset = nir_imad24_ir3(b, z_stride, z_coord, offset);
> +                       debug_assert(offset);
> +                       nir_alu_instr *imad = nir_instr_as_alu(offset->parent_instr);
> +                       debug_assert(imad);
> +                       imad->src[1].swizzle[0] = vec4_instr->src[2].swizzle[0];
> +               }
> +       }
> +
> +       if (intrinsic_is_image_atomic(intrinsic->intrinsic)) {
> +               /* Some cases, like atomics, seem to use dword offset instead
> +                * of byte offsets.. blob just puts an extra shr.b in there
> +                * in those cases:
> +                */
> +               nir_ssa_def *two = nir_imm_int(b, 2);
> +               offset = nir_ushr(b, offset, two);
> +       }
> +
> +       /* Finally, store the computed offset in the 4th component of the
> +        * vec4 instruction, which is the only one guaranteed not to be used.
> +        * We don't want to override the original coordinate because it is
> +        * still needed in the backend.
> +        */
> +       nir_instr_rewrite_src(&vec4_instr->instr,
> +                                                 &vec4_instr->src[3].src,
> +                                                 nir_src_for_ssa(offset));
> +       return true;
> +}
> +
> +static bool
> +lower_io_offsets_block(nir_block *block, nir_builder *b, void *mem_ctx)
> +{
> +       bool progress = false;
> +
> +       nir_foreach_instr_safe(instr, block) {
> +               if (instr->type != nir_instr_type_intrinsic)
> +                       continue;
> +
> +               nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
> +               if (!intrinsic_is_image_store_or_atomic(intr->intrinsic))
> +                       continue;
> +
> +               const nir_variable *var = nir_intrinsic_get_var(intr, 0);
> +               progress |= lower_offset_for_image_store_or_atomic(intr, var, b,
> +                                                                                                                  mem_ctx);
> +       }
> +
> +       return progress;
> +}
> +
> +static bool
> +lower_io_offsets_func(nir_function_impl *impl)
> +{
> +       void *mem_ctx = ralloc_parent(impl);
> +       nir_builder b;
> +       nir_builder_init(&b, impl);
> +
> +       bool progress = false;
> +       nir_foreach_block_safe(block, impl) {
> +               progress |= lower_io_offsets_block(block, &b, mem_ctx);
> +       }
> +
> +       if (progress) {
> +               nir_metadata_preserve(impl, nir_metadata_block_index |
> +                                                                       nir_metadata_dominance);
> +       }
> +
> +       return progress;
> +}
> +
> +bool
> +ir3_nir_lower_io_offsets(nir_shader *shader)
> +{
> +       bool progress = false;
> +
> +       nir_foreach_function(function, shader) {
> +               if (function->impl)
> +                       progress |= lower_io_offsets_func(function->impl);
> +       }
> +
> +       return progress;
> +}
> --
> 2.20.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev