[Mesa-dev] [PATCH 7/8] nir: Add alignment parameters to SSBO, UBO, and shared access

Wed Nov 14 20:54:35 UTC 2018

I like the general idea, we just shouldn't rely too much on the type
size later on, especially in regards to CL where we can have unaligned
load/stores especially for packed structs.

Acked-by: Karol Herbst <kherbst at redhat.com>
On Wed, Nov 14, 2018 at 12:24 AM Jason Ekstrand <jason at jlekstrand.net> wrote:
>
> This also changes spirv_to_nir and glsl_to_nir to set them.  The one
> place that doesn't set them is shared memory access lowering in
> nir_lower_io.  That will have to be updated before any consumers of it
> can effectively use these new alignments.
> ---
>  src/compiler/glsl/glsl_to_nir.cpp            | 14 +++++++
>  src/compiler/nir/nir.h                       | 41 ++++++++++++++++++++
>  src/compiler/nir/nir_intrinsics.py           | 26 ++++++++-----
>  src/compiler/nir/nir_lower_atomics_to_ssbo.c |  4 ++
>  src/compiler/nir/nir_print.c                 |  2 +
>  src/compiler/spirv/spirv_to_nir.c            |  2 +
>  src/compiler/spirv/vtn_variables.c           |  6 +++
>  7 files changed, 85 insertions(+), 10 deletions(-)
>
> diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp
> index 9bb0f5d4044..9f73b721e39 100644
> --- a/src/compiler/glsl/glsl_to_nir.cpp
> +++ b/src/compiler/glsl/glsl_to_nir.cpp
> @@ -33,6 +33,7 @@
>  #include "compiler/nir/nir_builder.h"
>  #include "main/imports.h"
>  #include "main/mtypes.h"
> +#include "util/u_math.h"
>
>  /*
>   * pass to lower GLSL IR to NIR
> @@ -603,6 +604,14 @@ nir_visitor::visit(ir_return *ir)
>     nir_builder_instr_insert(&b, &instr->instr);
>  }
>
> +static void
> +intrinsic_set_std430_align(nir_intrinsic_instr *intrin, const glsl_type *type)
> +{
> +   unsigned bit_size = type->is_boolean() ? 32 : glsl_get_bit_size(type);
> +   unsigned pow2_components = util_next_power_of_two(type->vector_elements);
> +   nir_intrinsic_set_align(intrin, (bit_size / 8) * pow2_components, 0);
> +}
> +
>  void
>  nir_visitor::visit(ir_call *ir)
>  {
> @@ -1006,6 +1015,7 @@ nir_visitor::visit(ir_call *ir)
>           instr->src[0] = nir_src_for_ssa(nir_val);
>           instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block));
>           instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset));
> +         intrinsic_set_std430_align(instr, val->type);
>           nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]);
>           instr->num_components = val->type->vector_elements;
>
> @@ -1024,6 +1034,7 @@ nir_visitor::visit(ir_call *ir)
>
>           const glsl_type *type = ir->return_deref->var->type;
>           instr->num_components = type->vector_elements;
> +         intrinsic_set_std430_align(instr, type);
>
>           /* Setup destination register */
>           unsigned bit_size = type->is_boolean() ? 32 : glsl_get_bit_size(type);
> @@ -1101,6 +1112,7 @@ nir_visitor::visit(ir_call *ir)
>
>           const glsl_type *type = ir->return_deref->var->type;
>           instr->num_components = type->vector_elements;
> +         intrinsic_set_std430_align(instr, type);
>
>           /* Setup destination register */
>           unsigned bit_size = type->is_boolean() ? 32 : glsl_get_bit_size(type);
> @@ -1131,6 +1143,7 @@ nir_visitor::visit(ir_call *ir)
>
>           instr->src[0] = nir_src_for_ssa(nir_val);
>           instr->num_components = val->type->vector_elements;
> +         intrinsic_set_std430_align(instr, val->type);
>
>           nir_builder_instr_insert(&b, &instr->instr);
>           break;
> @@ -1388,6 +1401,7 @@ nir_visitor::visit(ir_expression *ir)
>        load->num_components = ir->type->vector_elements;
>        load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));
>        load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
> +      intrinsic_set_std430_align(load, ir->type);
>        add_instr(&load->instr, ir->type->vector_elements, bit_size);
>
>        /*
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index c469e111b2c..41d61dc8105 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -34,6 +34,7 @@
>  #include "util/list.h"
>  #include "util/ralloc.h"
>  #include "util/set.h"
> +#include "util/bitscan.h"
>  #include "util/bitset.h"
>  #include "util/macros.h"
>  #include "compiler/nir_types.h"
> @@ -1248,6 +1249,18 @@ typedef enum {
>      */
>     NIR_INTRINSIC_ACCESS = 16,
>
> +   /**
> +    * Alignment for offsets and addresses
> +    *
> +    * These two parameters, specify an alignment in terms of a multiplier and
> +    * an offset.  The offset or address parameter X of the intrinsic is
> +    * guaranteed to satisfy the following:
> +    *
> +    *                (X - align_offset) % align_mul == 0
> +    */
> +   NIR_INTRINSIC_ALIGN_MUL = 17,
> +   NIR_INTRINSIC_ALIGN_OFFSET = 18,
> +
>     NIR_INTRINSIC_NUM_INDEX_FLAGS,
>
>  } nir_intrinsic_index_flag;
> @@ -1342,6 +1355,34 @@ INTRINSIC_IDX_ACCESSORS(image_dim, IMAGE_DIM, enum glsl_sampler_dim)
>  INTRINSIC_IDX_ACCESSORS(image_array, IMAGE_ARRAY, bool)
>  INTRINSIC_IDX_ACCESSORS(access, ACCESS, enum gl_access_qualifier)
>  INTRINSIC_IDX_ACCESSORS(format, FORMAT, unsigned)
> +INTRINSIC_IDX_ACCESSORS(align_mul, ALIGN_MUL, unsigned)
> +INTRINSIC_IDX_ACCESSORS(align_offset, ALIGN_OFFSET, unsigned)
> +
> +static inline void
> +nir_intrinsic_set_align(nir_intrinsic_instr *intrin,
> +                        unsigned align_mul, unsigned align_offset)
> +{
> +   assert(util_is_power_of_two_nonzero(align_mul));
> +   assert(align_offset < align_mul);
> +   nir_intrinsic_set_align_mul(intrin, align_mul);
> +   nir_intrinsic_set_align_offset(intrin, align_offset);
> +}
> +
> +/** Returns a simple alignment for a load/store intrinsic offset
> + *
> + * Instead of the full mul+offset alignment scheme provided by the ALIGN_MUL
> + * and ALIGN_OFFSET parameters, this helper takes both into account and
> + * provides a single simple alignment parameter.  The offset X is guaranteed
> + * to satisfy X % align == 0.
> + */
> +static inline unsigned
> +nir_intrinsic_align(nir_intrinsic_instr *intrin)
> +{
> +   const unsigned align_mul = nir_intrinsic_align_mul(intrin);
> +   const unsigned align_offset = nir_intrinsic_align_offset(intrin);
> +   assert(align_offset < align_mul);
> +   return align_offset ? 1 << (ffs(align_offset) - 1) : align_mul;
> +}
>
>  /**
>   * \group texture information
> diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py
> index ec3049ca06d..735db43d16a 100644
> --- a/src/compiler/nir/nir_intrinsics.py
> +++ b/src/compiler/nir/nir_intrinsics.py
> @@ -109,6 +109,9 @@ IMAGE_ARRAY = "NIR_INTRINSIC_IMAGE_ARRAY"
>  ACCESS = "NIR_INTRINSIC_ACCESS"
>  # Image format for image intrinsics
>  FORMAT = "NIR_INTRINSIC_FORMAT"
> +# Offset or address alignment
> +ALIGN_MUL = "NIR_INTRINSIC_ALIGN_MUL"
> +ALIGN_OFFSET = "NIR_INTRINSIC_ALIGN_OFFSET"
>
>  #
>  # Possible flags:
> @@ -554,8 +557,8 @@ def load(name, num_srcs, indices=[], flags=[]):
>
>  # src[] = { offset }. const_index[] = { base, range }
>  load("uniform", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER])
> -# src[] = { buffer_index, offset }. No const_index
> -load("ubo", 2, flags=[CAN_ELIMINATE, CAN_REORDER])
> +# src[] = { buffer_index, offset }. const_index[] = { align_mul, align_offset }
> +load("ubo", 2, [ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE, CAN_REORDER])
>  # src[] = { offset }. const_index[] = { base, component }
>  load("input", 1, [BASE, COMPONENT], [CAN_ELIMINATE, CAN_REORDER])
>  # src[] = { vertex, offset }. const_index[] = { base, component }
> @@ -564,14 +567,15 @@ load("per_vertex_input", 2, [BASE, COMPONENT], [CAN_ELIMINATE, CAN_REORDER])
>  intrinsic("load_interpolated_input", src_comp=[2, 1], dest_comp=0,
>            indices=[BASE, COMPONENT], flags=[CAN_ELIMINATE, CAN_REORDER])
>
> -# src[] = { buffer_index, offset }. No const_index
> -load("ssbo", 2, flags=[CAN_ELIMINATE], indices=[ACCESS])
> +# src[] = { buffer_index, offset }.
> +# const_index[] = { access, align_mul, align_offset }
> +load("ssbo", 2, [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
>  # src[] = { offset }. const_index[] = { base, component }
>  load("output", 1, [BASE, COMPONENT], flags=[CAN_ELIMINATE])
>  # src[] = { vertex, offset }. const_index[] = { base }
>  load("per_vertex_output", 2, [BASE, COMPONENT], [CAN_ELIMINATE])
> -# src[] = { offset }. const_index[] = { base }
> -load("shared", 1, [BASE], [CAN_ELIMINATE])
> +# src[] = { offset }. const_index[] = { base, align_mul, align_offset }
> +load("shared", 1, [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
>  # src[] = { offset }. const_index[] = { base, range }
>  load("push_constant", 1, [BASE, RANGE], [CAN_ELIMINATE, CAN_REORDER])
>  # src[] = { offset }. const_index[] = { base, range }
> @@ -590,7 +594,9 @@ store("output", 2, [BASE, WRMASK, COMPONENT])
>  # src[] = { value, vertex, offset }.
>  # const_index[] = { base, write_mask, component }
>  store("per_vertex_output", 3, [BASE, WRMASK, COMPONENT])
> -# src[] = { value, block_index, offset }. const_index[] = { write_mask }
> -store("ssbo", 3, [WRMASK, ACCESS])
> -# src[] = { value, offset }. const_index[] = { base, write_mask }
> -store("shared", 2, [BASE, WRMASK])
> +# src[] = { value, block_index, offset }
> +# const_index[] = { write_mask, access, align_mul, align_offset }
> +store("ssbo", 3, [WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
> +# src[] = { value, offset }.
> +# const_index[] = { base, write_mask, align_mul, align_offset }
> +store("shared", 2, [BASE, WRMASK, ALIGN_MUL, ALIGN_OFFSET])
> diff --git a/src/compiler/nir/nir_lower_atomics_to_ssbo.c b/src/compiler/nir/nir_lower_atomics_to_ssbo.c
> index c165a03b141..cdc660981fc 100644
> --- a/src/compiler/nir/nir_lower_atomics_to_ssbo.c
> +++ b/src/compiler/nir/nir_lower_atomics_to_ssbo.c
> @@ -149,6 +149,10 @@ lower_instr(nir_intrinsic_instr *instr, unsigned ssbo_offset, nir_builder *b)
>        break;
>     }
>
> +   if (new_instr->intrinsic == nir_intrinsic_load_ssbo ||
> +       new_instr->intrinsic == nir_intrinsic_store_ssbo)
> +      nir_intrinsic_set_align(new_instr, 4, 0);
> +
>     nir_ssa_dest_init(&new_instr->instr, &new_instr->dest,
>                       instr->dest.ssa.num_components,
>                       instr->dest.ssa.bit_size, NULL);
> diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
> index ab3d5115688..e20c28fec87 100644
> --- a/src/compiler/nir/nir_print.c
> +++ b/src/compiler/nir/nir_print.c
> @@ -691,6 +691,8 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
>        [NIR_INTRINSIC_IMAGE_ARRAY] = "image_array",
>        [NIR_INTRINSIC_ACCESS] = "access",
>        [NIR_INTRINSIC_FORMAT] = "format",
> +      [NIR_INTRINSIC_ALIGN_MUL] = "align_mul",
> +      [NIR_INTRINSIC_ALIGN_OFFSET] = "align_offset",
>     };
>     for (unsigned idx = 1; idx < NIR_INTRINSIC_NUM_INDEX_FLAGS; idx++) {
>        if (!info->index_map[idx])
> diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c
> index 96ff09c3659..11a283ccf8f 100644
> --- a/src/compiler/spirv/spirv_to_nir.c
> +++ b/src/compiler/spirv/spirv_to_nir.c
> @@ -2742,6 +2742,7 @@ vtn_handle_atomics(struct vtn_builder *b, SpvOp opcode,
>        switch (opcode) {
>        case SpvOpAtomicLoad:
>           atomic->num_components = glsl_get_vector_elements(ptr->type->type);
> +         nir_intrinsic_set_align(atomic, 4, 0);
>           if (ptr->mode == vtn_variable_mode_ssbo)
>              atomic->src[src++] = nir_src_for_ssa(index);
>           atomic->src[src++] = nir_src_for_ssa(offset);
> @@ -2750,6 +2751,7 @@ vtn_handle_atomics(struct vtn_builder *b, SpvOp opcode,
>        case SpvOpAtomicStore:
>           atomic->num_components = glsl_get_vector_elements(ptr->type->type);
>           nir_intrinsic_set_write_mask(atomic, (1 << atomic->num_components) - 1);
> +         nir_intrinsic_set_align(atomic, 4, 0);
>           atomic->src[src++] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
>           if (ptr->mode == vtn_variable_mode_ssbo)
>              atomic->src[src++] = nir_src_for_ssa(index);
> diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c
> index e1e2c8c26ba..53f52255dc3 100644
> --- a/src/compiler/spirv/vtn_variables.c
> +++ b/src/compiler/spirv/vtn_variables.c
> @@ -646,6 +646,12 @@ _vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load,
>        nir_intrinsic_set_access(instr, access);
>     }
>
> +   /* With extensions like relaxed_block_layout, we really can't guarantee
> +    * much more than scalar alignment.
> +    */
> +   if (op != nir_intrinsic_load_push_constant)
> +      nir_intrinsic_set_align(instr, data_bit_size / 8, 0);
> +
>     if (index)
>        instr->src[src++] = nir_src_for_ssa(index);
>
> --
> 2.19.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev