[Mesa-dev] [PATCH 11/12] spirv: Add support for lowering workgroup access to offsets

Jason Ekstrand jason at jlekstrand.net
Thu Oct 19 18:04:13 UTC 2017


Before, we always left workgroup variables as shared nir_variables and
let the driver call nir_lower_io.  This adds an option to do the
lowering directly in spirv_to_nir.  To do this, we implicitly assign the
variables a std430 layout and then treat them like a UBO or SSBO and
immediately lower all the way to an offset.

As a side-effect, the spirv_to_nir pass now handles variable pointers
for workgroup variables.
---
 src/compiler/spirv/nir_spirv.h     |   8 +++
 src/compiler/spirv/spirv_to_nir.c  | 130 +++++++++++++++++++++++++++++++++----
 src/compiler/spirv/vtn_private.h   |  17 ++++-
 src/compiler/spirv/vtn_variables.c |  54 +++++++++++++--
 4 files changed, 190 insertions(+), 19 deletions(-)

diff --git a/src/compiler/spirv/nir_spirv.h b/src/compiler/spirv/nir_spirv.h
index 234b0ce..58df3e1 100644
--- a/src/compiler/spirv/nir_spirv.h
+++ b/src/compiler/spirv/nir_spirv.h
@@ -43,6 +43,14 @@ struct nir_spirv_specialization {
 };
 
 struct spirv_to_nir_options {
+   /* Whether or not to lower all workgroup variable access to offsets
+    * up-front.  This means you will _shared intrinsics instead of _var
+    * for workgroup data access.
+    *
+    * This is currently required for full variable pointers support.
+    */
+   bool lower_workgroup_access_to_offsets;
+
    struct {
       bool float64;
       bool image_ms_array;
diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c
index 96ecff6..1a612ae 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -729,6 +729,64 @@ translate_image_format(SpvImageFormat format)
    }
 }
 
+static struct vtn_type *
+vtn_type_layout_std430(struct vtn_builder *b, struct vtn_type *type,
+                       uint32_t *size_out, uint32_t *align_out)
+{
+   switch (type->base_type) {
+   case vtn_base_type_scalar: {
+      uint32_t comp_size = glsl_get_bit_size(type->type) / 8;
+      *size_out = comp_size;
+      *align_out = comp_size;
+      return type;
+   }
+
+   case vtn_base_type_vector: {
+      uint32_t comp_size = glsl_get_bit_size(type->type) / 8;
+      assert(type->length > 0 && type->length <= 4);
+      unsigned align_comps = type->length == 3 ? 4 : type->length;
+      *size_out = comp_size * type->length,
+      *align_out = comp_size * align_comps;
+      return type;
+   }
+
+   case vtn_base_type_matrix:
+   case vtn_base_type_array: {
+      /* We're going to add an array stride */
+      type = vtn_type_copy(b, type);
+      uint32_t elem_size, elem_align;
+      type->array_element = vtn_type_layout_std430(b, type->array_element,
+                                                   &elem_size, &elem_align);
+      type->stride = vtn_align_u32(elem_size, elem_align);
+      *size_out = type->stride * type->length;
+      *align_out = elem_align;
+      return type;
+   }
+
+   case vtn_base_type_struct: {
+      /* We're going to add member offsets */
+      type = vtn_type_copy(b, type);
+      uint32_t offset = 0;
+      uint32_t align = 0;
+      for (unsigned i = 0; i < type->length; i++) {
+         uint32_t mem_size, mem_align;
+         type->members[i] = vtn_type_layout_std430(b, type->members[i],
+                                                   &mem_size, &mem_align);
+         offset = vtn_align_u32(offset, mem_align);
+         type->offsets[i] = offset;
+         offset += mem_size;
+         align = MAX2(align, mem_align);
+      }
+      *size_out = offset;
+      *align_out = align;
+      return type;
+   }
+
+   default:
+      unreachable("Invalid SPIR-V type for std430");
+   }
+}
+
 static void
 vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
                 const uint32_t *w, unsigned count)
@@ -878,6 +936,19 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
           */
          val->type->type = glsl_vector_type(GLSL_TYPE_UINT, 2);
       }
+
+      if (storage_class == SpvStorageClassWorkgroup &&
+          b->options->lower_workgroup_access_to_offsets) {
+         uint32_t size, align;
+         val->type->deref = vtn_type_layout_std430(b, val->type->deref,
+                                                   &size, &align);
+         val->type->length = size;
+         val->type->align = align;
+         /* These can actually be stored to nir_variables and used as SSA
+          * values so they need a real glsl_type.
+          */
+         val->type->type = glsl_uint_type();
+      }
       break;
    }
 
@@ -2102,6 +2173,32 @@ get_ssbo_nir_atomic_op(SpvOp opcode)
 }
 
 static nir_intrinsic_op
+get_shared_nir_atomic_op(SpvOp opcode)
+{
+   switch (opcode) {
+   case SpvOpAtomicLoad:      return nir_intrinsic_load_shared;
+   case SpvOpAtomicStore:     return nir_intrinsic_store_shared;
+#define OP(S, N) case SpvOp##S: return nir_intrinsic_shared_##N;
+   OP(AtomicExchange,         atomic_exchange)
+   OP(AtomicCompareExchange,  atomic_comp_swap)
+   OP(AtomicIIncrement,       atomic_add)
+   OP(AtomicIDecrement,       atomic_add)
+   OP(AtomicIAdd,             atomic_add)
+   OP(AtomicISub,             atomic_add)
+   OP(AtomicSMin,             atomic_imin)
+   OP(AtomicUMin,             atomic_umin)
+   OP(AtomicSMax,             atomic_imax)
+   OP(AtomicUMax,             atomic_umax)
+   OP(AtomicAnd,              atomic_and)
+   OP(AtomicOr,               atomic_or)
+   OP(AtomicXor,              atomic_xor)
+#undef OP
+   default:
+      unreachable("Invalid shared atomic");
+   }
+}
+
+static nir_intrinsic_op
 get_var_nir_atomic_op(SpvOp opcode)
 {
    switch (opcode) {
@@ -2166,7 +2263,8 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
    SpvMemorySemanticsMask semantics = w[5];
    */
 
-   if (ptr->mode == vtn_variable_mode_workgroup) {
+   if (ptr->mode == vtn_variable_mode_workgroup &&
+       !b->options->lower_workgroup_access_to_offsets) {
       nir_deref_var *deref = vtn_pointer_to_deref(b, ptr);
       const struct glsl_type *deref_type = nir_deref_tail(&deref->deref)->type;
       nir_intrinsic_op op = get_var_nir_atomic_op(opcode);
@@ -2206,27 +2304,36 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
 
       }
    } else {
-      assert(ptr->mode == vtn_variable_mode_ssbo);
       nir_ssa_def *offset, *index;
       offset = vtn_pointer_to_offset(b, ptr, &index, NULL);
 
-      nir_intrinsic_op op = get_ssbo_nir_atomic_op(opcode);
+      nir_intrinsic_op op;
+      if (ptr->mode == vtn_variable_mode_ssbo) {
+         op = get_ssbo_nir_atomic_op(opcode);
+      } else {
+         assert(ptr->mode == vtn_variable_mode_workgroup &&
+                b->options->lower_workgroup_access_to_offsets);
+         op = get_shared_nir_atomic_op(opcode);
+      }
 
       atomic = nir_intrinsic_instr_create(b->nb.shader, op);
 
+      int src = 0;
       switch (opcode) {
       case SpvOpAtomicLoad:
          atomic->num_components = glsl_get_vector_elements(ptr->type->type);
-         atomic->src[0] = nir_src_for_ssa(index);
-         atomic->src[1] = nir_src_for_ssa(offset);
+         if (ptr->mode == vtn_variable_mode_ssbo)
+            atomic->src[src++] = nir_src_for_ssa(index);
+         atomic->src[src++] = nir_src_for_ssa(offset);
          break;
 
       case SpvOpAtomicStore:
          atomic->num_components = glsl_get_vector_elements(ptr->type->type);
          nir_intrinsic_set_write_mask(atomic, (1 << atomic->num_components) - 1);
-         atomic->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
-         atomic->src[1] = nir_src_for_ssa(index);
-         atomic->src[2] = nir_src_for_ssa(offset);
+         atomic->src[src++] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
+         if (ptr->mode == vtn_variable_mode_ssbo)
+            atomic->src[src++] = nir_src_for_ssa(index);
+         atomic->src[src++] = nir_src_for_ssa(offset);
          break;
 
       case SpvOpAtomicExchange:
@@ -2243,9 +2350,10 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
       case SpvOpAtomicAnd:
       case SpvOpAtomicOr:
       case SpvOpAtomicXor:
-         atomic->src[0] = nir_src_for_ssa(index);
-         atomic->src[1] = nir_src_for_ssa(offset);
-         fill_common_atomic_sources(b, opcode, w, &atomic->src[2]);
+         if (ptr->mode == vtn_variable_mode_ssbo)
+            atomic->src[src++] = nir_src_for_ssa(index);
+         atomic->src[src++] = nir_src_for_ssa(offset);
+         fill_common_atomic_sources(b, opcode, w, &atomic->src[src]);
          break;
 
       default:
diff --git a/src/compiler/spirv/vtn_private.h b/src/compiler/spirv/vtn_private.h
index 63bebe3..dcff0b5 100644
--- a/src/compiler/spirv/vtn_private.h
+++ b/src/compiler/spirv/vtn_private.h
@@ -220,7 +220,10 @@ struct vtn_type {
    /* The value that declares this type.  Used for finding decorations */
    struct vtn_value *val;
 
-   /* Specifies the length of complex types. */
+   /* Specifies the length of complex types.
+    *
+    * For Workgroup pointers, this is the size of the referenced type.
+    */
    unsigned length;
 
    /* for arrays, matrices and pointers, the array stride */
@@ -271,6 +274,9 @@ struct vtn_type {
 
          /* Storage class for pointers */
          SpvStorageClass storage_class;
+
+         /* Required alignment for pointers */
+         uint32_t align;
       };
 
       /* Members for image types */
@@ -385,6 +391,8 @@ struct vtn_variable {
    nir_variable *var;
    nir_variable **members;
 
+   int shared_location;
+
    /**
     * In some early released versions of GLSLang, it implemented all function
     * calls by making copies of all parameters into temporary variables and
@@ -625,6 +633,13 @@ void vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
 bool vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode,
                                     const uint32_t *words, unsigned count);
 
+static inline uint32_t
+vtn_align_u32(uint32_t v, uint32_t a)
+{
+   assert(a != 0 && a == (a & -a));
+   return (v + a - 1) & ~(a - 1);
+}
+
 static inline uint64_t
 vtn_u64_literal(const uint32_t *w)
 {
diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c
index 74204f1..0b469cc 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -62,7 +62,9 @@ vtn_pointer_uses_ssa_offset(struct vtn_builder *b,
                             struct vtn_pointer *ptr)
 {
    return ptr->mode == vtn_variable_mode_ubo ||
-          ptr->mode == vtn_variable_mode_ssbo;
+          ptr->mode == vtn_variable_mode_ssbo ||
+          (ptr->mode == vtn_variable_mode_workgroup &&
+           b->options->lower_workgroup_access_to_offsets);
 }
 
 static bool
@@ -71,7 +73,9 @@ vtn_pointer_is_external_block(struct vtn_builder *b,
 {
    return ptr->mode == vtn_variable_mode_ssbo ||
           ptr->mode == vtn_variable_mode_ubo ||
-          ptr->mode == vtn_variable_mode_push_constant;
+          ptr->mode == vtn_variable_mode_push_constant ||
+          (ptr->mode == vtn_variable_mode_workgroup &&
+           b->options->lower_workgroup_access_to_offsets);
 }
 
 /* Dereference the given base pointer by the access chain */
@@ -167,7 +171,8 @@ vtn_ssa_offset_pointer_dereference(struct vtn_builder *b,
       /* We need ptr_type for the stride */
       assert(base->ptr_type);
       /* This must be a pointer to an actual element somewhere */
-      assert(block_index && offset);
+      assert(offset);
+      assert(block_index || base->mode == vtn_variable_mode_workgroup);
       /* We need at least one element in the chain */
       assert(deref_chain->length >= 1);
 
@@ -183,6 +188,7 @@ vtn_ssa_offset_pointer_dereference(struct vtn_builder *b,
       assert(!block_index);
 
       assert(base->var);
+      assert(base->ptr_type);
       switch (base->mode) {
       case vtn_variable_mode_ubo:
       case vtn_variable_mode_ssbo:
@@ -201,6 +207,22 @@ vtn_ssa_offset_pointer_dereference(struct vtn_builder *b,
          offset = nir_imm_int(&b->nb, 0);
          break;
 
+      case vtn_variable_mode_workgroup:
+         /* Assign location on first use so that we don't end up bloating SLM
+          * address space for variables which are never statically used.
+          */
+         if (base->var->shared_location < 0) {
+            assert(base->ptr_type->length > 0 && base->ptr_type->align > 0);
+            b->shader->num_shared = vtn_align_u32(b->shader->num_shared,
+                                                  base->ptr_type->align);
+            base->var->shared_location = b->shader->num_shared;
+            b->shader->num_shared += base->ptr_type->length;
+         }
+
+         block_index = NULL;
+         offset = nir_imm_int(&b->nb, base->var->shared_location);
+         break;
+
       default:
          unreachable("Invalid offset pointer mode");
       }
@@ -836,6 +858,9 @@ vtn_block_load(struct vtn_builder *b, struct vtn_pointer *src)
       vtn_access_chain_get_offset_size(src->chain, src->var->type,
                                        &access_offset, &access_size);
       break;
+   case vtn_variable_mode_workgroup:
+      op = nir_intrinsic_load_shared;
+      break;
    default:
       unreachable("Invalid block variable mode");
    }
@@ -860,6 +885,9 @@ vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src,
    case vtn_variable_mode_ssbo:
       op = nir_intrinsic_store_ssbo;
       break;
+   case vtn_variable_mode_workgroup:
+      op = nir_intrinsic_store_shared;
+      break;
    default:
       unreachable("Invalid block variable mode");
    }
@@ -945,7 +973,8 @@ vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src,
                    struct vtn_pointer *dest)
 {
    if (vtn_pointer_is_external_block(b, dest)) {
-      assert(dest->mode == vtn_variable_mode_ssbo);
+      assert(dest->mode == vtn_variable_mode_ssbo ||
+             dest->mode == vtn_variable_mode_workgroup);
       vtn_block_store(b, src, dest);
    } else {
       _vtn_variable_load_store(b, false, dest, &src);
@@ -1524,7 +1553,7 @@ vtn_pointer_to_ssa(struct vtn_builder *b, struct vtn_pointer *ptr)
              ptr->mode == vtn_variable_mode_ssbo);
       return nir_vec2(&b->nb, ptr->block_index, ptr->offset);
    } else {
-      unreachable("Invalid pointer");
+      assert(ptr->mode == vtn_variable_mode_workgroup);
       return ptr->offset;
    }
 }
@@ -1553,7 +1582,7 @@ vtn_pointer_from_ssa(struct vtn_builder *b, nir_ssa_def *ssa,
       ptr->offset = nir_channel(&b->nb, ssa, 1);
    } else {
       assert(ssa->num_components == 1);
-      unreachable("Invalid pointer");
+      assert(ptr->mode == vtn_variable_mode_workgroup);
       ptr->block_index = NULL;
       ptr->offset = ssa;
    }
@@ -1628,7 +1657,6 @@ vtn_create_variable(struct vtn_builder *b, struct vtn_value *val,
    case vtn_variable_mode_global:
    case vtn_variable_mode_image:
    case vtn_variable_mode_sampler:
-   case vtn_variable_mode_workgroup:
       /* For these, we create the variable normally */
       var->var = rzalloc(b->shader, nir_variable);
       var->var->name = ralloc_strdup(var->var, val->name);
@@ -1646,6 +1674,18 @@ vtn_create_variable(struct vtn_builder *b, struct vtn_value *val,
       }
       break;
 
+   case vtn_variable_mode_workgroup:
+      if (b->options->lower_workgroup_access_to_offsets) {
+         var->shared_location = -1;
+      } else {
+         /* Create the variable normally */
+         var->var = rzalloc(b->shader, nir_variable);
+         var->var->name = ralloc_strdup(var->var, val->name);
+         var->var->type = var->type->type;
+         var->var->data.mode = nir_var_shared;
+      }
+      break;
+
    case vtn_variable_mode_input:
    case vtn_variable_mode_output: {
       /* In order to know whether or not we're a per-vertex inout, we need
-- 
2.5.0.400.gff86faf



More information about the mesa-dev mailing list