[Mesa-dev] [PATCH 21/22] anv, i965: Lower away image derefs in the driver

Jason Ekstrand jason at jlekstrand.net
Fri Aug 17 20:06:27 UTC 2018


Previously, the back-end compiler turn image access into magic uniform
reads and there was a complex contract between back-end compiler and
driver about setting up and filling out those params.  As of this
commit, both drivers now lower image_deref_load_param_intel intrinsics
to load_uniform intrinsics controlled by the driver and lower the other
image_deref_* intrinsics to image_* intrinsics which take an actual
binding table index.  There are still "magic" uniforms but they are now
added and controlled entirely by the driver and that contract no longer
spans components.

This also has the side-effect of making most image use compile-time
binding table indices.  Previously, all image access pulled the binding
table index from a uniform.  Part of the reason for this was that the
magic uniforms made it difficult to decouple binding table indices from
the uniforms and, since they are indexed completely differently
(especially in Vulkan), it was hard to pull them apart.  Now that the
driver is handling both, it's trivial to decouple the two and provide
actual binding table indices.

Shader-db results on Kaby Lake:

    total instructions in shared programs: 15166872 -> 15164293 (-0.02%)
    instructions in affected programs: 115834 -> 113255 (-2.23%)
    helped: 191
    HURT: 0

    total cycles in shared programs: 571311495 -> 571196465 (-0.02%)
    cycles in affected programs: 4757115 -> 4642085 (-2.42%)
    helped: 73
    HURT: 67

    total spills in shared programs: 10951 -> 10926 (-0.23%)
    spills in affected programs: 742 -> 717 (-3.37%)
    helped: 7
    HURT: 0

    total fills in shared programs: 22226 -> 22201 (-0.11%)
    fills in affected programs: 1146 -> 1121 (-2.18%)
    helped: 7
    HURT: 0
---
 src/compiler/nir/nir_intrinsics.py            |   6 +-
 src/intel/compiler/brw_fs.cpp                 |  10 +-
 src/intel/compiler/brw_fs.h                   |   3 +-
 src/intel/compiler/brw_fs_nir.cpp             | 211 +++++++-----------
 src/intel/compiler/brw_nir.h                  |   5 +
 .../compiler/brw_nir_lower_image_load_store.c |  40 ++++
 .../vulkan/anv_nir_apply_pipeline_layout.c    | 159 +++++++++----
 src/intel/vulkan/anv_pipeline.c               |   4 +-
 .../drivers/dri/i965/brw_nir_uniforms.cpp     | 130 +++++++++++
 src/mesa/drivers/dri/i965/brw_program.c       |   1 +
 10 files changed, 375 insertions(+), 194 deletions(-)

diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py
index 0dae37ae886..ffcd2c0b83f 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -329,9 +329,9 @@ image("samples", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER])
 # variable. The const index specifies which of the six parameters to load.
 intrinsic("image_deref_load_param_intel", src_comp=[1], dest_comp=0,
           indices=[BASE], flags=[CAN_ELIMINATE, CAN_REORDER])
-intrinsic("image_deref_load_raw_intel", src_comp=[1, 1], dest_comp=0,
-          flags=[CAN_ELIMINATE, CAN_REORDER])
-intrinsic("image_deref_store_raw_intel", src_comp=[1, 1, 0])
+image("load_raw_intel", src_comp=[1], dest_comp=0,
+      flags=[CAN_ELIMINATE, CAN_REORDER])
+image("store_raw_intel", src_comp=[1, 0])
 
 # Vulkan descriptor set intrinsics
 #
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 1e1954270e8..2afc1c946ab 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -493,16 +493,14 @@ type_size_scalar(const struct glsl_type *type)
       }
       return size;
    case GLSL_TYPE_SAMPLER:
-      /* Samplers take up no register space, since they're baked in at
-       * link time.
-       */
-      return 0;
    case GLSL_TYPE_ATOMIC_UINT:
+   case GLSL_TYPE_IMAGE:
+      /* Samplers, atomics, and images take up no register space, since
+       * they're baked in at link time.
+       */
       return 0;
    case GLSL_TYPE_SUBROUTINE:
       return 1;
-   case GLSL_TYPE_IMAGE:
-      return BRW_IMAGE_PARAM_SIZE;
    case GLSL_TYPE_VOID:
    case GLSL_TYPE_ERROR:
    case GLSL_TYPE_INTERFACE:
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index d56e33715ee..22e8ae5bff0 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -216,6 +216,8 @@ public:
                               nir_intrinsic_instr *instr);
    void nir_emit_cs_intrinsic(const brw::fs_builder &bld,
                               nir_intrinsic_instr *instr);
+   fs_reg get_nir_image_intrinsic_image(const brw::fs_builder &bld,
+                                        nir_intrinsic_instr *instr);
    void nir_emit_intrinsic(const brw::fs_builder &bld,
                            nir_intrinsic_instr *instr);
    void nir_emit_tes_intrinsic(const brw::fs_builder &bld,
@@ -231,7 +233,6 @@ public:
    fs_reg get_nir_src(const nir_src &src);
    fs_reg get_nir_src_imm(const nir_src &src);
    fs_reg get_nir_dest(const nir_dest &dest);
-   fs_reg get_nir_image_deref(nir_deref_instr *deref);
    fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
    void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
                      unsigned wr_mask);
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index ac45b559830..8922021cedb 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -1694,70 +1694,6 @@ fs_visitor::get_nir_dest(const nir_dest &dest)
    }
 }
 
-fs_reg
-fs_visitor::get_nir_image_deref(nir_deref_instr *deref)
-{
-   fs_reg arr_offset = brw_imm_ud(0);
-   unsigned array_size = BRW_IMAGE_PARAM_SIZE * 4;
-   nir_deref_instr *head = deref;
-   while (head->deref_type != nir_deref_type_var) {
-      assert(head->deref_type == nir_deref_type_array);
-
-      /* This level's element size is the previous level's array size */
-      const unsigned elem_size = array_size;
-
-      fs_reg index = retype(get_nir_src_imm(head->arr.index),
-                            BRW_REGISTER_TYPE_UD);
-      if (arr_offset.file == BRW_IMMEDIATE_VALUE &&
-          index.file == BRW_IMMEDIATE_VALUE) {
-         arr_offset.ud += index.ud * elem_size;
-      } else if (index.file == BRW_IMMEDIATE_VALUE) {
-         bld.ADD(arr_offset, arr_offset, brw_imm_ud(index.ud * elem_size));
-      } else {
-         fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD);
-         bld.MUL(tmp, index, brw_imm_ud(elem_size));
-         bld.ADD(tmp, tmp, arr_offset);
-         arr_offset = tmp;
-      }
-
-      head = nir_deref_instr_parent(head);
-      assert(glsl_type_is_array(head->type));
-      array_size = elem_size * glsl_get_length(head->type);
-   }
-
-   assert(head->deref_type == nir_deref_type_var);
-   const unsigned max_arr_offset = array_size - (BRW_IMAGE_PARAM_SIZE * 4);
-   fs_reg image(UNIFORM, head->var->data.driver_location / 4,
-                BRW_REGISTER_TYPE_UD);
-
-   if (arr_offset.file == BRW_IMMEDIATE_VALUE) {
-      /* The offset is in bytes but we want it in dwords */
-      return offset(image, bld, MIN2(arr_offset.ud, max_arr_offset) / 4);
-   } else {
-      /* Accessing an invalid surface index with the dataport can result
-       * in a hang.  According to the spec "if the index used to
-       * select an individual element is negative or greater than or
-       * equal to the size of the array, the results of the operation
-       * are undefined but may not lead to termination" -- which is one
-       * of the possible outcomes of the hang.  Clamp the index to
-       * prevent access outside of the array bounds.
-       */
-      bld.emit_minmax(arr_offset, arr_offset, brw_imm_ud(max_arr_offset),
-                      BRW_CONDITIONAL_L);
-
-      /* Emit a pile of MOVs to load the uniform into a temporary.  The
-       * dead-code elimination pass will get rid of what we don't use.
-       */
-      fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, BRW_IMAGE_PARAM_SIZE);
-      for (unsigned j = 0; j < BRW_IMAGE_PARAM_SIZE; j++) {
-         bld.emit(SHADER_OPCODE_MOV_INDIRECT,
-                  offset(tmp, bld, j), offset(image, bld, j),
-                  arr_offset, brw_imm_ud(max_arr_offset + 4));
-      }
-      return tmp;
-   }
-}
-
 void
 fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst,
                          unsigned wr_mask)
@@ -1776,49 +1712,30 @@ fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst,
    }
 }
 
-/**
- * Get the matching channel register datatype for an image intrinsic of the
- * specified GLSL image type.
- */
-static brw_reg_type
-get_image_base_type(const glsl_type *type)
-{
-   switch ((glsl_base_type)type->sampled_type) {
-   case GLSL_TYPE_UINT:
-      return BRW_REGISTER_TYPE_UD;
-   case GLSL_TYPE_INT:
-      return BRW_REGISTER_TYPE_D;
-   case GLSL_TYPE_FLOAT:
-      return BRW_REGISTER_TYPE_F;
-   default:
-      unreachable("Not reached.");
-   }
-}
-
 /**
  * Get the appropriate atomic op for an image atomic intrinsic.
  */
 static unsigned
-get_image_atomic_op(nir_intrinsic_op op, const glsl_type *type)
+get_image_atomic_op(nir_intrinsic_op op, GLenum format)
 {
    switch (op) {
-   case nir_intrinsic_image_deref_atomic_add:
+   case nir_intrinsic_image_atomic_add:
       return BRW_AOP_ADD;
-   case nir_intrinsic_image_deref_atomic_min:
-      return (get_image_base_type(type) == BRW_REGISTER_TYPE_D ?
-              BRW_AOP_IMIN : BRW_AOP_UMIN);
-   case nir_intrinsic_image_deref_atomic_max:
-      return (get_image_base_type(type) == BRW_REGISTER_TYPE_D ?
-              BRW_AOP_IMAX : BRW_AOP_UMAX);
-   case nir_intrinsic_image_deref_atomic_and:
+   case nir_intrinsic_image_atomic_min:
+      assert(format == GL_R32UI || format == GL_R32I);
+      return format == GL_R32I ? BRW_AOP_IMIN : BRW_AOP_UMIN;
+   case nir_intrinsic_image_atomic_max:
+      assert(format == GL_R32UI || format == GL_R32I);
+      return format == GL_R32I ? BRW_AOP_IMAX : BRW_AOP_UMAX;
+   case nir_intrinsic_image_atomic_and:
       return BRW_AOP_AND;
-   case nir_intrinsic_image_deref_atomic_or:
+   case nir_intrinsic_image_atomic_or:
       return BRW_AOP_OR;
-   case nir_intrinsic_image_deref_atomic_xor:
+   case nir_intrinsic_image_atomic_xor:
       return BRW_AOP_XOR;
-   case nir_intrinsic_image_deref_atomic_exchange:
+   case nir_intrinsic_image_atomic_exchange:
       return BRW_AOP_MOV;
-   case nir_intrinsic_image_deref_atomic_comp_swap:
+   case nir_intrinsic_image_atomic_comp_swap:
       return BRW_AOP_CMPWR;
    default:
       unreachable("Not reachable.");
@@ -3853,6 +3770,43 @@ brw_cond_mod_for_nir_reduction_op(nir_op op)
    }
 }
 
+fs_reg
+fs_visitor::get_nir_image_intrinsic_image(const brw::fs_builder &bld,
+                                          nir_intrinsic_instr *instr)
+{
+   fs_reg image = retype(get_nir_src_imm(instr->src[0]), BRW_REGISTER_TYPE_UD);
+
+   if (stage_prog_data->binding_table.image_start > 0) {
+      if (image.file == BRW_IMMEDIATE_VALUE) {
+         image.d += stage_prog_data->binding_table.image_start;
+      } else {
+         bld.ADD(image, image,
+                 brw_imm_d(stage_prog_data->binding_table.image_start));
+      }
+   }
+
+   return image;
+}
+
+static unsigned
+image_intrinsic_coord_components(nir_intrinsic_instr *instr)
+{
+   switch (nir_intrinsic_image_dim(instr)) {
+   case GLSL_SAMPLER_DIM_1D:
+      return 1 + nir_intrinsic_image_array(instr);
+   case GLSL_SAMPLER_DIM_2D:
+   case GLSL_SAMPLER_DIM_RECT:
+      return 2 + nir_intrinsic_image_array(instr);
+   case GLSL_SAMPLER_DIM_3D:
+   case GLSL_SAMPLER_DIM_CUBE:
+      return 3;
+   case GLSL_SAMPLER_DIM_BUF:
+      return 1;
+   default:
+      unreachable("Invalid image dimension");
+   }
+}
+
 void
 fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr)
 {
@@ -3861,31 +3815,28 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       dest = get_nir_dest(instr->dest);
 
    switch (instr->intrinsic) {
-   case nir_intrinsic_image_deref_load:
-   case nir_intrinsic_image_deref_store:
-   case nir_intrinsic_image_deref_atomic_add:
-   case nir_intrinsic_image_deref_atomic_min:
-   case nir_intrinsic_image_deref_atomic_max:
-   case nir_intrinsic_image_deref_atomic_and:
-   case nir_intrinsic_image_deref_atomic_or:
-   case nir_intrinsic_image_deref_atomic_xor:
-   case nir_intrinsic_image_deref_atomic_exchange:
-   case nir_intrinsic_image_deref_atomic_comp_swap: {
+   case nir_intrinsic_image_load:
+   case nir_intrinsic_image_store:
+   case nir_intrinsic_image_atomic_add:
+   case nir_intrinsic_image_atomic_min:
+   case nir_intrinsic_image_atomic_max:
+   case nir_intrinsic_image_atomic_and:
+   case nir_intrinsic_image_atomic_or:
+   case nir_intrinsic_image_atomic_xor:
+   case nir_intrinsic_image_atomic_exchange:
+   case nir_intrinsic_image_atomic_comp_swap: {
       if (stage == MESA_SHADER_FRAGMENT &&
-          instr->intrinsic != nir_intrinsic_image_deref_load)
+          instr->intrinsic != nir_intrinsic_image_load)
          brw_wm_prog_data(prog_data)->has_side_effects = true;
 
-      /* Get the referenced image variable and type. */
-      nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
-      const glsl_type *type = deref->type;
-
       /* Get some metadata from the image intrinsic. */
       const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
-      const unsigned dims = type->coordinate_components();
+      const unsigned dims = image_intrinsic_coord_components(instr);
+      const GLenum format = nir_intrinsic_format(instr);
       const unsigned dest_components = nir_intrinsic_dest_components(instr);
 
       /* Get the arguments of the image intrinsic. */
-      const fs_reg image = get_nir_image_deref(deref);
+      const fs_reg image = get_nir_image_intrinsic_image(bld, instr);
       const fs_reg coords = retype(get_nir_src(instr->src[1]),
                                    BRW_REGISTER_TYPE_UD);
       const fs_reg src0 = (info->num_srcs >= 4 ?
@@ -3895,15 +3846,15 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       fs_reg tmp;
 
       /* Emit an image load, store or atomic op. */
-      if (instr->intrinsic == nir_intrinsic_image_deref_load) {
+      if (instr->intrinsic == nir_intrinsic_image_load) {
          tmp = emit_typed_read(bld, image, coords, dims,
                                instr->num_components);
-      } else if (instr->intrinsic == nir_intrinsic_image_deref_store) {
+      } else if (instr->intrinsic == nir_intrinsic_image_store) {
          emit_typed_write(bld, image, coords, src0, dims,
                           instr->num_components);
       } else {
          tmp = emit_typed_atomic(bld, image, coords, src0, src1, dims, 1,
-                                 get_image_atomic_op(instr->intrinsic, type));
+                                 get_image_atomic_op(instr->intrinsic, format));
       }
 
       /* Assign the result. */
@@ -3914,9 +3865,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
-   case nir_intrinsic_image_deref_size: {
-      nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
-      const fs_reg image = get_nir_image_deref(deref);
+   case nir_intrinsic_image_size: {
+      const fs_reg image = get_nir_image_intrinsic_image(bld, instr);
 
       /* The LOD also serves as the message payload */
       fs_reg lod = bld.vgrf(BRW_REGISTER_TYPE_UD);
@@ -3928,7 +3878,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       inst->size_written = type_sz(tmp.type) * bld.dispatch_width() * 4;
 
       for (unsigned c = 0; c < instr->dest.ssa.num_components; ++c) {
-         if (c == 2 && glsl_get_sampler_dim(deref->type) == GLSL_SAMPLER_DIM_CUBE) {
+         if (c == 2 && nir_intrinsic_image_dim(instr) == GLSL_SAMPLER_DIM_CUBE) {
             bld.emit(SHADER_OPCODE_INT_QUOTIENT,
                      offset(retype(dest, tmp.type), bld, c),
                      offset(tmp, bld, c), brw_imm_ud(6));
@@ -3940,19 +3890,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
-   case nir_intrinsic_image_deref_load_param_intel: {
-      nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
-      const fs_reg image = get_nir_image_deref(deref);
-      const fs_reg param = offset(image, bld, nir_intrinsic_base(instr) * 4);
-      for (unsigned c = 0; c < instr->dest.ssa.num_components; ++c) {
-         bld.MOV(offset(retype(dest, param.type), bld, c),
-                 offset(param, bld, c));
-      }
-      break;
-   }
-
-   case nir_intrinsic_image_deref_load_raw_intel: {
-      const fs_reg image = get_nir_image_deref(nir_src_as_deref(instr->src[0]));
+   case nir_intrinsic_image_load_raw_intel: {
+      const fs_reg image = get_nir_image_intrinsic_image(bld, instr);
       const fs_reg addr = retype(get_nir_src(instr->src[1]),
                                  BRW_REGISTER_TYPE_UD);
 
@@ -3966,8 +3905,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
-   case nir_intrinsic_image_deref_store_raw_intel: {
-      const fs_reg image = get_nir_image_deref(nir_src_as_deref(instr->src[0]));
+   case nir_intrinsic_image_store_raw_intel: {
+      const fs_reg image = get_nir_image_intrinsic_image(bld, instr);
       const fs_reg addr = retype(get_nir_src(instr->src[1]),
                                  BRW_REGISTER_TYPE_UD);
       const fs_reg data = retype(get_nir_src(instr->src[2]),
@@ -4002,7 +3941,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
-   case nir_intrinsic_image_deref_samples:
+   case nir_intrinsic_image_samples:
       /* The driver does not support multi-sampled images. */
       bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), brw_imm_d(1));
       break;
diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h
index 30c10117dfd..d3aca21bdd1 100644
--- a/src/intel/compiler/brw_nir.h
+++ b/src/intel/compiler/brw_nir.h
@@ -116,6 +116,8 @@ void brw_nir_lower_fs_outputs(nir_shader *nir);
 
 bool brw_nir_lower_image_load_store(nir_shader *nir,
                                     const struct gen_device_info *devinfo);
+void brw_nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin,
+                                     nir_ssa_def *index);
 
 nir_shader *brw_postprocess_nir(nir_shader *nir,
                                 const struct brw_compiler *compiler,
@@ -147,6 +149,9 @@ void brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader,
                                 struct gl_program *prog,
                                 struct brw_stage_prog_data *stage_prog_data);
 
+void brw_nir_lower_glsl_images(nir_shader *shader,
+                               const struct gl_program *prog);
+
 void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler,
                                 nir_shader *nir,
                                 const struct brw_vs_prog_key *vs_key,
diff --git a/src/intel/compiler/brw_nir_lower_image_load_store.c b/src/intel/compiler/brw_nir_lower_image_load_store.c
index dfad39c898d..f888e6fb00f 100644
--- a/src/intel/compiler/brw_nir_lower_image_load_store.c
+++ b/src/intel/compiler/brw_nir_lower_image_load_store.c
@@ -828,3 +828,43 @@ brw_nir_lower_image_load_store(nir_shader *shader,
 
    return progress;
 }
+
+void
+brw_nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin,
+                                nir_ssa_def *index)
+{
+   nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+   nir_variable *var = nir_deref_instr_get_variable(deref);
+
+   switch (intrin->intrinsic) {
+#define CASE(op) \
+   case nir_intrinsic_image_deref_##op: \
+      intrin->intrinsic = nir_intrinsic_image_##op; \
+      break;
+   CASE(load)
+   CASE(store)
+   CASE(atomic_add)
+   CASE(atomic_min)
+   CASE(atomic_max)
+   CASE(atomic_and)
+   CASE(atomic_or)
+   CASE(atomic_xor)
+   CASE(atomic_exchange)
+   CASE(atomic_comp_swap)
+   CASE(size)
+   CASE(samples)
+   CASE(load_raw_intel)
+   CASE(store_raw_intel)
+#undef CASE
+   default:
+      unreachable("Unhanded image intrinsic");
+   }
+
+   nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(deref->type));
+   nir_intrinsic_set_image_array(intrin, glsl_sampler_type_is_array(deref->type));
+   nir_intrinsic_set_access(intrin, var->data.image.access);
+   nir_intrinsic_set_format(intrin, var->data.image.format);
+
+   nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
+                         nir_src_for_ssa(index));
+}
diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
index 84a664826e8..583b5a17cc6 100644
--- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
+++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
@@ -24,6 +24,7 @@
 #include "anv_nir.h"
 #include "program/prog_parameter.h"
 #include "nir/nir_builder.h"
+#include "compiler/brw_nir.h"
 
 struct apply_pipeline_layout_state {
    nir_shader *shader;
@@ -32,6 +33,8 @@ struct apply_pipeline_layout_state {
    struct anv_pipeline_layout *layout;
    bool add_bounds_checks;
 
+   unsigned first_image_uniform;
+
    bool uses_constants;
    uint8_t constants_offset;
    struct {
@@ -99,6 +102,9 @@ get_used_bindings_block(nir_block *block,
          case nir_intrinsic_image_deref_atomic_comp_swap:
          case nir_intrinsic_image_deref_size:
          case nir_intrinsic_image_deref_samples:
+         case nir_intrinsic_image_deref_load_param_intel:
+         case nir_intrinsic_image_deref_load_raw_intel:
+         case nir_intrinsic_image_deref_store_raw_intel:
             add_deref_src_binding(state, intrin->src[0]);
             break;
 
@@ -178,6 +184,63 @@ lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin,
    nir_instr_remove(&intrin->instr);
 }
 
+static void
+lower_image_intrinsic(nir_intrinsic_instr *intrin,
+                      struct apply_pipeline_layout_state *state)
+{
+   nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+   nir_variable *var = nir_deref_instr_get_variable(deref);
+
+   unsigned set = var->data.descriptor_set;
+   unsigned binding = var->data.binding;
+   unsigned array_size =
+      state->layout->set[set].layout->binding[binding].array_size;
+
+   nir_builder *b = &state->builder;
+   b->cursor = nir_before_instr(&intrin->instr);
+
+   nir_ssa_def *index = NULL;
+   if (deref->deref_type != nir_deref_type_var) {
+      assert(deref->deref_type == nir_deref_type_array);
+      index = nir_ssa_for_src(b, deref->arr.index, 1);
+      if (state->add_bounds_checks)
+         index = nir_umin(b, index, nir_imm_int(b, array_size - 1));
+   } else {
+      index = nir_imm_int(b, 0);
+   }
+
+   if (intrin->intrinsic == nir_intrinsic_image_deref_load_param_intel) {
+      b->cursor = nir_instr_remove(&intrin->instr);
+
+      nir_intrinsic_instr *load =
+         nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
+
+      nir_intrinsic_set_base(load, state->first_image_uniform +
+                                   state->set[set].image_offsets[binding] *
+                                   BRW_IMAGE_PARAM_SIZE * 4);
+      nir_intrinsic_set_range(load, array_size * BRW_IMAGE_PARAM_SIZE * 4);
+
+      const unsigned param = nir_intrinsic_base(intrin);
+      nir_ssa_def *offset =
+         nir_imul(b, index, nir_imm_int(b, BRW_IMAGE_PARAM_SIZE * 4));
+      offset = nir_iadd(b, offset, nir_imm_int(b, param * 16));
+      load->src[0] = nir_src_for_ssa(offset);
+
+      load->num_components = intrin->dest.ssa.num_components;
+      nir_ssa_dest_init(&load->instr, &load->dest,
+                        intrin->dest.ssa.num_components,
+                        intrin->dest.ssa.bit_size, NULL);
+      nir_builder_instr_insert(b, &load->instr);
+
+      nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                               nir_src_for_ssa(&load->dest.ssa));
+   } else {
+      unsigned binding_offset = state->set[set].surface_offsets[binding];
+      index = nir_iadd(b, index, nir_imm_int(b, binding_offset));
+      brw_nir_rewrite_image_intrinsic(intrin, index);
+   }
+}
+
 static void
 lower_load_constant(nir_intrinsic_instr *intrin,
                     struct apply_pipeline_layout_state *state)
@@ -318,6 +381,23 @@ apply_pipeline_layout_block(nir_block *block,
          case nir_intrinsic_vulkan_resource_reindex:
             lower_res_reindex_intrinsic(intrin, state);
             break;
+         case nir_intrinsic_image_deref_load:
+         case nir_intrinsic_image_deref_store:
+         case nir_intrinsic_image_deref_atomic_add:
+         case nir_intrinsic_image_deref_atomic_min:
+         case nir_intrinsic_image_deref_atomic_max:
+         case nir_intrinsic_image_deref_atomic_and:
+         case nir_intrinsic_image_deref_atomic_or:
+         case nir_intrinsic_image_deref_atomic_xor:
+         case nir_intrinsic_image_deref_atomic_exchange:
+         case nir_intrinsic_image_deref_atomic_comp_swap:
+         case nir_intrinsic_image_deref_size:
+         case nir_intrinsic_image_deref_samples:
+         case nir_intrinsic_image_deref_load_param_intel:
+         case nir_intrinsic_image_deref_load_raw_intel:
+         case nir_intrinsic_image_deref_store_raw_intel:
+            lower_image_intrinsic(intrin, state);
+            break;
          case nir_intrinsic_load_constant:
             lower_load_constant(intrin, state);
             break;
@@ -436,6 +516,39 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
       }
    }
 
+   unsigned image_uniform;
+   if (map->image_count > 0) {
+      assert(map->image_count <= MAX_IMAGES);
+      assert(shader->num_uniforms == prog_data->nr_params * 4);
+      state.first_image_uniform = shader->num_uniforms;
+      uint32_t *param = brw_stage_prog_data_add_params(prog_data,
+                                                       map->image_count *
+                                                       BRW_IMAGE_PARAM_SIZE);
+      struct anv_push_constants *null_data = NULL;
+      const struct brw_image_param *image_param = null_data->images;
+      for (uint32_t i = 0; i < map->image_count; i++) {
+         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
+                                  (uintptr_t)&image_param->surface_idx, 1);
+         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
+                                  (uintptr_t)image_param->offset, 2);
+         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
+                                  (uintptr_t)image_param->size, 3);
+         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
+                                  (uintptr_t)image_param->stride, 4);
+         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
+                                  (uintptr_t)image_param->tiling, 3);
+         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
+                                  (uintptr_t)image_param->swizzling, 2);
+
+         param += BRW_IMAGE_PARAM_SIZE;
+         image_param ++;
+      }
+      assert(param == prog_data->param + prog_data->nr_params);
+
+      shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4;
+      assert(shader->num_uniforms == prog_data->nr_params * 4);
+   }
+
    nir_foreach_variable(var, &shader->uniforms) {
       const struct glsl_type *glsl_type = glsl_without_array(var->type);
 
@@ -479,51 +592,5 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
                                             nir_metadata_dominance);
    }
 
-   if (map->image_count > 0) {
-      assert(map->image_count <= MAX_IMAGES);
-      nir_foreach_variable(var, &shader->uniforms) {
-         if (glsl_type_is_image(var->type) ||
-             (glsl_type_is_array(var->type) &&
-              glsl_type_is_image(glsl_get_array_element(var->type)))) {
-            /* Images are represented as uniform push constants and the actual
-             * information required for reading/writing to/from the image is
-             * storred in the uniform.
-             */
-            unsigned set = var->data.descriptor_set;
-            unsigned binding = var->data.binding;
-            unsigned image_index = state.set[set].image_offsets[binding];
-
-            var->data.driver_location = shader->num_uniforms +
-                                        image_index * BRW_IMAGE_PARAM_SIZE * 4;
-         }
-      }
-
-      uint32_t *param = brw_stage_prog_data_add_params(prog_data,
-                                                       map->image_count *
-                                                       BRW_IMAGE_PARAM_SIZE);
-      struct anv_push_constants *null_data = NULL;
-      const struct brw_image_param *image_param = null_data->images;
-      for (uint32_t i = 0; i < map->image_count; i++) {
-         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
-                                  (uintptr_t)&image_param->surface_idx, 1);
-         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
-                                  (uintptr_t)image_param->offset, 2);
-         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
-                                  (uintptr_t)image_param->size, 3);
-         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
-                                  (uintptr_t)image_param->stride, 4);
-         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
-                                  (uintptr_t)image_param->tiling, 3);
-         setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
-                                  (uintptr_t)image_param->swizzling, 2);
-
-         param += BRW_IMAGE_PARAM_SIZE;
-         image_param ++;
-      }
-      assert(param == prog_data->param + prog_data->nr_params);
-
-      shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4;
-   }
-
    ralloc_free(mem_ctx);
 }
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 19d59b7fbac..a3eb68769a2 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -523,6 +523,8 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
    if (nir->info.num_ssbos > 0 || nir->info.num_images > 0)
       pipeline->needs_data_cache = true;
 
+   NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo);
+
    /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
    if (layout) {
       anv_nir_apply_pipeline_layout(pipeline, layout, nir, prog_data,
@@ -532,8 +534,6 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
    if (nir->info.stage != MESA_SHADER_COMPUTE)
       brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
 
-   NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo);
-
    assert(nir->num_uniforms == prog_data->nr_params * 4);
 
    stage->nir = nir;
diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
index 54f9f9b1a6b..8a560d9bac1 100644
--- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
+++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
@@ -23,6 +23,7 @@
 
 #include "compiler/brw_nir.h"
 #include "compiler/glsl/ir_uniform.h"
+#include "compiler/nir/nir_builder.h"
 #include "brw_program.h"
 
 static void
@@ -267,3 +268,132 @@ brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader,
          stage_prog_data->param[4 * p + i] = BRW_PARAM_BUILTIN_ZERO;
    }
 }
+
+static nir_ssa_def *
+get_aoa_deref_offset(nir_builder *b,
+                     nir_deref_instr *deref,
+                     unsigned elem_size)
+{
+   unsigned array_size = elem_size;
+   nir_ssa_def *offset = nir_imm_int(b, 0);
+
+   while (deref->deref_type != nir_deref_type_var) {
+      assert(deref->deref_type == nir_deref_type_array);
+
+      /* This level's element size is the previous level's array size */
+      nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
+      assert(deref->arr.index.ssa);
+      offset = nir_iadd(b, offset,
+                           nir_imul(b, index, nir_imm_int(b, array_size)));
+
+      deref = nir_deref_instr_parent(deref);
+      assert(glsl_type_is_array(deref->type));
+      array_size *= glsl_get_length(deref->type);
+   }
+
+   /* Accessing an invalid surface index with the dataport can result in a
+    * hang.  According to the spec "if the index used to select an individual
+    * element is negative or greater than or equal to the size of the array,
+    * the results of the operation are undefined but may not lead to
+    * termination" -- which is one of the possible outcomes of the hang.
+    * Clamp the index to prevent access outside of the array bounds.
+    */
+   return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
+}
+
+void
+brw_nir_lower_glsl_images(nir_shader *shader,
+                          const struct gl_program *prog)
+{
+   /* We put image uniforms at the end */
+   nir_foreach_variable(var, &shader->uniforms) {
+      if (!var->type->contains_image())
+         continue;
+
+      /* GL Only allows arrays of arrays of images */
+      assert(var->type->without_array()->is_image());
+      const unsigned num_images = MAX2(1, var->type->arrays_of_arrays_size());
+
+      var->data.driver_location = shader->num_uniforms;
+      shader->num_uniforms += num_images * BRW_IMAGE_PARAM_SIZE * 4;
+   }
+
+   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   nir_foreach_block(block, impl) {
+      nir_foreach_instr_safe(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+
+         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+         switch (intrin->intrinsic) {
+         case nir_intrinsic_image_deref_load:
+         case nir_intrinsic_image_deref_store:
+         case nir_intrinsic_image_deref_atomic_add:
+         case nir_intrinsic_image_deref_atomic_min:
+         case nir_intrinsic_image_deref_atomic_max:
+         case nir_intrinsic_image_deref_atomic_and:
+         case nir_intrinsic_image_deref_atomic_or:
+         case nir_intrinsic_image_deref_atomic_xor:
+         case nir_intrinsic_image_deref_atomic_exchange:
+         case nir_intrinsic_image_deref_atomic_comp_swap:
+         case nir_intrinsic_image_deref_size:
+         case nir_intrinsic_image_deref_samples:
+         case nir_intrinsic_image_deref_load_raw_intel:
+         case nir_intrinsic_image_deref_store_raw_intel: {
+            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+            nir_variable *var = nir_deref_instr_get_variable(deref);
+            const unsigned num_images =
+               MAX2(1, var->type->arrays_of_arrays_size());
+
+            struct gl_uniform_storage *storage =
+               &prog->sh.data->UniformStorage[var->data.location];
+            const unsigned image_var_idx =
+               storage->opaque[shader->info.stage].index;
+
+            b.cursor = nir_before_instr(&intrin->instr);
+            nir_ssa_def *index = nir_iadd(&b, nir_imm_int(&b, image_var_idx),
+                                          get_aoa_deref_offset(&b, deref, 1));
+            brw_nir_rewrite_image_intrinsic(intrin, index);
+            break;
+         }
+
+         case nir_intrinsic_image_deref_load_param_intel: {
+            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+            nir_variable *var = nir_deref_instr_get_variable(deref);
+            const unsigned num_images =
+               MAX2(1, var->type->arrays_of_arrays_size());
+
+            b.cursor = nir_instr_remove(&intrin->instr);
+
+            const unsigned param = nir_intrinsic_base(intrin);
+            nir_ssa_def *offset =
+               get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4);
+            offset = nir_iadd(&b, offset, nir_imm_int(&b, param * 16));
+
+            nir_intrinsic_instr *load =
+               nir_intrinsic_instr_create(b.shader,
+                                          nir_intrinsic_load_uniform);
+            nir_intrinsic_set_base(load, var->data.driver_location);
+            nir_intrinsic_set_range(load, num_images * BRW_IMAGE_PARAM_SIZE * 4);
+            load->src[0] = nir_src_for_ssa(offset);
+            load->num_components = intrin->dest.ssa.num_components;
+            nir_ssa_dest_init(&load->instr, &load->dest,
+                              intrin->dest.ssa.num_components,
+                              intrin->dest.ssa.bit_size, NULL);
+            nir_builder_instr_insert(&b, &load->instr);
+
+            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                                     nir_src_for_ssa(&load->dest.ssa));
+            break;
+         }
+
+         default:
+            break;
+         }
+      }
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index f5ebd3c3b05..041395ec4c0 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -140,6 +140,7 @@ brw_create_nir(struct brw_context *brw,
    }
 
    NIR_PASS_V(nir, brw_nir_lower_uniforms, is_scalar);
+   NIR_PASS_V(nir, brw_nir_lower_glsl_images, prog);
 
    return nir;
 }
-- 
2.17.1



More information about the mesa-dev mailing list