Mesa (master): turnip: Add support for descriptor arrays.

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Dec 17 00:15:56 UTC 2019


Module: Mesa
Branch: master
Commit: 2d3182b429552651f54650fcc9ea53d41fabe6de
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=2d3182b429552651f54650fcc9ea53d41fabe6de

Author: Eric Anholt <eric at anholt.net>
Date:   Fri Dec 13 22:05:11 2019 -0800

turnip: Add support for descriptor arrays.

I had a bigger rework I was working on, but this is simple and gets tests
passing.

Fixes 36 failures in
dEQP-VK.binding_model.shader_access.primary_cmd_buf.sampler_mutable.fragment.*
(now all passing)

Reviewed-by: Jonathan Marek <jonathan at marek.ca>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3124>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3124>

---

 src/freedreno/vulkan/tu_cmd_buffer.c     | 151 +++++++++++++++++++++----------
 src/freedreno/vulkan/tu_descriptor_set.c |  15 +++
 src/freedreno/vulkan/tu_pipeline.c       |  54 +++++++----
 src/freedreno/vulkan/tu_private.h        |   5 +-
 src/freedreno/vulkan/tu_shader.c         | 104 ++++++++++++---------
 5 files changed, 217 insertions(+), 112 deletions(-)

diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c
index f267f6ffeb2..0c67ea983db 100644
--- a/src/freedreno/vulkan/tu_cmd_buffer.c
+++ b/src/freedreno/vulkan/tu_cmd_buffer.c
@@ -2490,7 +2490,8 @@ struct tu_draw_state_group
 
 static struct tu_sampler*
 sampler_ptr(struct tu_descriptor_state *descriptors_state,
-            const struct tu_descriptor_map *map, unsigned i)
+            const struct tu_descriptor_map *map, unsigned i,
+            unsigned array_index)
 {
    assert(descriptors_state->valid & (1 << map->set[i]));
 
@@ -2504,7 +2505,10 @@ sampler_ptr(struct tu_descriptor_state *descriptors_state,
    case VK_DESCRIPTOR_TYPE_SAMPLER:
       return (struct tu_sampler*) &set->mapped_ptr[layout->offset / 4];
    case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
-      return (struct tu_sampler*) &set->mapped_ptr[layout->offset / 4 + A6XX_TEX_CONST_DWORDS];
+      return (struct tu_sampler*) &set->mapped_ptr[layout->offset / 4 + A6XX_TEX_CONST_DWORDS +
+                                                   array_index *
+                                                   (A6XX_TEX_CONST_DWORDS +
+                                                    sizeof(struct tu_sampler) / 4)];
    default:
       unreachable("unimplemented descriptor type");
       break;
@@ -2516,7 +2520,7 @@ write_tex_const(struct tu_cmd_buffer *cmd,
                 uint32_t *dst,
                 struct tu_descriptor_state *descriptors_state,
                 const struct tu_descriptor_map *map,
-                unsigned i)
+                unsigned i, unsigned array_index)
 {
    assert(descriptors_state->valid & (1 << map->set[i]));
 
@@ -2528,11 +2532,19 @@ write_tex_const(struct tu_cmd_buffer *cmd,
 
    switch (layout->type) {
    case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
-   case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
    case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
    case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
    case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
-      memcpy(dst, &set->mapped_ptr[layout->offset / 4], A6XX_TEX_CONST_DWORDS*4);
+      memcpy(dst, &set->mapped_ptr[layout->offset / 4 +
+                                   array_index * A6XX_TEX_CONST_DWORDS],
+             A6XX_TEX_CONST_DWORDS * 4);
+      break;
+   case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+      memcpy(dst, &set->mapped_ptr[layout->offset / 4 +
+                                   array_index *
+                                   (A6XX_TEX_CONST_DWORDS +
+                                    sizeof(struct tu_sampler) / 4)],
+             A6XX_TEX_CONST_DWORDS * 4);
       break;
    default:
       unreachable("unimplemented descriptor type");
@@ -2541,7 +2553,8 @@ write_tex_const(struct tu_cmd_buffer *cmd,
 
    if (layout->type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
       const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
-      uint32_t a = cmd->state.subpass->input_attachments[map->value[i]].attachment;
+      uint32_t a = cmd->state.subpass->input_attachments[map->value[i] +
+                                                         array_index].attachment;
 
       assert(cmd->state.pass->attachments[a].needs_gmem);
       dst[0] &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK);
@@ -2561,7 +2574,7 @@ write_tex_const(struct tu_cmd_buffer *cmd,
 static uint64_t
 buffer_ptr(struct tu_descriptor_state *descriptors_state,
            const struct tu_descriptor_map *map,
-           unsigned i)
+           unsigned i, unsigned array_index)
 {
    assert(descriptors_state->valid & (1 << map->set[i]));
 
@@ -2574,11 +2587,12 @@ buffer_ptr(struct tu_descriptor_state *descriptors_state,
    switch (layout->type) {
    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
-      return descriptors_state->dynamic_buffers[layout->dynamic_offset_offset];
+      return descriptors_state->dynamic_buffers[layout->dynamic_offset_offset +
+                                                array_index];
    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
-      return (uint64_t) set->mapped_ptr[layout->offset / 4 + 1] << 32 |
-                        set->mapped_ptr[layout->offset / 4];
+      return (uint64_t) set->mapped_ptr[layout->offset / 4 + array_index * 2 + 1] << 32 |
+                        set->mapped_ptr[layout->offset / 4 + array_index * 2];
    default:
       unreachable("unimplemented descriptor type");
       break;
@@ -2663,7 +2677,22 @@ tu6_emit_user_consts(struct tu_cs *cs, const struct tu_pipeline *pipeline,
             continue;
          }
 
-         uint64_t va = buffer_ptr(descriptors_state, &link->ubo_map, i - 1);
+         /* Look through the UBO map to find our UBO index, and get the VA for
+          * that UBO.
+          */
+         uint64_t va = 0;
+         uint32_t ubo_idx = i - 1;
+         uint32_t ubo_map_base = 0;
+         for (int j = 0; j < link->ubo_map.num; j++) {
+            if (ubo_idx >= ubo_map_base &&
+                ubo_idx < ubo_map_base + link->ubo_map.array_size[j]) {
+               va = buffer_ptr(descriptors_state, &link->ubo_map, j,
+                               ubo_idx - ubo_map_base);
+               break;
+            }
+            ubo_map_base += link->ubo_map.array_size[j];
+         }
+         assert(va);
 
          tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3);
          tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(state->range[i].offset / 16) |
@@ -2684,9 +2713,8 @@ tu6_emit_ubos(struct tu_cs *cs, const struct tu_pipeline *pipeline,
    const struct tu_program_descriptor_linkage *link =
       &pipeline->program.link[type];
 
-   uint32_t num = MIN2(link->ubo_map.num, link->const_state.num_ubos);
+   uint32_t num = MIN2(link->ubo_map.num_desc, link->const_state.num_ubos);
    uint32_t anum = align(num, 2);
-   uint32_t i;
 
    if (!num)
       return;
@@ -2700,10 +2728,15 @@ tu6_emit_ubos(struct tu_cs *cs, const struct tu_pipeline *pipeline,
    tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
    tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
 
-   for (i = 0; i < num; i++)
-      tu_cs_emit_qw(cs, buffer_ptr(descriptors_state, &link->ubo_map, i));
+   unsigned emitted = 0;
+   for (unsigned i = 0; emitted < num && i < link->ubo_map.num; i++) {
+      for (unsigned j = 0; emitted < num && j < link->ubo_map.array_size[i]; j++) {
+         tu_cs_emit_qw(cs, buffer_ptr(descriptors_state, &link->ubo_map, i, j));
+         emitted++;
+      }
+   }
 
-   for (; i < anum; i++) {
+   for (; emitted < anum; emitted++) {
       tu_cs_emit(cs, 0xffffffff);
       tu_cs_emit(cs, 0xffffffff);
    }
@@ -2738,33 +2771,45 @@ tu6_emit_textures(struct tu_cmd_buffer *cmd,
       &pipeline->program.link[type];
    VkResult result;
 
-   if (link->texture_map.num == 0 && link->sampler_map.num == 0) {
+   if (link->texture_map.num_desc == 0 && link->sampler_map.num_desc == 0) {
       *entry = (struct tu_cs_entry) {};
       return VK_SUCCESS;
    }
 
    /* allocate and fill texture state */
    struct ts_cs_memory tex_const;
-   result = tu_cs_alloc(device, draw_state, link->texture_map.num, A6XX_TEX_CONST_DWORDS, &tex_const);
+   result = tu_cs_alloc(device, draw_state, link->texture_map.num_desc,
+                        A6XX_TEX_CONST_DWORDS, &tex_const);
    if (result != VK_SUCCESS)
       return result;
 
+   int tex_index = 0;
    for (unsigned i = 0; i < link->texture_map.num; i++) {
-      write_tex_const(cmd,
-                      &tex_const.map[A6XX_TEX_CONST_DWORDS*i],
-                      descriptors_state, &link->texture_map, i);
+      for (int j = 0; j < link->texture_map.array_size[i]; j++) {
+         write_tex_const(cmd,
+                         &tex_const.map[A6XX_TEX_CONST_DWORDS * tex_index++],
+                         descriptors_state, &link->texture_map, i, j);
+      }
    }
 
    /* allocate and fill sampler state */
-   struct ts_cs_memory tex_samp;
-   result = tu_cs_alloc(device, draw_state, link->sampler_map.num, A6XX_TEX_SAMP_DWORDS, &tex_samp);
-   if (result != VK_SUCCESS)
-      return result;
+   struct ts_cs_memory tex_samp = { 0 };
+   if (link->sampler_map.num_desc) {
+      result = tu_cs_alloc(device, draw_state, link->sampler_map.num_desc,
+                           A6XX_TEX_SAMP_DWORDS, &tex_samp);
+      if (result != VK_SUCCESS)
+         return result;
 
-   for (unsigned i = 0; i < link->sampler_map.num; i++) {
-      struct tu_sampler *sampler = sampler_ptr(descriptors_state, &link->sampler_map, i);
-      memcpy(&tex_samp.map[A6XX_TEX_SAMP_DWORDS*i], sampler->state, sizeof(sampler->state));
-      *needs_border |= sampler->needs_border;
+      int sampler_index = 0;
+      for (unsigned i = 0; i < link->sampler_map.num; i++) {
+         for (int j = 0; j < link->sampler_map.array_size[i]; j++) {
+            struct tu_sampler *sampler = sampler_ptr(descriptors_state,
+                                                     &link->sampler_map, i, j);
+            memcpy(&tex_samp.map[A6XX_TEX_SAMP_DWORDS * sampler_index++],
+                   sampler->state, sizeof(sampler->state));
+            *needs_border |= sampler->needs_border;
+         }
+      }
    }
 
    unsigned tex_samp_reg, tex_const_reg, tex_count_reg;
@@ -2798,17 +2843,19 @@ tu6_emit_textures(struct tu_cmd_buffer *cmd,
    if (result != VK_SUCCESS)
       return result;
 
-   /* output sampler state: */
-   tu_cs_emit_pkt7(&cs, tu6_stage2opcode(type), 3);
-   tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(0) |
-      CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
-      CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
-      CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
-      CP_LOAD_STATE6_0_NUM_UNIT(link->sampler_map.num));
-   tu_cs_emit_qw(&cs, tex_samp.iova); /* SRC_ADDR_LO/HI */
+   if (link->sampler_map.num_desc) {
+      /* output sampler state: */
+      tu_cs_emit_pkt7(&cs, tu6_stage2opcode(type), 3);
+      tu_cs_emit(&cs, CP_LOAD_STATE6_0_DST_OFF(0) |
+                 CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
+                 CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
+                 CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
+                 CP_LOAD_STATE6_0_NUM_UNIT(link->sampler_map.num_desc));
+      tu_cs_emit_qw(&cs, tex_samp.iova); /* SRC_ADDR_LO/HI */
 
-   tu_cs_emit_pkt4(&cs, tex_samp_reg, 2);
-   tu_cs_emit_qw(&cs, tex_samp.iova); /* SRC_ADDR_LO/HI */
+      tu_cs_emit_pkt4(&cs, tex_samp_reg, 2);
+      tu_cs_emit_qw(&cs, tex_samp.iova); /* SRC_ADDR_LO/HI */
+   }
 
    /* emit texture state: */
    tu_cs_emit_pkt7(&cs, tu6_stage2opcode(type), 3);
@@ -2816,14 +2863,14 @@ tu6_emit_textures(struct tu_cmd_buffer *cmd,
       CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
       CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
       CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
-      CP_LOAD_STATE6_0_NUM_UNIT(link->texture_map.num));
+      CP_LOAD_STATE6_0_NUM_UNIT(link->texture_map.num_desc));
    tu_cs_emit_qw(&cs, tex_const.iova); /* SRC_ADDR_LO/HI */
 
    tu_cs_emit_pkt4(&cs, tex_const_reg, 2);
    tu_cs_emit_qw(&cs, tex_const.iova); /* SRC_ADDR_LO/HI */
 
    tu_cs_emit_pkt4(&cs, tex_count_reg, 1);
-   tu_cs_emit(&cs, link->texture_map.num);
+   tu_cs_emit(&cs, link->texture_map.num_desc);
 
    *entry = tu_cs_end_sub_stream(draw_state, &cs);
    return VK_SUCCESS;
@@ -2860,7 +2907,8 @@ tu6_emit_ibo(struct tu_cmd_buffer *cmd,
       if (idx & IBO_SSBO) {
          idx &= ~IBO_SSBO;
 
-         uint64_t va = buffer_ptr(descriptors_state, &link->ssbo_map, idx);
+         uint64_t va = buffer_ptr(descriptors_state, &link->ssbo_map, idx,
+                                  0 /* XXX */);
          /* We don't expose robustBufferAccess, so leave the size unlimited. */
          uint32_t sz = MAX_STORAGE_BUFFER_RANGE / 4;
 
@@ -2992,21 +3040,26 @@ tu6_emit_border_color(struct tu_cmd_buffer *cmd,
    struct ts_cs_memory ptr;
 
    VkResult result = tu_cs_alloc(cmd->device, &cmd->sub_cs,
-                                 vs_sampler->num + fs_sampler->num, 128 / 4,
+                                 vs_sampler->num_desc + fs_sampler->num_desc,
+                                 128 / 4,
                                  &ptr);
    if (result != VK_SUCCESS)
       return result;
 
    for (unsigned i = 0; i < vs_sampler->num; i++) {
-      struct tu_sampler *sampler = sampler_ptr(descriptors_state, vs_sampler, i);
-      memcpy(ptr.map, &border_color[sampler->border], 128);
-      ptr.map += 128 / 4;
+      for (unsigned j = 0; j < vs_sampler->array_size[i]; j++) {
+         struct tu_sampler *sampler = sampler_ptr(descriptors_state, vs_sampler, i, j);
+         memcpy(ptr.map, &border_color[sampler->border], 128);
+         ptr.map += 128 / 4;
+      }
    }
 
    for (unsigned i = 0; i < fs_sampler->num; i++) {
-      struct tu_sampler *sampler = sampler_ptr(descriptors_state, fs_sampler, i);
-      memcpy(ptr.map, &border_color[sampler->border], 128);
-      ptr.map += 128 / 4;
+      for (unsigned j = 0; j < fs_sampler->array_size[i]; j++) {
+         struct tu_sampler *sampler = sampler_ptr(descriptors_state, fs_sampler, i, j);
+         memcpy(ptr.map, &border_color[sampler->border], 128);
+         ptr.map += 128 / 4;
+      }
    }
 
    tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_BORDER_COLOR_BASE_ADDR_LO, 2);
diff --git a/src/freedreno/vulkan/tu_descriptor_set.c b/src/freedreno/vulkan/tu_descriptor_set.c
index 70fa3cfe3d2..26f49001d09 100644
--- a/src/freedreno/vulkan/tu_descriptor_set.c
+++ b/src/freedreno/vulkan/tu_descriptor_set.c
@@ -21,6 +21,21 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
+
+/**
+ * @file
+ *
+ * The texture and sampler descriptors are laid out in a single global space
+ * across all shader stages, for both simplicity of implementation and because
+ * that seems to be how things have to be structured for border color
+ * handling.
+ *
+ * Each shader stage will declare its texture/sampler count based on the last
+ * descriptor set it uses.  At draw emit time (though it really should be
+ * CmdBind time), we upload the descriptor sets used by each shader stage to
+ * their stage.
+ */
+
 #include "tu_private.h"
 
 #include <assert.h>
diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c
index ff523c984a4..f4f050ab8d9 100644
--- a/src/freedreno/vulkan/tu_pipeline.c
+++ b/src/freedreno/vulkan/tu_pipeline.c
@@ -43,6 +43,7 @@ struct tu_pipeline_builder
 {
    struct tu_device *device;
    struct tu_pipeline_cache *cache;
+   struct tu_pipeline_layout *layout;
    const VkAllocationCallbacks *alloc;
    const VkGraphicsPipelineCreateInfo *create_info;
 
@@ -358,7 +359,8 @@ tu6_blend_op(VkBlendOp op)
 }
 
 static void
-tu6_emit_vs_config(struct tu_cs *cs, const struct ir3_shader_variant *vs)
+tu6_emit_vs_config(struct tu_cs *cs, struct tu_shader *shader,
+                   const struct ir3_shader_variant *vs)
 {
    uint32_t sp_vs_ctrl =
       A6XX_SP_VS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
@@ -368,8 +370,8 @@ tu6_emit_vs_config(struct tu_cs *cs, const struct ir3_shader_variant *vs)
    if (vs->need_pixlod)
       sp_vs_ctrl |= A6XX_SP_VS_CTRL_REG0_PIXLODENABLE;
 
-   uint32_t sp_vs_config = A6XX_SP_VS_CONFIG_NTEX(vs->num_samp) |
-                           A6XX_SP_VS_CONFIG_NSAMP(vs->num_samp);
+   uint32_t sp_vs_config = A6XX_SP_VS_CONFIG_NTEX(shader->texture_map.num_desc) |
+                           A6XX_SP_VS_CONFIG_NSAMP(shader->sampler_map.num_desc);
    if (vs->instrlen)
       sp_vs_config |= A6XX_SP_VS_CONFIG_ENABLED;
 
@@ -386,7 +388,8 @@ tu6_emit_vs_config(struct tu_cs *cs, const struct ir3_shader_variant *vs)
 }
 
 static void
-tu6_emit_hs_config(struct tu_cs *cs, const struct ir3_shader_variant *hs)
+tu6_emit_hs_config(struct tu_cs *cs, struct tu_shader *shader,
+                   const struct ir3_shader_variant *hs)
 {
    uint32_t sp_hs_config = 0;
    if (hs->instrlen)
@@ -404,7 +407,8 @@ tu6_emit_hs_config(struct tu_cs *cs, const struct ir3_shader_variant *hs)
 }
 
 static void
-tu6_emit_ds_config(struct tu_cs *cs, const struct ir3_shader_variant *ds)
+tu6_emit_ds_config(struct tu_cs *cs, struct tu_shader *shader,
+                   const struct ir3_shader_variant *ds)
 {
    uint32_t sp_ds_config = 0;
    if (ds->instrlen)
@@ -419,7 +423,8 @@ tu6_emit_ds_config(struct tu_cs *cs, const struct ir3_shader_variant *ds)
 }
 
 static void
-tu6_emit_gs_config(struct tu_cs *cs, const struct ir3_shader_variant *gs)
+tu6_emit_gs_config(struct tu_cs *cs, struct tu_shader *shader,
+                   const struct ir3_shader_variant *gs)
 {
    uint32_t sp_gs_config = 0;
    if (gs->instrlen)
@@ -437,7 +442,8 @@ tu6_emit_gs_config(struct tu_cs *cs, const struct ir3_shader_variant *gs)
 }
 
 static void
-tu6_emit_fs_config(struct tu_cs *cs, const struct ir3_shader_variant *fs)
+tu6_emit_fs_config(struct tu_cs *cs, struct tu_shader *shader,
+                   const struct ir3_shader_variant *fs)
 {
    uint32_t sp_fs_ctrl =
       A6XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) | 0x1000000 |
@@ -449,8 +455,8 @@ tu6_emit_fs_config(struct tu_cs *cs, const struct ir3_shader_variant *fs)
    if (fs->need_pixlod)
       sp_fs_ctrl |= A6XX_SP_FS_CTRL_REG0_PIXLODENABLE;
 
-   uint32_t sp_fs_config = A6XX_SP_FS_CONFIG_NTEX(fs->num_samp) |
-                           A6XX_SP_FS_CONFIG_NSAMP(fs->num_samp) |
+   uint32_t sp_fs_config = A6XX_SP_FS_CONFIG_NTEX(shader->texture_map.num_desc) |
+                           A6XX_SP_FS_CONFIG_NSAMP(shader->sampler_map.num_desc) |
                            A6XX_SP_FS_CONFIG_NIBO(fs->image_mapping.num_ibo);
    if (fs->instrlen)
       sp_fs_config |= A6XX_SP_FS_CONFIG_ENABLED;
@@ -477,7 +483,8 @@ tu6_emit_fs_config(struct tu_cs *cs, const struct ir3_shader_variant *fs)
 }
 
 static void
-tu6_emit_cs_config(struct tu_cs *cs, const struct ir3_shader_variant *v)
+tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader,
+                   const struct ir3_shader_variant *v)
 {
    tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
    tu_cs_emit(cs, 0xff);
@@ -490,8 +497,8 @@ tu6_emit_cs_config(struct tu_cs *cs, const struct ir3_shader_variant *v)
    tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_CONFIG, 2);
    tu_cs_emit(cs, A6XX_SP_CS_CONFIG_ENABLED |
               A6XX_SP_CS_CONFIG_NIBO(v->image_mapping.num_ibo) |
-              A6XX_SP_CS_CONFIG_NTEX(v->num_samp) |
-              A6XX_SP_CS_CONFIG_NSAMP(v->num_samp));
+              A6XX_SP_CS_CONFIG_NTEX(shader->texture_map.num_desc) |
+              A6XX_SP_CS_CONFIG_NSAMP(shader->sampler_map.num_desc));
    tu_cs_emit(cs, v->instrlen);
 
    tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_CTRL_REG0, 1);
@@ -1036,11 +1043,11 @@ tu6_emit_program(struct tu_cs *cs,
       fs = &dummy_variant;
    }
 
-   tu6_emit_vs_config(cs, vs);
-   tu6_emit_hs_config(cs, hs);
-   tu6_emit_ds_config(cs, ds);
-   tu6_emit_gs_config(cs, gs);
-   tu6_emit_fs_config(cs, fs);
+   tu6_emit_vs_config(cs, builder->shaders[MESA_SHADER_VERTEX], vs);
+   tu6_emit_hs_config(cs, builder->shaders[MESA_SHADER_TESS_CTRL], hs);
+   tu6_emit_ds_config(cs, builder->shaders[MESA_SHADER_TESS_EVAL], ds);
+   tu6_emit_gs_config(cs, builder->shaders[MESA_SHADER_GEOMETRY], gs);
+   tu6_emit_fs_config(cs, builder->shaders[MESA_SHADER_FRAGMENT], fs);
 
    tu6_emit_vs_system_values(cs, vs);
    tu6_emit_vpc(cs, vs, fs, binning_pass);
@@ -1535,7 +1542,8 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder)
          continue;
 
       struct tu_shader *shader =
-         tu_shader_create(builder->device, stage, stage_info, builder->alloc);
+         tu_shader_create(builder->device, stage, stage_info, builder->layout,
+                          builder->alloc);
       if (!shader)
          return VK_ERROR_OUT_OF_HOST_MEMORY;
 
@@ -1910,11 +1918,14 @@ tu_pipeline_builder_init_graphics(
    const VkGraphicsPipelineCreateInfo *create_info,
    const VkAllocationCallbacks *alloc)
 {
+   TU_FROM_HANDLE(tu_pipeline_layout, layout, create_info->layout);
+
    *builder = (struct tu_pipeline_builder) {
       .device = dev,
       .cache = cache,
       .create_info = create_info,
       .alloc = alloc,
+      .layout = layout,
    };
 
    builder->rasterizer_discard =
@@ -2003,7 +2014,7 @@ tu6_emit_compute_program(struct tu_cs *cs,
 {
    const struct ir3_shader_variant *v = &shader->variants[0];
 
-   tu6_emit_cs_config(cs, v);
+   tu6_emit_cs_config(cs, shader, v);
 
    /* The compute program is the only one in the pipeline, so 0 offset. */
    tu6_emit_shader_object(cs, MESA_SHADER_COMPUTE, v, binary_bo, 0);
@@ -2044,6 +2055,7 @@ tu_compute_pipeline_create(VkDevice device,
                            VkPipeline *pPipeline)
 {
    TU_FROM_HANDLE(tu_device, dev, device);
+   TU_FROM_HANDLE(tu_pipeline_layout, layout, pCreateInfo->layout);
    const VkPipelineShaderStageCreateInfo *stage_info = &pCreateInfo->stage;
    VkResult result;
 
@@ -2053,11 +2065,13 @@ tu_compute_pipeline_create(VkDevice device,
    if (result != VK_SUCCESS)
       return result;
 
+   pipeline->layout = layout;
+
    struct tu_shader_compile_options options;
    tu_shader_compile_options_init(&options, NULL);
 
    struct tu_shader *shader =
-      tu_shader_create(dev, MESA_SHADER_COMPUTE, stage_info, pAllocator);
+      tu_shader_create(dev, MESA_SHADER_COMPUTE, stage_info, layout, pAllocator);
    if (!shader) {
       result = VK_ERROR_OUT_OF_HOST_MEMORY;
       goto fail;
diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h
index 7541e729fee..4f78f7163e5 100644
--- a/src/freedreno/vulkan/tu_private.h
+++ b/src/freedreno/vulkan/tu_private.h
@@ -1050,10 +1050,12 @@ struct tu_shader_compile_options
 struct tu_descriptor_map
 {
    /* TODO: avoid fixed size array/justify the size */
-   unsigned num;
+   unsigned num; /* number of array entries */
+   unsigned num_desc; /* Number of descriptors (sum of array_size[]) */
    int set[64];
    int binding[64];
    int value[64];
+   int array_size[64];
 };
 
 struct tu_shader
@@ -1080,6 +1082,7 @@ struct tu_shader *
 tu_shader_create(struct tu_device *dev,
                  gl_shader_stage stage,
                  const VkPipelineShaderStageCreateInfo *stage_info,
+                 struct tu_pipeline_layout *layout,
                  const VkAllocationCallbacks *alloc);
 
 void
diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c
index 9a904e61d07..1fc9da9a779 100644
--- a/src/freedreno/vulkan/tu_shader.c
+++ b/src/freedreno/vulkan/tu_shader.c
@@ -109,26 +109,35 @@ tu_sort_variables_by_location(struct exec_list *variables)
 }
 
 static unsigned
-map_add(struct tu_descriptor_map *map, int set, int binding, int value)
+map_add(struct tu_descriptor_map *map, int set, int binding, int value,
+        int array_size)
 {
-   unsigned index;
-   for (index = 0; index < map->num; index++) {
-      if (set == map->set[index] && binding == map->binding[index])
-         break;
+   unsigned index = 0;
+   for (unsigned i = 0; i < map->num; i++) {
+      if (set == map->set[i] && binding == map->binding[i]) {
+         assert(value == map->value[i]);
+         assert(array_size == map->array_size[i]);
+         return index;
+      }
+      index += map->array_size[i];
    }
 
-   assert(index < ARRAY_SIZE(map->set));
+   assert(index == map->num_desc);
+
+   map->set[map->num] = set;
+   map->binding[map->num] = binding;
+   map->value[map->num] = value;
+   map->array_size[map->num] = array_size;
+   map->num++;
+   map->num_desc += array_size;
 
-   map->set[index] = set;
-   map->binding[index] = binding;
-   map->value[index] = value;
-   map->num = MAX2(map->num, index + 1);
    return index;
 }
 
 static void
 lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx,
-                        struct tu_shader *shader)
+                        struct tu_shader *shader,
+                        const struct tu_pipeline_layout *layout)
 {
    nir_ssa_def *index = NULL;
    unsigned base_index = 0;
@@ -184,39 +193,39 @@ lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx,
       nir_tex_instr_remove_src(instr, src_idx);
    }
 
-   if (array_elements > 1)
-      tu_finishme("texture/sampler array");
-
-   if (is_sampler) {
-      instr->sampler_index = map_add(&shader->sampler_map,
-                                     deref->var->data.descriptor_set,
-                                     deref->var->data.binding,
-                                     0);
-      instr->sampler_index += base_index;
-   } else {
-      instr->texture_index = map_add(&shader->texture_map,
-                                     deref->var->data.descriptor_set,
-                                     deref->var->data.binding,
-                                     deref->var->data.index);
-      instr->texture_index += base_index;
-      instr->texture_array_size = array_elements;
-   }
+   uint32_t set = deref->var->data.descriptor_set;
+   uint32_t binding = deref->var->data.binding;
+   struct tu_descriptor_set_layout *set_layout = layout->set[set].layout;
+   struct tu_descriptor_set_binding_layout *binding_layout =
+      &set_layout->binding[binding];
+
+   int desc_index = map_add(is_sampler ?
+                            &shader->sampler_map : &shader->texture_map,
+                            deref->var->data.descriptor_set,
+                            deref->var->data.binding,
+                            deref->var->data.index,
+                            binding_layout->array_size) + base_index;
+   if (is_sampler)
+      instr->sampler_index = desc_index;
+   else
+      instr->texture_index = desc_index;
 }
 
 static bool
-lower_sampler(nir_builder *b, nir_tex_instr *instr, struct tu_shader *shader)
+lower_sampler(nir_builder *b, nir_tex_instr *instr, struct tu_shader *shader,
+                const struct tu_pipeline_layout *layout)
 {
    int texture_idx =
       nir_tex_instr_src_index(instr, nir_tex_src_texture_deref);
 
    if (texture_idx >= 0)
-      lower_tex_src_to_offset(b, instr, texture_idx, shader);
+      lower_tex_src_to_offset(b, instr, texture_idx, shader, layout);
 
    int sampler_idx =
       nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref);
 
    if (sampler_idx >= 0)
-      lower_tex_src_to_offset(b, instr, sampler_idx, shader);
+      lower_tex_src_to_offset(b, instr, sampler_idx, shader, layout);
 
    if (texture_idx < 0 && sampler_idx < 0)
       return false;
@@ -226,7 +235,8 @@ lower_sampler(nir_builder *b, nir_tex_instr *instr, struct tu_shader *shader)
 
 static bool
 lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
-                struct tu_shader *shader)
+                struct tu_shader *shader,
+                const struct tu_pipeline_layout *layout)
 {
    /* TODO: remove this when layered rendering is implemented */
    if (instr->intrinsic == nir_intrinsic_load_layer_id) {
@@ -260,23 +270,30 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
       return false;
 
    nir_const_value *const_val = nir_src_as_const_value(instr->src[0]);
-   if (!const_val || const_val->u32 != 0)
-      tu_finishme("non-zero vulkan_resource_index array index");
 
 
    unsigned set = nir_intrinsic_desc_set(instr);
    unsigned binding = nir_intrinsic_binding(instr);
+   struct tu_descriptor_set_layout *set_layout = layout->set[set].layout;
+   struct tu_descriptor_set_binding_layout *binding_layout =
+      &set_layout->binding[binding];
    unsigned index = 0;
 
    switch (nir_intrinsic_desc_type(instr)) {
    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+      if (!const_val || const_val->u32 != 0)
+         tu_finishme("non-zero vulkan_resource_index array index");
       /* skip index 0 which is used for push constants */
-      index = map_add(&shader->ubo_map, set, binding, 0) + 1;
+      index = map_add(&shader->ubo_map, set, binding, 0,
+                      binding_layout->array_size) + 1;
       break;
    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
-      index = map_add(&shader->ssbo_map, set, binding, 0);
+      if (!const_val)
+         tu_finishme("non-constant vulkan_resource_index array index");
+      index = map_add(&shader->ssbo_map, set, binding, 0,
+                      binding_layout->array_size);
       break;
    default:
       tu_finishme("unsupported desc_type for vulkan_resource_index");
@@ -291,7 +308,8 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
 }
 
 static bool
-lower_impl(nir_function_impl *impl, struct tu_shader *shader)
+lower_impl(nir_function_impl *impl, struct tu_shader *shader,
+            const struct tu_pipeline_layout *layout)
 {
    nir_builder b;
    nir_builder_init(&b, impl);
@@ -302,10 +320,10 @@ lower_impl(nir_function_impl *impl, struct tu_shader *shader)
          b.cursor = nir_before_instr(instr);
          switch (instr->type) {
          case nir_instr_type_tex:
-            progress |= lower_sampler(&b, nir_instr_as_tex(instr), shader);
+            progress |= lower_sampler(&b, nir_instr_as_tex(instr), shader, layout);
             break;
          case nir_instr_type_intrinsic:
-            progress |= lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader);
+            progress |= lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader, layout);
             break;
          default:
             break;
@@ -317,13 +335,14 @@ lower_impl(nir_function_impl *impl, struct tu_shader *shader)
 }
 
 static bool
-tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader)
+tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader,
+            const struct tu_pipeline_layout *layout)
 {
    bool progress = false;
 
    nir_foreach_function(function, shader) {
       if (function->impl)
-         progress |= lower_impl(function->impl, tu_shader);
+         progress |= lower_impl(function->impl, tu_shader, layout);
    }
 
    return progress;
@@ -333,6 +352,7 @@ struct tu_shader *
 tu_shader_create(struct tu_device *dev,
                  gl_shader_stage stage,
                  const VkPipelineShaderStageCreateInfo *stage_info,
+                 struct tu_pipeline_layout *layout,
                  const VkAllocationCallbacks *alloc)
 {
    const struct tu_shader_module *module =
@@ -426,7 +446,7 @@ tu_shader_create(struct tu_device *dev,
    if (stage == MESA_SHADER_FRAGMENT)
       NIR_PASS_V(nir, nir_lower_input_attachments, true);
 
-   NIR_PASS_V(nir, tu_lower_io, shader);
+   NIR_PASS_V(nir, tu_lower_io, shader, layout);
 
    NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size, 0);
 




More information about the mesa-commit mailing list