Mesa (main): radv: Add task shader arguments.

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu May 12 00:59:26 UTC 2022


Module: Mesa
Branch: main
Commit: b3ea6c610363c26cfc461b92c7a002b94a2761fe
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=b3ea6c610363c26cfc461b92c7a002b94a2761fe

Author: Timur Kristóf <timur.kristof at gmail.com>
Date:   Sun Jan 23 18:35:12 2022 +0100

radv: Add task shader arguments.

Mostly the same as for compute shaders, but with a few extras:

task_ring_offsets:
Same as what ring_offsets is to graphics shaders.
Contains an address that points to a buffer that contains
the ring buffer descriptors.

task_ring_entry:
Index that can be used to address the draw and payload rings.

draw_id:
Same meaning as in graphics shaders.

task_ib_addr/task_ib_stride:
Indirect buffer address and stride from the draw calls.
These are used to emulate the firstTask feature of NV_mesh_shader.

Signed-off-by: Timur Kristóf <timur.kristof at gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14929>

---

 src/amd/vulkan/radv_constants.h   |  4 +++-
 src/amd/vulkan/radv_device.c      |  6 +++++-
 src/amd/vulkan/radv_shader.h      |  6 +++++-
 src/amd/vulkan/radv_shader_args.c | 42 +++++++++++++++++++++++++++++++++++++--
 src/amd/vulkan/radv_shader_args.h |  7 +++++++
 5 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/src/amd/vulkan/radv_constants.h b/src/amd/vulkan/radv_constants.h
index 5787f042316..4b6d3b9667b 100644
--- a/src/amd/vulkan/radv_constants.h
+++ b/src/amd/vulkan/radv_constants.h
@@ -74,7 +74,9 @@
 #define RING_GSVS_GS             4
 #define RING_HS_TESS_FACTOR      5
 #define RING_HS_TESS_OFFCHIP     6
-#define RING_PS_SAMPLE_POSITIONS 7
+#define RING_TS_DRAW             7
+#define RING_TS_PAYLOAD          8
+#define RING_PS_SAMPLE_POSITIONS 9
 
 /* max number of descriptor sets */
 #define MAX_SETS 32
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 9822deb815d..63de77db804 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -3707,6 +3707,10 @@ radv_fill_shader_rings(struct radv_queue *queue, uint32_t *map, bool add_sample_
 
    desc += 8;
 
+   /* Reserved for task shader rings. */
+
+   desc += 8;
+
    if (add_sample_positions) {
       /* add sample positions after all rings */
       memcpy(desc, queue->device->sample_locations_1x, 8);
@@ -4004,7 +4008,7 @@ radv_update_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave
        add_sample_positions) {
       uint32_t size = 0;
       if (gsvs_ring_bo || esgs_ring_bo || tess_rings_bo || add_sample_positions) {
-         size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
+         size = 144; /* 2 dword + 2 padding + 4 dword * 8 */
          if (add_sample_positions)
             size += 128; /* 64+32+16+8 = 120 bytes */
       } else if (scratch_bo) {
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index f843776f102..373e3410e78 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -146,7 +146,8 @@ enum radv_ud_index {
    AC_UD_NGG_CULLING_SETTINGS = 7,
    AC_UD_NGG_VIEWPORT = 8,
    AC_UD_FORCE_VRS_RATES = 9,
-   AC_UD_SHADER_START = 10,
+   AC_UD_TASK_RING_ENTRY = 10,
+   AC_UD_SHADER_START = 11,
    AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
    AC_UD_VS_BASE_VERTEX_START_INSTANCE,
    AC_UD_VS_PROLOG_INPUTS,
@@ -155,6 +156,9 @@ enum radv_ud_index {
    AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START,
    AC_UD_CS_SBT_DESCRIPTORS,
    AC_UD_CS_RAY_LAUNCH_SIZE,
+   AC_UD_CS_TASK_RING_OFFSETS,
+   AC_UD_CS_TASK_DRAW_ID,
+   AC_UD_CS_TASK_IB,
    AC_UD_CS_MAX_UD,
    AC_UD_GS_MAX_UD,
    AC_UD_TCS_MAX_UD,
diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c
index c4c95bf4d74..084c779934b 100644
--- a/src/amd/vulkan/radv_shader_args.c
+++ b/src/amd/vulkan/radv_shader_args.c
@@ -50,7 +50,8 @@ set_loc_shader(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx, uint8_
 static void
 set_loc_shader_ptr(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
 {
-   bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS;
+   bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS &&
+                             idx != AC_UD_CS_TASK_RING_OFFSETS;
 
    set_loc_shader(args, idx, sgpr_idx, use_32bit_pointers ? 1 : 2);
 }
@@ -157,18 +158,26 @@ allocate_user_sgprs(enum chip_class chip_class, const struct radv_shader_info *i
    /* 2 user sgprs will always be allocated for scratch/rings */
    user_sgpr_count += 2;
 
+   if (stage == MESA_SHADER_TASK)
+      user_sgpr_count += 2; /* task descriptors */
+
    /* prolog inputs */
    if (info->vs.has_prolog)
       user_sgpr_count += 2;
 
    switch (stage) {
    case MESA_SHADER_COMPUTE:
+   case MESA_SHADER_TASK:
       if (info->cs.uses_sbt)
          user_sgpr_count += 1;
       if (info->cs.uses_grid_size)
          user_sgpr_count += args->load_grid_size_from_user_sgpr ? 3 : 2;
       if (info->cs.uses_ray_launch_size)
          user_sgpr_count += 3;
+      if (info->vs.needs_draw_id)
+         user_sgpr_count += 1;
+      if (info->cs.uses_task_rings)
+         user_sgpr_count += 4; /* ring_entry, 2x ib_addr, ib_stride */
       break;
    case MESA_SHADER_FRAGMENT:
       break;
@@ -212,7 +221,8 @@ allocate_user_sgprs(enum chip_class chip_class, const struct radv_shader_info *i
    if (info->so.num_outputs)
       user_sgpr_count++;
 
-   uint32_t available_sgprs = chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16;
+   uint32_t available_sgprs =
+      chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_TASK ? 32 : 16;
    uint32_t remaining_sgprs = available_sgprs - user_sgpr_count;
    uint32_t num_desc_set = util_bitcount(info->desc_set_used_mask);
 
@@ -527,6 +537,9 @@ radv_declare_shader_args(enum chip_class chip_class, const struct radv_pipeline_
    if (args->explicit_scratch_args) {
       ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ring_offsets);
    }
+   if (stage == MESA_SHADER_TASK) {
+      ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->task_ring_offsets);
+   }
 
    /* To ensure prologs match the main VS, VS specific input SGPRs have to be placed before other
     * sgprs.
@@ -534,6 +547,7 @@ radv_declare_shader_args(enum chip_class chip_class, const struct radv_pipeline_
 
    switch (stage) {
    case MESA_SHADER_COMPUTE:
+   case MESA_SHADER_TASK:
       declare_global_input_sgprs(info, &user_sgpr_info, args);
 
       if (info->cs.uses_sbt) {
@@ -551,6 +565,16 @@ radv_declare_shader_args(enum chip_class chip_class, const struct radv_pipeline_
          ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.ray_launch_size);
       }
 
+      if (info->vs.needs_draw_id) {
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
+      }
+
+      if (info->cs.uses_task_rings) {
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.task_ring_entry);
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_INT, &args->task_ib_addr);
+         ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->task_ib_stride);
+      }
+
       for (int i = 0; i < 3; i++) {
          if (info->cs.uses_block_id[i]) {
             ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.workgroup_ids[i]);
@@ -750,6 +774,9 @@ radv_declare_shader_args(enum chip_class chip_class, const struct radv_pipeline_
    uint8_t user_sgpr_idx = 0;
 
    set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_idx);
+   if (stage == MESA_SHADER_TASK) {
+      set_loc_shader_ptr(args, AC_UD_CS_TASK_RING_OFFSETS, &user_sgpr_idx);
+   }
 
    /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
     * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */
@@ -765,6 +792,7 @@ radv_declare_shader_args(enum chip_class chip_class, const struct radv_pipeline_
 
    switch (stage) {
    case MESA_SHADER_COMPUTE:
+   case MESA_SHADER_TASK:
       if (args->ac.sbt_descriptors.used) {
          set_loc_shader_ptr(args, AC_UD_CS_SBT_DESCRIPTORS, &user_sgpr_idx);
       }
@@ -775,6 +803,16 @@ radv_declare_shader_args(enum chip_class chip_class, const struct radv_pipeline_
       if (args->ac.ray_launch_size.used) {
          set_loc_shader(args, AC_UD_CS_RAY_LAUNCH_SIZE, &user_sgpr_idx, 3);
       }
+      if (args->ac.draw_id.used) {
+         set_loc_shader(args, AC_UD_CS_TASK_DRAW_ID, &user_sgpr_idx, 1);
+      }
+      if (args->ac.task_ring_entry.used) {
+         set_loc_shader(args, AC_UD_TASK_RING_ENTRY, &user_sgpr_idx, 1);
+      }
+      if (args->task_ib_addr.used) {
+         assert(args->task_ib_stride.used);
+         set_loc_shader(args, AC_UD_CS_TASK_IB, &user_sgpr_idx, 3);
+      }
       break;
    case MESA_SHADER_VERTEX:
       if (args->ac.view_index.used)
diff --git a/src/amd/vulkan/radv_shader_args.h b/src/amd/vulkan/radv_shader_args.h
index ed202a09fae..b510c31d0ef 100644
--- a/src/amd/vulkan/radv_shader_args.h
+++ b/src/amd/vulkan/radv_shader_args.h
@@ -36,7 +36,10 @@ struct radv_shader_args {
    struct ac_shader_args ac;
 
    struct ac_arg descriptor_sets[MAX_SETS];
+   /* User data 0/1. GFX: descriptor list, Compute: scratch BO */
    struct ac_arg ring_offsets;
+   /* User data 2/3. same as the descriptor list above but for task shaders. */
+   struct ac_arg task_ring_offsets;
 
    /* Streamout */
    struct ac_arg streamout_buffers;
@@ -47,6 +50,10 @@ struct radv_shader_args {
    struct ac_arg ngg_viewport_scale[2];
    struct ac_arg ngg_viewport_translate[2];
 
+   /* Task shaders */
+   struct ac_arg task_ib_addr;
+   struct ac_arg task_ib_stride;
+
    struct ac_arg prolog_inputs;
    struct ac_arg vs_inputs[MAX_VERTEX_ATTRIBS];
 



More information about the mesa-commit mailing list