Mesa (main): tu, freedreno/a6xx, ir3: Rewrite tess PrimID handling

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Aug 5 17:18:01 UTC 2021


Module: Mesa
Branch: main
Commit: 8115cde3ba61711302b969a359ca8d36e3f54780
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=8115cde3ba61711302b969a359ca8d36e3f54780

Author: Connor Abbott <cwabbott0 at gmail.com>
Date:   Mon Aug  2 11:55:15 2021 +0200

tu, freedreno/a6xx, ir3: Rewrite tess PrimID handling

The previous handling conflated RelPatchID and PrimID, which would
result in incorrect gl_PrimitiveID when doing draw splitting and didn't
work with PrimID passthrough which fills the VPC slot with the "correct"
PrimID value from the tess factor BO which we left 0. Replace PrimID in
the tess lowering pass with a new RelPatchID sysval, and relace PrimID
with RelPatchID in the VS input code in turnip/freedreno at the same
time so that there is no net change in the tess lowering code. However,
now we have to add new mechanisms for getting the user-level PrimID:

- In the TCS it comes from the VS, just like gl_PrimitiveIDIn in the GS.
  This means we have to add another register to our VS->TCS ABI. I
  decided to put PrimID in r0.z, after the TCS header and RelPatchID,
  because it might not be read in the TCS.
- If any stage after the TCS uses PrimID, the TCS stores it in the first
  dword of the tess factor BO, and it is read by the fixed-function
  tessellator and accessed in the TES via the newly-uncovered DSPRIMID
  field. If we have tess and GS, the TES passes this value through to
  the GS in the same way as the VS does. PrimID passthrough for reading
  it in the FS when there's tess but no GS also "just works" once we
  start storing it in the TCS. In particular this fixes
  dEQP-VK.pipeline.misc.primitive_id_from_tess which tests exactly that.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12166>

---

 src/compiler/nir/nir_intrinsics.py                 |  1 +
 src/compiler/shader_enums.c                        |  1 +
 src/compiler/shader_enums.h                        |  5 ++
 src/freedreno/ci/deqp-freedreno-a630-fails.txt     |  1 -
 src/freedreno/ir3/ir3_compiler_nir.c               | 45 +++++++++++++----
 src/freedreno/ir3/ir3_context.h                    |  1 +
 src/freedreno/ir3/ir3_nir_lower_tess.c             | 57 +++++++++++++++-------
 src/freedreno/ir3/ir3_shader.c                     |  3 ++
 src/freedreno/ir3/ir3_shader.h                     |  6 +++
 src/freedreno/vulkan/tu_pipeline.c                 | 28 +++++++++--
 src/gallium/drivers/freedreno/a6xx/fd6_draw.c      |  6 +++
 src/gallium/drivers/freedreno/a6xx/fd6_program.c   | 27 ++++++----
 .../freedreno/ci/piglit-freedreno-a630-fails.txt   |  1 -
 13 files changed, 139 insertions(+), 43 deletions(-)

diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py
index 05e7ee7c912..f3f62ad452b 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -1021,6 +1021,7 @@ system_value("hs_patch_stride_ir3", 1)
 system_value("tess_factor_base_ir3", 2)
 system_value("tess_param_base_ir3", 2)
 system_value("tcs_header_ir3", 1)
+system_value("rel_patch_id_ir3", 1)
 
 # System values for freedreno compute shaders.
 system_value("subgroup_id_shift_ir3", 1)
diff --git a/src/compiler/shader_enums.c b/src/compiler/shader_enums.c
index 4ac7758a00b..8e6babed870 100644
--- a/src/compiler/shader_enums.c
+++ b/src/compiler/shader_enums.c
@@ -307,6 +307,7 @@ gl_system_value_name(gl_system_value sysval)
      ENUM(SYSTEM_VALUE_RAY_GEOMETRY_INDEX),
      ENUM(SYSTEM_VALUE_GS_HEADER_IR3),
      ENUM(SYSTEM_VALUE_TCS_HEADER_IR3),
+     ENUM(SYSTEM_VALUE_REL_PATCH_ID_IR3),
      ENUM(SYSTEM_VALUE_FRAG_SHADING_RATE),
    };
    STATIC_ASSERT(ARRAY_SIZE(names) == SYSTEM_VALUE_MAX);
diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h
index 8ae14bc0894..1833b8c1ed0 100644
--- a/src/compiler/shader_enums.h
+++ b/src/compiler/shader_enums.h
@@ -779,6 +779,11 @@ typedef enum
    SYSTEM_VALUE_GS_HEADER_IR3,
    SYSTEM_VALUE_TCS_HEADER_IR3,
 
+   /* IR3 specific system value that contains the patch id for the current
+    * subdraw.
+    */
+   SYSTEM_VALUE_REL_PATCH_ID_IR3,
+
    /**
     * Fragment shading rate used for KHR_fragment_shading_rate (Vulkan).
     */
diff --git a/src/freedreno/ci/deqp-freedreno-a630-fails.txt b/src/freedreno/ci/deqp-freedreno-a630-fails.txt
index 2f5f4d3a68a..76efb739831 100644
--- a/src/freedreno/ci/deqp-freedreno-a630-fails.txt
+++ b/src/freedreno/ci/deqp-freedreno-a630-fails.txt
@@ -50,7 +50,6 @@ dEQP-VK.api.info.format_properties.g8_b8_r8_3plane_420_unorm,Fail
 dEQP-VK.api.info.image_format_properties.2d.optimal.g8_b8_r8_3plane_420_unorm,Fail
 dEQP-VK.graphicsfuzz.spv-stable-maze-flatten-copy-composite,Fail
 dEQP-VK.graphicsfuzz.spv-stable-pillars-volatile-nontemporal-store,Fail
-dEQP-VK.pipeline.misc.primitive_id_from_tess,Fail
 dEQP-VK.spirv_assembly.instruction.spirv1p4.opcopylogical.nested_arrays_different_strides,Fail
 dEQP-VK.ssbo.layout.2_level_array.scalar.row_major_mat4_store_cols,Fail
 dEQP-VK.ssbo.layout.2_level_array.std140.row_major_mat4_comp_access_store_cols,Fail
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index 62d22f50beb..a5bc577f726 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -1703,7 +1703,15 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
       dst[0] = ctx->tcs_header;
       break;
 
+   case nir_intrinsic_load_rel_patch_id_ir3:
+      dst[0] = ctx->rel_patch_id;
+      break;
+
    case nir_intrinsic_load_primitive_id:
+      if (!ctx->primitive_id) {
+         ctx->primitive_id =
+            create_sysval_input(ctx, SYSTEM_VALUE_PRIMITIVE_ID, 0x1);
+      }
       dst[0] = ctx->primitive_id;
       break;
 
@@ -3734,6 +3742,8 @@ emit_instructions(struct ir3_context *ctx)
       if (has_tess) {
          ctx->tcs_header =
             create_sysval_input(ctx, SYSTEM_VALUE_TCS_HEADER_IR3, 0x1);
+         ctx->rel_patch_id =
+            create_sysval_input(ctx, SYSTEM_VALUE_REL_PATCH_ID_IR3, 0x1);
          ctx->primitive_id =
             create_sysval_input(ctx, SYSTEM_VALUE_PRIMITIVE_ID, 0x1);
       } else if (has_gs) {
@@ -3746,21 +3756,22 @@ emit_instructions(struct ir3_context *ctx)
    case MESA_SHADER_TESS_CTRL:
       ctx->tcs_header =
          create_sysval_input(ctx, SYSTEM_VALUE_TCS_HEADER_IR3, 0x1);
-      ctx->primitive_id =
-         create_sysval_input(ctx, SYSTEM_VALUE_PRIMITIVE_ID, 0x1);
+      ctx->rel_patch_id =
+         create_sysval_input(ctx, SYSTEM_VALUE_REL_PATCH_ID_IR3, 0x1);
       break;
    case MESA_SHADER_TESS_EVAL:
-      if (has_gs)
+      if (has_gs) {
          ctx->gs_header =
             create_sysval_input(ctx, SYSTEM_VALUE_GS_HEADER_IR3, 0x1);
-      ctx->primitive_id =
-         create_sysval_input(ctx, SYSTEM_VALUE_PRIMITIVE_ID, 0x1);
+         ctx->primitive_id =
+            create_sysval_input(ctx, SYSTEM_VALUE_PRIMITIVE_ID, 0x1);
+      }
+      ctx->rel_patch_id =
+         create_sysval_input(ctx, SYSTEM_VALUE_REL_PATCH_ID_IR3, 0x1);
       break;
    case MESA_SHADER_GEOMETRY:
       ctx->gs_header =
          create_sysval_input(ctx, SYSTEM_VALUE_GS_HEADER_IR3, 0x1);
-      ctx->primitive_id =
-         create_sysval_input(ctx, SYSTEM_VALUE_PRIMITIVE_ID, 0x1);
       break;
    default:
       break;
@@ -3986,6 +3997,19 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
          struct ir3_instruction *out = ir3_collect(ctx, ctx->primitive_id);
          outputs[outputs_count] = out;
          outidxs[outputs_count] = n;
+         if (so->type == MESA_SHADER_VERTEX && ctx->rel_patch_id)
+            regids[outputs_count] = regid(0, 2);
+         else
+            regids[outputs_count] = regid(0, 1);
+         outputs_count++;
+      }
+
+      if (so->type == MESA_SHADER_VERTEX && ctx->rel_patch_id) {
+         unsigned n = so->outputs_count++;
+         so->outputs[n].slot = VARYING_SLOT_REL_PATCH_ID_IR3;
+         struct ir3_instruction *out = ir3_collect(ctx, ctx->rel_patch_id);
+         outputs[outputs_count] = out;
+         outidxs[outputs_count] = n;
          regids[outputs_count] = regid(0, 1);
          outputs_count++;
       }
@@ -4188,7 +4212,9 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
        */
 
       ctx->tcs_header->dsts[0]->num = regid(0, 0);
-      ctx->primitive_id->dsts[0]->num = regid(0, 1);
+      ctx->rel_patch_id->dsts[0]->num = regid(0, 1);
+      if (ctx->primitive_id)
+         ctx->primitive_id->dsts[0]->num = regid(0, 2);
    } else if (ctx->gs_header) {
       /* We need to have these values in the same registers between producer
        * (VS or DS) and GS since the producer chains to GS and doesn't get
@@ -4196,7 +4222,8 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
        */
 
       ctx->gs_header->dsts[0]->num = regid(0, 0);
-      ctx->primitive_id->dsts[0]->num = regid(0, 1);
+      if (ctx->primitive_id)
+         ctx->primitive_id->dsts[0]->num = regid(0, 1);
    } else if (so->num_sampler_prefetch) {
       assert(so->type == MESA_SHADER_FRAGMENT);
       int idx = 0;
diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h
index 17bfb48ac1c..18dfcabb2ce 100644
--- a/src/freedreno/ir3/ir3_context.h
+++ b/src/freedreno/ir3/ir3_context.h
@@ -98,6 +98,7 @@ struct ir3_context {
    struct ir3_instruction *patch_vertices_in;
    struct ir3_instruction *tcs_header;
    struct ir3_instruction *tess_coord;
+   struct ir3_instruction *rel_patch_id;
 
    /* Compute shader inputs: */
    struct ir3_instruction *local_invocation_id, *work_group_id;
diff --git a/src/freedreno/ir3/ir3_nir_lower_tess.c b/src/freedreno/ir3/ir3_nir_lower_tess.c
index 296a6f58c7c..6426a7b913d 100644
--- a/src/freedreno/ir3/ir3_nir_lower_tess.c
+++ b/src/freedreno/ir3/ir3_nir_lower_tess.c
@@ -76,7 +76,8 @@ build_local_primitive_id(nir_builder *b, struct state *state)
 static bool
 is_tess_levels(gl_varying_slot slot)
 {
-   return (slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
+   return (slot == VARYING_SLOT_PRIMITIVE_ID ||
+           slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
            slot == VARYING_SLOT_TESS_LEVEL_INNER);
 }
 
@@ -391,9 +392,9 @@ build_per_vertex_offset(nir_builder *b, struct state *state,
                         nir_ssa_def *vertex, uint32_t location, uint32_t comp,
                         nir_ssa_def *offset)
 {
-   nir_ssa_def *primitive_id = nir_load_primitive_id(b);
+   nir_ssa_def *patch_id = nir_load_rel_patch_id_ir3(b);
    nir_ssa_def *patch_stride = nir_load_hs_patch_stride_ir3(b);
-   nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, patch_stride);
+   nir_ssa_def *patch_offset = nir_imul24(b, patch_id, patch_stride);
    nir_ssa_def *attr_offset;
 
    if (nir_src_is_const(nir_src_for_ssa(offset))) {
@@ -471,15 +472,17 @@ build_tessfactor_base(nir_builder *b, gl_varying_slot slot, struct state *state)
 
    const uint32_t patch_stride = 1 + inner_levels + outer_levels;
 
-   nir_ssa_def *primitive_id = nir_load_primitive_id(b);
+   nir_ssa_def *patch_id = nir_load_rel_patch_id_ir3(b);
 
    nir_ssa_def *patch_offset =
-      nir_imul24(b, primitive_id, nir_imm_int(b, patch_stride));
+      nir_imul24(b, patch_id, nir_imm_int(b, patch_stride));
 
    uint32_t offset;
    switch (slot) {
+   case VARYING_SLOT_PRIMITIVE_ID:
+      offset = 0;
+      break;
    case VARYING_SLOT_TESS_LEVEL_OUTER:
-      /* There's some kind of header dword, tess levels start at index 1. */
       offset = 1;
       break;
    case VARYING_SLOT_TESS_LEVEL_INNER:
@@ -582,32 +585,37 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
 
          gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;
          if (is_tess_levels(location)) {
-            /* with tess levels are defined as float[4] and float[2],
-             * but tess factor BO has smaller sizes for tris/isolines,
-             * so we have to discard any writes beyond the number of
-             * components for inner/outer levels */
             uint32_t inner_levels, outer_levels, levels;
             tess_level_components(state, &inner_levels, &outer_levels);
 
-            if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
-               levels = outer_levels;
-            else
-               levels = inner_levels;
-
             assert(intr->src[0].ssa->num_components == 1);
 
             nir_ssa_def *offset =
                nir_iadd_imm(b, intr->src[1].ssa, nir_intrinsic_component(intr));
 
-            nir_if *nif =
-               nir_push_if(b, nir_ult(b, offset, nir_imm_int(b, levels)));
+            nir_if *nif = NULL;
+            if (location != VARYING_SLOT_PRIMITIVE_ID) {
+               /* with tess levels are defined as float[4] and float[2],
+                * but tess factor BO has smaller sizes for tris/isolines,
+                * so we have to discard any writes beyond the number of
+                * components for inner/outer levels
+                */
+               if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
+                  levels = outer_levels;
+               else
+                  levels = inner_levels;
+
+               nif = nir_push_if(b, nir_ult(b, offset, nir_imm_int(b, levels)));
+            }
 
             replace_intrinsic(
                b, intr, nir_intrinsic_store_global_ir3, intr->src[0].ssa,
                nir_load_tess_factor_base_ir3(b),
                nir_iadd(b, offset, build_tessfactor_base(b, location, state)));
 
-            nir_pop_if(b, nif);
+            if (location != VARYING_SLOT_PRIMITIVE_ID) {
+               nir_pop_if(b, nif);
+            }
          } else {
             nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
             nir_ssa_def *offset = build_patch_offset(
@@ -664,6 +672,19 @@ ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v,
 
    state.header = nir_load_tcs_header_ir3(&b);
 
+   /* If required, store gl_PrimitiveID. */
+   if (v->key.tcs_store_primid) {
+      b.cursor = nir_after_cf_list(&impl->body);
+
+      nir_store_output(&b, nir_load_primitive_id(&b), nir_imm_int(&b, 0),
+                       .io_semantics = {
+                           .location = VARYING_SLOT_PRIMITIVE_ID,
+                           .num_slots = 1
+                        });
+
+      b.cursor = nir_before_cf_list(&impl->body);
+   }
+
    nir_foreach_block_safe (block, impl)
       lower_tess_ctrl_block(block, &b, &state);
 
diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c
index 5b7d77d820f..67ad1c721ca 100644
--- a/src/freedreno/ir3/ir3_shader.c
+++ b/src/freedreno/ir3/ir3_shader.c
@@ -507,6 +507,9 @@ ir3_setup_used_key(struct ir3_shader *shader)
          key->vastc_srgb = ~0;
          key->vsamples = ~0;
       }
+
+      if (info->stage == MESA_SHADER_TESS_CTRL)
+         key->tcs_store_primid = true;
    }
 }
 
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index 864ef22626b..8f6f2cfdd74 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -314,6 +314,11 @@ struct ir3_shader_key {
 
          unsigned has_gs : 1;
 
+         /* Whether stages after TCS read gl_PrimitiveID, used to determine
+          * whether the TCS has to store it in the tess factor BO.
+          */
+         unsigned tcs_store_primid : 1;
+
          /* Whether this variant sticks to the "safe" maximum constlen,
           * which guarantees that the combined stages will never go over
           * the limit:
@@ -991,6 +996,7 @@ void ir3_link_stream_out(struct ir3_shader_linkage *l,
 #define VARYING_SLOT_GS_HEADER_IR3       (VARYING_SLOT_MAX + 0)
 #define VARYING_SLOT_GS_VERTEX_FLAGS_IR3 (VARYING_SLOT_MAX + 1)
 #define VARYING_SLOT_TCS_HEADER_IR3      (VARYING_SLOT_MAX + 2)
+#define VARYING_SLOT_REL_PATCH_ID_IR3    (VARYING_SLOT_MAX + 3)
 
 static inline uint32_t
 ir3_find_sysval_regid(const struct ir3_shader_variant *so, unsigned slot)
diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c
index 420870db40f..ac3af16275e 100644
--- a/src/freedreno/vulkan/tu_pipeline.c
+++ b/src/freedreno/vulkan/tu_pipeline.c
@@ -646,17 +646,25 @@ tu6_emit_vs_system_values(struct tu_cs *cs,
          tess_coord_x_regid + 1 :
          regid(63, 0);
    const uint32_t hs_rel_patch_regid = hs ?
-         ir3_find_sysval_regid(hs, SYSTEM_VALUE_PRIMITIVE_ID) :
+         ir3_find_sysval_regid(hs, SYSTEM_VALUE_REL_PATCH_ID_IR3) :
          regid(63, 0);
    const uint32_t ds_rel_patch_regid = hs ?
-         ir3_find_sysval_regid(ds, SYSTEM_VALUE_PRIMITIVE_ID) :
+         ir3_find_sysval_regid(ds, SYSTEM_VALUE_REL_PATCH_ID_IR3) :
          regid(63, 0);
    const uint32_t hs_invocation_regid = hs ?
          ir3_find_sysval_regid(hs, SYSTEM_VALUE_TCS_HEADER_IR3) :
          regid(63, 0);
-   const uint32_t primitiveid_regid = gs ?
+   const uint32_t gs_primitiveid_regid = gs ?
          ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID) :
          regid(63, 0);
+   const uint32_t hs_primitiveid_regid = hs ?
+         ir3_find_sysval_regid(hs, SYSTEM_VALUE_PRIMITIVE_ID) :
+         regid(63, 0);
+   const uint32_t vs_primitiveid_regid = gs ? gs_primitiveid_regid :
+      hs_primitiveid_regid;
+   const uint32_t ds_primitiveid_regid = ds ?
+         ir3_find_sysval_regid(ds, SYSTEM_VALUE_PRIMITIVE_ID) :
+         regid(63, 0);
    const uint32_t gsheader_regid = gs ?
          ir3_find_sysval_regid(gs, SYSTEM_VALUE_GS_HEADER_IR3) :
          regid(63, 0);
@@ -671,14 +679,14 @@ tu6_emit_vs_system_values(struct tu_cs *cs,
    tu_cs_emit_pkt4(cs, REG_A6XX_VFD_CONTROL_1, 6);
    tu_cs_emit(cs, A6XX_VFD_CONTROL_1_REGID4VTX(vertexid_regid) |
                   A6XX_VFD_CONTROL_1_REGID4INST(instanceid_regid) |
-                  A6XX_VFD_CONTROL_1_REGID4PRIMID(primitiveid_regid) |
+                  A6XX_VFD_CONTROL_1_REGID4PRIMID(vs_primitiveid_regid) |
                   A6XX_VFD_CONTROL_1_REGID4VIEWID(viewid_regid));
    tu_cs_emit(cs, A6XX_VFD_CONTROL_2_REGID_HSRELPATCHID(hs_rel_patch_regid) |
                   A6XX_VFD_CONTROL_2_REGID_INVOCATIONID(hs_invocation_regid));
    tu_cs_emit(cs, A6XX_VFD_CONTROL_3_REGID_DSRELPATCHID(ds_rel_patch_regid) |
                   A6XX_VFD_CONTROL_3_REGID_TESSX(tess_coord_x_regid) |
                   A6XX_VFD_CONTROL_3_REGID_TESSY(tess_coord_y_regid) |
-                  0xfc);
+                  A6XX_VFD_CONTROL_3_REGID_DSPRIMID(ds_primitiveid_regid));
    tu_cs_emit(cs, 0x000000fc); /* VFD_CONTROL_4 */
    tu_cs_emit(cs, A6XX_VFD_CONTROL_5_REGID_GSHEADER(gsheader_regid) |
                   0xfc00); /* VFD_CONTROL_5 */
@@ -2305,6 +2313,16 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
          key.tessellation = tu6_get_tessmode(shader);
       }
 
+      if (stage > MESA_SHADER_TESS_CTRL) {
+         if (stage == MESA_SHADER_FRAGMENT) {
+            key.tcs_store_primid = key.tcs_store_primid ||
+               (nir[stage]->info.inputs_read & (1ull << VARYING_SLOT_PRIMITIVE_ID));
+         } else {
+            key.tcs_store_primid = key.tcs_store_primid ||
+               BITSET_TEST(nir[stage]->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
+         }
+      }
+
       /* Keep track of the status of each shader's active descriptor sets,
        * which is set in tu_lower_io. */
       desc_sets |= shader->active_desc_sets;
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
index b599891b03b..466bbdd8cd3 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
@@ -176,6 +176,12 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
       struct shader_info *ds_info = ir3_get_shader_info(emit.key.ds);
       emit.key.key.tessellation = ir3_tess_mode(ds_info->tess.primitive_mode);
       ctx->gen_dirty |= BIT(FD6_GROUP_PRIMITIVE_PARAMS);
+
+      struct shader_info *fs_info = ir3_get_shader_info(emit.key.fs);
+      emit.key.key.tcs_store_primid =
+         BITSET_TEST(ds_info->system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) ||
+         (gs_info && BITSET_TEST(gs_info->system_values_read, SYSTEM_VALUE_PRIMITIVE_ID)) ||
+         (fs_info && (fs_info->inputs_read & (1ull << VARYING_SLOT_PRIMITIVE_ID)));
    }
 
    if (emit.key.gs) {
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
index 0fb279d61a8..492aa2e033e 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
@@ -318,10 +318,10 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
    uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid;
    uint32_t smask_in_regid, smask_regid;
    uint32_t stencilref_regid;
-   uint32_t vertex_regid, instance_regid, layer_regid, primitive_regid;
+   uint32_t vertex_regid, instance_regid, layer_regid, vs_primitive_regid;
    uint32_t hs_invocation_regid;
    uint32_t tess_coord_x_regid, tess_coord_y_regid, hs_rel_patch_regid,
-      ds_rel_patch_regid;
+      ds_rel_patch_regid, ds_primitive_regid;
    uint32_t ij_regid[IJ_COUNT];
    uint32_t gs_header_regid;
    enum a6xx_threadsize fssz;
@@ -353,12 +353,19 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
    layer_regid = ir3_find_output_regid(vs, VARYING_SLOT_LAYER);
    vertex_regid = ir3_find_sysval_regid(vs, SYSTEM_VALUE_VERTEX_ID);
    instance_regid = ir3_find_sysval_regid(vs, SYSTEM_VALUE_INSTANCE_ID);
+   if (gs)
+      vs_primitive_regid = ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID);
+   else if (hs)
+      vs_primitive_regid = ir3_find_sysval_regid(hs, SYSTEM_VALUE_PRIMITIVE_ID);
+   else
+      vs_primitive_regid = regid(63, 0);
 
    if (hs) {
       tess_coord_x_regid = ir3_find_sysval_regid(ds, SYSTEM_VALUE_TESS_COORD);
       tess_coord_y_regid = next_regid(tess_coord_x_regid, 1);
-      hs_rel_patch_regid = ir3_find_sysval_regid(hs, SYSTEM_VALUE_PRIMITIVE_ID);
-      ds_rel_patch_regid = ir3_find_sysval_regid(ds, SYSTEM_VALUE_PRIMITIVE_ID);
+      hs_rel_patch_regid = ir3_find_sysval_regid(hs, SYSTEM_VALUE_REL_PATCH_ID_IR3);
+      ds_rel_patch_regid = ir3_find_sysval_regid(ds, SYSTEM_VALUE_REL_PATCH_ID_IR3);
+      ds_primitive_regid = ir3_find_sysval_regid(ds, SYSTEM_VALUE_PRIMITIVE_ID);
       hs_invocation_regid =
          ir3_find_sysval_regid(hs, SYSTEM_VALUE_TCS_HEADER_IR3);
 
@@ -371,12 +378,14 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
       tess_coord_y_regid = regid(63, 0);
       hs_rel_patch_regid = regid(63, 0);
       ds_rel_patch_regid = regid(63, 0);
+      ds_primitive_regid = regid(63, 0);
       hs_invocation_regid = regid(63, 0);
    }
 
+   bool gs_reads_primid = false;
    if (gs) {
       gs_header_regid = ir3_find_sysval_regid(gs, SYSTEM_VALUE_GS_HEADER_IR3);
-      primitive_regid = ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID);
+      gs_reads_primid = VALIDREG(ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID));
       pos_regid = ir3_find_output_regid(gs, VARYING_SLOT_POS);
       psize_regid = ir3_find_output_regid(gs, VARYING_SLOT_PSIZ);
       clip0_regid = ir3_find_output_regid(gs, VARYING_SLOT_CLIP_DIST0);
@@ -384,7 +393,6 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
       layer_regid = ir3_find_output_regid(gs, VARYING_SLOT_LAYER);
    } else {
       gs_header_regid = regid(63, 0);
-      primitive_regid = regid(63, 0);
    }
 
    if (fs->color0_mrt) {
@@ -871,7 +879,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
                A6XX_PC_GS_OUT_CNTL_STRIDE_IN_VPC(l.max_loc) |
                   CONDREG(psize_regid, A6XX_PC_GS_OUT_CNTL_PSIZE) |
                   CONDREG(layer_regid, A6XX_PC_GS_OUT_CNTL_LAYER) |
-                  CONDREG(primitive_regid, A6XX_PC_GS_OUT_CNTL_PRIMITIVE_ID) |
+                  COND(gs_reads_primid, A6XX_PC_GS_OUT_CNTL_PRIMITIVE_ID) |
                   A6XX_PC_GS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
 
       uint32_t output;
@@ -973,14 +981,15 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
    OUT_PKT4(ring, REG_A6XX_VFD_CONTROL_1, 6);
    OUT_RING(ring, A6XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
                      A6XX_VFD_CONTROL_1_REGID4INST(instance_regid) |
-                     A6XX_VFD_CONTROL_1_REGID4PRIMID(primitive_regid) |
+                     A6XX_VFD_CONTROL_1_REGID4PRIMID(vs_primitive_regid) |
                      0xfc000000);
    OUT_RING(ring,
             A6XX_VFD_CONTROL_2_REGID_HSRELPATCHID(hs_rel_patch_regid) |
                A6XX_VFD_CONTROL_2_REGID_INVOCATIONID(hs_invocation_regid));
    OUT_RING(ring, A6XX_VFD_CONTROL_3_REGID_DSRELPATCHID(ds_rel_patch_regid) |
                      A6XX_VFD_CONTROL_3_REGID_TESSX(tess_coord_x_regid) |
-                     A6XX_VFD_CONTROL_3_REGID_TESSY(tess_coord_y_regid) | 0xfc);
+                     A6XX_VFD_CONTROL_3_REGID_TESSY(tess_coord_y_regid) |
+                     A6XX_VFD_CONTROL_3_REGID_DSPRIMID(ds_primitive_regid));
    OUT_RING(ring, 0x000000fc); /* VFD_CONTROL_4 */
    OUT_RING(ring, A6XX_VFD_CONTROL_5_REGID_GSHEADER(gs_header_regid) |
                      0xfc00); /* VFD_CONTROL_5 */
diff --git a/src/gallium/drivers/freedreno/ci/piglit-freedreno-a630-fails.txt b/src/gallium/drivers/freedreno/ci/piglit-freedreno-a630-fails.txt
index c8eb1effaf9..b9953b09782 100644
--- a/src/gallium/drivers/freedreno/ci/piglit-freedreno-a630-fails.txt
+++ b/src/gallium/drivers/freedreno/ci/piglit-freedreno-a630-fails.txt
@@ -91,7 +91,6 @@ spec at arb_sample_shading@samplemask 4 at sample mask_in_one,Fail
 spec at arb_shader_image_load_store@indexing,Crash
 spec at arb_shader_storage_buffer_object@array-ssbo-auto-binding,Fail
 spec at arb_shader_storage_buffer_object@linker at instance-matching-shader-storage-blocks-member-array-size-mismatch,Fail
-spec at arb_tessellation_shader@execution at fs-primitiveid-instanced,Fail
 spec at arb_tessellation_shader@execution at gs-primitiveid-instanced,Fail
 spec at arb_tessellation_shader@execution at invocation-counting-even,Fail
 spec at arb_tessellation_shader@execution at invocation-counting-odd,Fail



More information about the mesa-commit mailing list