Mesa (main): tu, freedreno/a6xx: Fix setting PC_XS_OUT_CNTL::PRIMITVE_ID

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Aug 5 17:18:01 UTC 2021


Module: Mesa
Branch: main
Commit: 2e2e6865b40c459a101653ae26f2d0b0da2b0614
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=2e2e6865b40c459a101653ae26f2d0b0da2b0614

Author: Connor Abbott <cwabbott0 at gmail.com>
Date:   Mon Aug  2 11:56:15 2021 +0200

tu, freedreno/a6xx: Fix setting PC_XS_OUT_CNTL::PRIMITVE_ID

This is supposed to be set when that stage needs the PrimID sysval
preloaded, except for the VS which doesn't have this bit and instead
infers it from the HS or GS bit (depending on whether tess/GS is
enabled). Therefore for HS, GS, and DS we should set it whenever the
corresponding sysval is there. This includes adding a missing
PC_HS_OUT_CNTL, which I confirmed is set when the HS reads PrimID from
the VS. Note that the DS sysval is currently always enabled whenever
there's a GS, if we were to fix that then we should also change the
logic here.

This doesn't fix anything that I know of, but aligns us more with what
the blob does.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12166>

---

 src/freedreno/.gitlab-ci/reference/crash.log     |  8 ++---
 src/freedreno/.gitlab-ci/reference/fd-clouds.log | 12 +++----
 src/freedreno/registers/adreno/a6xx.xml          |  3 +-
 src/freedreno/vulkan/tu_pipeline.c               | 45 +++++++++++++++++-------
 src/gallium/drivers/freedreno/a6xx/fd6_program.c |  9 +++--
 5 files changed, 51 insertions(+), 26 deletions(-)

diff --git a/src/freedreno/.gitlab-ci/reference/crash.log b/src/freedreno/.gitlab-ci/reference/crash.log
index 1b4436aac02..ceba6c8b554 100644
--- a/src/freedreno/.gitlab-ci/reference/crash.log
+++ b/src/freedreno/.gitlab-ci/reference/crash.log
@@ -6335,7 +6335,7 @@ clusters:
 	00000002	PC_PRIMITIVE_CNTL_0: { PROVOKING_VTX_LAST }
 	00000001	PC_VS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
 	00000001	PC_GS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
-	00000000	PC_PRIMITIVE_CNTL_3: 0
+	00000000	PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
 	00000001	PC_DS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
 	00000000	PC_PRIMITIVE_CNTL_5: { GS_VERTICES_OUT = 0 | GS_INVOCATIONS = 0 | GS_OUTPUT = TESS_POINTS }
 	00000000	PC_PRIMITIVE_CNTL_6: { STRIDE_IN_VPC = 0 }
@@ -6595,7 +6595,7 @@ clusters:
 	00000002	PC_PRIMITIVE_CNTL_0: { PROVOKING_VTX_LAST }
 	00000001	PC_VS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
 	00000001	PC_GS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
-	00000000	PC_PRIMITIVE_CNTL_3: 0
+	00000000	PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
 	00000001	PC_DS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
 	00000000	PC_PRIMITIVE_CNTL_5: { GS_VERTICES_OUT = 0 | GS_INVOCATIONS = 0 | GS_OUTPUT = TESS_POINTS }
 	00000000	PC_PRIMITIVE_CNTL_6: { STRIDE_IN_VPC = 0 }
@@ -6860,7 +6860,7 @@ clusters:
 	00000002	PC_PRIMITIVE_CNTL_0: { PROVOKING_VTX_LAST }
 	00000001	PC_VS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
 	00000001	PC_GS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
-	00000000	PC_PRIMITIVE_CNTL_3: 0
+	00000000	PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
 	00000001	PC_DS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
 	00000000	PC_PRIMITIVE_CNTL_5: { GS_VERTICES_OUT = 0 | GS_INVOCATIONS = 0 | GS_OUTPUT = TESS_POINTS }
 	00000000	PC_PRIMITIVE_CNTL_6: { STRIDE_IN_VPC = 0 }
@@ -6887,7 +6887,7 @@ clusters:
 	00000002	PC_PRIMITIVE_CNTL_0: { PROVOKING_VTX_LAST }
 	00000001	PC_VS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
 	00000001	PC_GS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
-	00000000	PC_PRIMITIVE_CNTL_3: 0
+	00000000	PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
 	00000001	PC_DS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
 	00000000	PC_PRIMITIVE_CNTL_5: { GS_VERTICES_OUT = 0 | GS_INVOCATIONS = 0 | GS_OUTPUT = TESS_POINTS }
 	00000000	PC_PRIMITIVE_CNTL_6: { STRIDE_IN_VPC = 0 }
diff --git a/src/freedreno/.gitlab-ci/reference/fd-clouds.log b/src/freedreno/.gitlab-ci/reference/fd-clouds.log
index b049d0488e5..376af515f75 100644
--- a/src/freedreno/.gitlab-ci/reference/fd-clouds.log
+++ b/src/freedreno/.gitlab-ci/reference/fd-clouds.log
@@ -679,8 +679,8 @@ t4					write VPC_CNTL_0 (9304)
 t4					write PC_VS_OUT_CNTL (9b01)
 						PC_VS_OUT_CNTL: { STRIDE_IN_VPC = 4 | CLIP_MASK = 0 }
 0000000001121090:					0000: 489b0101 00000004
-t4					write PC_PRIMITIVE_CNTL_3 (9b03)
-						PC_PRIMITIVE_CNTL_3: 0
+t4					write PC_HS_OUT_CNTL (9b03)
+						PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
 0000000001121098:					0000: 409b0301 00000000
 t4					write HLSQ_CONTROL_1_REG (b982)
 						HLSQ_CONTROL_1_REG: 0x7
@@ -1052,7 +1052,7 @@ t7			opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
  +	00000000			0x9990: 00000000
 !+	00000002			PC_PRIMITIVE_CNTL_0: { PROVOKING_VTX_LAST }
 !+	00000004			PC_VS_OUT_CNTL: { STRIDE_IN_VPC = 4 | CLIP_MASK = 0 }
- +	00000000			PC_PRIMITIVE_CNTL_3: 0
+ +	00000000			PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
  +	00000000			PC_PRIMITIVE_CNTL_6: { STRIDE_IN_VPC = 0 }
  +	00000000			PC_MULTIVIEW_CNTL: { VIEWS = 0 }
  +	00000000			PC_UNKNOWN_9E72: 0
@@ -1973,8 +1973,8 @@ t4					write VPC_CNTL_0 (9304)
 t4					write PC_VS_OUT_CNTL (9b01)
 						PC_VS_OUT_CNTL: { STRIDE_IN_VPC = 4 | CLIP_MASK = 0 }
 0000000001120090:					0000: 489b0101 00000004
-t4					write PC_PRIMITIVE_CNTL_3 (9b03)
-						PC_PRIMITIVE_CNTL_3: 0
+t4					write PC_HS_OUT_CNTL (9b03)
+						PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
 0000000001120098:					0000: 409b0301 00000000
 t4					write HLSQ_CONTROL_1_REG (b982)
 						HLSQ_CONTROL_1_REG: 0x7
@@ -5257,7 +5257,7 @@ t7			opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
  +	ffffffff			PC_RESTART_INDEX: 4294967295
  +	00000002			PC_PRIMITIVE_CNTL_0: { PROVOKING_VTX_LAST }
  +	00000004			PC_VS_OUT_CNTL: { STRIDE_IN_VPC = 4 | CLIP_MASK = 0 }
- +	00000000			PC_PRIMITIVE_CNTL_3: 0
+ +	00000000			PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
  +	00000000			PC_PRIMITIVE_CNTL_6: { STRIDE_IN_VPC = 0 }
  +	00000101			VFD_CONTROL_0: { FETCH_CNT = 1 | DECODE_CNT = 1 }
  +	fcfcfcfc			VFD_CONTROL_1: { REGID4VTX = r63.x | REGID4INST = r63.x | REGID4PRIMID = r63.x | REGID4VIEWID = r63.x }
diff --git a/src/freedreno/registers/adreno/a6xx.xml b/src/freedreno/registers/adreno/a6xx.xml
index 0b6e65a691e..21bf9b64c8b 100644
--- a/src/freedreno/registers/adreno/a6xx.xml
+++ b/src/freedreno/registers/adreno/a6xx.xml
@@ -2539,7 +2539,8 @@ to upconvert to 32b float internally?
 
 	<reg32 offset="0x9b01" name="PC_VS_OUT_CNTL" type="a6xx_xs_out_cntl"/>
 	<reg32 offset="0x9b02" name="PC_GS_OUT_CNTL" type="a6xx_xs_out_cntl"/>
-	<reg32 offset="0x9b03" name="PC_PRIMITIVE_CNTL_3" pos="11"/>
+	<!-- since HS can't output anything, only PRIMITIVE_ID is valid -->
+	<reg32 offset="0x9b03" name="PC_HS_OUT_CNTL" type="a6xx_xs_out_cntl"/>
 	<reg32 offset="0x9b04" name="PC_DS_OUT_CNTL" type="a6xx_xs_out_cntl"/>
 
 	<reg32 offset="0x9b05" name="PC_PRIMITIVE_CNTL_5">
diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c
index ac3af16275e..ac315b29437 100644
--- a/src/freedreno/vulkan/tu_pipeline.c
+++ b/src/freedreno/vulkan/tu_pipeline.c
@@ -881,6 +881,17 @@ tu6_emit_vpc(struct tu_cs *cs,
          REG_A6XX_VPC_VS_LAYER_CNTL,
          REG_A6XX_GRAS_VS_LAYER_CNTL
       },
+      [MESA_SHADER_TESS_CTRL] = {
+         0,
+         0,
+         0,
+         0,
+         0,
+         REG_A6XX_PC_HS_OUT_CNTL,
+         0,
+         0,
+         0
+      },
       [MESA_SHADER_TESS_EVAL] = {
          REG_A6XX_SP_DS_OUT_REG(0),
          REG_A6XX_SP_DS_VPC_DST_REG(0),
@@ -950,8 +961,6 @@ tu6_emit_vpc(struct tu_cs *cs,
       ir3_find_output_regid(last_shader, VARYING_SLOT_CLIP_DIST0);
    const uint32_t clip1_regid =
       ir3_find_output_regid(last_shader, VARYING_SLOT_CLIP_DIST1);
-   uint32_t primitive_regid = gs ?
-      ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID) : regid(63, 0);
    uint32_t flags_regid = gs ?
       ir3_find_output_regid(gs, VARYING_SLOT_GS_VERTEX_FLAGS_IR3) : 0;
 
@@ -1044,13 +1053,28 @@ tu6_emit_vpc(struct tu_cs *cs,
    tu_cs_emit(cs, A6XX_GRAS_VS_CL_CNTL_CLIP_MASK(last_shader->clip_mask) |
                   A6XX_GRAS_VS_CL_CNTL_CULL_MASK(last_shader->cull_mask));
 
-   tu_cs_emit_pkt4(cs, cfg->reg_pc_xs_out_cntl, 1);
-   tu_cs_emit(cs, A6XX_PC_VS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) |
-                  CONDREG(pointsize_regid, A6XX_PC_VS_OUT_CNTL_PSIZE) |
-                  CONDREG(layer_regid, A6XX_PC_VS_OUT_CNTL_LAYER) |
-                  CONDREG(view_regid, A6XX_PC_VS_OUT_CNTL_VIEW) |
-                  CONDREG(primitive_regid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID) |
-                  A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
+   const struct ir3_shader_variant *geom_shaders[] = { vs, hs, ds, gs };
+
+   for (unsigned i = 0; i < ARRAY_SIZE(geom_shaders); i++) {
+      const struct ir3_shader_variant *shader = geom_shaders[i];
+      if (!shader)
+         continue;
+
+      bool primid = shader->type != MESA_SHADER_VERTEX &&
+         VALIDREG(ir3_find_sysval_regid(shader, SYSTEM_VALUE_PRIMITIVE_ID));
+
+      tu_cs_emit_pkt4(cs, reg_config[shader->type].reg_pc_xs_out_cntl, 1);
+      if (shader == last_shader) {
+         tu_cs_emit(cs, A6XX_PC_VS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) |
+                        CONDREG(pointsize_regid, A6XX_PC_VS_OUT_CNTL_PSIZE) |
+                        CONDREG(layer_regid, A6XX_PC_VS_OUT_CNTL_LAYER) |
+                        CONDREG(view_regid, A6XX_PC_VS_OUT_CNTL_VIEW) |
+                        COND(primid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID) |
+                        A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
+      } else {
+         tu_cs_emit(cs, COND(primid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID));
+      }
+   }
 
    tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_primitive_cntl, 1);
    tu_cs_emit(cs, A6XX_SP_VS_PRIMITIVE_CNTL_OUT(linkage.cnt) |
@@ -1170,9 +1194,6 @@ tu6_emit_vpc(struct tu_cs *cs,
             A6XX_PC_PRIMITIVE_CNTL_5_GS_OUTPUT(output) |
             A6XX_PC_PRIMITIVE_CNTL_5_GS_INVOCATIONS(invocations));
 
-      tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_3, 1);
-      tu_cs_emit(cs, 0);
-
       tu_cs_emit_pkt4(cs, REG_A6XX_VPC_UNKNOWN_9100, 1);
       tu_cs_emit(cs, 0xff);
 
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
index 492aa2e033e..b54b6f43d3b 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
@@ -360,9 +360,12 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
    else
       vs_primitive_regid = regid(63, 0);
 
+   bool hs_reads_primid = false, ds_reads_primid = false;
    if (hs) {
       tess_coord_x_regid = ir3_find_sysval_regid(ds, SYSTEM_VALUE_TESS_COORD);
       tess_coord_y_regid = next_regid(tess_coord_x_regid, 1);
+      hs_reads_primid = VALIDREG(ir3_find_sysval_regid(hs, SYSTEM_VALUE_PRIMITIVE_ID));
+      ds_reads_primid = VALIDREG(ir3_find_sysval_regid(ds, SYSTEM_VALUE_PRIMITIVE_ID));
       hs_rel_patch_regid = ir3_find_sysval_regid(hs, SYSTEM_VALUE_REL_PATCH_ID_IR3);
       ds_rel_patch_regid = ir3_find_sysval_regid(ds, SYSTEM_VALUE_REL_PATCH_ID_IR3);
       ds_primitive_regid = ir3_find_sysval_regid(ds, SYSTEM_VALUE_PRIMITIVE_ID);
@@ -696,8 +699,11 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
       OUT_PKT4(ring, REG_A6XX_PC_DS_OUT_CNTL, 1);
       OUT_RING(ring, A6XX_PC_DS_OUT_CNTL_STRIDE_IN_VPC(l.max_loc) |
                         CONDREG(psize_regid, A6XX_PC_DS_OUT_CNTL_PSIZE) |
+                        COND(ds_reads_primid, A6XX_PC_DS_OUT_CNTL_PRIMITIVE_ID) |
                         A6XX_PC_DS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
 
+      OUT_PKT4(ring, REG_A6XX_PC_HS_OUT_CNTL, 1);
+      OUT_RING(ring, COND(hs_reads_primid, A6XX_PC_HS_OUT_CNTL_PRIMITIVE_ID));
    } else {
       OUT_PKT4(ring, REG_A6XX_SP_HS_WAVE_INPUT_SIZE, 1);
       OUT_RING(ring, 0);
@@ -720,9 +726,6 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
                      CONDREG(layer_regid, A6XX_PC_VS_OUT_CNTL_LAYER) |
                      A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
 
-   OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_3, 1);
-   OUT_RING(ring, 0);
-
    OUT_PKT4(ring, REG_A6XX_HLSQ_CONTROL_1_REG, 5);
    OUT_RING(ring, 0x7); /* XXX */
    OUT_RING(ring, A6XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) |



More information about the mesa-commit mailing list