Mesa (main): freedreno, tu: Set SP_XS_PVT_MEM_HW_STACK_OFFSET

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Jun 25 16:15:44 UTC 2021


Module: Mesa
Branch: main
Commit: d01e7b50b88ae6a12df5a5772ad817d8e9f58bb3
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=d01e7b50b88ae6a12df5a5772ad817d8e9f58bb3

Author: Connor Abbott <cwabbott0 at gmail.com>
Date:   Thu Jun 24 18:30:24 2021 +0200

freedreno, tu: Set SP_XS_PVT_MEM_HW_STACK_OFFSET

Theoretically this register should only be used when function calls in
the shader are used, which we don't support. But with the default value
of 0 it seems like pvtmem doesn't work on a650. Just set it to the total
per-SP size, effectively leaving no space for the return-address stack,
like the blob does.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/4949
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11581>

---

 src/freedreno/vulkan/tu_pipeline.c               | 10 ++++++++++
 src/gallium/drivers/freedreno/a6xx/fd6_program.c | 10 ++++++++++
 2 files changed, 20 insertions(+)

diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c
index 33758e7d3e5..0d5b035bb0c 100644
--- a/src/freedreno/vulkan/tu_pipeline.c
+++ b/src/freedreno/vulkan/tu_pipeline.c
@@ -342,42 +342,49 @@ tu6_emit_xs_config(struct tu_cs *cs,
       uint16_t reg_sp_xs_config;
       uint16_t reg_hlsq_xs_ctrl;
       uint16_t reg_sp_xs_first_exec_offset;
+      uint16_t reg_sp_xs_pvt_mem_hw_stack_offset;
    } xs_config[] = {
       [MESA_SHADER_VERTEX] = {
          REG_A6XX_SP_VS_CTRL_REG0,
          REG_A6XX_SP_VS_CONFIG,
          REG_A6XX_HLSQ_VS_CNTL,
          REG_A6XX_SP_VS_OBJ_FIRST_EXEC_OFFSET,
+         REG_A6XX_SP_VS_PVT_MEM_HW_STACK_OFFSET,
       },
       [MESA_SHADER_TESS_CTRL] = {
          REG_A6XX_SP_HS_CTRL_REG0,
          REG_A6XX_SP_HS_CONFIG,
          REG_A6XX_HLSQ_HS_CNTL,
          REG_A6XX_SP_HS_OBJ_FIRST_EXEC_OFFSET,
+         REG_A6XX_SP_HS_PVT_MEM_HW_STACK_OFFSET,
       },
       [MESA_SHADER_TESS_EVAL] = {
          REG_A6XX_SP_DS_CTRL_REG0,
          REG_A6XX_SP_DS_CONFIG,
          REG_A6XX_HLSQ_DS_CNTL,
          REG_A6XX_SP_DS_OBJ_FIRST_EXEC_OFFSET,
+         REG_A6XX_SP_DS_PVT_MEM_HW_STACK_OFFSET,
       },
       [MESA_SHADER_GEOMETRY] = {
          REG_A6XX_SP_GS_CTRL_REG0,
          REG_A6XX_SP_GS_CONFIG,
          REG_A6XX_HLSQ_GS_CNTL,
          REG_A6XX_SP_GS_OBJ_FIRST_EXEC_OFFSET,
+         REG_A6XX_SP_GS_PVT_MEM_HW_STACK_OFFSET,
       },
       [MESA_SHADER_FRAGMENT] = {
          REG_A6XX_SP_FS_CTRL_REG0,
          REG_A6XX_SP_FS_CONFIG,
          REG_A6XX_HLSQ_FS_CNTL,
          REG_A6XX_SP_FS_OBJ_FIRST_EXEC_OFFSET,
+         REG_A6XX_SP_FS_PVT_MEM_HW_STACK_OFFSET,
       },
       [MESA_SHADER_COMPUTE] = {
          REG_A6XX_SP_CS_CTRL_REG0,
          REG_A6XX_SP_CS_CONFIG,
          REG_A6XX_HLSQ_CS_CNTL,
          REG_A6XX_SP_CS_OBJ_FIRST_EXEC_OFFSET,
+         REG_A6XX_SP_CS_PVT_MEM_HW_STACK_OFFSET,
       },
    };
    const struct xs_config *cfg = &xs_config[stage];
@@ -482,6 +489,9 @@ tu6_emit_xs_config(struct tu_cs *cs,
    tu_cs_emit(cs, A6XX_SP_VS_PVT_MEM_SIZE_TOTALPVTMEMSIZE(pvtmem->per_sp_size) |
                   COND(pvtmem->per_wave, A6XX_SP_VS_PVT_MEM_SIZE_PERWAVEMEMLAYOUT));
 
+   tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_pvt_mem_hw_stack_offset, 1);
+   tu_cs_emit(cs, A6XX_SP_VS_PVT_MEM_HW_STACK_OFFSET_OFFSET(pvtmem->per_sp_size));
+
    tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3);
    tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) |
                   CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
index 308bfadf0a4..937ff0819c5 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
@@ -49,32 +49,39 @@ fd6_emit_shader(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
    uint32_t first_exec_offset = 0;
    uint32_t instrlen = 0;
+   uint32_t hw_stack_offset = 0;
 
    switch (so->type) {
    case MESA_SHADER_VERTEX:
       first_exec_offset = REG_A6XX_SP_VS_OBJ_FIRST_EXEC_OFFSET;
       instrlen = REG_A6XX_SP_VS_INSTRLEN;
+      hw_stack_offset = REG_A6XX_SP_VS_PVT_MEM_HW_STACK_OFFSET;
       break;
    case MESA_SHADER_TESS_CTRL:
       first_exec_offset = REG_A6XX_SP_HS_OBJ_FIRST_EXEC_OFFSET;
       instrlen = REG_A6XX_SP_HS_INSTRLEN;
+      hw_stack_offset = REG_A6XX_SP_HS_PVT_MEM_HW_STACK_OFFSET;
       break;
    case MESA_SHADER_TESS_EVAL:
       first_exec_offset = REG_A6XX_SP_DS_OBJ_FIRST_EXEC_OFFSET;
       instrlen = REG_A6XX_SP_DS_INSTRLEN;
+      hw_stack_offset = REG_A6XX_SP_DS_PVT_MEM_HW_STACK_OFFSET;
       break;
    case MESA_SHADER_GEOMETRY:
       first_exec_offset = REG_A6XX_SP_GS_OBJ_FIRST_EXEC_OFFSET;
       instrlen = REG_A6XX_SP_GS_INSTRLEN;
+      hw_stack_offset = REG_A6XX_SP_GS_PVT_MEM_HW_STACK_OFFSET;
       break;
    case MESA_SHADER_FRAGMENT:
       first_exec_offset = REG_A6XX_SP_FS_OBJ_FIRST_EXEC_OFFSET;
       instrlen = REG_A6XX_SP_FS_INSTRLEN;
+      hw_stack_offset = REG_A6XX_SP_FS_PVT_MEM_HW_STACK_OFFSET;
       break;
    case MESA_SHADER_COMPUTE:
    case MESA_SHADER_KERNEL:
       first_exec_offset = REG_A6XX_SP_CS_OBJ_FIRST_EXEC_OFFSET;
       instrlen = REG_A6XX_SP_CS_INSTRLEN;
+      hw_stack_offset = REG_A6XX_SP_CS_PVT_MEM_HW_STACK_OFFSET;
       break;
    case MESA_SHADER_TASK:
    case MESA_SHADER_MESH:
@@ -133,6 +140,9 @@ fd6_emit_shader(struct fd_context *ctx, struct fd_ringbuffer *ring,
                      COND(so->pvtmem_per_wave,
                           A6XX_SP_VS_PVT_MEM_SIZE_PERWAVEMEMLAYOUT));
 
+   OUT_PKT4(ring, hw_stack_offset, 1);
+   OUT_RING(ring, A6XX_SP_VS_PVT_MEM_HW_STACK_OFFSET_OFFSET(per_sp_size));
+
    OUT_PKT7(ring, fd6_stage2opcode(so->type), 3);
    OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
                      CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |



More information about the mesa-commit mailing list