Mesa (master): freedreno/a6xx: Rename and document HLSQ_UPDATE_CNTL

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Jul 14 08:50:28 UTC 2020


Module: Mesa
Branch: master
Commit: e1fa740c4c1412458b56732a222c86bef3bd6b31
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=e1fa740c4c1412458b56732a222c86bef3bd6b31

Author: Connor Abbott <cwabbott0 at gmail.com>
Date:   Mon Jul 13 12:22:20 2020 +0200

freedreno/a6xx: Rename and document HLSQ_UPDATE_CNTL

It turns out that this clears CP_LOAD_STATE6 packets, including
disabling any pending loads for SS6_INDIRECT/SS6_BINDLESS (these loads
don't actually happen until the draw itself, and I'm not sure if they
happen if the state is unused by the shader) and marking constants and
UBO descriptors loaded with SS6_DIRECT as invalid. It's used very
differently from HLSQ_UPDATE_CNTL on a4xx from whence the name came, and
unlike on a4xx it's not readable, so this probably doesn't line up with
HLSQ_UPDATE_CNTL on a4xx.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5877>

---

 src/freedreno/computerator/a6xx.c                | 11 ++++++++--
 src/freedreno/registers/a6xx.xml                 | 27 ++++++++++++++++++++++--
 src/freedreno/vulkan/tu_clear_blit.c             | 13 +++++++++++-
 src/freedreno/vulkan/tu_cmd_buffer.c             | 22 ++++++++++++++-----
 src/freedreno/vulkan/tu_pipeline.c               | 22 +++++++++++++++----
 src/gallium/drivers/freedreno/a6xx/fd6_compute.c | 13 ++++++++++--
 src/gallium/drivers/freedreno/a6xx/fd6_draw.c    | 15 +++++++++++--
 src/gallium/drivers/freedreno/a6xx/fd6_emit.c    | 16 ++++++++++++--
 src/gallium/drivers/freedreno/a6xx/fd6_program.c | 13 ++++++++++--
 9 files changed, 130 insertions(+), 22 deletions(-)

diff --git a/src/freedreno/computerator/a6xx.c b/src/freedreno/computerator/a6xx.c
index 72b0f067efe..21d7d1c4894 100644
--- a/src/freedreno/computerator/a6xx.c
+++ b/src/freedreno/computerator/a6xx.c
@@ -117,8 +117,15 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel)
 	const struct ir3_info *i = &v->info;
 	enum a3xx_threadsize thrsz = FOUR_QUADS;
 
-	OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
-	OUT_RING(ring, 0xff);
+	OUT_PKT4(ring, REG_A6XX_HLSQ_INVALIDATE_CMD, 1);
+	OUT_RING(ring, A6XX_HLSQ_INVALIDATE_CMD_VS_STATE |
+                   A6XX_HLSQ_INVALIDATE_CMD_HS_STATE |
+                   A6XX_HLSQ_INVALIDATE_CMD_DS_STATE |
+                   A6XX_HLSQ_INVALIDATE_CMD_GS_STATE |
+                   A6XX_HLSQ_INVALIDATE_CMD_FS_STATE |
+                   A6XX_HLSQ_INVALIDATE_CMD_CS_STATE |
+                   A6XX_HLSQ_INVALIDATE_CMD_CS_IBO |
+                   A6XX_HLSQ_INVALIDATE_CMD_GFX_IBO);
 
 	unsigned constlen = align(v->constlen, 4);
 	OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL, 1);
diff --git a/src/freedreno/registers/a6xx.xml b/src/freedreno/registers/a6xx.xml
index 56e1746303b..f821832ba06 100644
--- a/src/freedreno/registers/a6xx.xml
+++ b/src/freedreno/registers/a6xx.xml
@@ -3408,8 +3408,31 @@ to upconvert to 32b float internally?
 		<bitfield name="EVENT" low="0" high="6" type="vgt_event_type"/>
 	</reg32>
 
-	<!-- probably: -->
-	<reg32 offset="0xbb08" name="HLSQ_UPDATE_CNTL"/>
+	<reg32 offset="0xbb08" name="HLSQ_INVALIDATE_CMD">
+		<doc>
+			This register clears pending loads queued up by
+			CP_LOAD_STATE6. Each bit resets a particular kind(s) of
+			CP_LOAD_STATE6.
+		</doc>
+
+		<!-- per-stage state: shader, non-bindless UBO, textures, and samplers -->
+		<bitfield name="VS_STATE" pos="0" type="boolean"/>
+		<bitfield name="HS_STATE" pos="1" type="boolean"/>
+		<bitfield name="DS_STATE" pos="2" type="boolean"/>
+		<bitfield name="GS_STATE" pos="3" type="boolean"/>
+		<bitfield name="FS_STATE" pos="4" type="boolean"/>
+		<bitfield name="CS_STATE" pos="5" type="boolean"/>
+
+		<bitfield name="CS_IBO" pos="6" type="boolean"/>
+		<bitfield name="GFX_IBO" pos="7" type="boolean"/>
+
+		<bitfield name="CS_SHARED_CONST" pos="19" type="boolean"/>
+		<bitfield name="GFX_SHARED_CONST" pos="8" type="boolean"/>
+
+		<!-- SS6_BINDLESS: one bit per bindless base -->
+		<bitfield name="CS_BINDLESS" low="9" high="13" type="hex"/>
+		<bitfield name="GFX_BINDLESS" low="14" high="18" type="hex"/>
+	</reg32>
 
 	<reg32 offset="0xbb10" name="HLSQ_FS_CNTL" type="a6xx_hlsq_xs_cntl"/>
 
diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c
index 29b952548bf..2be3e38dccc 100644
--- a/src/freedreno/vulkan/tu_clear_blit.c
+++ b/src/freedreno/vulkan/tu_clear_blit.c
@@ -428,7 +428,18 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_
       .const_state = &dummy_const_state,
    };
 
-   tu_cs_emit_regs(cs, A6XX_HLSQ_UPDATE_CNTL(0x7ffff));
+   tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
+         .vs_state = true,
+         .hs_state = true,
+         .ds_state = true,
+         .gs_state = true,
+         .fs_state = true,
+         .cs_state = true,
+         .gfx_ibo = true,
+         .cs_ibo = true,
+         .gfx_shared_const = true,
+         .gfx_bindless = 0x1f,
+         .cs_bindless = 0x1f));
 
    tu6_emit_xs_config(cs, MESA_SHADER_VERTEX, &vs, global_iova(cmd, shaders[GLOBAL_SH_VS]));
    tu6_emit_xs_config(cs, MESA_SHADER_TESS_CTRL, NULL, 0);
diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c
index 765732ad9a9..d1145bfb13e 100644
--- a/src/freedreno/vulkan/tu_cmd_buffer.c
+++ b/src/freedreno/vulkan/tu_cmd_buffer.c
@@ -719,7 +719,19 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
 
    tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
 
-   tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 0xfffff);
+   tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
+         .vs_state = true,
+         .hs_state = true,
+         .ds_state = true,
+         .gs_state = true,
+         .fs_state = true,
+         .cs_state = true,
+         .gfx_ibo = true,
+         .cs_ibo = true,
+         .gfx_shared_const = true,
+         .cs_shared_const = true,
+         .gfx_bindless = 0x1f,
+         .cs_bindless = 0x1f));
 
    tu_cs_emit_regs(cs,
                    A6XX_RB_CCU_CNTL(.offset = phys_dev->ccu_offset_bypass));
@@ -1684,7 +1696,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
    }
    assert(dyn_idx == dynamicOffsetCount);
 
-   uint32_t sp_bindless_base_reg, hlsq_bindless_base_reg, hlsq_update_value;
+   uint32_t sp_bindless_base_reg, hlsq_bindless_base_reg, hlsq_invalidate_value;
    uint64_t addr[MAX_SETS + 1] = {};
    struct tu_cs cs;
 
@@ -1709,7 +1721,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
    if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
       sp_bindless_base_reg = REG_A6XX_SP_BINDLESS_BASE(0);
       hlsq_bindless_base_reg = REG_A6XX_HLSQ_BINDLESS_BASE(0);
-      hlsq_update_value = 0x7c000;
+      hlsq_invalidate_value = A6XX_HLSQ_INVALIDATE_CMD_GFX_BINDLESS(0x1f);
 
       cmd->state.dirty |= TU_CMD_DIRTY_DESCRIPTOR_SETS | TU_CMD_DIRTY_SHADER_CONSTS;
    } else {
@@ -1717,7 +1729,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
 
       sp_bindless_base_reg = REG_A6XX_SP_CS_BINDLESS_BASE(0);
       hlsq_bindless_base_reg = REG_A6XX_HLSQ_CS_BINDLESS_BASE(0);
-      hlsq_update_value = 0x3e00;
+      hlsq_invalidate_value = A6XX_HLSQ_INVALIDATE_CMD_CS_BINDLESS(0x1f);
 
       cmd->state.dirty |= TU_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS;
    }
@@ -1728,7 +1740,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
    tu_cs_emit_array(&cs, (const uint32_t*) addr, 10);
    tu_cs_emit_pkt4(&cs, hlsq_bindless_base_reg, 10);
    tu_cs_emit_array(&cs, (const uint32_t*) addr, 10);
-   tu_cs_emit_regs(&cs, A6XX_HLSQ_UPDATE_CNTL(.dword = hlsq_update_value));
+   tu_cs_emit_regs(&cs, A6XX_HLSQ_INVALIDATE_CMD(.dword = hlsq_invalidate_value));
 
    struct tu_cs_entry ib = tu_cs_end_sub_stream(&cmd->sub_cs, &cs);
    if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c
index 88cdca7e19c..84cb9c465d9 100644
--- a/src/freedreno/vulkan/tu_pipeline.c
+++ b/src/freedreno/vulkan/tu_pipeline.c
@@ -468,8 +468,15 @@ tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader,
                    const struct ir3_shader_variant *v,
                    uint32_t binary_iova)
 {
-   tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
-   tu_cs_emit(cs, 0xff);
+   tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
+         .vs_state = true,
+         .hs_state = true,
+         .ds_state = true,
+         .gs_state = true,
+         .fs_state = true,
+         .cs_state = true,
+         .cs_ibo = true,
+         .gfx_ibo = true));
 
    tu6_emit_xs_config(cs, MESA_SHADER_COMPUTE, v, binary_iova);
 
@@ -1355,8 +1362,15 @@ tu6_emit_program(struct tu_cs *cs,
 
    STATIC_ASSERT(MESA_SHADER_VERTEX == 0);
 
-   tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
-   tu_cs_emit(cs, 0xff); /* XXX */
+   tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
+         .vs_state = true,
+         .hs_state = true,
+         .ds_state = true,
+         .gs_state = true,
+         .fs_state = true,
+         .cs_state = true,
+         .cs_ibo = true,
+         .gfx_ibo = true));
 
   /* Don't use the binning pass variant when GS is present because we don't
    * support compiling correct binning pass variants with GS.
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c
index 438557600f2..75d4b965f6f 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c
@@ -34,6 +34,7 @@
 #include "fd6_const.h"
 #include "fd6_context.h"
 #include "fd6_emit.h"
+#include "fd6_pack.h"
 
 struct fd6_compute_stateobj {
 	struct ir3_shader *shader;
@@ -78,8 +79,16 @@ cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v)
 	const struct ir3_info *i = &v->info;
 	enum a3xx_threadsize thrsz = FOUR_QUADS;
 
-	OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
-	OUT_RING(ring, 0xff);
+	OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(
+			.vs_state = true,
+			.hs_state = true,
+			.ds_state = true,
+			.gs_state = true,
+			.fs_state = true,
+			.cs_state = true,
+			.gfx_ibo = true,
+			.cs_ibo = true,
+		));
 
 	OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL, 1);
 	OUT_RING(ring, A6XX_HLSQ_CS_CNTL_CONSTLEN(v->constlen) |
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
index 4fa32b5d2ac..ab8fdea19de 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
@@ -356,8 +356,19 @@ fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth)
 	OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1);
 	OUT_RING(ring, fd6_ctx->magic.RB_CCU_CNTL_bypass);
 
-	OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
-	OUT_RING(ring, 0x7ffff);
+	OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(
+			.vs_state = true,
+			.hs_state = true,
+			.ds_state = true,
+			.gs_state = true,
+			.fs_state = true,
+			.cs_state = true,
+			.gfx_ibo = true,
+			.cs_ibo = true,
+			.gfx_shared_const = true,
+			.gfx_bindless = 0x1f,
+			.cs_bindless = 0x1f
+		));
 
 	emit_marker6(ring, 7);
 	OUT_PKT7(ring, CP_SET_MARKER, 1);
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
index f20666c145b..4740f60ab45 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
@@ -1130,8 +1130,20 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
 
 	fd6_cache_inv(batch, ring);
 
-	OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
-	OUT_RING(ring, 0xfffff);
+	OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(
+			.vs_state = true,
+			.hs_state = true,
+			.ds_state = true,
+			.gs_state = true,
+			.fs_state = true,
+			.cs_state = true,
+			.gfx_ibo = true,
+			.cs_ibo = true,
+			.gfx_shared_const = true,
+			.cs_shared_const = true,
+			.gfx_bindless = 0x1f,
+			.cs_bindless = 0x1f
+		));
 
 	OUT_WFI5(ring);
 
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
index 72a47c1f571..4ee227b027e 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c
@@ -39,6 +39,7 @@
 #include "fd6_emit.h"
 #include "fd6_texture.h"
 #include "fd6_format.h"
+#include "fd6_pack.h"
 
 void
 fd6_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
@@ -225,8 +226,16 @@ setup_stream_out(struct fd6_program_state *state, const struct ir3_shader_varian
 static void
 setup_config_stateobj(struct fd_ringbuffer *ring, struct fd6_program_state *state)
 {
-	OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
-	OUT_RING(ring, 0xff);        /* XXX */
+	OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(
+			.vs_state = true,
+			.hs_state = true,
+			.ds_state = true,
+			.gs_state = true,
+			.fs_state = true,
+			.cs_state = true,
+			.gfx_ibo = true,
+			.cs_ibo = true,
+		));
 
 	debug_assert(state->vs->constlen >= state->bs->constlen);
 



More information about the mesa-commit mailing list