Mesa (master): radeonsi: make cik_emit_prefetch_L2 templated and move it to si_state_draw.cpp
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Mon Jan 18 01:33:14 UTC 2021
Module: Mesa
Branch: master
Commit: 0eca4660a5588696047c18546a9525e456478af9
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0eca4660a5588696047c18546a9525e456478af9
Author: Marek Olšák <marek.olsak at amd.com>
Date: Sat Dec 26 20:34:09 2020 -0500
radeonsi: make cik_emit_prefetch_L2 templated and move it to si_state_draw.cpp
This is a great candidate for a template. There are a lot of conditions
that are already templated in si_draw_vbo.
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8548>
---
src/gallium/drivers/radeonsi/si_cp_dma.c | 128 ----------------------
src/gallium/drivers/radeonsi/si_pipe.h | 3 -
src/gallium/drivers/radeonsi/si_state.h | 2 +
src/gallium/drivers/radeonsi/si_state_draw.cpp | 146 ++++++++++++++++++++++++-
4 files changed, 142 insertions(+), 137 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index b911d3e839c..f8e483d9fcc 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -399,134 +399,6 @@ void si_cp_dma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst,
}
}
-void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf, uint64_t offset,
- unsigned size)
-{
- assert(sctx->chip_class >= GFX7);
-
- si_cp_dma_copy_buffer(sctx, buf, buf, offset, offset, size, SI_CPDMA_SKIP_ALL,
- SI_COHERENCY_SHADER, L2_LRU);
-}
-
-static void cik_prefetch_shader_async(struct si_context *sctx, struct si_pm4_state *state)
-{
- struct pipe_resource *bo = &state->shader->bo->b.b;
-
- cik_prefetch_TC_L2_async(sctx, bo, 0, bo->width0);
-}
-
-static void cik_prefetch_VBO_descriptors(struct si_context *sctx)
-{
- if (!sctx->vertex_elements || !sctx->vertex_elements->vb_desc_list_alloc_size)
- return;
-
- cik_prefetch_TC_L2_async(sctx, &sctx->vb_descriptors_buffer->b.b, sctx->vb_descriptors_offset,
- sctx->vertex_elements->vb_desc_list_alloc_size);
-}
-
-/**
- * Prefetch shaders and VBO descriptors.
- *
- * \param vertex_stage_only Whether only the the API VS and VBO descriptors
- * should be prefetched.
- */
-void cik_emit_prefetch_L2(struct si_context *sctx, bool vertex_stage_only)
-{
- unsigned mask = sctx->prefetch_L2_mask;
- assert(mask);
-
- /* Prefetch shaders and VBO descriptors to TC L2. */
- if (sctx->chip_class >= GFX9) {
- /* Choose the right spot for the VBO prefetch. */
- if (sctx->queued.named.hs) {
- if (mask & SI_PREFETCH_HS)
- cik_prefetch_shader_async(sctx, sctx->queued.named.hs);
- if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
- cik_prefetch_VBO_descriptors(sctx);
- if (vertex_stage_only) {
- sctx->prefetch_L2_mask &= ~(SI_PREFETCH_HS | SI_PREFETCH_VBO_DESCRIPTORS);
- return;
- }
-
- if (mask & SI_PREFETCH_GS)
- cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
- if (mask & SI_PREFETCH_VS)
- cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
- } else if (sctx->queued.named.gs) {
- if (mask & SI_PREFETCH_GS)
- cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
- if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
- cik_prefetch_VBO_descriptors(sctx);
- if (vertex_stage_only) {
- sctx->prefetch_L2_mask &= ~(SI_PREFETCH_GS | SI_PREFETCH_VBO_DESCRIPTORS);
- return;
- }
-
- if (mask & SI_PREFETCH_VS)
- cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
- } else {
- if (mask & SI_PREFETCH_VS)
- cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
- if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
- cik_prefetch_VBO_descriptors(sctx);
- if (vertex_stage_only) {
- sctx->prefetch_L2_mask &= ~(SI_PREFETCH_VS | SI_PREFETCH_VBO_DESCRIPTORS);
- return;
- }
- }
- } else {
- /* GFX6-GFX8 */
- /* Choose the right spot for the VBO prefetch. */
- if (sctx->tes_shader.cso) {
- if (mask & SI_PREFETCH_LS)
- cik_prefetch_shader_async(sctx, sctx->queued.named.ls);
- if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
- cik_prefetch_VBO_descriptors(sctx);
- if (vertex_stage_only) {
- sctx->prefetch_L2_mask &= ~(SI_PREFETCH_LS | SI_PREFETCH_VBO_DESCRIPTORS);
- return;
- }
-
- if (mask & SI_PREFETCH_HS)
- cik_prefetch_shader_async(sctx, sctx->queued.named.hs);
- if (mask & SI_PREFETCH_ES)
- cik_prefetch_shader_async(sctx, sctx->queued.named.es);
- if (mask & SI_PREFETCH_GS)
- cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
- if (mask & SI_PREFETCH_VS)
- cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
- } else if (sctx->gs_shader.cso) {
- if (mask & SI_PREFETCH_ES)
- cik_prefetch_shader_async(sctx, sctx->queued.named.es);
- if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
- cik_prefetch_VBO_descriptors(sctx);
- if (vertex_stage_only) {
- sctx->prefetch_L2_mask &= ~(SI_PREFETCH_ES | SI_PREFETCH_VBO_DESCRIPTORS);
- return;
- }
-
- if (mask & SI_PREFETCH_GS)
- cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
- if (mask & SI_PREFETCH_VS)
- cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
- } else {
- if (mask & SI_PREFETCH_VS)
- cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
- if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
- cik_prefetch_VBO_descriptors(sctx);
- if (vertex_stage_only) {
- sctx->prefetch_L2_mask &= ~(SI_PREFETCH_VS | SI_PREFETCH_VBO_DESCRIPTORS);
- return;
- }
- }
- }
-
- if (mask & SI_PREFETCH_PS)
- cik_prefetch_shader_async(sctx, sctx->queued.named.ps);
-
- sctx->prefetch_L2_mask = 0;
-}
-
void si_test_gds(struct si_context *sctx)
{
struct pipe_context *ctx = &sctx->b;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index f269b6ec1c0..1fc7d0d25b5 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1401,9 +1401,6 @@ void si_cp_dma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst,
struct pipe_resource *src, uint64_t dst_offset, uint64_t src_offset,
unsigned size, unsigned user_flags, enum si_coherency coher,
enum si_cache_policy cache_policy);
-void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf, uint64_t offset,
- unsigned size);
-void cik_emit_prefetch_L2(struct si_context *sctx, bool vertex_stage_only);
void si_test_gds(struct si_context *sctx);
void si_cp_write_data(struct si_context *sctx, struct si_resource *buf, unsigned offset,
unsigned size, unsigned dst_sel, unsigned engine, const void *data);
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index a293787487a..63ede1d1e15 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -587,6 +587,8 @@ unsigned si_get_input_prim(const struct si_shader_selector *gs);
bool si_update_ngg(struct si_context *sctx);
/* si_state_draw.c */
+void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf, uint64_t offset,
+ unsigned size);
void si_prim_discard_signal_next_compute_ib_start(struct si_context *sctx);
void si_trace_emit(struct si_context *sctx);
void si_init_draw_functions(struct si_context *sctx);
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp
index 7f9bdc63ac7..c180e1d3153 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp
@@ -59,6 +59,143 @@ static unsigned si_conv_pipe_prim(unsigned mode)
return prim_conv[mode];
}
+void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf, uint64_t offset,
+ unsigned size)
+{
+ assert(sctx->chip_class >= GFX7);
+
+ si_cp_dma_copy_buffer(sctx, buf, buf, offset, offset, size, SI_CPDMA_SKIP_ALL,
+ SI_COHERENCY_SHADER, L2_LRU);
+}
+
+static void si_prefetch_shader_async(struct si_context *sctx, struct si_pm4_state *state)
+{
+ struct pipe_resource *bo = &state->shader->bo->b.b;
+
+ cik_prefetch_TC_L2_async(sctx, bo, 0, bo->width0);
+}
+
+static void si_prefetch_VBO_descriptors(struct si_context *sctx)
+{
+ if (!sctx->vertex_elements || !sctx->vertex_elements->vb_desc_list_alloc_size)
+ return;
+
+ cik_prefetch_TC_L2_async(sctx, &sctx->vb_descriptors_buffer->b.b, sctx->vb_descriptors_offset,
+ sctx->vertex_elements->vb_desc_list_alloc_size);
+}
+
+/**
+ * Prefetch shaders and VBO descriptors.
+ *
+ * \param VS_ONLY Whether only the the API VS and VBO descriptors should be prefetched.
+ */
+template<chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG, bool VS_ONLY>
+static void si_emit_prefetch_L2(struct si_context *sctx)
+{
+ unsigned mask = sctx->prefetch_L2_mask;
+
+ /* GFX6 doesn't support the L2 prefetch. */
+ if (GFX_VERSION < GFX7 || !mask)
+ return;
+
+ /* Prefetch shaders and VBO descriptors to TC L2. */
+ if (GFX_VERSION >= GFX9) {
+ /* Choose the right spot for the VBO prefetch. */
+ if (HAS_TESS) {
+ if (mask & SI_PREFETCH_HS)
+ si_prefetch_shader_async(sctx, sctx->queued.named.hs);
+ if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
+ si_prefetch_VBO_descriptors(sctx);
+
+ if (VS_ONLY) {
+ sctx->prefetch_L2_mask &= ~(SI_PREFETCH_HS | SI_PREFETCH_VBO_DESCRIPTORS);
+ return;
+ }
+
+ if ((HAS_GS || NGG) && mask & SI_PREFETCH_GS)
+ si_prefetch_shader_async(sctx, sctx->queued.named.gs);
+ if (!NGG && mask & SI_PREFETCH_VS)
+ si_prefetch_shader_async(sctx, sctx->queued.named.vs);
+ } else if (HAS_GS || NGG) {
+ if (mask & SI_PREFETCH_GS)
+ si_prefetch_shader_async(sctx, sctx->queued.named.gs);
+ if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
+ si_prefetch_VBO_descriptors(sctx);
+
+ if (VS_ONLY) {
+ sctx->prefetch_L2_mask &= ~(SI_PREFETCH_GS | SI_PREFETCH_VBO_DESCRIPTORS);
+ return;
+ }
+
+ if (!NGG && mask & SI_PREFETCH_VS)
+ si_prefetch_shader_async(sctx, sctx->queued.named.vs);
+ } else {
+ if (mask & SI_PREFETCH_VS)
+ si_prefetch_shader_async(sctx, sctx->queued.named.vs);
+ if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
+ si_prefetch_VBO_descriptors(sctx);
+
+ if (VS_ONLY) {
+ sctx->prefetch_L2_mask &= ~(SI_PREFETCH_VS | SI_PREFETCH_VBO_DESCRIPTORS);
+ return;
+ }
+ }
+ } else {
+ /* GFX6-GFX8 */
+ /* Choose the right spot for the VBO prefetch. */
+ if (HAS_TESS) {
+ if (mask & SI_PREFETCH_LS)
+ si_prefetch_shader_async(sctx, sctx->queued.named.ls);
+ if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
+ si_prefetch_VBO_descriptors(sctx);
+
+ if (VS_ONLY) {
+ sctx->prefetch_L2_mask &= ~(SI_PREFETCH_LS | SI_PREFETCH_VBO_DESCRIPTORS);
+ return;
+ }
+
+ if (mask & SI_PREFETCH_HS)
+ si_prefetch_shader_async(sctx, sctx->queued.named.hs);
+ if (mask & SI_PREFETCH_ES)
+ si_prefetch_shader_async(sctx, sctx->queued.named.es);
+ if (mask & SI_PREFETCH_GS)
+ si_prefetch_shader_async(sctx, sctx->queued.named.gs);
+ if (mask & SI_PREFETCH_VS)
+ si_prefetch_shader_async(sctx, sctx->queued.named.vs);
+ } else if (HAS_GS) {
+ if (mask & SI_PREFETCH_ES)
+ si_prefetch_shader_async(sctx, sctx->queued.named.es);
+ if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
+ si_prefetch_VBO_descriptors(sctx);
+
+ if (VS_ONLY) {
+ sctx->prefetch_L2_mask &= ~(SI_PREFETCH_ES | SI_PREFETCH_VBO_DESCRIPTORS);
+ return;
+ }
+
+ if (mask & SI_PREFETCH_GS)
+ si_prefetch_shader_async(sctx, sctx->queued.named.gs);
+ if (mask & SI_PREFETCH_VS)
+ si_prefetch_shader_async(sctx, sctx->queued.named.vs);
+ } else {
+ if (mask & SI_PREFETCH_VS)
+ si_prefetch_shader_async(sctx, sctx->queued.named.vs);
+ if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
+ si_prefetch_VBO_descriptors(sctx);
+
+ if (VS_ONLY) {
+ sctx->prefetch_L2_mask &= ~(SI_PREFETCH_VS | SI_PREFETCH_VBO_DESCRIPTORS);
+ return;
+ }
+ }
+ }
+
+ if (mask & SI_PREFETCH_PS)
+ si_prefetch_shader_async(sctx, sctx->queued.named.ps);
+
+ sctx->prefetch_L2_mask = 0;
+}
+
/**
* This calculates the LDS size for tessellation shaders (VS, TCS, TES).
* LS.LDS_SIZE is shared by all 3 shader stages.
@@ -1942,8 +2079,7 @@ static void si_draw_vbo(struct pipe_context *ctx,
/* Start prefetches after the draw has been started. Both will run
* in parallel, but starting the draw first is more important.
*/
- if (GFX_VERSION >= GFX7 && sctx->prefetch_L2_mask)
- cik_emit_prefetch_L2(sctx, false);
+ si_emit_prefetch_L2<GFX_VERSION, HAS_TESS, HAS_GS, NGG, false>(sctx);
} else {
/* If we don't wait for idle, start prefetches first, then set
* states, and draw at the end.
@@ -1952,8 +2088,7 @@ static void si_draw_vbo(struct pipe_context *ctx,
sctx->emit_cache_flush(sctx, &sctx->gfx_cs);
/* Only prefetch the API VS and VBO descriptors. */
- if (GFX_VERSION >= GFX7 && sctx->prefetch_L2_mask)
- cik_emit_prefetch_L2(sctx, true);
+ si_emit_prefetch_L2<GFX_VERSION, HAS_TESS, HAS_GS, NGG, true>(sctx);
si_emit_all_states<GFX_VERSION, HAS_TESS, HAS_GS, NGG>
(sctx, info, indirect, prim, instance_count, min_direct_count,
@@ -1973,8 +2108,7 @@ static void si_draw_vbo(struct pipe_context *ctx,
/* Prefetch the remaining shaders after the draw has been
* started. */
- if (GFX_VERSION >= GFX7 && sctx->prefetch_L2_mask)
- cik_emit_prefetch_L2(sctx, false);
+ si_emit_prefetch_L2<GFX_VERSION, HAS_TESS, HAS_GS, NGG, false>(sctx);
}
/* Clear the context roll flag after the draw call.
More information about the mesa-commit
mailing list