Mesa (main): radv: remove DFSM

Wed May 26 17:41:25 UTC 2021

Module: Mesa
Branch: main
Commit: 69ae02151d78d6283c5fb98b2236601f6d5af184
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=69ae02151d78d6283c5fb98b2236601f6d5af184

Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date:   Tue May 25 08:26:23 2021 +0200

radv: remove DFSM

DFSM has never been enabled by default because it was slower.
RadeonSI is also dropping support for this because they discovered
that's actually not efficient in practice.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10968>

---

 docs/envvars.rst                 |  2 --
 src/amd/vulkan/radv_cmd_buffer.c | 22 +---------------------
 src/amd/vulkan/radv_debug.h      |  5 ++---
 src/amd/vulkan/radv_device.c     |  6 +-----
 src/amd/vulkan/radv_pipeline.c   | 28 +---------------------------
 src/amd/vulkan/radv_private.h    |  2 --
 6 files changed, 5 insertions(+), 60 deletions(-)

diff --git a/docs/envvars.rst b/docs/envvars.rst
index d95e79110a8..9bd85ac26d2 100644
--- a/docs/envvars.rst
+++ b/docs/envvars.rst
@@ -633,8 +633,6 @@ RADV driver environment variables
       enable wave32 for compute shaders (GFX10+)
    ``dccmsaa``
       enable DCC for MSAA images
-   ``dfsm``
-      enable DFSM
    ``gewave32``
       enable wave32 for vertex/tess/geometry shaders (GFX10+)
    ``localbos``
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 2099d8c733d..f245b2935e6 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -950,12 +950,6 @@ radv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer)
    radeon_emit(cs, centroid_priority);
    radeon_emit(cs, centroid_priority >> 32);
 
-   /* GFX9: Flush DFSM when the AA mode changes. */
-   if (cmd_buffer->device->dfsm_allowed) {
-      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-      radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
-   }
-
    cmd_buffer->state.context_roll_without_scissor_emitted = true;
 }
 
@@ -1001,8 +995,7 @@ radv_update_binning_state(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeli
 
    if (old_pipeline &&
        old_pipeline->graphics.binning.pa_sc_binner_cntl_0 ==
-          pipeline->graphics.binning.pa_sc_binner_cntl_0 &&
-       old_pipeline->graphics.binning.db_dfsm_control == pipeline->graphics.binning.db_dfsm_control)
+          pipeline->graphics.binning.pa_sc_binner_cntl_0)
       return;
 
    bool binning_flush = false;
@@ -1019,14 +1012,6 @@ radv_update_binning_state(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeli
                           pipeline->graphics.binning.pa_sc_binner_cntl_0 |
                              S_028C44_FLUSH_ON_BINNING_TRANSITION(!!binning_flush));
 
-   if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
-      radeon_set_context_reg(cmd_buffer->cs, R_028038_DB_DFSM_CONTROL,
-                             pipeline->graphics.binning.db_dfsm_control);
-   } else {
-      radeon_set_context_reg(cmd_buffer->cs, R_028060_DB_DFSM_CONTROL,
-                             pipeline->graphics.binning.db_dfsm_control);
-   }
-
    cmd_buffer->state.context_roll_without_scissor_emitted = true;
 }
 
@@ -2481,11 +2466,6 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer)
                                 S_028424_DISABLE_CONSTANT_ENCODE_REG(disable_constant_encode));
    }
 
-   if (cmd_buffer->device->dfsm_allowed) {
-      radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-      radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
-   }
-
    cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_FRAMEBUFFER;
 }
 
diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h
index 7d01d5b0def..9b5755a4785 100644
--- a/src/amd/vulkan/radv_debug.h
+++ b/src/amd/vulkan/radv_debug.h
@@ -71,9 +71,8 @@ enum {
    RADV_PERFTEST_CS_WAVE_32 = 1u << 3,
    RADV_PERFTEST_PS_WAVE_32 = 1u << 4,
    RADV_PERFTEST_GE_WAVE_32 = 1u << 5,
-   RADV_PERFTEST_DFSM = 1u << 6,
-   RADV_PERFTEST_NO_SAM = 1u << 7,
-   RADV_PERFTEST_SAM = 1u << 8,
+   RADV_PERFTEST_NO_SAM = 1u << 6,
+   RADV_PERFTEST_SAM = 1u << 7,
 };
 
 bool radv_init_trace(struct radv_device *device);
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index ce9ef934645..0f36f066434 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -814,7 +814,7 @@ static const struct debug_control radv_perftest_options[] = {
    {"localbos", RADV_PERFTEST_LOCAL_BOS},   {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
    {"bolist", RADV_PERFTEST_BO_LIST},
    {"cswave32", RADV_PERFTEST_CS_WAVE_32},  {"pswave32", RADV_PERFTEST_PS_WAVE_32},
-   {"gewave32", RADV_PERFTEST_GE_WAVE_32},  {"dfsm", RADV_PERFTEST_DFSM},
+   {"gewave32", RADV_PERFTEST_GE_WAVE_32},
    {"nosam", RADV_PERFTEST_NO_SAM},         {"sam", RADV_PERFTEST_SAM},
    {NULL, 0}};
 
@@ -2975,10 +2975,6 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
    device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
                          !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
 
-   /* Disable DFSM by default. As of 2019-09-15 Talos on Low is still 3% slower on Raven. */
-   device->dfsm_allowed =
-      device->pbb_allowed && (device->instance->perftest_flags & RADV_PERFTEST_DFSM);
-
    /* The maximum number of scratch waves. Scratch space isn't divided
     * evenly between CUs. The number is only a function of the number of CUs.
     * We can decrease the constant to decrease the scratch buffer size.
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 8a4f485381c..b67430d281b 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -4080,7 +4080,6 @@ radv_pipeline_init_disabled_binning_state(struct radv_pipeline *pipeline,
 {
    uint32_t pa_sc_binner_cntl_0 = S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
                                   S_028C44_DISABLE_START_OF_PRIM(1);
-   uint32_t db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF);
 
    if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
       RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
@@ -4112,7 +4111,6 @@ radv_pipeline_init_disabled_binning_state(struct radv_pipeline *pipeline,
    }
 
    pipeline->graphics.binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0;
-   pipeline->graphics.binning.db_dfsm_control = db_dfsm_control;
 }
 
 struct radv_binning_settings
@@ -4162,17 +4160,6 @@ radv_pipeline_init_binning_state(struct radv_pipeline *pipeline,
       struct radv_binning_settings settings =
          radv_get_binning_settings(pipeline->device->physical_device);
 
-      bool disable_start_of_prim = true;
-      uint32_t db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF);
-
-      const struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
-
-      if (pipeline->device->dfsm_allowed && ps && !ps->info.ps.can_discard &&
-          !ps->info.ps.writes_memory && blend->cb_target_enabled_4bit) {
-         db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_AUTO);
-         disable_start_of_prim = (blend->blend_enable_4bit & blend->cb_target_enabled_4bit) != 0;
-      }
-
       const uint32_t pa_sc_binner_cntl_0 =
          S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) |
          S_028C44_BIN_SIZE_X(bin_size.width == 16) | S_028C44_BIN_SIZE_Y(bin_size.height == 16) |
@@ -4180,11 +4167,10 @@ radv_pipeline_init_binning_state(struct radv_pipeline *pipeline,
          S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(MAX2(bin_size.height, 32)) - 5) |
          S_028C44_CONTEXT_STATES_PER_BIN(settings.context_states_per_bin - 1) |
          S_028C44_PERSISTENT_STATES_PER_BIN(settings.persistent_states_per_bin - 1) |
-         S_028C44_DISABLE_START_OF_PRIM(disable_start_of_prim) |
+         S_028C44_DISABLE_START_OF_PRIM(1) |
          S_028C44_FPOVS_PER_BATCH(settings.fpovs_per_batch) | S_028C44_OPTIMAL_BIN_SELECTION(1);
 
       pipeline->graphics.binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0;
-      pipeline->graphics.binning.db_dfsm_control = db_dfsm_control;
    } else
       radv_pipeline_init_disabled_binning_state(pipeline, pCreateInfo);
 }
@@ -4331,12 +4317,6 @@ radv_pipeline_generate_multisample_state(struct radeon_cmdbuf *ctx_cs,
    radeon_set_context_reg(
       ctx_cs, R_02882C_PA_SU_PRIM_FILTER_CNTL,
       S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
-
-   /* GFX9: Flush DFSM when the AA mode changes. */
-   if (pipeline->device->dfsm_allowed) {
-      radeon_emit(ctx_cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-      radeon_emit(ctx_cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
-   }
 }
 
 static void
@@ -5038,12 +5018,6 @@ radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *ctx_cs, struct rade
       ctx_cs, R_028710_SPI_SHADER_Z_FORMAT,
       ac_get_spi_shader_z_format(ps->info.ps.writes_z, ps->info.ps.writes_stencil,
                                  ps->info.ps.writes_sample_mask));
-
-   if (pipeline->device->dfsm_allowed) {
-      /* optimise this? */
-      radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
-      radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
-   }
 }
 
 static void
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 25f0ba28785..040e008b755 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -756,7 +756,6 @@ struct radv_device {
    struct radeon_cmdbuf *empty_cs[RADV_MAX_QUEUE_FAMILIES];
 
    bool pbb_allowed;
-   bool dfsm_allowed;
    uint32_t tess_offchip_block_dw_size;
    uint32_t scratch_waves;
    uint32_t dispatch_initiator;
@@ -1686,7 +1685,6 @@ struct radv_ia_multi_vgt_param_helpers {
 
 struct radv_binning_state {
    uint32_t pa_sc_binner_cntl_0;
-   uint32_t db_dfsm_control;
 };
 
 #define SI_GS_PER_ES 128