Mesa (main): radv: remove DFSM
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed May 26 17:41:25 UTC 2021
Module: Mesa
Branch: main
Commit: 69ae02151d78d6283c5fb98b2236601f6d5af184
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=69ae02151d78d6283c5fb98b2236601f6d5af184
Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date: Tue May 25 08:26:23 2021 +0200
radv: remove DFSM
DFSM has never been enabled by default because it was slower.
RadeonSI is also dropping support for this because they discovered
that's actually not efficient in practice.
Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10968>
---
docs/envvars.rst | 2 --
src/amd/vulkan/radv_cmd_buffer.c | 22 +---------------------
src/amd/vulkan/radv_debug.h | 5 ++---
src/amd/vulkan/radv_device.c | 6 +-----
src/amd/vulkan/radv_pipeline.c | 28 +---------------------------
src/amd/vulkan/radv_private.h | 2 --
6 files changed, 5 insertions(+), 60 deletions(-)
diff --git a/docs/envvars.rst b/docs/envvars.rst
index d95e79110a8..9bd85ac26d2 100644
--- a/docs/envvars.rst
+++ b/docs/envvars.rst
@@ -633,8 +633,6 @@ RADV driver environment variables
enable wave32 for compute shaders (GFX10+)
``dccmsaa``
enable DCC for MSAA images
- ``dfsm``
- enable DFSM
``gewave32``
enable wave32 for vertex/tess/geometry shaders (GFX10+)
``localbos``
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 2099d8c733d..f245b2935e6 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -950,12 +950,6 @@ radv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer)
radeon_emit(cs, centroid_priority);
radeon_emit(cs, centroid_priority >> 32);
- /* GFX9: Flush DFSM when the AA mode changes. */
- if (cmd_buffer->device->dfsm_allowed) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
- }
-
cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
@@ -1001,8 +995,7 @@ radv_update_binning_state(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeli
if (old_pipeline &&
old_pipeline->graphics.binning.pa_sc_binner_cntl_0 ==
- pipeline->graphics.binning.pa_sc_binner_cntl_0 &&
- old_pipeline->graphics.binning.db_dfsm_control == pipeline->graphics.binning.db_dfsm_control)
+ pipeline->graphics.binning.pa_sc_binner_cntl_0)
return;
bool binning_flush = false;
@@ -1019,14 +1012,6 @@ radv_update_binning_state(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeli
pipeline->graphics.binning.pa_sc_binner_cntl_0 |
S_028C44_FLUSH_ON_BINNING_TRANSITION(!!binning_flush));
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
- radeon_set_context_reg(cmd_buffer->cs, R_028038_DB_DFSM_CONTROL,
- pipeline->graphics.binning.db_dfsm_control);
- } else {
- radeon_set_context_reg(cmd_buffer->cs, R_028060_DB_DFSM_CONTROL,
- pipeline->graphics.binning.db_dfsm_control);
- }
-
cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
@@ -2481,11 +2466,6 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer)
S_028424_DISABLE_CONSTANT_ENCODE_REG(disable_constant_encode));
}
- if (cmd_buffer->device->dfsm_allowed) {
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
- }
-
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_FRAMEBUFFER;
}
diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h
index 7d01d5b0def..9b5755a4785 100644
--- a/src/amd/vulkan/radv_debug.h
+++ b/src/amd/vulkan/radv_debug.h
@@ -71,9 +71,8 @@ enum {
RADV_PERFTEST_CS_WAVE_32 = 1u << 3,
RADV_PERFTEST_PS_WAVE_32 = 1u << 4,
RADV_PERFTEST_GE_WAVE_32 = 1u << 5,
- RADV_PERFTEST_DFSM = 1u << 6,
- RADV_PERFTEST_NO_SAM = 1u << 7,
- RADV_PERFTEST_SAM = 1u << 8,
+ RADV_PERFTEST_NO_SAM = 1u << 6,
+ RADV_PERFTEST_SAM = 1u << 7,
};
bool radv_init_trace(struct radv_device *device);
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index ce9ef934645..0f36f066434 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -814,7 +814,7 @@ static const struct debug_control radv_perftest_options[] = {
{"localbos", RADV_PERFTEST_LOCAL_BOS}, {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
{"bolist", RADV_PERFTEST_BO_LIST},
{"cswave32", RADV_PERFTEST_CS_WAVE_32}, {"pswave32", RADV_PERFTEST_PS_WAVE_32},
- {"gewave32", RADV_PERFTEST_GE_WAVE_32}, {"dfsm", RADV_PERFTEST_DFSM},
+ {"gewave32", RADV_PERFTEST_GE_WAVE_32},
{"nosam", RADV_PERFTEST_NO_SAM}, {"sam", RADV_PERFTEST_SAM},
{NULL, 0}};
@@ -2975,10 +2975,6 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
!(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
- /* Disable DFSM by default. As of 2019-09-15 Talos on Low is still 3% slower on Raven. */
- device->dfsm_allowed =
- device->pbb_allowed && (device->instance->perftest_flags & RADV_PERFTEST_DFSM);
-
/* The maximum number of scratch waves. Scratch space isn't divided
* evenly between CUs. The number is only a function of the number of CUs.
* We can decrease the constant to decrease the scratch buffer size.
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 8a4f485381c..b67430d281b 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -4080,7 +4080,6 @@ radv_pipeline_init_disabled_binning_state(struct radv_pipeline *pipeline,
{
uint32_t pa_sc_binner_cntl_0 = S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
S_028C44_DISABLE_START_OF_PRIM(1);
- uint32_t db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF);
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
@@ -4112,7 +4111,6 @@ radv_pipeline_init_disabled_binning_state(struct radv_pipeline *pipeline,
}
pipeline->graphics.binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0;
- pipeline->graphics.binning.db_dfsm_control = db_dfsm_control;
}
struct radv_binning_settings
@@ -4162,17 +4160,6 @@ radv_pipeline_init_binning_state(struct radv_pipeline *pipeline,
struct radv_binning_settings settings =
radv_get_binning_settings(pipeline->device->physical_device);
- bool disable_start_of_prim = true;
- uint32_t db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF);
-
- const struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
-
- if (pipeline->device->dfsm_allowed && ps && !ps->info.ps.can_discard &&
- !ps->info.ps.writes_memory && blend->cb_target_enabled_4bit) {
- db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_AUTO);
- disable_start_of_prim = (blend->blend_enable_4bit & blend->cb_target_enabled_4bit) != 0;
- }
-
const uint32_t pa_sc_binner_cntl_0 =
S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) |
S_028C44_BIN_SIZE_X(bin_size.width == 16) | S_028C44_BIN_SIZE_Y(bin_size.height == 16) |
@@ -4180,11 +4167,10 @@ radv_pipeline_init_binning_state(struct radv_pipeline *pipeline,
S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(MAX2(bin_size.height, 32)) - 5) |
S_028C44_CONTEXT_STATES_PER_BIN(settings.context_states_per_bin - 1) |
S_028C44_PERSISTENT_STATES_PER_BIN(settings.persistent_states_per_bin - 1) |
- S_028C44_DISABLE_START_OF_PRIM(disable_start_of_prim) |
+ S_028C44_DISABLE_START_OF_PRIM(1) |
S_028C44_FPOVS_PER_BATCH(settings.fpovs_per_batch) | S_028C44_OPTIMAL_BIN_SELECTION(1);
pipeline->graphics.binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0;
- pipeline->graphics.binning.db_dfsm_control = db_dfsm_control;
} else
radv_pipeline_init_disabled_binning_state(pipeline, pCreateInfo);
}
@@ -4331,12 +4317,6 @@ radv_pipeline_generate_multisample_state(struct radeon_cmdbuf *ctx_cs,
radeon_set_context_reg(
ctx_cs, R_02882C_PA_SU_PRIM_FILTER_CNTL,
S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
-
- /* GFX9: Flush DFSM when the AA mode changes. */
- if (pipeline->device->dfsm_allowed) {
- radeon_emit(ctx_cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(ctx_cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
- }
}
static void
@@ -5038,12 +5018,6 @@ radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *ctx_cs, struct rade
ctx_cs, R_028710_SPI_SHADER_Z_FORMAT,
ac_get_spi_shader_z_format(ps->info.ps.writes_z, ps->info.ps.writes_stencil,
ps->info.ps.writes_sample_mask));
-
- if (pipeline->device->dfsm_allowed) {
- /* optimise this? */
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
- }
}
static void
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 25f0ba28785..040e008b755 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -756,7 +756,6 @@ struct radv_device {
struct radeon_cmdbuf *empty_cs[RADV_MAX_QUEUE_FAMILIES];
bool pbb_allowed;
- bool dfsm_allowed;
uint32_t tess_offchip_block_dw_size;
uint32_t scratch_waves;
uint32_t dispatch_initiator;
@@ -1686,7 +1685,6 @@ struct radv_ia_multi_vgt_param_helpers {
struct radv_binning_state {
uint32_t pa_sc_binner_cntl_0;
- uint32_t db_dfsm_control;
};
#define SI_GS_PER_ES 128
More information about the mesa-commit
mailing list