[PATCH 05/17] drm/amd/display: Add Clock Table Entry With Max DC Values
Hamza Mahfooz
hamza.mahfooz at amd.com
Wed Jun 14 17:57:38 UTC 2023
From: Austin Zheng <austin.zheng at amd.com>
Why:
Certain display configs resulted in underflow
How:
Add an entry containing all max DC clock timings
Reviewed-by: Alvin Lee <alvin.lee2 at amd.com>
Acked-by: Hamza Mahfooz <hamza.mahfooz at amd.com>
Signed-off-by: Austin Zheng <austin.zheng at amd.com>
---
.../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 90 ++++++++++++++++--
.../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 4 -
.../amd/display/dc/dml/dcn321/dcn321_fpu.c | 92 +++++++++++++++++--
.../amd/display/dc/dml/dcn321/dcn321_fpu.h | 4 -
.../amd/display/dc/dml/display_mode_structs.h | 1 +
5 files changed, 171 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index e2bb2b9971f3..a95034801712 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -485,24 +485,20 @@ static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry)
}
}
-void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table,
+static void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table,
unsigned int *num_entries,
struct _vcs_dpi_voltage_scaling_st *entry)
{
int i = 0;
int index = 0;
- float net_bw_of_new_state = 0;
dc_assert_fp_enabled();
- get_optimal_ntuple(entry);
-
if (*num_entries == 0) {
table[0] = *entry;
(*num_entries)++;
} else {
- net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry);
- while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) {
+ while (entry->net_bw_in_kbytes_sec > table[index].net_bw_in_kbytes_sec) {
index++;
if (index >= *num_entries)
break;
@@ -2349,6 +2345,63 @@ void dcn32_patch_dpm_table(struct clk_bw_params *bw_params)
bw_params->clk_table.entries[0].memclk_mhz = dcn3_2_soc.clock_limits[0].dram_speed_mts / 16;
}
+static void swap_table_entries(struct _vcs_dpi_voltage_scaling_st *first_entry,
+ struct _vcs_dpi_voltage_scaling_st *second_entry)
+{
+ struct _vcs_dpi_voltage_scaling_st temp_entry = *first_entry;
+ *first_entry = *second_entry;
+ *second_entry = temp_entry;
+}
+
+/*
+ * sort_entries_with_same_bw - Sort entries sharing the same bandwidth by DCFCLK
+ */
+static void sort_entries_with_same_bw(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries)
+{
+ unsigned int start_index = 0;
+ unsigned int end_index = 0;
+ unsigned int current_bw = 0;
+
+ for (int i = 0; i < (*num_entries - 1); i++) {
+ if (table[i].net_bw_in_kbytes_sec == table[i+1].net_bw_in_kbytes_sec) {
+ current_bw = table[i].net_bw_in_kbytes_sec;
+ start_index = i;
+ end_index = ++i;
+
+ while ((i < (*num_entries - 1)) && (table[i+1].net_bw_in_kbytes_sec == current_bw))
+ end_index = ++i;
+ }
+
+ if (start_index != end_index) {
+ for (int j = start_index; j < end_index; j++) {
+ for (int k = start_index; k < end_index; k++) {
+ if (table[k].dcfclk_mhz > table[k+1].dcfclk_mhz)
+ swap_table_entries(&table[k], &table[k+1]);
+ }
+ }
+ }
+
+ start_index = 0;
+ end_index = 0;
+
+ }
+}
+
+/*
+ * remove_inconsistent_entries - Ensure entries with the same bandwidth have MEMCLK and FCLK monotonically increasing
+ * and remove entries that do not
+ */
+static void remove_inconsistent_entries(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries)
+{
+ for (int i = 0; i < (*num_entries - 1); i++) {
+ if (table[i].net_bw_in_kbytes_sec == table[i+1].net_bw_in_kbytes_sec) {
+ if ((table[i].dram_speed_mts > table[i+1].dram_speed_mts) ||
+ (table[i].fabricclk_mhz > table[i+1].fabricclk_mhz))
+ remove_entry_from_table_at_index(table, num_entries, i);
+ }
+ }
+}
+
/*
* override_max_clk_values - Overwrite the max clock frequencies with the max DC mode timings
* Input:
@@ -2480,6 +2533,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk
entry.fabricclk_mhz = 0;
entry.dram_speed_mts = 0;
+ get_optimal_ntuple(&entry);
+ entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry);
insert_entry_into_table_sorted(table, num_entries, &entry);
}
@@ -2488,6 +2543,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk
entry.fabricclk_mhz = 0;
entry.dram_speed_mts = 0;
+ get_optimal_ntuple(&entry);
+ entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry);
insert_entry_into_table_sorted(table, num_entries, &entry);
// Insert the UCLK DPMS
@@ -2496,6 +2553,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk
entry.fabricclk_mhz = 0;
entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16;
+ get_optimal_ntuple(&entry);
+ entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry);
insert_entry_into_table_sorted(table, num_entries, &entry);
}
@@ -2506,6 +2565,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk
entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz;
entry.dram_speed_mts = 0;
+ get_optimal_ntuple(&entry);
+ entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry);
insert_entry_into_table_sorted(table, num_entries, &entry);
}
}
@@ -2515,6 +2576,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk
entry.fabricclk_mhz = max_clk_data.fclk_mhz;
entry.dram_speed_mts = 0;
+ get_optimal_ntuple(&entry);
+ entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry);
insert_entry_into_table_sorted(table, num_entries, &entry);
}
@@ -2530,6 +2593,21 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk
remove_entry_from_table_at_index(table, num_entries, i);
}
+ // Insert entry with all max dc limits without bandwidth matching
+ if (!disable_dc_mode_overwrite) {
+ struct _vcs_dpi_voltage_scaling_st max_dc_limits_entry = entry;
+
+ max_dc_limits_entry.dcfclk_mhz = max_clk_data.dcfclk_mhz;
+ max_dc_limits_entry.fabricclk_mhz = max_clk_data.fclk_mhz;
+ max_dc_limits_entry.dram_speed_mts = max_clk_data.memclk_mhz * 16;
+
+ max_dc_limits_entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&max_dc_limits_entry);
+ insert_entry_into_table_sorted(table, num_entries, &max_dc_limits_entry);
+
+ sort_entries_with_same_bw(table, num_entries);
+ remove_inconsistent_entries(table, num_entries);
+ }
+
// At this point, the table only contains supported points of interest
// it could be used as is, but some states may be redundant due to
// coarse grained nature of some clocks, so we want to round up to
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
index a4206b71d650..defbee866be6 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
@@ -39,10 +39,6 @@ void dcn32_helper_populate_phantom_dlg_params(struct dc *dc,
uint8_t dcn32_predict_pipe_split(struct dc_state *context,
display_e2e_pipe_params_st *pipe_e2e);
-void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table,
- unsigned int *num_entries,
- struct _vcs_dpi_voltage_scaling_st *entry);
-
void dcn32_set_phantom_stream_timing(struct dc *dc,
struct dc_state *context,
struct pipe_ctx *ref_pipe,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
index f0683fd9d3f0..190776063f46 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
@@ -207,24 +207,20 @@ static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *
return limiting_bw_kbytes_sec;
}
-void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table,
+static void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table,
unsigned int *num_entries,
struct _vcs_dpi_voltage_scaling_st *entry)
{
int i = 0;
int index = 0;
- float net_bw_of_new_state = 0;
dc_assert_fp_enabled();
- get_optimal_ntuple(entry);
-
if (*num_entries == 0) {
table[0] = *entry;
(*num_entries)++;
} else {
- net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry);
- while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) {
+ while (entry->net_bw_in_kbytes_sec > table[index].net_bw_in_kbytes_sec) {
index++;
if (index >= *num_entries)
break;
@@ -252,6 +248,63 @@ static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st
memset(&table[--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st));
}
+static void swap_table_entries(struct _vcs_dpi_voltage_scaling_st *first_entry,
+ struct _vcs_dpi_voltage_scaling_st *second_entry)
+{
+ struct _vcs_dpi_voltage_scaling_st temp_entry = *first_entry;
+ *first_entry = *second_entry;
+ *second_entry = temp_entry;
+}
+
+/*
+ * sort_entries_with_same_bw - Sort entries sharing the same bandwidth by DCFCLK
+ */
+static void sort_entries_with_same_bw(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries)
+{
+ unsigned int start_index = 0;
+ unsigned int end_index = 0;
+ unsigned int current_bw = 0;
+
+ for (int i = 0; i < (*num_entries - 1); i++) {
+ if (table[i].net_bw_in_kbytes_sec == table[i+1].net_bw_in_kbytes_sec) {
+ current_bw = table[i].net_bw_in_kbytes_sec;
+ start_index = i;
+ end_index = ++i;
+
+ while ((i < (*num_entries - 1)) && (table[i+1].net_bw_in_kbytes_sec == current_bw))
+ end_index = ++i;
+ }
+
+ if (start_index != end_index) {
+ for (int j = start_index; j < end_index; j++) {
+ for (int k = start_index; k < end_index; k++) {
+ if (table[k].dcfclk_mhz > table[k+1].dcfclk_mhz)
+ swap_table_entries(&table[k], &table[k+1]);
+ }
+ }
+ }
+
+ start_index = 0;
+ end_index = 0;
+
+ }
+}
+
+/*
+ * remove_inconsistent_entries - Ensure entries with the same bandwidth have MEMCLK and FCLK monotonically increasing
+ * and remove entries that do not follow this order
+ */
+static void remove_inconsistent_entries(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries)
+{
+ for (int i = 0; i < (*num_entries - 1); i++) {
+ if (table[i].net_bw_in_kbytes_sec == table[i+1].net_bw_in_kbytes_sec) {
+ if ((table[i].dram_speed_mts > table[i+1].dram_speed_mts) ||
+ (table[i].fabricclk_mhz > table[i+1].fabricclk_mhz))
+ remove_entry_from_table_at_index(table, num_entries, i);
+ }
+ }
+}
+
/*
* override_max_clk_values - Overwrite the max clock frequencies with the max DC mode timings
* Input:
@@ -383,6 +436,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk
entry.fabricclk_mhz = 0;
entry.dram_speed_mts = 0;
+ get_optimal_ntuple(&entry);
+ entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry);
dcn321_insert_entry_into_table_sorted(table, num_entries, &entry);
}
@@ -391,6 +446,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk
entry.fabricclk_mhz = 0;
entry.dram_speed_mts = 0;
+ get_optimal_ntuple(&entry);
+ entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry);
dcn321_insert_entry_into_table_sorted(table, num_entries, &entry);
// Insert the UCLK DPMS
@@ -399,6 +456,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk
entry.fabricclk_mhz = 0;
entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16;
+ get_optimal_ntuple(&entry);
+ entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry);
dcn321_insert_entry_into_table_sorted(table, num_entries, &entry);
}
@@ -409,6 +468,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk
entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz;
entry.dram_speed_mts = 0;
+ get_optimal_ntuple(&entry);
+ entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry);
dcn321_insert_entry_into_table_sorted(table, num_entries, &entry);
}
}
@@ -418,6 +479,8 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk
entry.fabricclk_mhz = max_clk_data.fclk_mhz;
entry.dram_speed_mts = 0;
+ get_optimal_ntuple(&entry);
+ entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&entry);
dcn321_insert_entry_into_table_sorted(table, num_entries, &entry);
}
@@ -433,6 +496,23 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk
remove_entry_from_table_at_index(table, num_entries, i);
}
+ // Insert entry with all max dc limits without bandwitch matching
+ if (!disable_dc_mode_overwrite) {
+ struct _vcs_dpi_voltage_scaling_st max_dc_limits_entry = entry;
+
+ max_dc_limits_entry.dcfclk_mhz = max_clk_data.dcfclk_mhz;
+ max_dc_limits_entry.fabricclk_mhz = max_clk_data.fclk_mhz;
+ max_dc_limits_entry.dram_speed_mts = max_clk_data.memclk_mhz * 16;
+
+ max_dc_limits_entry.net_bw_in_kbytes_sec = calculate_net_bw_in_kbytes_sec(&max_dc_limits_entry);
+ dcn321_insert_entry_into_table_sorted(table, num_entries, &max_dc_limits_entry);
+
+ sort_entries_with_same_bw(table, num_entries);
+ remove_inconsistent_entries(table, num_entries);
+ }
+
+
+
// At this point, the table only contains supported points of interest
// it could be used as is, but some states may be redundant due to
// coarse grained nature of some clocks, so we want to round up to
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h
index e8fad9b4be69..c6623b3705ca 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h
@@ -29,10 +29,6 @@
#include "dml/display_mode_vba.h"
-void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table,
- unsigned int *num_entries,
- struct _vcs_dpi_voltage_scaling_st *entry);
-
void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params);
#endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
index ff0246a9458f..fb17f8868cb4 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
@@ -167,6 +167,7 @@ struct _vcs_dpi_voltage_scaling_st {
double phyclk_mhz;
double dppclk_mhz;
double dtbclk_mhz;
+ float net_bw_in_kbytes_sec;
};
/**
--
2.40.1
More information about the amd-gfx
mailing list