[PATCH v5] drm/amd/display: Revert W/A for hard hangs on DCN20/DCN21

Kazlauskas, Nicholas Nicholas.Kazlauskas at amd.com
Fri Jan 7 16:55:48 UTC 2022


[AMD Official Use Only]

> -----Original Message-----
> From: Limonciello, Mario <Mario.Limonciello at amd.com>
> Sent: January 7, 2022 11:50 AM
> To: amd-gfx at lists.freedesktop.org
> Cc: Limonciello, Mario <Mario.Limonciello at amd.com>; Kazlauskas, Nicholas
> <Nicholas.Kazlauskas at amd.com>; Zhuo, Qingqing (Lillian)
> <Qingqing.Zhuo at amd.com>; Scott Bruce <smbruce at gmail.com>; Chris
> Hixon <linux-kernel-bugs at hixontech.com>; spasswolf at web.de
> Subject: [PATCH v5] drm/amd/display: Revert W/A for hard hangs on
> DCN20/DCN21
> Importance: High
>
> The WA from commit 2a50edbf10c8 ("drm/amd/display: Apply w/a for hard
> hang
> on HPD") and commit 1bd3bc745e7f ("drm/amd/display: Extend w/a for hard
> hang on HPD to dcn20") causes a regression in s0ix where the system will
> fail to resume properly on many laptops.  Pull the workarounds out to
> avoid that s0ix regression in the common case.  This HPD hang happens with
> an external device and a new W/A will need to be developed for this in the
> future.
>
> Cc: Kazlauskas Nicholas <Nicholas.Kazlauskas at amd.com>
> Cc: Qingqing Zhuo <qingqing.zhuo at amd.com>
> Reported-by: Scott Bruce <smbruce at gmail.com>
> Reported-by: Chris Hixon <linux-kernel-bugs at hixontech.com>
> Reported-by: spasswolf at web.de
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=215436
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1821
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1852
> Fixes: 2a50edbf10c8 ("drm/amd/display: Apply w/a for hard hang on HPD")
> Fixes: 1bd3bc745e7f ("drm/amd/display: Extend w/a for hard hang on HPD to
> dcn20")
> Signed-off-by: Mario Limonciello <mario.limonciello at amd.com>

I think the revert is fine once we figure out where we're missing calls to:

        .optimize_pwr_state = dcn21_optimize_pwr_state,
        .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state,

These are already part of dc_link_detect, so I suspect there's another interface in DC that should be using these.

I think the best way to debug this is to revert the patch locally and add a stack dump when DMCUB hangs our times out.

That way you can know where the PHY was trying to be accessed without the refclk being on.

We had a similar issue in DCN31 which didn't require a W/A like DCN21.

I'd like to hold off on merging this until that hang is verified as gone.

Regards,
Nicholas Kazlauskas

> ---
>  .../display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c  | 11 +-------
>  .../amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 11 +-------
>  .../display/dc/irq/dcn20/irq_service_dcn20.c  | 25 -------------------
>  .../display/dc/irq/dcn20/irq_service_dcn20.h  |  2 --
>  .../display/dc/irq/dcn21/irq_service_dcn21.c  | 25 -------------------
>  .../display/dc/irq/dcn21/irq_service_dcn21.h  |  2 --
>  .../gpu/drm/amd/display/dc/irq/irq_service.c  |  2 +-
>  .../gpu/drm/amd/display/dc/irq/irq_service.h  |  4 ---
>  8 files changed, 3 insertions(+), 79 deletions(-)
>
> diff --git
> a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
> b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
> index 9f35f2e8f971..cac80ba69072 100644
> --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
> +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
> @@ -38,7 +38,6 @@
>  #include "clk/clk_11_0_0_offset.h"
>  #include "clk/clk_11_0_0_sh_mask.h"
>
> -#include "irq/dcn20/irq_service_dcn20.h"
>
>  #undef FN
>  #define FN(reg_name, field_name) \
> @@ -223,8 +222,6 @@ void dcn2_update_clocks(struct clk_mgr
> *clk_mgr_base,
>       bool force_reset = false;
>       bool p_state_change_support;
>       int total_plane_count;
> -     int irq_src;
> -     uint32_t hpd_state;
>
>       if (dc->work_arounds.skip_clock_update)
>               return;
> @@ -242,13 +239,7 @@ void dcn2_update_clocks(struct clk_mgr
> *clk_mgr_base,
>       if (dc->res_pool->pp_smu)
>               pp_smu = &dc->res_pool->pp_smu->nv_funcs;
>
> -     for (irq_src = DC_IRQ_SOURCE_HPD1; irq_src <=
> DC_IRQ_SOURCE_HPD6; irq_src++) {
> -             hpd_state = dc_get_hpd_state_dcn20(dc->res_pool->irqs,
> irq_src);
> -             if (hpd_state)
> -                     break;
> -     }
> -
> -     if (display_count == 0 && !hpd_state)
> +     if (display_count == 0)
>               enter_display_off = true;
>
>       if (enter_display_off == safe_to_lower) {
> diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
> b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
> index fbda42313bfe..f4dee0e48a67 100644
> --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
> +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
> @@ -42,7 +42,6 @@
>  #include "clk/clk_10_0_2_sh_mask.h"
>  #include "renoir_ip_offset.h"
>
> -#include "irq/dcn21/irq_service_dcn21.h"
>
>  /* Constants */
>
> @@ -129,11 +128,9 @@ static void rn_update_clocks(struct clk_mgr
> *clk_mgr_base,
>       struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
>       struct dc *dc = clk_mgr_base->ctx->dc;
>       int display_count;
> -     int irq_src;
>       bool update_dppclk = false;
>       bool update_dispclk = false;
>       bool dpp_clock_lowered = false;
> -     uint32_t hpd_state;
>
>       struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu;
>
> @@ -150,14 +147,8 @@ static void rn_update_clocks(struct clk_mgr
> *clk_mgr_base,
>
>                       display_count = rn_get_active_display_cnt_wa(dc,
> context);
>
> -                     for (irq_src = DC_IRQ_SOURCE_HPD1; irq_src <=
> DC_IRQ_SOURCE_HPD5; irq_src++) {
> -                             hpd_state = dc_get_hpd_state_dcn21(dc-
> >res_pool->irqs, irq_src);
> -                             if (hpd_state)
> -                                     break;
> -                     }
> -
>                       /* if we can go lower, go lower */
> -                     if (display_count == 0 && !hpd_state) {
> +                     if (display_count == 0) {
>
>       rn_vbios_smu_set_dcn_low_power_state(clk_mgr,
> DCN_PWR_STATE_LOW_POWER);
>                               /* update power state */
>                               clk_mgr_base->clks.pwr_state =
> DCN_PWR_STATE_LOW_POWER;
> diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
> b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
> index 9ccafe007b23..c4b067d01895 100644
> --- a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
> +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.c
> @@ -132,31 +132,6 @@ enum dc_irq_source to_dal_irq_source_dcn20(
>       }
>  }
>
> -uint32_t dc_get_hpd_state_dcn20(struct irq_service *irq_service, enum
> dc_irq_source source)
> -{
> -     const struct irq_source_info *info;
> -     uint32_t addr;
> -     uint32_t value;
> -     uint32_t current_status;
> -
> -     info = find_irq_source_info(irq_service, source);
> -     if (!info)
> -             return 0;
> -
> -     addr = info->status_reg;
> -     if (!addr)
> -             return 0;
> -
> -     value = dm_read_reg(irq_service->ctx, addr);
> -     current_status =
> -             get_reg_field_value(
> -                     value,
> -                     HPD0_DC_HPD_INT_STATUS,
> -                     DC_HPD_SENSE);
> -
> -     return current_status;
> -}
> -
>  static bool hpd_ack(
>       struct irq_service *irq_service,
>       const struct irq_source_info *info)
> diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h
> b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h
> index 4d69ab24ca25..aee4b37999f1 100644
> --- a/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h
> +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn20/irq_service_dcn20.h
> @@ -31,6 +31,4 @@
>  struct irq_service *dal_irq_service_dcn20_create(
>       struct irq_service_init_data *init_data);
>
> -uint32_t dc_get_hpd_state_dcn20(struct irq_service *irq_service, enum
> dc_irq_source source);
> -
>  #endif
> diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
> b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
> index 235294534c43..0f15bcada4e9 100644
> --- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
> +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
> @@ -134,31 +134,6 @@ static enum dc_irq_source
> to_dal_irq_source_dcn21(struct irq_service *irq_servic
>       return DC_IRQ_SOURCE_INVALID;
>  }
>
> -uint32_t dc_get_hpd_state_dcn21(struct irq_service *irq_service, enum
> dc_irq_source source)
> -{
> -     const struct irq_source_info *info;
> -     uint32_t addr;
> -     uint32_t value;
> -     uint32_t current_status;
> -
> -     info = find_irq_source_info(irq_service, source);
> -     if (!info)
> -             return 0;
> -
> -     addr = info->status_reg;
> -     if (!addr)
> -             return 0;
> -
> -     value = dm_read_reg(irq_service->ctx, addr);
> -     current_status =
> -             get_reg_field_value(
> -                     value,
> -                     HPD0_DC_HPD_INT_STATUS,
> -                     DC_HPD_SENSE);
> -
> -     return current_status;
> -}
> -
>  static bool hpd_ack(
>       struct irq_service *irq_service,
>       const struct irq_source_info *info)
> diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h
> b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h
> index 616470e32380..da2bd0e93d7a 100644
> --- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h
> +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.h
> @@ -31,6 +31,4 @@
>  struct irq_service *dal_irq_service_dcn21_create(
>       struct irq_service_init_data *init_data);
>
> -uint32_t dc_get_hpd_state_dcn21(struct irq_service *irq_service, enum
> dc_irq_source source);
> -
>  #endif
> diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
> b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
> index 4db1133e4466..a2a4fbeb83f8 100644
> --- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
> +++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
> @@ -79,7 +79,7 @@ void dal_irq_service_destroy(struct irq_service
> **irq_service)
>       *irq_service = NULL;
>  }
>
> -const struct irq_source_info *find_irq_source_info(
> +static const struct irq_source_info *find_irq_source_info(
>       struct irq_service *irq_service,
>       enum dc_irq_source source)
>  {
> diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.h
> b/drivers/gpu/drm/amd/display/dc/irq/irq_service.h
> index e60b82480093..dbfcb096eedd 100644
> --- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.h
> +++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.h
> @@ -69,10 +69,6 @@ struct irq_service {
>       const struct irq_service_funcs *funcs;
>  };
>
> -const struct irq_source_info *find_irq_source_info(
> -     struct irq_service *irq_service,
> -     enum dc_irq_source source);
> -
>  void dal_irq_service_construct(
>       struct irq_service *irq_service,
>       struct irq_service_init_data *init_data);
> --
> 2.25.1



More information about the amd-gfx mailing list