[Intel-gfx] [PATCH] drm/i915: Fix simulated GPU reset wrt. encoder HW readout

Imre Deak imre.deak at intel.com
Fri Oct 7 13:33:07 UTC 2022


The GPU reset involves a display suspend/resume sequence, but this is
done without suspending/resuming the encoders. The encoder HW readout
code during resume however assumes that the encoders were
suspended/resumed, at least on TypeC platforms where the TC PHYs must be
left in a disconnected state during encoder-suspend, and the PHY's TypeC
mode must be initialized already during encoder-resume.

Since the above issue occurs only in case the display recovery during
GPU reset is simulated in CI tests (on new platforms w/o the GPU reset
clobbering the display), this patch fixes the issue by simply restoring
the saved display state in this case w/o doing a display HW state
readout / sanitization first. This also fixes the WARN below introduced
by

commit a82796a2e332 ("drm/i915: Fix TypeC mode initialization during system resume")

<4> [319.983309] ------------[ cut here ]------------
<4> [319.983313] i915 0000:00:02.0: drm_WARN_ON(dig_port->tc_link_refcount != 1)
<4> [319.983341] WARNING: CPU: 10 PID: 268 at drivers/gpu/drm/i915/display/intel_tc.c:751
intel_tc_port_sanitize_mode+0x239/0x290 [i915]
<4> [319.983407] Modules linked in: fuse snd_hda_codec_hdmi i915 x86_pkg_temp_thermal mei_hdcp coretemp wmi_bmof
r8153_ecm cdc_ether kvm_intel usbnet r8152 mii kvm prime_numbers snd_hda_intel ttm snd_intel_dspcfg irqbypass
drm_buddy e1000e crct10dif_pclmul snd_hda_codec crc32_pclmul drm_display_helper ptp snd_hwdep ghash_clmulni_intel
snd_hda_core drm_kms_helper pps_core mei_me syscopyarea video i2c_i801 snd_pcm sysfillrect i2c_smbus sysimgblt mei
fb_sys_fops intel_lpss_pci wmi
<4> [319.983483] CPU: 10 PID: 268 Comm: kworker/10:1H Not tainted 6.0.0-rc7-CI_DRM_12200-g394e575b57e9+ #1
<4> [319.983486] Hardware name: Intel Corporation Alder Lake Client Platform/AlderLake-P LP5 RVP, BIOS
ADLPFWI1.R00.2313.A00.2107301001 07/30/2021
<4> [319.983488] Workqueue: events_highpri heartbeat [i915]
<4> [319.983536] RIP: 0010:intel_tc_port_sanitize_mode+0x239/0x290 [i915]
<4> [319.983600] Code: 85 d2 75 03 48 8b 17 48 89 14 24 e8 e1 dc 2d e1 48 8b 14 24 48 c7 c1 f8 db 5b a0 48 c7 c7 3e
3c 5e a0 48 89 c6 e8 45 d7 66 e1 <0f> 0b e9 20 fe ff ff 0f 0b 49 c7 c0 8b 3c 5e a0 e9 9e fe ff ff 48
<4> [319.983601] RSP: 0018:ffffc90001617a30 EFLAGS: 00010286
<4> [319.983604] RAX: 0000000000000000 RBX: ffff88811f9d2000 RCX: 0000000000000001
<4> [319.983606] RDX: 0000000080000001 RSI: ffffffff8231e8cd RDI: 00000000ffffffff
<4> [319.983607] RBP: ffff888121e98000 R08: 0000000000000000 R09: c0000000ffffc134
<4> [319.983608] R10: 00000000000d6078 R11: ffffc900016178c8 R12: ffff88811f9d3838
<4> [319.983609] R13: ffff88811f9d397d R14: ffff888121e98000 R15: 0000000000000000
<4> [319.983611] FS:  0000000000000000(0000) GS:ffff8882a7300000(0000) knlGS:0000000000000000
<4> [319.983612] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
<4> [319.983613] CR2: 00007fe7397f1e18 CR3: 0000000006612003 CR4: 0000000000770ee0
<4> [319.983615] PKRU: 55555554
<4> [319.983616] Call Trace:
<4> [319.983617]  <TASK>
<4> [319.983621]  intel_ddi_sync_state+0x3f/0x90 [i915]
<4> [319.983698]  intel_modeset_setup_hw_state+0x3a3/0x1440 [i915]
<4> [319.983777]  ? intel_gt_reset_global+0xeb/0x160 [i915]
<4> [319.983839]  ? __intel_display_resume+0x15/0xe0 [i915]
<4> [319.983909]  __intel_display_resume+0x15/0xe0 [i915]
<4> [319.983979]  intel_display_finish_reset+0x58/0x130 [i915]
<4> [319.984048]  intel_gt_reset_global+0xf3/0x160 [i915]
<4> [319.984107]  ? intel_reset_guc.cold.62+0x5d/0x5d [i915]
<4> [319.984189]  ? 0xffffffff81000000
<4> [319.984192]  ? queue_work_node+0x90/0x90
<4> [319.984202]  intel_gt_handle_error+0x2c2/0x410 [i915]
<4> [319.984267]  ? _raw_spin_unlock_irqrestore+0x54/0x70
<4> [319.984271]  ? lockdep_hardirqs_on+0xbf/0x140
<4> [319.984276]  ? intel_guc_find_hung_context+0x19e/0x1d0 [i915]
<4> [319.984352]  reset_engine+0x99/0xd0 [i915]
<4> [319.984399]  ? __drm_printfn_seq_file+0x20/0x20
<4> [319.984406]  heartbeat+0x4cd/0x4f0 [i915]
<4> [319.984454]  process_one_work+0x272/0x5b0
<4> [319.984461]  worker_thread+0x37/0x370
<4> [319.984465]  ? process_one_work+0x5b0/0x5b0
<4> [319.984467]  kthread+0xed/0x120
<4> [319.984470]  ? kthread_complete_and_exit+0x20/0x20
<4> [319.984474]  ret_from_fork+0x1f/0x30
<4> [319.984484]  </TASK>
<4> [319.984485] irq event stamp: 36107
<4> [319.984487] hardirqs last  enabled at (36113): [<ffffffff811391d6>] __up_console_sem+0x66/0x70
<4> [319.984492] hardirqs last disabled at (36118): [<ffffffff811391bb>] __up_console_sem+0x4b/0x70
<4> [319.984494] softirqs last  enabled at (34316): [<ffffffff81e00323>] __do_softirq+0x323/0x48e
<4> [319.984497] softirqs last disabled at (34309): [<ffffffff810c16b8>] irq_exit_rcu+0xb8/0xe0
<4> [319.984499] ---[ end trace 0000000000000000 ]---

v2:
- Instead of trying to fix the suspend/resume sequence, restore simply
  the state w/o the HW readout/sanitization step. (Ville)

References: https://lore.kernel.org/intel-gfx/20221005175251.3586272-1-imre.deak@intel.com/T/#mcfac180a67f6048096d09fa04347aa088291fafb
Closes: https://gitlab.freedesktop.org/drm/intel/issues/7021
Cc: Mika Kahola <mika.kahola at intel.com>
Cc: Ville Syrjälä <ville.syrjala at linux.intel.com>
Signed-off-by: Imre Deak <imre.deak at intel.com>
---
 drivers/gpu/drm/i915/display/intel_display.c | 24 ++++++++++++++------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 8c3bd9ba0d748..b147ea79c566a 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -830,6 +830,20 @@ intel_plane_fence_y_offset(const struct intel_plane_state *plane_state)
 	return y;
 }
 
+static int
+intel_display_commit_duplicated_state(struct intel_atomic_state *state,
+				      struct drm_modeset_acquire_ctx *ctx)
+{
+	struct drm_i915_private *i915 = to_i915(state->base.dev);
+	int ret;
+
+	ret = drm_atomic_helper_commit_duplicated_state(&state->base, ctx);
+
+	drm_WARN_ON(&i915->drm, ret == -EDEADLK);
+
+	return ret;
+}
+
 static int
 __intel_display_resume(struct drm_i915_private *i915,
 		       struct drm_atomic_state *state,
@@ -837,7 +851,7 @@ __intel_display_resume(struct drm_i915_private *i915,
 {
 	struct drm_crtc_state *crtc_state;
 	struct drm_crtc *crtc;
-	int i, ret;
+	int i;
 
 	intel_modeset_setup_hw_state(i915, ctx);
 	intel_vga_redisable(i915);
@@ -863,11 +877,7 @@ __intel_display_resume(struct drm_i915_private *i915,
 	if (!HAS_GMCH(i915))
 		to_intel_atomic_state(state)->skip_intermediate_wm = true;
 
-	ret = drm_atomic_helper_commit_duplicated_state(state, ctx);
-
-	drm_WARN_ON(&i915->drm, ret == -EDEADLK);
-
-	return ret;
+	return intel_display_commit_duplicated_state(to_intel_atomic_state(state), ctx);
 }
 
 static bool gpu_reset_clobbers_display(struct drm_i915_private *dev_priv)
@@ -959,7 +969,7 @@ void intel_display_finish_reset(struct drm_i915_private *i915)
 	/* reset doesn't touch the display */
 	if (!gpu_reset_clobbers_display(i915)) {
 		/* for testing only restore the display */
-		ret = __intel_display_resume(i915, state, ctx);
+		ret = intel_display_commit_duplicated_state(to_intel_atomic_state(state), ctx);
 		if (ret)
 			drm_err(&i915->drm,
 				"Restoring old state failed with %i\n", ret);
-- 
2.37.1



More information about the Intel-gfx mailing list