[Intel-gfx] [PATCH] drm/i915: Decouple engine->sanitize callback on removing the status page
Matthew Auld
matthew.william.auld at gmail.com
Wed Mar 23 11:08:02 UTC 2022
On Wed, 23 Mar 2022 at 09:44, Gwan-gyeong Mun <gwan-gyeong.mun at intel.com> wrote:
>
> From: Chris Wilson <chris at chris-wilson.co.uk>
>
> We have to be careful not to call into the submission backend's sanitize
> callback if we abort the module load and free the status page. Since we
> are only using the sanitize callback to cleanup the status page when we
> suspect its contents may have been lost (first load, upon resume etc)
> let us move the callback to engine->status_page and remove it as we free
> the status page.
>
> <3> [306.359604] BUG: KASAN: slab-out-of-bounds in xcs_sanitize+0x4a/0x110 [i915]
> <3> [306.360346] Write of size 4096 at addr ffff88806d5e8000 by task i915_module_loa/1052
> <3> [306.360561]
> <4> [306.360627] CPU: 1 PID: 1052 Comm: i915_module_loa Tainted: G U 5.12.0-rc2-g249f72def27bf-kasan_218+ #1
> <4> [306.360650] Hardware name: GIGABYTE GB-BXBT-1900/MZBAYAB-00, BIOS F6 02/17/2015
> <4> [306.360667] Call Trace:
> <4> [306.360688] dump_stack+0xa4/0xe5
> <4> [306.360727] ? xcs_sanitize+0x4a/0x110 [i915]
> <4> [306.361274] print_address_description.constprop.9+0x3a/0x60
> <4> [306.361311] ? xcs_sanitize+0x4a/0x110 [i915]
> <4> [306.361855] ? xcs_sanitize+0x4a/0x110 [i915]
> <4> [306.362408] kasan_report.cold.14+0x7c/0xd8
> <4> [306.362456] ? xcs_sanitize+0x4a/0x110 [i915]
> <4> [306.363015] kasan_check_range+0x1c1/0x1e0
> <4> [306.363056] memset+0x1f/0x40
> <4> [306.363093] xcs_sanitize+0x4a/0x110 [i915]
> <4> [306.363661] gt_sanitize+0x2f7/0x6d0 [i915]
> <4> [306.364221] ? __pm_runtime_suspend+0x186/0x2e0
> <4> [306.364270] intel_gt_suspend_late+0x126/0x2c0 [i915]
> <4> [306.364833] i915_gem_suspend_late+0x9d/0x470 [i915]
> <4> [306.365402] ? intel_wakeref_auto+0x3ba/0x520 [i915]
> <4> [306.365939] ? i915_gem_suspend+0x180/0x180 [i915]
> <4> [306.366521] i915_gem_driver_remove+0x25/0x1f0 [i915]
> <4> [306.367071] ? lockdep_hardirqs_on+0xbf/0x130
> <4> [306.367124] i915_driver_remove+0xba/0xf0 [i915]
> <4> [306.367670] i915_pci_remove+0x34/0x70 [i915]
> <4> [306.368224] pci_device_remove+0xa3/0x1e0
> <4> [306.368275] device_release_driver_internal+0x1e0/0x4a0
> <4> [306.368320] driver_detach+0xbc/0x180
> <4> [306.368364] bus_remove_driver+0x15e/0x2d0
> <4> [306.368404] pci_unregister_driver+0x28/0x220
> <4> [306.368456] i915_exit+0x1b/0x26 [i915]
> <4> [306.369055] __x64_sys_delete_module+0x257/0x370
> <4> [306.369093] ? __ia32_sys_delete_module+0x370/0x370
> <4> [306.369146] ? lockdep_hardirqs_on+0xbf/0x130
> <4> [306.369185] do_syscall_64+0x33/0x80
> <4> [306.369212] entry_SYSCALL_64_after_hwframe+0x44/0xae
> <4> [306.369237] RIP: 0033:0x7f7a7020fbcb
> <4> [306.369259] Code: 73 01 c3 48 8b 0d c5 82 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 95 82 0c 00 f7 d8 64 89 01 48
> <4> [306.369281] RSP: 002b:00007ffc22d2ef78 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
> <4> [306.369315] RAX: ffffffffffffffda RBX: 000055724a324db0 RCX: 00007f7a7020fbcb
> <4> [306.369334] RDX: 00007f7a702d8be0 RSI: 0000000000000800 RDI: 000055724a324e18
> <4> [306.369352] RBP: 00007f7a70392702 R08: 0000000000000000 R09: 00007f7a702d8da0
> <4> [306.369370] R10: 000055724a2ee010 R11: 0000000000000206 R12: 0000000000000000
> <4> [306.369388] R13: 00007ffc22d2f670 R14: 0000000000000000 R15: 0000000000000000
>
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/4105
> Fixes: b436a5f8b6c8 ("drm/i915/gt: Track all timelines created using the HWSP")
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Signed-off-by: Gwan-gyeong Mun <gwan-gyeong.mun at intel.com>
> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_engine_cs.c | 7 +++++--
> drivers/gpu/drm/i915/gt/intel_engine_types.h | 4 +++-
> drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 4 +---
> drivers/gpu/drm/i915/gt/intel_gt_pm.c | 4 ++--
> drivers/gpu/drm/i915/gt/intel_ring_submission.c | 2 +-
> drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 ++--
> 6 files changed, 14 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 98b61ff13c95..7e3a65b0661c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -844,6 +844,9 @@ static void cleanup_status_page(struct intel_engine_cs *engine)
>
> i915_gem_object_unpin_map(vma->obj);
> i915_gem_object_put(vma->obj);
> +
> + /* no longer in control, nothing left to sanitize */
> + engine->status_page.sanitize = NULL;
> }
>
> static int pin_ggtt_status_page(struct intel_engine_cs *engine,
> @@ -1542,8 +1545,8 @@ void intel_engines_reset_default_submission(struct intel_gt *gt)
> enum intel_engine_id id;
>
> for_each_engine(engine, gt, id) {
> - if (engine->sanitize)
> - engine->sanitize(engine);
> + if (engine->status_page.sanitize)
> + engine->status_page.sanitize(engine);
>
> engine->set_default_submission(engine);
> }
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index eac20112709c..268249efd76c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -53,6 +53,7 @@ struct intel_gt;
> struct intel_ring;
> struct intel_uncore;
> struct intel_breadcrumbs;
> +struct intel_engine_cs;
>
> typedef u32 intel_engine_mask_t;
> #define ALL_ENGINES ((intel_engine_mask_t)~0ul)
> @@ -61,6 +62,8 @@ struct intel_hw_status_page {
> struct list_head timelines;
> struct i915_vma *vma;
> u32 *addr;
> +
> + void (*sanitize)(struct intel_engine_cs *engine);
> };
>
> struct intel_instdone {
> @@ -445,7 +448,6 @@ struct intel_engine_cs {
> void (*irq_disable)(struct intel_engine_cs *engine);
> void (*irq_handler)(struct intel_engine_cs *engine, u16 iir);
>
> - void (*sanitize)(struct intel_engine_cs *engine);
> int (*resume)(struct intel_engine_cs *engine);
>
> struct {
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index e23c1d0e980b..c3b850e23c4b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -3328,8 +3328,6 @@ static void execlists_shutdown(struct intel_engine_cs *engine)
>
> static void execlists_release(struct intel_engine_cs *engine)
> {
> - engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
> -
> execlists_shutdown(engine);
>
> intel_engine_cleanup_common(engine);
> @@ -3520,7 +3518,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
> }
>
> /* Finally, take ownership and responsibility for cleanup! */
> - engine->sanitize = execlists_sanitize;
> + engine->status_page.sanitize = execlists_sanitize;
> engine->release = execlists_release;
>
> return 0;
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> index eeead40485fb..a38124748a7d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> @@ -189,8 +189,8 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
> intel_uc_reset_prepare(>->uc);
>
> for_each_engine(engine, gt, id)
> - if (engine->sanitize)
> - engine->sanitize(engine);
> + if (engine->status_page.sanitize)
> + engine->status_page.sanitize(engine);
>
> if (reset_engines(gt) || force) {
> for_each_engine(engine, gt, id)
> diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> index 5423bfd301ad..a8d4398ea024 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> @@ -1121,7 +1121,7 @@ static void setup_common(struct intel_engine_cs *engine)
> setup_irq(engine);
>
> engine->resume = xcs_resume;
> - engine->sanitize = xcs_sanitize;
> + engine->status_page.sanitize = xcs_sanitize;
>
> engine->reset.prepare = reset_prepare;
> engine->reset.rewind = reset_rewind;
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index e1612c393781..156a5a9e6a55 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -3676,7 +3676,7 @@ static inline void guc_init_lrc_mapping(struct intel_guc *guc)
>
> static void guc_release(struct intel_engine_cs *engine)
> {
> - engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
> + tasklet_kill(&engine->sched_engine->tasklet);
>
> intel_engine_cleanup_common(engine);
> lrc_fini_wa_ctx(engine);
> @@ -3809,7 +3809,7 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine)
> lrc_init_wa_ctx(engine);
>
> /* Finally, take ownership and responsibility for cleanup! */
> - engine->sanitize = guc_sanitize;
> + engine->status_page.sanitize = guc_sanitize;
> engine->release = guc_release;
>
> return 0;
> --
> 2.34.1
>
More information about the Intel-gfx
mailing list