[Intel-gfx] [PATCH] drm/i915: Decouple engine->sanitize callback on removing the status page

Matthew Auld matthew.william.auld at gmail.com
Wed Mar 23 11:08:02 UTC 2022


On Wed, 23 Mar 2022 at 09:44, Gwan-gyeong Mun <gwan-gyeong.mun at intel.com> wrote:
>
> From: Chris Wilson <chris at chris-wilson.co.uk>
>
> We have to be careful not to call into the submission backend's sanitize
> callback if we abort the module load and free the status page. Since we
> are only using the sanitize callback to cleanup the status page when we
> suspect its contents may have been lost (first load, upon resume etc)
> let us move the callback to engine->status_page and remove it as we free
> the status page.
>
> <3> [306.359604] BUG: KASAN: slab-out-of-bounds in xcs_sanitize+0x4a/0x110 [i915]
> <3> [306.360346] Write of size 4096 at addr ffff88806d5e8000 by task i915_module_loa/1052
> <3> [306.360561]
> <4> [306.360627] CPU: 1 PID: 1052 Comm: i915_module_loa Tainted: G     U            5.12.0-rc2-g249f72def27bf-kasan_218+ #1
> <4> [306.360650] Hardware name: GIGABYTE GB-BXBT-1900/MZBAYAB-00, BIOS F6 02/17/2015
> <4> [306.360667] Call Trace:
> <4> [306.360688]  dump_stack+0xa4/0xe5
> <4> [306.360727]  ? xcs_sanitize+0x4a/0x110 [i915]
> <4> [306.361274]  print_address_description.constprop.9+0x3a/0x60
> <4> [306.361311]  ? xcs_sanitize+0x4a/0x110 [i915]
> <4> [306.361855]  ? xcs_sanitize+0x4a/0x110 [i915]
> <4> [306.362408]  kasan_report.cold.14+0x7c/0xd8
> <4> [306.362456]  ? xcs_sanitize+0x4a/0x110 [i915]
> <4> [306.363015]  kasan_check_range+0x1c1/0x1e0
> <4> [306.363056]  memset+0x1f/0x40
> <4> [306.363093]  xcs_sanitize+0x4a/0x110 [i915]
> <4> [306.363661]  gt_sanitize+0x2f7/0x6d0 [i915]
> <4> [306.364221]  ? __pm_runtime_suspend+0x186/0x2e0
> <4> [306.364270]  intel_gt_suspend_late+0x126/0x2c0 [i915]
> <4> [306.364833]  i915_gem_suspend_late+0x9d/0x470 [i915]
> <4> [306.365402]  ? intel_wakeref_auto+0x3ba/0x520 [i915]
> <4> [306.365939]  ? i915_gem_suspend+0x180/0x180 [i915]
> <4> [306.366521]  i915_gem_driver_remove+0x25/0x1f0 [i915]
> <4> [306.367071]  ? lockdep_hardirqs_on+0xbf/0x130
> <4> [306.367124]  i915_driver_remove+0xba/0xf0 [i915]
> <4> [306.367670]  i915_pci_remove+0x34/0x70 [i915]
> <4> [306.368224]  pci_device_remove+0xa3/0x1e0
> <4> [306.368275]  device_release_driver_internal+0x1e0/0x4a0
> <4> [306.368320]  driver_detach+0xbc/0x180
> <4> [306.368364]  bus_remove_driver+0x15e/0x2d0
> <4> [306.368404]  pci_unregister_driver+0x28/0x220
> <4> [306.368456]  i915_exit+0x1b/0x26 [i915]
> <4> [306.369055]  __x64_sys_delete_module+0x257/0x370
> <4> [306.369093]  ? __ia32_sys_delete_module+0x370/0x370
> <4> [306.369146]  ? lockdep_hardirqs_on+0xbf/0x130
> <4> [306.369185]  do_syscall_64+0x33/0x80
> <4> [306.369212]  entry_SYSCALL_64_after_hwframe+0x44/0xae
> <4> [306.369237] RIP: 0033:0x7f7a7020fbcb
> <4> [306.369259] Code: 73 01 c3 48 8b 0d c5 82 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 95 82 0c 00 f7 d8 64 89 01 48
> <4> [306.369281] RSP: 002b:00007ffc22d2ef78 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
> <4> [306.369315] RAX: ffffffffffffffda RBX: 000055724a324db0 RCX: 00007f7a7020fbcb
> <4> [306.369334] RDX: 00007f7a702d8be0 RSI: 0000000000000800 RDI: 000055724a324e18
> <4> [306.369352] RBP: 00007f7a70392702 R08: 0000000000000000 R09: 00007f7a702d8da0
> <4> [306.369370] R10: 000055724a2ee010 R11: 0000000000000206 R12: 0000000000000000
> <4> [306.369388] R13: 00007ffc22d2f670 R14: 0000000000000000 R15: 0000000000000000
>

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/4105

> Fixes: b436a5f8b6c8 ("drm/i915/gt: Track all timelines created using the HWSP")
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Signed-off-by: Gwan-gyeong Mun <gwan-gyeong.mun at intel.com>
> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c            | 7 +++++--
>  drivers/gpu/drm/i915/gt/intel_engine_types.h         | 4 +++-
>  drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 4 +---
>  drivers/gpu/drm/i915/gt/intel_gt_pm.c                | 4 ++--
>  drivers/gpu/drm/i915/gt/intel_ring_submission.c      | 2 +-
>  drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c    | 4 ++--
>  6 files changed, 14 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 98b61ff13c95..7e3a65b0661c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -844,6 +844,9 @@ static void cleanup_status_page(struct intel_engine_cs *engine)
>
>         i915_gem_object_unpin_map(vma->obj);
>         i915_gem_object_put(vma->obj);
> +
> +       /* no longer in control, nothing left to sanitize */
> +       engine->status_page.sanitize = NULL;
>  }
>
>  static int pin_ggtt_status_page(struct intel_engine_cs *engine,
> @@ -1542,8 +1545,8 @@ void intel_engines_reset_default_submission(struct intel_gt *gt)
>         enum intel_engine_id id;
>
>         for_each_engine(engine, gt, id) {
> -               if (engine->sanitize)
> -                       engine->sanitize(engine);
> +               if (engine->status_page.sanitize)
> +                       engine->status_page.sanitize(engine);
>
>                 engine->set_default_submission(engine);
>         }
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index eac20112709c..268249efd76c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -53,6 +53,7 @@ struct intel_gt;
>  struct intel_ring;
>  struct intel_uncore;
>  struct intel_breadcrumbs;
> +struct intel_engine_cs;
>
>  typedef u32 intel_engine_mask_t;
>  #define ALL_ENGINES ((intel_engine_mask_t)~0ul)
> @@ -61,6 +62,8 @@ struct intel_hw_status_page {
>         struct list_head timelines;
>         struct i915_vma *vma;
>         u32 *addr;
> +
> +       void (*sanitize)(struct intel_engine_cs *engine);
>  };
>
>  struct intel_instdone {
> @@ -445,7 +448,6 @@ struct intel_engine_cs {
>         void            (*irq_disable)(struct intel_engine_cs *engine);
>         void            (*irq_handler)(struct intel_engine_cs *engine, u16 iir);
>
> -       void            (*sanitize)(struct intel_engine_cs *engine);
>         int             (*resume)(struct intel_engine_cs *engine);
>
>         struct {
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index e23c1d0e980b..c3b850e23c4b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -3328,8 +3328,6 @@ static void execlists_shutdown(struct intel_engine_cs *engine)
>
>  static void execlists_release(struct intel_engine_cs *engine)
>  {
> -       engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
> -
>         execlists_shutdown(engine);
>
>         intel_engine_cleanup_common(engine);
> @@ -3520,7 +3518,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
>         }
>
>         /* Finally, take ownership and responsibility for cleanup! */
> -       engine->sanitize = execlists_sanitize;
> +       engine->status_page.sanitize = execlists_sanitize;
>         engine->release = execlists_release;
>
>         return 0;
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> index eeead40485fb..a38124748a7d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> @@ -189,8 +189,8 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
>         intel_uc_reset_prepare(&gt->uc);
>
>         for_each_engine(engine, gt, id)
> -               if (engine->sanitize)
> -                       engine->sanitize(engine);
> +               if (engine->status_page.sanitize)
> +                       engine->status_page.sanitize(engine);
>
>         if (reset_engines(gt) || force) {
>                 for_each_engine(engine, gt, id)
> diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> index 5423bfd301ad..a8d4398ea024 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> @@ -1121,7 +1121,7 @@ static void setup_common(struct intel_engine_cs *engine)
>         setup_irq(engine);
>
>         engine->resume = xcs_resume;
> -       engine->sanitize = xcs_sanitize;
> +       engine->status_page.sanitize = xcs_sanitize;
>
>         engine->reset.prepare = reset_prepare;
>         engine->reset.rewind = reset_rewind;
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index e1612c393781..156a5a9e6a55 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -3676,7 +3676,7 @@ static inline void guc_init_lrc_mapping(struct intel_guc *guc)
>
>  static void guc_release(struct intel_engine_cs *engine)
>  {
> -       engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
> +       tasklet_kill(&engine->sched_engine->tasklet);
>
>         intel_engine_cleanup_common(engine);
>         lrc_fini_wa_ctx(engine);
> @@ -3809,7 +3809,7 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine)
>         lrc_init_wa_ctx(engine);
>
>         /* Finally, take ownership and responsibility for cleanup! */
> -       engine->sanitize = guc_sanitize;
> +       engine->status_page.sanitize = guc_sanitize;
>         engine->release = guc_release;
>
>         return 0;
> --
> 2.34.1
>


More information about the Intel-gfx mailing list