[Intel-xe] [PATCH] drm/xe: Fix memory use after free
Niranjana Vishwanathapura
niranjana.vishwanathapura at intel.com
Sat Apr 8 05:19:06 UTC 2023
On Fri, Apr 07, 2023 at 04:45:58PM -0700, Matt Roper wrote:
>On Fri, Apr 07, 2023 at 02:12:31PM -0700, Niranjana Vishwanathapura wrote:
>> The wait_event_timeout() on g2h_fence.wq which is declared on
>> stack can return before the wake_up() gets called, resulting in a
>> stack out of bound access when wake_up() accesses the g2h_fene.wq.
>>
>> Do not declare g2h_fence related wait_queue_head_t on stack.
>>
>> Fixes the below KASAN BUG and associated kernel crashes.
>>
>> BUG: KASAN: stack-out-of-bounds in do_raw_spin_lock+0x6f/0x1e0
>> Read of size 4 at addr ffff88826252f4ac by task kworker/u128:5/467
>>
>> CPU: 25 PID: 467 Comm: kworker/u128:5 Tainted: G U 6.3.0-rc4-xe #1
>> Workqueue: events_unbound g2h_worker_func [xe]
>> Call Trace:
>> <TASK>
>> dump_stack_lvl+0x64/0xb0
>> print_report+0xc2/0x600
>> kasan_report+0x96/0xc0
>> do_raw_spin_lock+0x6f/0x1e0
>> _raw_spin_lock_irqsave+0x47/0x60
>> __wake_up_common_lock+0xc0/0x150
>> dequeue_one_g2h+0x20f/0x6a0 [xe]
>> g2h_worker_func+0xa9/0x180 [xe]
>> process_one_work+0x527/0x990
>> worker_thread+0x2d1/0x640
>> kthread+0x174/0x1b0
>> ret_from_fork+0x29/0x50
>> </TASK>
>>
>> Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura at intel.com>
>
>Tested-by: Matt Roper <matthew.d.roper at intel.com>
>
>Seems to solve the occasional NULL derefs I was seeing on ~2.5% of my
>driver load attempts.
>
Thanks, applied.
Niranjana
>
>Matt
>
>> ---
>> drivers/gpu/drm/xe/xe_guc_ct.c | 7 +++----
>> drivers/gpu/drm/xe/xe_guc_ct_types.h | 2 ++
>> 2 files changed, 5 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
>> index 5e00b75d3ca2..9055ff133a7c 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_ct.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_ct.c
>> @@ -23,7 +23,6 @@
>>
>> /* Used when a CT send wants to block and / or receive data */
>> struct g2h_fence {
>> - wait_queue_head_t wq;
>> u32 *response_buffer;
>> u32 seqno;
>> u16 response_len;
>> @@ -142,6 +141,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
>> ct->fence_context = dma_fence_context_alloc(1);
>> INIT_WORK(&ct->g2h_worker, g2h_worker_func);
>> init_waitqueue_head(&ct->wq);
>> + init_waitqueue_head(&ct->g2h_fence_wq);
>>
>> primelockdep(ct);
>>
>> @@ -484,7 +484,6 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
>> void *ptr;
>>
>> g2h_fence->seqno = (ct->fence_seqno++ & 0xffff);
>> - init_waitqueue_head(&g2h_fence->wq);
>> ptr = xa_store(&ct->fence_lookup,
>> g2h_fence->seqno,
>> g2h_fence, GFP_ATOMIC);
>> @@ -709,7 +708,7 @@ static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
>> return ret;
>> }
>>
>> - ret = wait_event_timeout(g2h_fence.wq, g2h_fence.done, HZ);
>> + ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ);
>> if (!ret) {
>> drm_err(&xe->drm, "Timed out wait for G2H, fence %u, action %04x",
>> g2h_fence.seqno, action[0]);
>> @@ -801,7 +800,7 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
>> g2h_fence->done = true;
>> smp_mb();
>>
>> - wake_up(&g2h_fence->wq);
>> + wake_up_all(&ct->g2h_fence_wq);
>>
>> return 0;
>> }
>> diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
>> index e0f9063e9b65..fd27dacf00c5 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_ct_types.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
>> @@ -74,6 +74,8 @@ struct xe_guc_ct {
>> struct xarray fence_lookup;
>> /** @wq: wait queue used for reliable CT sends and freeing G2H credits */
>> wait_queue_head_t wq;
>> + /** @g2h_fence_wq: wait queue used for G2H fencing */
>> + wait_queue_head_t g2h_fence_wq;
>> #ifdef XE_GUC_CT_SELFTEST
>> /** @suppress_irq_handler: force flow control to sender */
>> bool suppress_irq_handler;
>> --
>> 2.21.0.rc0.32.g243a4c7e27
>>
>
>--
>Matt Roper
>Graphics Software Engineer
>Linux GPU Platform Enablement
>Intel Corporation
More information about the Intel-xe
mailing list