[RFC PATCH] drm/xe: fix memory leaks on vm_bind_ioctl failure paths

Matthew Brost matthew.brost at intel.com
Fri Sep 27 23:14:46 UTC 2024


On Thu, Sep 26, 2024 at 05:36:33PM -0700, Paulo Zanoni wrote:
> Caught by kmemleak when running Vulkan CTS tests on LNL. The leak
> seems to happen only when there's some kind of failure happening, like
> the lack of memory. Example output:
> 

So I think this patch is correct that it will a memory leak. I'm going
to fix it a slightly different way though. It would be great if you
could tests this out.

I doubt this would cause any major problems though as just leak a few
pages here and there.

> unreferenced object 0xffff9120bdf62000 (size 8192):
>   comm "deqp-vk", pid 115008, jiffies 4310295728
>   hex dump (first 32 bytes):
>     00 00 00 00 00 00 00 00 1b 05 f9 28 01 00 00 40  ...........(...@
>     00 00 00 00 00 00 00 00 1b 15 f9 28 01 00 00 40  ...........(...@
>   backtrace (crc 7a56be79):
>     [<ffffffff86dd81f0>] __kmalloc_cache_noprof+0x310/0x3d0
>     [<ffffffffc08e8211>] xe_pt_new_shared.constprop.0+0x81/0xb0 [xe]
>     [<ffffffffc08e8309>] xe_pt_insert_entry+0xb9/0x140 [xe]
>     [<ffffffffc08eab6d>] xe_pt_stage_bind_entry+0x12d/0x5b0 [xe]
>     [<ffffffffc08ecbca>] xe_pt_walk_range+0xea/0x280 [xe]
>     [<ffffffffc08eccea>] xe_pt_walk_range+0x20a/0x280 [xe]
>     [<ffffffffc08eccea>] xe_pt_walk_range+0x20a/0x280 [xe]
>     [<ffffffffc08eccea>] xe_pt_walk_range+0x20a/0x280 [xe]
>     [<ffffffffc08eccea>] xe_pt_walk_range+0x20a/0x280 [xe]
>     [<ffffffffc08e9eff>] xe_pt_stage_bind.constprop.0+0x25f/0x580 [xe]
>     [<ffffffffc08eb21a>] bind_op_prepare+0xea/0x6e0 [xe]
>     [<ffffffffc08ebab8>] xe_pt_update_ops_prepare+0x1c8/0x440 [xe]
>     [<ffffffffc08ffbf3>] ops_execute+0x143/0x850 [xe]
>     [<ffffffffc0900b64>] vm_bind_ioctl_ops_execute+0x244/0x800 [xe]
>     [<ffffffffc0906467>] xe_vm_bind_ioctl+0x1877/0x2370 [xe]
>     [<ffffffffc05e92b3>] drm_ioctl_kernel+0xb3/0x110 [drm]
> unreferenced object 0xffff9120bdf72000 (size 8192):
>   comm "deqp-vk", pid 115008, jiffies 4310295728
>   hex dump (first 32 bytes):
>     6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
>     6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
>   backtrace (crc 23b2f0b5):
>     [<ffffffff86dd81f0>] __kmalloc_cache_noprof+0x310/0x3d0
>     [<ffffffffc08e8211>] xe_pt_new_shared.constprop.0+0x81/0xb0 [xe]
>     [<ffffffffc08e8453>] xe_pt_stage_unbind_post_descend+0xb3/0x150 [xe]
>     [<ffffffffc08ecd26>] xe_pt_walk_range+0x246/0x280 [xe]
>     [<ffffffffc08eccea>] xe_pt_walk_range+0x20a/0x280 [xe]
>     [<ffffffffc08eccea>] xe_pt_walk_range+0x20a/0x280 [xe]
>     [<ffffffffc08eccea>] xe_pt_walk_range+0x20a/0x280 [xe]
>     [<ffffffffc08ece31>] xe_pt_walk_shared+0xc1/0x110 [xe]
>     [<ffffffffc08e7b2a>] xe_pt_stage_unbind+0x9a/0xd0 [xe]
>     [<ffffffffc08e913d>] unbind_op_prepare+0xdd/0x270 [xe]
>     [<ffffffffc08eb9f6>] xe_pt_update_ops_prepare+0x106/0x440 [xe]
>     [<ffffffffc08ffbf3>] ops_execute+0x143/0x850 [xe]
>     [<ffffffffc0900b64>] vm_bind_ioctl_ops_execute+0x244/0x800 [xe]
>     [<ffffffffc0906467>] xe_vm_bind_ioctl+0x1877/0x2370 [xe]
>     [<ffffffffc05e92b3>] drm_ioctl_kernel+0xb3/0x110 [drm]
>     [<ffffffffc05e95a0>] drm_ioctl+0x280/0x4e0 [drm]
>

How did you get this output? I used a couple kernel tools but I wasn't
able to get output like this.

Matt

> Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2877
> Signed-off-by: Paulo Zanoni <paulo.r.zanoni at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_pt.c | 42 +++++++++++++++++++++++---------------
>  1 file changed, 25 insertions(+), 17 deletions(-)
> 
> PS: I'm not super confident that this is the right place to do this
> since I'm not too familiar with this code and pointer ownership
> changes a lot here. Still, this patch seems to work on my system, so
> it may serve as a good conversation starter.
> 
> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> index d6353e8969f0..212f7de645df 100644
> --- a/drivers/gpu/drm/xe/xe_pt.c
> +++ b/drivers/gpu/drm/xe/xe_pt.c
> @@ -921,18 +921,23 @@ static void xe_pt_abort_bind(struct xe_vma *vma,
>  		if (!rebind)
>  			pt->num_live -= entries[i].qwords;
>  
> -		if (!pt->level)
> -			continue;
> -
> -		pt_dir = as_xe_pt_dir(pt);
> -		for (j = 0; j < entries[i].qwords; j++) {
> -			u32 j_ = j + entries[i].ofs;
> -			struct xe_pt *newpte = xe_pt_entry(pt_dir, j_);
> -			struct xe_pt *oldpte = entries[i].pt_entries[j].pt;
> -
> -			pt_dir->children[j_] = oldpte ? &oldpte->base : 0;
> -			xe_pt_destroy(newpte, xe_vma_vm(vma)->flags, NULL);
> +		if (pt->level) {
> +			pt_dir = as_xe_pt_dir(pt);
> +			for (j = 0; j < entries[i].qwords; j++) {
> +				u32 j_ = j + entries[i].ofs;
> +				struct xe_pt *newpte = xe_pt_entry(pt_dir, j_);
> +				struct xe_pt *oldpte =
> +					entries[i].pt_entries[j].pt;
> +
> +				pt_dir->children[j_] = oldpte ?
> +					&oldpte->base : 0;
> +				xe_pt_destroy(newpte, xe_vma_vm(vma)->flags,
> +					      NULL);
> +			}
>  		}
> +
> +		if (entries[i].pt_entries)
> +			kfree(entries[i].pt_entries);
>  	}
>  }
>  
> @@ -1564,13 +1569,16 @@ static void xe_pt_abort_unbind(struct xe_vma *vma,
>  
>  		pt->num_live += entry->qwords;
>  
> -		if (!pt->level)
> -			continue;
> +		if (pt->level) {
> +			for (j = entry->ofs; j < entry->ofs + entry->qwords; j++)
> +				pt_dir->children[j] =
> +					entries[i].pt_entries[j - entry->ofs].pt ?
> +						&entries[i].pt_entries[j - entry->ofs].pt->base :
> +						NULL;
> +		}
>  
> -		for (j = entry->ofs; j < entry->ofs + entry->qwords; j++)
> -			pt_dir->children[j] =
> -				entries[i].pt_entries[j - entry->ofs].pt ?
> -				&entries[i].pt_entries[j - entry->ofs].pt->base : NULL;
> +		if (entry->pt_entries)
> +			kfree(entry->pt_entries);
>  	}
>  }
>  
> -- 
> 2.45.2
> 


More information about the Intel-xe mailing list