[PATCH v2 03/11] mm: introduce pfnmap_track() and pfnmap_untrack() and use them for memremap

Liam R. Howlett Liam.Howlett at oracle.com
Tue May 13 17:40:42 UTC 2025


* David Hildenbrand <david at redhat.com> [250512 08:34]:
> Let's provide variants of track_pfn_remap() and untrack_pfn() that won't
> mess with VMAs, and replace the usage in mm/memremap.c.
> 
> Add some documentation.
> 
> Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes at oracle.com>
> Acked-by: Ingo Molnar <mingo at kernel.org> # x86 bits
> Signed-off-by: David Hildenbrand <david at redhat.com>

Small nit with this one, but either way:

Reviewed-by: Liam R. Howlett <Liam.Howlett at oracle.com>

> ---
>  arch/x86/mm/pat/memtype.c | 14 ++++++++++++++
>  include/linux/pgtable.h   | 39 +++++++++++++++++++++++++++++++++++++++
>  mm/memremap.c             |  8 ++++----
>  3 files changed, 57 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
> index fa78facc6f633..1ec8af6cad6bf 100644
> --- a/arch/x86/mm/pat/memtype.c
> +++ b/arch/x86/mm/pat/memtype.c
> @@ -1068,6 +1068,20 @@ int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size, pgprot_t *prot
>  	return 0;
>  }
>  
> +int pfnmap_track(unsigned long pfn, unsigned long size, pgprot_t *prot)
> +{
> +	const resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;

Here, the << PAGE_SHIFT isn't really needed, because..

> +
> +	return reserve_pfn_range(paddr, size, prot, 0);
> +}
> +
> +void pfnmap_untrack(unsigned long pfn, unsigned long size)
> +{
> +	const resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
> +
> +	free_pfn_range(paddr, size);
> +}
> +
>  /*
>   * untrack_pfn is called while unmapping a pfnmap for a region.
>   * untrack can be called for a specific region indicated by pfn and size or
> diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
> index be1745839871c..90f72cd358390 100644
> --- a/include/linux/pgtable.h
> +++ b/include/linux/pgtable.h
> @@ -1502,6 +1502,16 @@ static inline int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size,
>  	return 0;
>  }
>  
> +static inline int pfnmap_track(unsigned long pfn, unsigned long size,
> +		pgprot_t *prot)
> +{
> +	return 0;
> +}
> +
> +static inline void pfnmap_untrack(unsigned long pfn, unsigned long size)
> +{
> +}
> +
>  /*
>   * track_pfn_copy is called when a VM_PFNMAP VMA is about to get the page
>   * tables copied during copy_page_range(). Will store the pfn to be
> @@ -1575,6 +1585,35 @@ extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
>   */
>  int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size,
>  		pgprot_t *prot);
> +
> +/**
> + * pfnmap_track - track a pfn range
> + * @pfn: the start of the pfn range
> + * @size: the size of the pfn range in bytes
> + * @prot: the pgprot to track
> + *
> + * Requested the pfn range to be 'tracked' by a hardware implementation and
> + * setup the cachemode in @prot similar to pfnmap_setup_cachemode().
> + *
> + * This allows for fine-grained control of memory cache behaviour at page
> + * level granularity. Tracking memory this way is persisted across VMA splits
> + * (VMA merging does not apply for VM_PFNMAP).
> + *
> + * Currently, there is only one implementation for this - x86 Page Attribute
> + * Table (PAT). See Documentation/arch/x86/pat.rst for more details.
> + *
> + * Returns 0 on success and -EINVAL on error.
> + */
> +int pfnmap_track(unsigned long pfn, unsigned long size, pgprot_t *prot);
> +
> +/**
> + * pfnmap_untrack - untrack a pfn range
> + * @pfn: the start of the pfn range
> + * @size: the size of the pfn range in bytes
> + *
> + * Untrack a pfn range previously tracked through pfnmap_track().
> + */
> +void pfnmap_untrack(unsigned long pfn, unsigned long size);
>  extern int track_pfn_copy(struct vm_area_struct *dst_vma,
>  		struct vm_area_struct *src_vma, unsigned long *pfn);
>  extern void untrack_pfn_copy(struct vm_area_struct *dst_vma,
> diff --git a/mm/memremap.c b/mm/memremap.c
> index 2aebc1b192da9..c417c843e9b1f 100644
> --- a/mm/memremap.c
> +++ b/mm/memremap.c
> @@ -130,7 +130,7 @@ static void pageunmap_range(struct dev_pagemap *pgmap, int range_id)
>  	}
>  	mem_hotplug_done();
>  
> -	untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range), true);
> +	pfnmap_untrack(PHYS_PFN(range->start), range_len(range));
>  	pgmap_array_delete(range);
>  }
>  
> @@ -211,8 +211,8 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
>  	if (nid < 0)
>  		nid = numa_mem_id();
>  
> -	error = track_pfn_remap(NULL, &params->pgprot, PHYS_PFN(range->start), 0,
> -			range_len(range));
> +	error = pfnmap_track(PHYS_PFN(range->start), range_len(range),

This user (of two) converts the range->start to the pfn.

The other user is pfnmap_track_ctx_alloc() in mm/memory.c which is
called from remap_pfn_range(), which also has addr.

Couldn't we just use the address directly?

I think the same holds for untrack as well.

> +			     &params->pgprot);
>  	if (error)
>  		goto err_pfn_remap;
>  
> @@ -277,7 +277,7 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
>  	if (!is_private)
>  		kasan_remove_zero_shadow(__va(range->start), range_len(range));
>  err_kasan:
> -	untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range), true);
> +	pfnmap_untrack(PHYS_PFN(range->start), range_len(range));
>  err_pfn_remap:
>  	pgmap_array_delete(range);
>  	return error;
> -- 
> 2.49.0
> 


More information about the Intel-gfx mailing list