[PATCH v2 2/8] mm: Free device private pages have zero refcount
Felix Kuehling
felix.kuehling at amd.com
Thu Sep 29 19:21:10 UTC 2022
On 2022-09-28 08:01, Alistair Popple wrote:
> Since 27674ef6c73f ("mm: remove the extra ZONE_DEVICE struct page
> refcount") device private pages have no longer had an extra reference
> count when the page is in use. However before handing them back to the
> owning device driver we add an extra reference count such that free
> pages have a reference count of one.
>
> This makes it difficult to tell if a page is free or not because both
> free and in use pages will have a non-zero refcount. Instead we should
> return pages to the drivers page allocator with a zero reference count.
> Kernel code can then safely use kernel functions such as
> get_page_unless_zero().
>
> Signed-off-by: Alistair Popple <apopple at nvidia.com>
Acked-by: Felix Kuehling <Felix.Kuehling at amd.com>
> Cc: Jason Gunthorpe <jgg at nvidia.com>
> Cc: Michael Ellerman <mpe at ellerman.id.au>
> Cc: Felix Kuehling <Felix.Kuehling at amd.com>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Cc: Christian König <christian.koenig at amd.com>
> Cc: Ben Skeggs <bskeggs at redhat.com>
> Cc: Lyude Paul <lyude at redhat.com>
> Cc: Ralph Campbell <rcampbell at nvidia.com>
> Cc: Alex Sierra <alex.sierra at amd.com>
> Cc: John Hubbard <jhubbard at nvidia.com>
> Cc: Dan Williams <dan.j.williams at intel.com>
>
> ---
>
> This will conflict with Dan's series to fix reference counts for DAX[1].
> At the moment this only makes changes for device private and coherent
> pages, however if DAX is fixed to remove the extra refcount then we
> should just be able to drop the checks for private/coherent pages and
> treat them the same.
>
> [1] - https://lore.kernel.org/linux-mm/166329930818.2786261.6086109734008025807.stgit@dwillia2-xfh.jf.intel.com/
> ---
> arch/powerpc/kvm/book3s_hv_uvmem.c | 2 +-
> drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 2 +-
> drivers/gpu/drm/nouveau/nouveau_dmem.c | 2 +-
> include/linux/memremap.h | 1 +
> lib/test_hmm.c | 2 +-
> mm/memremap.c | 9 +++++++++
> mm/page_alloc.c | 8 ++++++++
> 7 files changed, 22 insertions(+), 4 deletions(-)
>
> diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
> index d4eacf4..9d8de68 100644
> --- a/arch/powerpc/kvm/book3s_hv_uvmem.c
> +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
> @@ -718,7 +718,7 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
>
> dpage = pfn_to_page(uvmem_pfn);
> dpage->zone_device_data = pvt;
> - lock_page(dpage);
> + zone_device_page_init(dpage);
> return dpage;
> out_clear:
> spin_lock(&kvmppc_uvmem_bitmap_lock);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> index 776448b..97a6845 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -223,7 +223,7 @@ svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn)
> page = pfn_to_page(pfn);
> svm_range_bo_ref(prange->svm_bo);
> page->zone_device_data = prange->svm_bo;
> - lock_page(page);
> + zone_device_page_init(page);
> }
>
> static void
> diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
> index 1635661..b092988 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
> @@ -326,7 +326,7 @@ nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm)
> return NULL;
> }
>
> - lock_page(page);
> + zone_device_page_init(page);
> return page;
> }
>
> diff --git a/include/linux/memremap.h b/include/linux/memremap.h
> index 1901049..f68bf6d 100644
> --- a/include/linux/memremap.h
> +++ b/include/linux/memremap.h
> @@ -182,6 +182,7 @@ static inline bool folio_is_device_coherent(const struct folio *folio)
> }
>
> #ifdef CONFIG_ZONE_DEVICE
> +void zone_device_page_init(struct page *page);
> void *memremap_pages(struct dev_pagemap *pgmap, int nid);
> void memunmap_pages(struct dev_pagemap *pgmap);
> void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
> diff --git a/lib/test_hmm.c b/lib/test_hmm.c
> index 89463ff..688c15d 100644
> --- a/lib/test_hmm.c
> +++ b/lib/test_hmm.c
> @@ -627,8 +627,8 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
> goto error;
> }
>
> + zone_device_page_init(dpage);
> dpage->zone_device_data = rpage;
> - lock_page(dpage);
> return dpage;
>
> error:
> diff --git a/mm/memremap.c b/mm/memremap.c
> index 25029a4..1c2c038 100644
> --- a/mm/memremap.c
> +++ b/mm/memremap.c
> @@ -505,8 +505,17 @@ void free_zone_device_page(struct page *page)
> /*
> * Reset the page count to 1 to prepare for handing out the page again.
> */
> + if (page->pgmap->type != MEMORY_DEVICE_PRIVATE &&
> + page->pgmap->type != MEMORY_DEVICE_COHERENT)
> + set_page_count(page, 1);
> +}
> +
> +void zone_device_page_init(struct page *page)
> +{
> set_page_count(page, 1);
> + lock_page(page);
> }
> +EXPORT_SYMBOL_GPL(zone_device_page_init);
>
> #ifdef CONFIG_FS_DAX
> bool __put_devmap_managed_page_refs(struct page *page, int refs)
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 9d49803..4df1e43 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -6744,6 +6744,14 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn,
> set_pageblock_migratetype(page, MIGRATE_MOVABLE);
> cond_resched();
> }
> +
> + /*
> + * ZONE_DEVICE pages are released directly to the driver page allocator
> + * which will set the page count to 1 when allocating the page.
> + */
> + if (pgmap->type == MEMORY_DEVICE_PRIVATE ||
> + pgmap->type == MEMORY_DEVICE_COHERENT)
> + set_page_count(page, 0);
> }
>
> /*
More information about the amd-gfx
mailing list