[RFC v1 1/3] mm/mmu_notifier: Add a new notifier for mapping updates (new pages)
Vivek Kasireddy
vivek.kasireddy at intel.com
Tue Jul 18 08:28:56 UTC 2023
Currently, there does not appear to be any mechanism for letting
drivers or other kernel entities know about updates made in a
mapping particularly when a new page is faulted in. Providing
notifications for such situations is really useful when using
memfds backed by ram-based filesystems such as shmem or hugetlbfs
that also allow FALLOC_FL_PUNCH_HOLE.
More specifically, when a hole is punched in a memfd (that is
backed by shmem or hugetlbfs), a driver can register for
notifications associated with range invalidations. However, it
would also be useful to have notifications when new pages are
faulted in as a result of writes made to the mapping region that
overlaps with a previously punched hole.
Cc: David Hildenbrand <david at redhat.com>
Cc: Mike Kravetz <mike.kravetz at oracle.com>
Cc: Hugh Dickins <hughd at google.com>
Cc: Peter Xu <peterx at redhat.com>
Cc: Jason Gunthorpe <jgg at nvidia.com>
Cc: Gerd Hoffmann <kraxel at redhat.com>
Cc: Dongwon Kim <dongwon.kim at intel.com>
Cc: Junxiao Chang <junxiao.chang at intel.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy at intel.com>
---
include/linux/mmu_notifier.h | 27 +++++++++++++++++++++++++++
mm/hugetlb.c | 9 ++++++++-
mm/mmu_notifier.c | 17 +++++++++++++++++
mm/shmem.c | 7 ++++++-
4 files changed, 58 insertions(+), 2 deletions(-)
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 64a3e051c3c4..218ddc3b4bc7 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -131,6 +131,16 @@ struct mmu_notifier_ops {
unsigned long address,
pte_t pte);
+ /*
+ * update_mapping is called when a page is replaced (at a given offset)
+ * in a mapping backed by shmem or hugetlbfs. The new page's pfn will
+ * be contained in the pfn field.
+ */
+ void (*update_mapping)(struct mmu_notifier *subscription,
+ struct mm_struct *mm,
+ unsigned long address,
+ unsigned long pfn);
+
/*
* invalidate_range_start() and invalidate_range_end() must be
* paired and are called only when the mmap_lock and/or the
@@ -394,6 +404,9 @@ extern int __mmu_notifier_test_young(struct mm_struct *mm,
unsigned long address);
extern void __mmu_notifier_change_pte(struct mm_struct *mm,
unsigned long address, pte_t pte);
+extern void __mmu_notifier_update_mapping(struct mm_struct *mm,
+ unsigned long address,
+ unsigned long pfn);
extern int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *r);
extern void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *r,
bool only_end);
@@ -447,6 +460,14 @@ static inline void mmu_notifier_change_pte(struct mm_struct *mm,
__mmu_notifier_change_pte(mm, address, pte);
}
+static inline void mmu_notifier_update_mapping(struct mm_struct *mm,
+ unsigned long address,
+ unsigned long pfn)
+{
+ if (mm_has_notifiers(mm))
+ __mmu_notifier_update_mapping(mm, address, pfn);
+}
+
static inline void
mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
{
@@ -695,6 +716,12 @@ static inline void mmu_notifier_change_pte(struct mm_struct *mm,
{
}
+static inline void mmu_notifier_update_mapping(struct mm_struct *mm,
+ unsigned long address,
+ unsigned long pfn)
+{
+}
+
static inline void
mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
{
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 64a3239b6407..1f2f0209101a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6096,8 +6096,12 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
* hugetlb_no_page will drop vma lock and hugetlb fault
* mutex internally, which make us return immediately.
*/
- return hugetlb_no_page(mm, vma, mapping, idx, address, ptep,
+ ret = hugetlb_no_page(mm, vma, mapping, idx, address, ptep,
entry, flags);
+ if (!ret)
+ mmu_notifier_update_mapping(vma->vm_mm, address,
+ pte_pfn(*ptep));
+ return ret;
ret = 0;
@@ -6223,6 +6227,9 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
*/
if (need_wait_lock)
folio_wait_locked(folio);
+ if (!ret)
+ mmu_notifier_update_mapping(vma->vm_mm, address,
+ pte_pfn(*ptep));
return ret;
}
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 50c0dde1354f..6421405334b9 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -441,6 +441,23 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
srcu_read_unlock(&srcu, id);
}
+void __mmu_notifier_update_mapping(struct mm_struct *mm, unsigned long address,
+ unsigned long pfn)
+{
+ struct mmu_notifier *subscription;
+ int id;
+
+ id = srcu_read_lock(&srcu);
+ hlist_for_each_entry_rcu(subscription,
+ &mm->notifier_subscriptions->list, hlist,
+ srcu_read_lock_held(&srcu)) {
+ if (subscription->ops->update_mapping)
+ subscription->ops->update_mapping(subscription, mm,
+ address, pfn);
+ }
+ srcu_read_unlock(&srcu, id);
+}
+
static int mn_itree_invalidate(struct mmu_notifier_subscriptions *subscriptions,
const struct mmu_notifier_range *range)
{
diff --git a/mm/shmem.c b/mm/shmem.c
index 2f2e0e618072..e59eb5fafadb 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -77,6 +77,7 @@ static struct vfsmount *shm_mnt;
#include <linux/fcntl.h>
#include <uapi/linux/memfd.h>
#include <linux/rmap.h>
+#include <linux/mmu_notifier.h>
#include <linux/uuid.h>
#include <linux/uaccess.h>
@@ -2164,8 +2165,12 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf)
gfp, vma, vmf, &ret);
if (err)
return vmf_error(err);
- if (folio)
+ if (folio) {
vmf->page = folio_file_page(folio, vmf->pgoff);
+ if (ret == VM_FAULT_LOCKED)
+ mmu_notifier_update_mapping(vma->vm_mm, vmf->address,
+ page_to_pfn(vmf->page));
+ }
return ret;
}
--
2.39.2
More information about the dri-devel
mailing list