[PATCH] mm/hmm: Simplify hmm_vma_walk_pud slightly

Jason Gunthorpe jgg at ziepe.ca
Thu Mar 12 14:27:49 UTC 2020


On Thu, Mar 12, 2020 at 10:28:13AM +0000, Steven Price wrote:
> By refactoring to deal with the !pud_huge(pud) || !pud_devmap(pud)
> condition early it's possible to remove the 'ret' variable and remove a
> level of indentation from half the function making the code easier to
> read.
> 
> No functional change.
> 
> Signed-off-by: Steven Price <steven.price at arm.com>
> ---
> Thanks to Jason's changes there were only two code paths left using
> the out_unlock label so it seemed like a good opportunity to
> refactor.

Yes, I made something very similar, what do you think of this:

https://github.com/jgunthorpe/linux/commit/93f0ed42ab3f9ceb27b58fb7c7c3ecaf60f16b36

>From 93f0ed42ab3f9ceb27b58fb7c7c3ecaf60f16b36 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg at mellanox.com>
Date: Wed, 4 Mar 2020 17:11:10 -0400
Subject: [PATCH] mm/hmm: rework hmm_vma_walk_pud()

At least since commit 3afc423632a1 ("mm: pagewalk: add p4d_entry() and
pgd_entry()") this code has developed a number of strange control flows.

The purpose of the routine is to copy the pfns of a huge devmap PUD into
the pfns output array, without splitting the PUD. Everything that is not a
huge devmap PUD should go back to the walker for splitting.

Rework the logic to show this goal and remove redundant stuff:

- If pud_trans_huge_lock returns !NULL then this is already
  'pud_trans_huge() || pud_devmap()' and 'pud_huge() || pud_devmap()'
  so some of the logic is redundant.

- Hitting pud_none() is a race, treat it as such and return back to the
  walker using ACTION_AGAIN

- !pud_present() gives 0 cpu_flags, so the extra checks are redundant

- Once the *pudp is read there is no need to continue holding the pud
  lock, so drop it. The only thing the following code cares about is the
  pfn from the devmap, and if there is racing then the notifiers will
  resolve everything. Perhaps the unlocked READ_ONCE in an ealier version
  was correct

Signed-off-by: Jason Gunthorpe <jgg at mellanox.com>
---
 mm/hmm.c | 79 +++++++++++++++++++++++---------------------------------
 1 file changed, 33 insertions(+), 46 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index 8fec801a33c9e2..87a376659b5ad4 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -455,66 +455,53 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
 {
 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
 	struct hmm_range *range = hmm_vma_walk->range;
-	unsigned long addr = start;
+	unsigned long i, npages, pfn;
+	unsigned int required_fault;
+	uint64_t cpu_flags;
+	uint64_t *pfns;
+	spinlock_t *ptl;
 	pud_t pud;
-	int ret = 0;
-	spinlock_t *ptl = pud_trans_huge_lock(pudp, walk->vma);
 
+	/*
+	 * This only handles huge devmap pages, the default return is
+	 * ACTION_SUBTREE, so everything else is split by the walker and passed
+	 * to the other routines.
+	 */
+	ptl = pud_trans_huge_lock(pudp, walk->vma);
 	if (!ptl)
 		return 0;
+	pud = *pudp;
+	spin_unlock(ptl);
 
-	/* Normally we don't want to split the huge page */
-	walk->action = ACTION_CONTINUE;
-
-	pud = READ_ONCE(*pudp);
 	if (pud_none(pud)) {
-		spin_unlock(ptl);
-		return hmm_vma_walk_hole(start, end, -1, walk);
+		walk->action = ACTION_AGAIN;
+		return 0;
 	}
 
-	if (pud_huge(pud) && pud_devmap(pud)) {
-		unsigned long i, npages, pfn;
-		unsigned int required_flags;
-		uint64_t *pfns, cpu_flags;
-
-		if (!pud_present(pud)) {
-			spin_unlock(ptl);
-			return hmm_vma_walk_hole(start, end, -1, walk);
-		}
-
-		i = (addr - range->start) >> PAGE_SHIFT;
-		npages = (end - addr) >> PAGE_SHIFT;
-		pfns = &range->pfns[i];
+	if (!pud_devmap(pud))
+		return 0;
 
-		cpu_flags = pud_to_hmm_pfn_flags(range, pud);
+	pfns = &range->pfns[(start - range->start) >> PAGE_SHIFT];
+	cpu_flags = pud_to_hmm_pfn_flags(range, pud);
+	required_fault =
 		hmm_range_need_fault(hmm_vma_walk, pfns, npages, cpu_flags);
-		if (required_flags) {
-			spin_unlock(ptl);
-			return hmm_vma_walk_hole_(addr, end, required_flags,
-						  walk);
-		}
+	if (required_fault)
+		return hmm_vma_walk_hole_(start, end, required_fault, walk);
 
-		pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
-		for (i = 0; i < npages; ++i, ++pfn) {
-			hmm_vma_walk->pgmap = get_dev_pagemap(pfn,
-					      hmm_vma_walk->pgmap);
-			if (unlikely(!hmm_vma_walk->pgmap)) {
-				ret = -EBUSY;
-				goto out_unlock;
-			}
-			pfns[i] = hmm_device_entry_from_pfn(range, pfn) |
-				  cpu_flags;
-		}
-		hmm_vma_walk->last = end;
-		goto out_unlock;
+	pfn = pud_pfn(pud) + ((start & ~PUD_MASK) >> PAGE_SHIFT);
+	npages = (end - start) >> PAGE_SHIFT;
+	for (i = 0; i < npages; ++i, ++pfn) {
+		hmm_vma_walk->pgmap = get_dev_pagemap(pfn, hmm_vma_walk->pgmap);
+		if (unlikely(!hmm_vma_walk->pgmap))
+			return -EBUSY;
+		pfns[i] = hmm_device_entry_from_pfn(range, pfn) | cpu_flags;
 	}
 
-	/* Ask for the PUD to be split */
-	walk->action = ACTION_SUBTREE;
+	hmm_vma_walk->last = end;
 
-out_unlock:
-	spin_unlock(ptl);
-	return ret;
+	/* Do not split the pud */
+	walk->action = ACTION_CONTINUE;
+	return 0;
 }
 #else
 #define hmm_vma_walk_pud	NULL
-- 
2.25.1



More information about the amd-gfx mailing list