[PATCH hmm 2/8] mm/hmm: don't free the cached pgmap while scanning

Thu Mar 12 01:29:37 UTC 2020

On 3/11/20 11:35 AM, Jason Gunthorpe wrote:
> From: Jason Gunthorpe <jgg at mellanox.com>
> 
> The pgmap is held in the hmm_vma_walk variable in hope of speeding up
> future get_dev_pagemap() calls by hitting the same pointer. The algorithm
> doesn't actually care about how long the pgmap is held for.
> 
> Move the put of the cached pgmap to after the walk is completed and delete
> all the other now redundant puts.
> 
> This solves a possible leak of the reference in hmm_vma_walk_pmd() if a
> hmm_vma_handle_pte() fails while looping.
> 
> Fixes: 992de9a8b751 ("mm/hmm: allow to mirror vma of a file on a DAX backed filesystem")
> Signed-off-by: Jason Gunthorpe <jgg at mellanox.com>

Reviewed-by: Ralph Campbell <rcampbell at nvidia.com>

> ---
>   mm/hmm.c | 31 +++++++++----------------------
>   1 file changed, 9 insertions(+), 22 deletions(-)
> 
> We talked about just deleting this stuff, but I think it makes alot sense for
> hmm_range_fault() to trigger fault on devmap pages that are not compatible
> with the caller - so lets just fix the leak on error path for now.
> 
> diff --git a/mm/hmm.c b/mm/hmm.c
> index 35f85424176d14..9e8f68eb83287a 100644
> --- a/mm/hmm.c
> +++ b/mm/hmm.c
> @@ -239,10 +239,6 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
>   		}
>   		pfns[i] = hmm_device_entry_from_pfn(range, pfn) | cpu_flags;
>   	}
> -	if (hmm_vma_walk->pgmap) {
> -		put_dev_pagemap(hmm_vma_walk->pgmap);
> -		hmm_vma_walk->pgmap = NULL;
> -	}
>   	hmm_vma_walk->last = end;
>   	return 0;
>   }
> @@ -360,10 +356,6 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
>   	return 0;
>   
>   fault:
> -	if (hmm_vma_walk->pgmap) {
> -		put_dev_pagemap(hmm_vma_walk->pgmap);
> -		hmm_vma_walk->pgmap = NULL;
> -	}
>   	pte_unmap(ptep);
>   	/* Fault any virtual address we were asked to fault */
>   	return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
> @@ -446,16 +438,6 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
>   			return r;
>   		}
>   	}
> -	if (hmm_vma_walk->pgmap) {
> -		/*
> -		 * We do put_dev_pagemap() here and not in hmm_vma_handle_pte()
> -		 * so that we can leverage get_dev_pagemap() optimization which
> -		 * will not re-take a reference on a pgmap if we already have
> -		 * one.
> -		 */
> -		put_dev_pagemap(hmm_vma_walk->pgmap);
> -		hmm_vma_walk->pgmap = NULL;
> -	}
>   	pte_unmap(ptep - 1);
>   
>   	hmm_vma_walk->last = addr;
> @@ -529,10 +511,6 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
>   			pfns[i] = hmm_device_entry_from_pfn(range, pfn) |
>   				  cpu_flags;
>   		}
> -		if (hmm_vma_walk->pgmap) {
> -			put_dev_pagemap(hmm_vma_walk->pgmap);
> -			hmm_vma_walk->pgmap = NULL;
> -		}
>   		hmm_vma_walk->last = end;
>   		goto out_unlock;
>   	}
> @@ -694,6 +672,15 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags)
>   			return -EBUSY;
>   		ret = walk_page_range(mm, hmm_vma_walk.last, range->end,
>   				      &hmm_walk_ops, &hmm_vma_walk);
> +		/*
> +		 * A pgmap is kept cached in the hmm_vma_walk to avoid expensive
> +		 * searching in the probably common case that the pgmap is the
> +		 * same for the entire requested range.
> +		 */
> +		if (hmm_vma_walk.pgmap) {
> +			put_dev_pagemap(hmm_vma_walk.pgmap);
> +			hmm_vma_walk.pgmap = NULL;
> +		}
>   	} while (ret == -EBUSY);
>   
>   	if (ret)
>