[PATCH v4 9/9] RDMA/umem_odp: optimize out the case when a range is updated to read only

Jason Gunthorpe jgg at mellanox.com
Wed Jan 23 22:32:00 UTC 2019


On Wed, Jan 23, 2019 at 05:23:15PM -0500, jglisse at redhat.com wrote:
> From: Jérôme Glisse <jglisse at redhat.com>
> 
> When range of virtual address is updated read only and corresponding
> user ptr object are already read only it is pointless to do anything.
> Optimize this case out.
> 
> Signed-off-by: Jérôme Glisse <jglisse at redhat.com>
> Cc: Christian König <christian.koenig at amd.com>
> Cc: Jan Kara <jack at suse.cz>
> Cc: Felix Kuehling <Felix.Kuehling at amd.com>
> Cc: Jason Gunthorpe <jgg at mellanox.com>
> Cc: Andrew Morton <akpm at linux-foundation.org>
> Cc: Matthew Wilcox <mawilcox at microsoft.com>
> Cc: Ross Zwisler <zwisler at kernel.org>
> Cc: Dan Williams <dan.j.williams at intel.com>
> Cc: Paolo Bonzini <pbonzini at redhat.com>
> Cc: Radim Krčmář <rkrcmar at redhat.com>
> Cc: Michal Hocko <mhocko at kernel.org>
> Cc: Ralph Campbell <rcampbell at nvidia.com>
> Cc: John Hubbard <jhubbard at nvidia.com>
> Cc: kvm at vger.kernel.org
> Cc: dri-devel at lists.freedesktop.org
> Cc: linux-rdma at vger.kernel.org
> Cc: linux-fsdevel at vger.kernel.org
> Cc: Arnd Bergmann <arnd at arndb.de>
>  drivers/infiniband/core/umem_odp.c | 22 +++++++++++++++++++---
>  include/rdma/ib_umem_odp.h         |  1 +
>  2 files changed, 20 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
> index a4ec43093cb3..fa4e7fdcabfc 100644
> +++ b/drivers/infiniband/core/umem_odp.c
> @@ -140,8 +140,15 @@ static void ib_umem_notifier_release(struct mmu_notifier *mn,
>  static int invalidate_range_start_trampoline(struct ib_umem_odp *item,
>  					     u64 start, u64 end, void *cookie)
>  {
> +	bool update_to_read_only = *((bool *)cookie);
> +
>  	ib_umem_notifier_start_account(item);
> -	item->umem.context->invalidate_range(item, start, end);
> +	/*
> +	 * If it is already read only and we are updating to read only then we
> +	 * do not need to change anything. So save time and skip this one.
> +	 */
> +	if (!update_to_read_only || !item->read_only)
> +		item->umem.context->invalidate_range(item, start, end);
>  	return 0;
>  }
>  
> @@ -150,6 +157,7 @@ static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
>  {
>  	struct ib_ucontext_per_mm *per_mm =
>  		container_of(mn, struct ib_ucontext_per_mm, mn);
> +	bool update_to_read_only;
>  
>  	if (range->blockable)
>  		down_read(&per_mm->umem_rwsem);
> @@ -166,10 +174,13 @@ static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
>  		return 0;
>  	}
>  
> +	update_to_read_only = mmu_notifier_range_update_to_read_only(range);
> +
>  	return rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, range->start,
>  					     range->end,
>  					     invalidate_range_start_trampoline,
> -					     range->blockable, NULL);
> +					     range->blockable,
> +					     &update_to_read_only);
>  }
>  
>  static int invalidate_range_end_trampoline(struct ib_umem_odp *item, u64 start,
> @@ -363,6 +374,9 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm,
>  		goto out_odp_data;
>  	}
>  
> +	/* Assume read only at first, each time GUP is call this is updated. */
> +	odp_data->read_only = true;
> +
>  	odp_data->dma_list =
>  		vzalloc(array_size(pages, sizeof(*odp_data->dma_list)));
>  	if (!odp_data->dma_list) {
> @@ -619,8 +633,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
>  		goto out_put_task;
>  	}
>  
> -	if (access_mask & ODP_WRITE_ALLOWED_BIT)
> +	if (access_mask & ODP_WRITE_ALLOWED_BIT) {
> +		umem_odp->read_only = false;

No locking?

>  		flags |= FOLL_WRITE;
> +	}
>  
>  	start_idx = (user_virt - ib_umem_start(umem)) >> page_shift;
>  	k = start_idx;
> diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
> index 0b1446fe2fab..8256668c6170 100644
> +++ b/include/rdma/ib_umem_odp.h
> @@ -76,6 +76,7 @@ struct ib_umem_odp {
>  	struct completion	notifier_completion;
>  	int			dying;
>  	struct work_struct	work;
> +	bool read_only;
>  };

The ib_umem already has a writeable flag. This reflects if the user
asked for write permission to be granted.. The tracking here is if any
remote fault thus far has requested write, is this an important
difference to justify the new flag?

Jason


More information about the dri-devel mailing list