[PATCH 3/4] mm: simplify device private page handling in hmm_range_fault
Jason Gunthorpe
jgg at ziepe.ca
Thu Mar 19 18:17:16 UTC 2020
On Tue, Mar 17, 2020 at 04:14:31PM -0700, Ralph Campbell wrote:
>
> On 3/17/20 5:59 AM, Christoph Hellwig wrote:
> > On Tue, Mar 17, 2020 at 09:47:55AM -0300, Jason Gunthorpe wrote:
> > > I've been using v7 of Ralph's tester and it is working well - it has
> > > DEVICE_PRIVATE support so I think it can test this flow too. Ralph are
> > > you able?
> > >
> > > This hunk seems trivial enough to me, can we include it now?
> >
> > I can send a separate patch for it once the tester covers it. I don't
> > want to add it to the original patch as it is a significant behavior
> > change compared to the existing code.
> >
>
> Attached is an updated version of my HMM tests based on linux-5.6.0-rc6.
> I ran this OK with Jason's 8+1 HMM patches, Christoph's 1-5 misc HMM clean ups,
> and Christoph's 1-4 device private page changes applied.
I'd like to get this to mergable, it looks pretty good now, but I have
no idea about selftests - and I'm struggling to even compile the tools
dir
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 69def4a9df00..4d22ce7879a7 100644
> +++ b/lib/Kconfig.debug
> @@ -2162,6 +2162,18 @@ config TEST_MEMINIT
>
> If unsure, say N.
>
> +config TEST_HMM
> + tristate "Test HMM (Heterogeneous Memory Management)"
> + depends on DEVICE_PRIVATE
> + select HMM_MIRROR
> + select MMU_NOTIFIER
extra spaces
In general I wonder if it even makes sense that DEVICE_PRIVATE is user
selectable?
> +static int dmirror_fops_open(struct inode *inode, struct file *filp)
> +{
> + struct cdev *cdev = inode->i_cdev;
> + struct dmirror *dmirror;
> + int ret;
> +
> + /* Mirror this process address space */
> + dmirror = kzalloc(sizeof(*dmirror), GFP_KERNEL);
> + if (dmirror == NULL)
> + return -ENOMEM;
> +
> + dmirror->mdevice = container_of(cdev, struct dmirror_device, cdevice);
> + mutex_init(&dmirror->mutex);
> + xa_init(&dmirror->pt);
> +
> + ret = mmu_interval_notifier_insert(&dmirror->notifier, current->mm,
> + 0, ULONG_MAX & PAGE_MASK, &dmirror_min_ops);
> + if (ret) {
> + kfree(dmirror);
> + return ret;
> + }
> +
> + /* Pairs with the mmdrop() in dmirror_fops_release(). */
> + mmgrab(current->mm);
> + dmirror->mm = current->mm;
The notifier holds a mmgrab, no need for another one
> + /* Only the first open registers the address space. */
> + filp->private_data = dmirror;
Not sure what this comment means
> +static inline struct dmirror_device *dmirror_page_to_device(struct page *page)
> +
> +{
> + struct dmirror_chunk *devmem;
> +
> + devmem = container_of(page->pgmap, struct dmirror_chunk, pagemap);
> + return devmem->mdevice;
> +}
extra devmem var is not really needed
> +
> +static bool dmirror_device_is_mine(struct dmirror_device *mdevice,
> + struct page *page)
> +{
> + if (!is_zone_device_page(page))
> + return false;
> + return page->pgmap->ops == &dmirror_devmem_ops &&
> + dmirror_page_to_device(page) == mdevice;
> +}
Use new owner stuff, right? Actually this is redunant now, the check
should be just WARN_ON pageowner != self owner
> +static int dmirror_do_fault(struct dmirror *dmirror, struct hmm_range *range)
> +{
> + uint64_t *pfns = range->pfns;
> + unsigned long pfn;
> +
> + for (pfn = (range->start >> PAGE_SHIFT);
> + pfn < (range->end >> PAGE_SHIFT);
> + pfn++, pfns++) {
> + struct page *page;
> + void *entry;
> +
> + /*
> + * HMM_PFN_ERROR is returned if it is accessing invalid memory
> + * either because of memory error (hardware detected memory
> + * corruption) or more likely because of truncate on mmap
> + * file.
> + */
> + if (*pfns == range->values[HMM_PFN_ERROR])
> + return -EFAULT;
Unless that snapshot is use hmm_range_fault() never returns success
and sets PFN_ERROR, so this should be a WARN_ON
> + if (!(*pfns & range->flags[HMM_PFN_VALID]))
> + return -EFAULT;
Same with valid.
> + page = hmm_device_entry_to_page(range, *pfns);
> + /* We asked for pages to be populated but check anyway. */
> + if (!page)
> + return -EFAULT;
WARN_ON
> + if (is_zone_device_page(page)) {
> + /*
> + * TODO: need a way to ask HMM to fault foreign zone
> + * device private pages.
> + */
> + if (!dmirror_device_is_mine(dmirror->mdevice, page))
> + continue;
Actually re
> +static bool dmirror_interval_invalidate(struct mmu_interval_notifier *mni,
> + const struct mmu_notifier_range *range,
> + unsigned long cur_seq)
> +{
> + struct dmirror *dmirror = container_of(mni, struct dmirror, notifier);
> + struct mm_struct *mm = dmirror->mm;
> +
> + /*
> + * If the process doesn't exist, we don't need to invalidate the
> + * device page table since the address space will be torn down.
> + */
> + if (!mmget_not_zero(mm))
> + return true;
Why? Don't the notifiers provide for this already.
mmget_not_zero() is required before calling hmm_range_fault() though
> +static int dmirror_fault(struct dmirror *dmirror, unsigned long start,
> + unsigned long end, bool write)
> +{
> + struct mm_struct *mm = dmirror->mm;
> + unsigned long addr;
> + uint64_t pfns[64];
> + struct hmm_range range = {
> + .notifier = &dmirror->notifier,
> + .pfns = pfns,
> + .flags = dmirror_hmm_flags,
> + .values = dmirror_hmm_values,
> + .pfn_shift = DPT_SHIFT,
> + .pfn_flags_mask = ~(dmirror_hmm_flags[HMM_PFN_VALID] |
> + dmirror_hmm_flags[HMM_PFN_WRITE]),
> + .default_flags = dmirror_hmm_flags[HMM_PFN_VALID] |
> + (write ? dmirror_hmm_flags[HMM_PFN_WRITE] : 0),
> + .dev_private_owner = dmirror->mdevice,
> + };
> + int ret = 0;
> +
> + /* Since the mm is for the mirrored process, get a reference first. */
> + if (!mmget_not_zero(mm))
> + return 0;
Right
> + for (addr = start; addr < end; addr = range.end) {
> + range.start = addr;
> + range.end = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end);
> +
> + ret = dmirror_range_fault(dmirror, &range);
> + if (ret)
> + break;
> + }
> +
> + mmput(mm);
> + return ret;
> +}
> +
> +static int dmirror_do_read(struct dmirror *dmirror, unsigned long start,
> + unsigned long end, struct dmirror_bounce *bounce)
> +{
> + unsigned long pfn;
> + void *ptr;
> +
> + ptr = bounce->ptr + ((start - bounce->addr) & PAGE_MASK);
> +
> + for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) {
> + void *entry;
> + struct page *page;
> + void *tmp;
> +
> + entry = xa_load(&dmirror->pt, pfn);
> + page = xa_untag_pointer(entry);
> + if (!page)
> + return -ENOENT;
> +
> + tmp = kmap(page);
> + memcpy(ptr, tmp, PAGE_SIZE);
> + kunmap(page);
> +
> + ptr += PAGE_SIZE;
> + bounce->cpages++;
> + }
> +
> + return 0;
> +}
> +
> +static int dmirror_read(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd)
> +{
> + struct dmirror_bounce bounce;
> + unsigned long start, end;
> + unsigned long size = cmd->npages << PAGE_SHIFT;
> + int ret;
> +
> + start = cmd->addr;
> + end = start + size;
> + if (end < start)
> + return -EINVAL;
> +
> + ret = dmirror_bounce_init(&bounce, start, size);
> + if (ret)
> + return ret;
> +
> +again:
> + mutex_lock(&dmirror->mutex);
> + ret = dmirror_do_read(dmirror, start, end, &bounce);
> + mutex_unlock(&dmirror->mutex);
> + if (ret == 0)
> + ret = copy_to_user((void __user *)cmd->ptr, bounce.ptr,
> + bounce.size);
Use u64_to_user_ptr() instead of the cast
> +static int dmirror_write(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd)
> +{
> + struct dmirror_bounce bounce;
> + unsigned long start, end;
> + unsigned long size = cmd->npages << PAGE_SHIFT;
> + int ret;
> +
> + start = cmd->addr;
> + end = start + size;
> + if (end < start)
> + return -EINVAL;
> +
> + ret = dmirror_bounce_init(&bounce, start, size);
> + if (ret)
> + return ret;
> + ret = copy_from_user(bounce.ptr, (void __user *)cmd->ptr,
> + bounce.size);
ditto
> + if (ret)
> + return ret;
> +
> +again:
> + mutex_lock(&dmirror->mutex);
> + ret = dmirror_do_write(dmirror, start, end, &bounce);
> + mutex_unlock(&dmirror->mutex);
> + if (ret == -ENOENT) {
> + start = cmd->addr + (bounce.cpages << PAGE_SHIFT);
> + ret = dmirror_fault(dmirror, start, end, true);
> + if (ret == 0) {
> + cmd->faults++;
> + goto again;
Use a loop instead of goto?
Also I get this:
lib/test_hmm.c: In function ‘dmirror_devmem_fault_alloc_and_copy’:
lib/test_hmm.c:1041:25: warning: unused variable ‘vma’ [-Wunused-variable]
1041 | struct vm_area_struct *vma = args->vma;
But this is a kernel bug, due to alloc_page_vma being a #define not a
static inline and me having CONFIG_NUMA off in this .config
Jason
More information about the amd-gfx
mailing list