[Intel-gfx] [RFC v2 05/12] drm/i915/svm: Page table mirroring support
Jason Gunthorpe
jgg at mellanox.com
Tue Dec 17 20:31:07 UTC 2019
On Fri, Dec 13, 2019 at 01:56:07PM -0800, Niranjana Vishwanathapura wrote:
> +static struct i915_svm *vm_get_svm(struct i915_address_space *vm)
> +{
> + struct i915_svm *svm = vm->svm;
> +
> + mutex_lock(&vm->svm_mutex);
> + if (svm && !kref_get_unless_zero(&svm->ref))
> + svm = NULL;
Quite strange to see a get_unless_zero under a lock..
> +static void release_svm(struct kref *ref)
> +{
> + struct i915_svm *svm = container_of(ref, typeof(*svm), ref);
> + struct i915_address_space *vm = svm->vm;
> +
> + mmu_notifier_unregister(&svm->notifier, svm->notifier.mm);
This would be a lot better to use mmu_notifier_put to manage the
reference and deallocation.
> +static u32 i915_svm_build_sg(struct i915_address_space *vm,
> + struct hmm_range *range,
> + struct sg_table *st)
> +{
> + struct scatterlist *sg;
> + u32 sg_page_sizes = 0;
> + u64 i, npages;
> +
> + sg = NULL;
> + st->nents = 0;
> + npages = (range->end - range->start) / PAGE_SIZE;
> +
> + /*
> + * No need to dma map the host pages and later unmap it, as
> + * GPU is not allowed to access it with SVM.
> + * XXX: Need to dma map host pages for integrated graphics while
> + * extending SVM support there.
> + */
> + for (i = 0; i < npages; i++) {
> + u64 addr = range->pfns[i] & ~((1UL << range->pfn_shift) - 1);
> +
> + if (sg && (addr == (sg_dma_address(sg) + sg->length))) {
> + sg->length += PAGE_SIZE;
> + sg_dma_len(sg) += PAGE_SIZE;
> + continue;
> + }
> +
> + if (sg)
> + sg_page_sizes |= sg->length;
> +
> + sg = sg ? __sg_next(sg) : st->sgl;
> + sg_dma_address(sg) = addr;
> + sg_dma_len(sg) = PAGE_SIZE;
This still can't be like this - assigning pfn to 'dma_address' is
fundamentally wrong.
Whatever explanation you had, this needs to be fixed up first before we get
to this patch.
> +static int i915_range_fault(struct svm_notifier *sn,
> + struct drm_i915_gem_vm_bind *args,
> + struct sg_table *st, u64 *pfns)
> +{
> + unsigned long timeout =
> + jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
> + /* Have HMM fault pages within the fault window to the GPU. */
> + struct hmm_range range = {
> + .notifier = &sn->notifier,
> + .start = sn->notifier.interval_tree.start,
> + .end = sn->notifier.interval_tree.last + 1,
> + .pfns = pfns,
> + .pfn_shift = PAGE_SHIFT,
> + .flags = i915_range_flags,
> + .values = i915_range_values,
> + .default_flags = (range.flags[HMM_PFN_VALID] |
> + ((args->flags & I915_GEM_VM_BIND_READONLY) ?
> + 0 : range.flags[HMM_PFN_WRITE])),
> + .pfn_flags_mask = 0,
> +
> + };
> + struct i915_svm *svm = sn->svm;
> + struct mm_struct *mm = sn->notifier.mm;
> + struct i915_address_space *vm = svm->vm;
> + u32 sg_page_sizes;
> + u64 flags;
> + long ret;
> +
> + while (true) {
> + if (time_after(jiffies, timeout))
> + return -EBUSY;
> +
> + range.notifier_seq = mmu_interval_read_begin(range.notifier);
> + down_read(&mm->mmap_sem);
> + ret = hmm_range_fault(&range, 0);
> + up_read(&mm->mmap_sem);
> + if (ret <= 0) {
> + if (ret == 0 || ret == -EBUSY)
> + continue;
> + return ret;
> + }
> +
> + sg_page_sizes = i915_svm_build_sg(vm, &range, st);
> + mutex_lock(&svm->mutex);
> + if (mmu_interval_read_retry(range.notifier,
> + range.notifier_seq)) {
> + mutex_unlock(&svm->mutex);
> + continue;
> + }
> + break;
> + }
> +
> + flags = (args->flags & I915_GEM_VM_BIND_READONLY) ?
> + I915_GTT_SVM_READONLY : 0;
> + ret = svm_bind_addr_commit(vm, args->start, args->length,
> + flags, st, sg_page_sizes);
> + mutex_unlock(&svm->mutex);
This looks better..
> +int i915_gem_vm_bind_svm_buffer(struct i915_address_space *vm,
> + struct drm_i915_gem_vm_bind *args)
> +{
> + struct svm_notifier sn;
> + struct i915_svm *svm;
> + struct mm_struct *mm;
> + struct sg_table st;
> + u64 npages, *pfns;
> + int ret = 0;
> +
> + svm = vm_get_svm(vm);
> + if (!svm)
> + return -EINVAL;
> +
> + mm = svm->notifier.mm;
> + if (mm != current->mm) {
> + ret = -EPERM;
> + goto bind_done;
> + }
Bit strange, we have APIs now to make it fairly easy to deal with
multiple processe, (ie the get/put scheme) why have this restriction?
> +
> + args->length += (args->start & ~PAGE_MASK);
> + args->start &= PAGE_MASK;
> + DRM_DEBUG_DRIVER("%sing start 0x%llx length 0x%llx vm_id 0x%x\n",
> + (args->flags & I915_GEM_VM_BIND_UNBIND) ?
> + "Unbind" : "Bind", args->start, args->length,
> + args->vm_id);
> + if (args->flags & I915_GEM_VM_BIND_UNBIND) {
> + i915_gem_vm_unbind_svm_buffer(vm, args->start, args->length);
> + goto bind_done;
> + }
> +
> + npages = args->length / PAGE_SIZE;
> + if (unlikely(sg_alloc_table(&st, npages, GFP_KERNEL))) {
> + ret = -ENOMEM;
> + goto bind_done;
> + }
> +
> + pfns = kvmalloc_array(npages, sizeof(uint64_t), GFP_KERNEL);
> + if (unlikely(!pfns)) {
> + ret = -ENOMEM;
> + goto range_done;
> + }
> +
> + ret = svm_bind_addr_prepare(vm, args->start, args->length);
> + if (unlikely(ret))
> + goto prepare_done;
> +
> + sn.svm = svm;
> + ret = mmu_interval_notifier_insert(&sn.notifier, mm,
> + args->start, args->length,
> + &i915_svm_mni_ops);
> + if (!ret) {
> + ret = i915_range_fault(&sn, args, &st, pfns);
> + mmu_interval_notifier_remove(&sn.notifier);
> + }
success oriented flow...
> +static int
> +i915_svm_invalidate_range_start(struct mmu_notifier *mn,
> + const struct mmu_notifier_range *update)
> +{
> + struct i915_svm *svm = container_of(mn, struct i915_svm, notifier);
> + unsigned long length = update->end - update->start;
> +
> + DRM_DEBUG_DRIVER("start 0x%lx length 0x%lx\n", update->start, length);
> + if (!mmu_notifier_range_blockable(update))
> + return -EAGAIN;
> +
> + i915_gem_vm_unbind_svm_buffer(svm->vm, update->start, length);
> + return 0;
> +}
I still think you should strive for a better design than putting a
notifier across the entire address space..
> +
> +#if defined(CONFIG_DRM_I915_SVM)
> +struct i915_svm {
> + /* i915 address space */
> + struct i915_address_space *vm;
> +
> + struct mmu_notifier notifier;
> + struct mutex mutex; /* protects svm operations */
> + /*
> + * XXX: Probably just make use of mmu_notifier's reference
> + * counting (get/put) instead of our own.
> + */
Yes
Jason
More information about the Intel-gfx
mailing list