[bug report] drm/amdkfd: process exit and retry fault race

Fri Nov 26 08:11:51 UTC 2021

Hello Philip Yang,

The patch a0c55ecee100: "drm/amdkfd: process exit and retry fault
race" from Nov 16, 2021, leads to the following Smatch static checker
warning:

	drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_svm.c:2615 svm_range_restore_pages()
	warn: missing error code here? 'get_task_mm()' failed. 'r' = '0'

drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_svm.c
    2570 int
    2571 svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
    2572                         uint64_t addr, bool write_fault)
    2573 {
    2574         struct mm_struct *mm = NULL;
    2575         struct svm_range_list *svms;
    2576         struct svm_range *prange;
    2577         struct kfd_process *p;
    2578         uint64_t timestamp;
    2579         int32_t best_loc;
    2580         int32_t gpuidx = MAX_GPU_INSTANCE;
    2581         bool write_locked = false;
    2582         struct vm_area_struct *vma;
    2583         int r = 0;
    2584 
    2585         if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) {
    2586                 pr_debug("device does not support SVM\n");
    2587                 return -EFAULT;
    2588         }
    2589 
    2590         p = kfd_lookup_process_by_pasid(pasid);
    2591         if (!p) {
    2592                 pr_debug("kfd process not founded pasid 0x%x\n", pasid);
    2593                 return 0;
    2594         }
    2595         if (!p->xnack_enabled) {
    2596                 pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);
    2597                 r = -EFAULT;
    2598                 goto out;
    2599         }
    2600         svms = &p->svms;
    2601 
    2602         pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr);
    2603 
    2604         if (atomic_read(&svms->drain_pagefaults)) {
    2605                 pr_debug("draining retry fault, drop fault 0x%llx\n", addr);

Presumably this is a success path.

    2606                 goto out;
    2607         }
    2608 
    2609         /* p->lead_thread is available as kfd_process_wq_release flush the work
    2610          * before releasing task ref.
    2611          */
    2612         mm = get_task_mm(p->lead_thread);
    2613         if (!mm) {
    2614                 pr_debug("svms 0x%p failed to get mm\n", svms);
--> 2615                 goto out;

This used to be an error path, but the patch removes the error code and
makes it a success path.  Unfortunately, it confuses static checkers
and also human readers.  The way to silence the static checker warning
is to set the "r = 0;" explicitly within 4 lines of the goto.

		r = 0;
		pr_debug("svms 0x%p failed to get mm\n", svms);
		goto out;

    2616         }
    2617 
    2618         mmap_read_lock(mm);
    2619 retry_write_locked:
    2620         mutex_lock(&svms->lock);
    2621         prange = svm_range_from_addr(svms, addr, NULL);
    2622         if (!prange) {
    2623                 pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
    2624                          svms, addr);
    2625                 if (!write_locked) {
    2626                         /* Need the write lock to create new range with MMU notifier.
    2627                          * Also flush pending deferred work to make sure the interval
    2628                          * tree is up to date before we add a new range
    2629                          */
    2630                         mutex_unlock(&svms->lock);
    2631                         mmap_read_unlock(mm);
    2632                         mmap_write_lock(mm);
    2633                         write_locked = true;
    2634                         goto retry_write_locked;
    2635                 }
    2636                 prange = svm_range_create_unregistered_range(adev, p, mm, addr);
    2637                 if (!prange) {
    2638                         pr_debug("failed to create unregistered range svms 0x%p address [0x%llx]\n",
    2639                                  svms, addr);
    2640                         mmap_write_downgrade(mm);
    2641                         r = -EFAULT;
    2642                         goto out_unlock_svms;
    2643                 }
    2644         }
    2645         if (write_locked)
    2646                 mmap_write_downgrade(mm);
    2647 
    2648         mutex_lock(&prange->migrate_mutex);
    2649 
    2650         if (svm_range_skip_recover(prange)) {
    2651                 amdgpu_gmc_filter_faults_remove(adev, addr, pasid);
    2652                 goto out_unlock_range;

Success path.

    2653         }
    2654 
    2655         timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp;
    2656         /* skip duplicate vm fault on different pages of same range */
    2657         if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) {
    2658                 pr_debug("svms 0x%p [0x%lx %lx] already restored\n",
    2659                          svms, prange->start, prange->last);
    2660                 goto out_unlock_range;

Same.

    2661         }
    2662 
    2663         /* __do_munmap removed VMA, return success as we are handling stale
    2664          * retry fault.
    2665          */
    2666         vma = find_vma(mm, addr << PAGE_SHIFT);
    2667         if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
    2668                 pr_debug("address 0x%llx VMA is removed\n", addr);
    2669                 r = 0;
    2670                 goto out_unlock_range;

This success path is very clear.  Good!

    2671         }
    2672 
    2673         if (!svm_fault_allowed(vma, write_fault)) {
    2674                 pr_debug("fault addr 0x%llx no %s permission\n", addr,
    2675                         write_fault ? "write" : "read");
    2676                 r = -EPERM;
    2677                 goto out_unlock_range;
    2678         }
    2679 
    2680         best_loc = svm_range_best_restore_location(prange, adev, &gpuidx);
    2681         if (best_loc == -1) {
    2682                 pr_debug("svms %p failed get best restore loc [0x%lx 0x%lx]\n",
    2683                          svms, prange->start, prange->last);
    2684                 r = -EACCES;
    2685                 goto out_unlock_range;
    2686         }
    2687 
    2688         pr_debug("svms %p [0x%lx 0x%lx] best restore 0x%x, actual loc 0x%x\n",
    2689                  svms, prange->start, prange->last, best_loc,
    2690                  prange->actual_loc);
    2691 
    2692         if (prange->actual_loc != best_loc) {
    2693                 if (best_loc) {
    2694                         r = svm_migrate_to_vram(prange, best_loc, mm);
    2695                         if (r) {
    2696                                 pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n",
    2697                                          r, addr);
    2698                                 /* Fallback to system memory if migration to
    2699                                  * VRAM failed
    2700                                  */
    2701                                 if (prange->actual_loc)
    2702                                         r = svm_migrate_vram_to_ram(prange, mm);
    2703                                 else
    2704                                         r = 0;
    2705                         }
    2706                 } else {
    2707                         r = svm_migrate_vram_to_ram(prange, mm);
    2708                 }
    2709                 if (r) {
    2710                         pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
    2711                                  r, svms, prange->start, prange->last);
    2712                         goto out_unlock_range;
    2713                 }
    2714         }
    2715 
    2716         r = svm_range_validate_and_map(mm, prange, gpuidx, false, false);
    2717         if (r)
    2718                 pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
    2719                          r, svms, prange->start, prange->last);
    2720 
    2721 out_unlock_range:
    2722         mutex_unlock(&prange->migrate_mutex);
    2723 out_unlock_svms:
    2724         mutex_unlock(&svms->lock);
    2725         mmap_read_unlock(mm);
    2726 
    2727         svm_range_count_fault(adev, p, gpuidx);
    2728 
    2729         mmput(mm);
    2730 out:
    2731         kfd_unref_process(p);
    2732 
    2733         if (r == -EAGAIN) {
    2734                 pr_debug("recover vm fault later\n");
    2735                 amdgpu_gmc_filter_faults_remove(adev, addr, pasid);
    2736                 r = 0;
    2737         }
    2738         return r;
    2739 }

regards,
dan carpenter