[PATCH 11/44] drm/amdkfd: svm range eviction and restore
Felix Kuehling
Felix.Kuehling at amd.com
Mon Mar 22 10:58:27 UTC 2021
HMM interval notifier callback notify CPU page table will be updated,
stop process queues if the updated address belongs to svm range
registered in process svms objects tree. Scheduled restore work to
update GPU page table using new pages address in the updated svm range.
The restore worker flushes any deferred work to make sure it restores
an up-to-date svm_range_list.
Signed-off-by: Philip Yang <Philip.Yang at amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +
drivers/gpu/drm/amd/amdkfd/kfd_process.c | 1 +
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 145 ++++++++++++++++++++++-
drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 2 +
4 files changed, 149 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 4101f5341ec9..7d70af26b5c7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -738,6 +738,8 @@ struct svm_range_list {
struct work_struct deferred_list_work;
struct list_head deferred_range_list;
spinlock_t deferred_list_lock;
+ atomic_t evicted_ranges;
+ struct delayed_work restore_work;
};
/* Process data */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 4d7a67141190..321895d7555a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1064,6 +1064,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
cancel_delayed_work_sync(&p->eviction_work);
cancel_delayed_work_sync(&p->restore_work);
+ cancel_delayed_work_sync(&p->svms.restore_work);
mutex_lock(&p->mutex);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 1244ba380292..55828d6fbea9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -21,6 +21,7 @@
*/
#include <linux/types.h>
+#include <linux/sched/task.h>
#include "amdgpu_sync.h"
#include "amdgpu_object.h"
#include "amdgpu_vm.h"
@@ -28,6 +29,8 @@
#include "kfd_priv.h"
#include "kfd_svm.h"
+#define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1
+
static bool
svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
const struct mmu_notifier_range *range,
@@ -226,6 +229,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
INIT_LIST_HEAD(&prange->insert_list);
INIT_LIST_HEAD(&prange->deferred_list);
INIT_LIST_HEAD(&prange->child_list);
+ atomic_set(&prange->invalid, 0);
svm_range_set_default_attributes(&prange->preferred_loc,
&prange->prefetch_loc,
&prange->granularity, &prange->flags);
@@ -282,6 +286,9 @@ svm_range_validate(struct mm_struct *mm, struct svm_range *prange)
r = svm_range_validate_ram(mm, prange);
+ pr_debug("svms 0x%p [0x%lx 0x%lx] ret %d invalid %d\n", prange->svms,
+ prange->start, prange->last, r, atomic_read(&prange->invalid));
+
return r;
}
@@ -886,6 +893,134 @@ svm_range_list_lock_and_flush_work(struct svm_range_list *svms,
goto retry_flush_work;
}
+static void svm_range_restore_work(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct amdkfd_process_info *process_info;
+ struct svm_range_list *svms;
+ struct svm_range *prange;
+ struct kfd_process *p;
+ struct mm_struct *mm;
+ int evicted_ranges;
+ int invalid;
+ int r;
+
+ svms = container_of(dwork, struct svm_range_list, restore_work);
+ evicted_ranges = atomic_read(&svms->evicted_ranges);
+ if (!evicted_ranges)
+ return;
+
+ pr_debug("restore svm ranges\n");
+
+ /* kfd_process_notifier_release destroys this worker thread. So during
+ * the lifetime of this thread, kfd_process and mm will be valid.
+ */
+ p = container_of(svms, struct kfd_process, svms);
+ process_info = p->kgd_process_info;
+ mm = p->mm;
+ if (!mm)
+ return;
+
+ mutex_lock(&process_info->lock);
+ svm_range_list_lock_and_flush_work(svms, mm);
+ mutex_lock(&svms->lock);
+
+ list_for_each_entry(prange, &svms->list, list) {
+ invalid = atomic_read(&prange->invalid);
+ if (!invalid)
+ continue;
+
+ pr_debug("restoring svms 0x%p prange 0x%p [0x%lx %lx] inv %d\n",
+ prange->svms, prange, prange->start, prange->last,
+ invalid);
+
+ r = svm_range_validate(mm, prange);
+ if (r) {
+ pr_debug("failed %d to validate [0x%lx 0x%lx]\n", r,
+ prange->start, prange->last);
+
+ goto unlock_out;
+ }
+
+ r = svm_range_map_to_gpus(prange, true);
+ if (r) {
+ pr_debug("failed %d to map 0x%lx to gpu\n", r,
+ prange->start);
+ goto unlock_out;
+ }
+
+ if (atomic_cmpxchg(&prange->invalid, invalid, 0) != invalid)
+ goto unlock_out;
+ }
+
+ if (atomic_cmpxchg(&svms->evicted_ranges, evicted_ranges, 0) !=
+ evicted_ranges)
+ goto unlock_out;
+
+ evicted_ranges = 0;
+
+ r = kgd2kfd_resume_mm(mm);
+ if (r) {
+ /* No recovery from this failure. Probably the CP is
+ * hanging. No point trying again.
+ */
+ pr_debug("failed %d to resume KFD\n", r);
+ }
+
+ pr_debug("restore svm ranges successfully\n");
+
+unlock_out:
+ mutex_unlock(&svms->lock);
+ mmap_write_unlock(mm);
+ mutex_unlock(&process_info->lock);
+
+ /* If validation failed, reschedule another attempt */
+ if (evicted_ranges) {
+ pr_debug("reschedule to restore svm range\n");
+ schedule_delayed_work(&svms->restore_work,
+ msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+ }
+}
+
+/**
+ * svm_range_evict - evict svm range
+ *
+ * Stop all queues of the process to ensure GPU doesn't access the memory, then
+ * return to let CPU evict the buffer and proceed CPU pagetable update.
+ *
+ * Don't need use lock to sync cpu pagetable invalidation with GPU execution.
+ * If invalidation happens while restore work is running, restore work will
+ * restart to ensure to get the latest CPU pages mapping to GPU, then start
+ * the queues.
+ */
+static int
+svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
+ unsigned long start, unsigned long last)
+{
+ struct svm_range_list *svms = prange->svms;
+ int invalid, evicted_ranges;
+ int r = 0;
+
+ invalid = atomic_inc_return(&prange->invalid);
+ evicted_ranges = atomic_inc_return(&svms->evicted_ranges);
+ if (evicted_ranges != 1)
+ return r;
+
+ pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n",
+ prange->svms, prange->start, prange->last);
+
+ /* First eviction, stop the queues */
+ r = kgd2kfd_quiesce_mm(mm);
+ if (r)
+ pr_debug("failed to quiesce KFD\n");
+
+ pr_debug("schedule to restore svm %p ranges\n", svms);
+ schedule_delayed_work(&svms->restore_work,
+ msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+
+ return r;
+}
+
struct svm_range *svm_range_clone(struct svm_range *old)
{
struct svm_range *new;
@@ -1256,6 +1391,11 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
* svm_range_cpu_invalidate_pagetables - interval notifier callback
*
* MMU range unmap notifier to remove svm ranges
+ *
+ * If GPU vm fault retry is not enabled, evict the svm range, then restore
+ * work will update GPU mapping.
+ * If GPU vm fault retry is enabled, unmap the svm range from GPU, vm fault
+ * will update GPU mapping.
*/
static bool
svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
@@ -1286,9 +1426,10 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
svm_range_unmap_from_cpu(mni->mm, prange, start, last);
break;
default:
+ mmu_interval_set_seq(mni, cur_seq);
+ svm_range_evict(prange, mni->mm, start, last);
break;
}
-
return true;
}
@@ -1307,6 +1448,8 @@ int svm_range_list_init(struct kfd_process *p)
svms->objects = RB_ROOT_CACHED;
mutex_init(&svms->lock);
INIT_LIST_HEAD(&svms->list);
+ atomic_set(&svms->evicted_ranges, 0);
+ INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work);
INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);
INIT_LIST_HEAD(&svms->deferred_range_list);
spin_lock_init(&svms->deferred_list_lock);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 596c881f7674..2a76e89dec04 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -66,6 +66,7 @@ struct svm_work_list_item {
* @perfetch_loc: last prefetch location, 0 for CPU, or GPU id
* @actual_loc: the actual location, 0 for CPU, or GPU id
* @granularity:migration granularity, log2 num pages
+ * @invalid: not 0 means cpu page table is invalidated
* @notifier: register mmu interval notifier
* @work_item: deferred work item information
* @deferred_list: list header used to add range to deferred list
@@ -96,6 +97,7 @@ struct svm_range {
uint32_t prefetch_loc;
uint32_t actual_loc;
uint8_t granularity;
+ atomic_t invalid;
struct mmu_interval_notifier notifier;
struct svm_work_list_item work_item;
struct list_head deferred_list;
--
2.31.0
More information about the dri-devel
mailing list