[PATCH v4 3/7] drm/amdkfd: Add granularity size based bitmap map flag

Philip Yang Philip.Yang at amd.com
Mon Jan 15 22:49:46 UTC 2024


Replace prange->mapped_to_gpu with prange->bitmap_map[], which is per
GPU flag and use bitmap bits based on prange granularity. Align map to
GPU or unmap from GPU range size to granularity size and update the
corresponding bitmap_map flag bits.  This will optimize multiple GPU
map, unmap and retry fault recover.

svm_range_partial_mapped is false only if no part of the range mapping
on any GPUs.

Signed-off-by: Philip Yang <Philip.Yang at amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 258 ++++++++++++++++++++++-----
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   7 +-
 2 files changed, 219 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index a2c96f5760ff..a003406db067 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -307,12 +307,12 @@ static void svm_range_free(struct svm_range *prange, bool do_unmap)
 					KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
 	}
 
-	/* free dma_addr array for each gpu */
+	/* free dma_addr array, bitmap_map for each gpu */
 	for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) {
-		if (prange->dma_addr[gpuidx]) {
+		if (prange->dma_addr[gpuidx])
 			kvfree(prange->dma_addr[gpuidx]);
-			prange->dma_addr[gpuidx] = NULL;
-		}
+		if (prange->bitmap_map[gpuidx])
+			bitmap_free(prange->bitmap_map[gpuidx]);
 	}
 
 	mutex_destroy(&prange->lock);
@@ -338,19 +338,38 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
 	uint64_t size = last - start + 1;
 	struct svm_range *prange;
 	struct kfd_process *p;
-
-	prange = kzalloc(sizeof(*prange), GFP_KERNEL);
-	if (!prange)
-		return NULL;
+	unsigned int nbits;
+	u32 gpuidx;
 
 	p = container_of(svms, struct kfd_process, svms);
 	if (!p->xnack_enabled && update_mem_usage &&
 	    amdgpu_amdkfd_reserve_mem_limit(NULL, size << PAGE_SHIFT,
 				    KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0)) {
 		pr_info("SVM mapping failed, exceeds resident system memory limit\n");
-		kfree(prange);
 		return NULL;
 	}
+
+	prange = kzalloc(sizeof(*prange), GFP_KERNEL);
+	if (!prange)
+		return NULL;
+
+	svm_range_set_default_attributes(&prange->preferred_loc,
+					 &prange->prefetch_loc,
+					 &prange->granularity, &prange->flags);
+
+	nbits = svm_range_map_nbits(start, last, prange->granularity);
+	pr_debug("prange 0x%p [0x%llx 0x%llx] bitmap_map nbits %d\n", prange,
+		 start, last, nbits);
+	for_each_set_bit(gpuidx, p->svms.bitmap_supported, p->n_pdds) {
+		prange->bitmap_map[gpuidx] = bitmap_zalloc(nbits, GFP_KERNEL);
+		if (!prange->bitmap_map[gpuidx]) {
+			while (gpuidx--)
+				bitmap_free(prange->bitmap_map[gpuidx]);
+			kfree(prange);
+			return NULL;
+		}
+	}
+
 	prange->npages = size;
 	prange->svms = svms;
 	prange->start = start;
@@ -369,10 +388,6 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
 		bitmap_copy(prange->bitmap_access, svms->bitmap_supported,
 			    MAX_GPU_INSTANCE);
 
-	svm_range_set_default_attributes(&prange->preferred_loc,
-					 &prange->prefetch_loc,
-					 &prange->granularity, &prange->flags);
-
 	pr_debug("svms 0x%p [0x%llx 0x%llx]\n", svms, start, last);
 
 	return prange;
@@ -1017,6 +1032,51 @@ svm_range_split_nodes(struct svm_range *new, struct svm_range *old,
 	return 0;
 }
 
+static int
+svm_range_split_bitmap_map(struct svm_range *new, struct svm_range *old,
+			   u64 start, u64 last)
+{
+	struct kfd_process *p = container_of(new->svms, struct kfd_process, svms);
+	u32 new_nbits, old_nbits, old_nbits2;
+	unsigned long *bits;
+	u32 gpuidx;
+
+	new_nbits = svm_range_map_nbits(new->start, new->last, new->granularity);
+	old_nbits = svm_range_map_nbits(old->start, old->last, old->granularity);
+	old_nbits2 = svm_range_map_nbits(start, last, old->granularity);
+
+	pr_debug("old 0x%p [0x%lx 0x%lx] => [0x%llx 0x%llx] nbits %d => %d\n",
+		 old, old->start, old->last, start, last, old_nbits, old_nbits2);
+	pr_debug("new 0x%p [0x%lx 0x%lx] nbits %d\n", new, new->start, new->last,
+		 new_nbits);
+
+	for_each_set_bit(gpuidx, p->svms.bitmap_supported, p->n_pdds) {
+		bits = bitmap_alloc(old_nbits2, GFP_KERNEL);
+		if (!bits)
+			return -ENOMEM;
+
+		if (start == old->start) {
+			bitmap_copy(bits, old->bitmap_map[gpuidx], old_nbits2);
+			bitmap_shift_right(old->bitmap_map[gpuidx],
+					   old->bitmap_map[gpuidx],
+					   old_nbits - new_nbits, old_nbits);
+			bitmap_copy(new->bitmap_map[gpuidx],
+				    old->bitmap_map[gpuidx], new_nbits);
+		} else {
+			bitmap_copy(new->bitmap_map[gpuidx],
+				    old->bitmap_map[gpuidx], new_nbits);
+			bitmap_shift_right(old->bitmap_map[gpuidx],
+					   old->bitmap_map[gpuidx],
+					   old_nbits - old_nbits2, old_nbits);
+			bitmap_copy(bits, old->bitmap_map[gpuidx], old_nbits2);
+		}
+		bitmap_free(old->bitmap_map[gpuidx]);
+		old->bitmap_map[gpuidx] = bits;
+	}
+
+	return 0;
+}
+
 /**
  * svm_range_split_adjust - split range and adjust
  *
@@ -1057,6 +1117,10 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old,
 			return r;
 	}
 
+	r = svm_range_split_bitmap_map(new, old, start, last);
+	if (r)
+		return r;
+
 	old->npages = last - start + 1;
 	old->start = start;
 	old->last = last;
@@ -1064,7 +1128,6 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old,
 	new->preferred_loc = old->preferred_loc;
 	new->prefetch_loc = old->prefetch_loc;
 	new->granularity = old->granularity;
-	new->mapped_to_gpu = old->mapped_to_gpu;
 	bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
 	bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
 
@@ -1169,6 +1232,65 @@ svm_range_add_child(struct svm_range *prange, struct mm_struct *mm,
 	list_add_tail(&pchild->child_list, &prange->child_list);
 }
 
+/**
+ * svm_range_partial_mapped_dev - check if prange mapped on the specific GPU
+ *
+ * @gpuidx: the GPU index to check
+ * @prange: prange to check
+ * @start: the start address in pages
+ * @last: the last address in pages
+ *
+ * Return:
+ * true: if any part of the range within [start, last] mapped on the GPU
+ * false: if the entire range [start, last] not mapped on the GPU
+ */
+static bool
+svm_range_partial_mapped_dev(u32 gpuidx, struct svm_range *prange,
+			     u64 start, u64 last)
+{
+	u64 start_index, last_index;
+
+	start = max_t(u64, start, prange->start);
+	last = min_t(u64, last, prange->last);
+	if (last < start)
+		return false;
+
+	start_index = (start >> prange->granularity) - (prange->start >> prange->granularity);
+	last_index = (last >> prange->granularity) - (prange->start >> prange->granularity);
+	return find_next_bit(prange->bitmap_map[gpuidx], last_index + 1,
+			     start_index) <= last_index;
+}
+
+/**
+ * svm_range_partial_mapped - check if prange mapped on any GPU
+ *
+ * @prange: prange to check
+ * @start: the start address in pages
+ * @last: the last address in pages
+ *
+ * Return:
+ * true: if any part of prange mapped on any GPU currently
+ * false: if the entire range is not mapped on any GPU
+ */
+static bool
+svm_range_partial_mapped(struct svm_range *prange, u64 start, u64 last)
+{
+	struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
+	struct svm_range *pchild;
+	u32 gpuidx;
+
+	for_each_set_bit(gpuidx, p->svms.bitmap_supported, p->n_pdds) {
+		list_for_each_entry(pchild, &prange->child_list, child_list) {
+			if (svm_range_partial_mapped_dev(gpuidx, pchild, start, last))
+				return true;
+		}
+
+		if (svm_range_partial_mapped_dev(gpuidx, prange, start, last))
+			return true;
+	}
+	return false;
+}
+
 static int
 svm_range_need_access_gpus(unsigned long *bitmap, struct svm_range *prange)
 {
@@ -1196,7 +1318,7 @@ svm_range_need_access_gpus(unsigned long *bitmap, struct svm_range *prange)
 		 * update mapping on GPUs with ACCESS attribute
 		 */
 		if (bitmap_empty(bitmap, MAX_GPU_INSTANCE)) {
-			if (prange->mapped_to_gpu ||
+			if (svm_range_partial_mapped(prange, prange->start, prange->last) ||
 			    prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)
 				bitmap_copy(bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
 		}
@@ -1336,6 +1458,48 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 				      fence);
 }
 
+/**
+ * svm_range_complete_mapped - check if prange mapped on all GPUs completely
+ *
+ * @prange: prange to check
+ *
+ * Return:
+ * true: if the entire prange mapped completely on all GPUs that need access
+ * otherwise return false
+ */
+static bool svm_range_complete_mapped(struct svm_range *prange)
+{
+	int nbits = svm_range_map_nbits(prange->start, prange->last, prange->granularity);
+	DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
+	u32 gpuidx;
+	int r;
+
+	r = svm_range_need_access_gpus(bitmap, prange);
+	if (r)
+		return false;
+
+	for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE)
+		if (!bitmap_full(prange->bitmap_map[gpuidx], nbits))
+			return false;
+	return true;
+}
+
+static void
+svm_range_update_mapped(u32 gpuidx, struct svm_range *prange,
+			u64 start, u64 last, bool mapped)
+{
+	u64 index, nbits;
+
+	index = (start >>  prange->granularity) - (prange->start >> prange->granularity);
+	nbits = svm_range_map_nbits(start, last, prange->granularity);
+	if (mapped)
+		bitmap_set(prange->bitmap_map[gpuidx], index, nbits);
+	else
+		bitmap_clear(prange->bitmap_map[gpuidx], index, nbits);
+	pr_debug("prange 0x%p [0x%llx 0x%llx] update mapped %d nbits %lld gpu %d\n",
+		 prange, start, last, mapped, nbits, gpuidx);
+}
+
 static int
 svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
 			  unsigned long last, uint32_t trigger)
@@ -1347,29 +1511,28 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
 	uint32_t gpuidx;
 	int r = 0;
 
-	if (!prange->mapped_to_gpu) {
-		pr_debug("prange 0x%p [0x%lx 0x%lx] not mapped to GPU\n",
-			 prange, prange->start, prange->last);
-		return 0;
-	}
-
-	if (prange->start == start && prange->last == last) {
-		pr_debug("unmap svms 0x%p prange 0x%p\n", prange->svms, prange);
-		prange->mapped_to_gpu = false;
-	}
-
 	bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
 		  MAX_GPU_INSTANCE);
 	p = container_of(prange->svms, struct kfd_process, svms);
 
 	for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
-		pr_debug("unmap from gpu idx 0x%x\n", gpuidx);
 		pdd = kfd_process_device_from_gpuidx(p, gpuidx);
 		if (!pdd) {
 			pr_debug("failed to find device idx %d\n", gpuidx);
-			return -EINVAL;
+			continue;
 		}
 
+		if (!svm_range_partial_mapped_dev(gpuidx, prange, start, last)) {
+			pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] not mapped on gpu %d\n",
+				 prange->svms, prange, start, last, gpuidx);
+			continue;
+		}
+
+		svm_range_update_mapped(gpuidx, prange, start, last, false);
+
+		pr_debug("unmap svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu %d\n",
+			 prange->svms, prange, start, last, gpuidx);
+
 		kfd_smi_event_unmap_from_gpu(pdd->dev, p->lead_thread->pid,
 					     start, last, trigger);
 
@@ -1510,6 +1673,10 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
 		if (r)
 			break;
 
+		if (!r)
+			svm_range_update_mapped(gpuidx, prange, prange->start + offset,
+						prange->start + offset + npages - 1, true);
+
 		if (fence) {
 			r = dma_fence_wait(fence, false);
 			dma_fence_put(fence);
@@ -1745,9 +1912,6 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 			}
 		}
 
-		if (!r && next == end)
-			prange->mapped_to_gpu = true;
-
 		svm_range_unlock(prange);
 
 		addr = next;
@@ -1916,12 +2080,11 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
 	if (!p->xnack_enabled ||
 	    (prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) {
 		int evicted_ranges;
-		bool mapped = prange->mapped_to_gpu;
+
+		if (!svm_range_partial_mapped(prange, start, last))
+			return r;
 
 		list_for_each_entry(pchild, &prange->child_list, child_list) {
-			if (!pchild->mapped_to_gpu)
-				continue;
-			mapped = true;
 			mutex_lock_nested(&pchild->lock, 1);
 			if (pchild->start <= last && pchild->last >= start) {
 				pr_debug("increment pchild invalid [0x%lx 0x%lx]\n",
@@ -1930,10 +2093,6 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
 			}
 			mutex_unlock(&pchild->lock);
 		}
-
-		if (!mapped)
-			return r;
-
 		if (prange->start <= last && prange->last >= start)
 			atomic_inc(&prange->invalid);
 
@@ -1982,7 +2141,10 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
 
 static struct svm_range *svm_range_clone(struct svm_range *old)
 {
+	struct kfd_process *p = container_of(old->svms, struct kfd_process, svms);
 	struct svm_range *new;
+	u32 gpuidx;
+	int nbits;
 
 	new = svm_range_new(old->svms, old->start, old->last, false);
 	if (!new)
@@ -2004,8 +2166,13 @@ static struct svm_range *svm_range_clone(struct svm_range *old)
 	new->prefetch_loc = old->prefetch_loc;
 	new->actual_loc = old->actual_loc;
 	new->granularity = old->granularity;
-	new->mapped_to_gpu = old->mapped_to_gpu;
 	new->vram_pages = old->vram_pages;
+	nbits = svm_range_map_nbits(new->start, new->last, new->granularity);
+	for_each_set_bit(gpuidx, p->svms.bitmap_supported, p->n_pdds) {
+		bitmap_copy(new->bitmap_map[gpuidx],
+			    old->bitmap_map[gpuidx],
+			    nbits);
+	};
 	bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
 	bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
 
@@ -2126,7 +2293,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
 		next_start = min(node->last, last) + 1;
 
 		if (svm_range_is_same_attrs(p, prange, nattr, attrs) &&
-		    prange->mapped_to_gpu) {
+		    svm_range_complete_mapped(prange)) {
 			/* nothing to do */
 		} else if (node->start < start || node->last > last) {
 			/* node intersects the update range and its attributes
@@ -3611,7 +3778,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
 
 		if (migrated && (!p->xnack_enabled ||
 		    (prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) &&
-		    prange->mapped_to_gpu) {
+		    svm_range_partial_mapped(prange, prange->start, prange->last)) {
 			pr_debug("restore_work will update mappings of GPUs\n");
 			mutex_unlock(&prange->migrate_mutex);
 			continue;
@@ -3622,8 +3789,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
 			continue;
 		}
 
-		flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu;
-
+		flush_tlb = !migrated && update_mapping &&
+			    svm_range_partial_mapped(prange, prange->start, prange->last);
 		r = svm_range_validate_and_map(mm, prange->start, prange->last, prange,
 					       MAX_GPU_INSTANCE, true, true, flush_tlb);
 		if (r)
@@ -3639,8 +3806,9 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
 		pr_debug("Remapping prange 0x%p [0x%lx 0x%lx]\n",
 			 prange, prange->start, prange->last);
 		mutex_lock(&prange->migrate_mutex);
+		flush_tlb = svm_range_partial_mapped(prange, prange->start, prange->last);
 		r = svm_range_validate_and_map(mm,  prange->start, prange->last, prange,
-					       MAX_GPU_INSTANCE, true, true, prange->mapped_to_gpu);
+					       MAX_GPU_INSTANCE, true, true, flush_tlb);
 		if (r)
 			pr_debug("failed %d on remap svm range\n", r);
 		mutex_unlock(&prange->migrate_mutex);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 806bcac6d101..a10eeb77f83e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -136,7 +136,7 @@ struct svm_range {
 	struct list_head		child_list;
 	DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
 	DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
-	bool				mapped_to_gpu;
+	unsigned long			*bitmap_map[MAX_GPU_INSTANCE];
 };
 
 static inline void svm_range_lock(struct svm_range *prange)
@@ -169,6 +169,11 @@ static inline u64 svm_range_align_last(u64 addr, u64 range_last, u64 align_size)
 	return min(ALIGN(addr + 1, align_size) - 1, range_last);
 }
 
+static inline u32 svm_range_map_nbits(u64 start, u64 last, u8 granularity)
+{
+	return (last >> granularity) - (start >> granularity) + 1;
+}
+
 int svm_range_list_init(struct kfd_process *p);
 void svm_range_list_fini(struct kfd_process *p);
 int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
-- 
2.35.1



More information about the amd-gfx mailing list