<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
</head>
<body>
<p><br>
</p>
<div class="moz-cite-prefix">On 2023-10-02 13:02, Chen, Xiaogang
wrote:<br>
</div>
<blockquote type="cite" cite="mid:a223b7a5-53f2-efaf-c754-5263cb799b3f@amd.com">
<br>
On 9/29/2023 9:11 AM, Philip Yang wrote:
<br>
<blockquote type="cite">Caution: This message originated from an
External Source. Use proper caution when opening attachments,
clicking links, or responding.
<br>
<br>
<br>
Replace prange->mapped_to_gpu with
prange->bitmap_mapped[], which is
<br>
based on prange granularity, updated when map to GPUS or unmap
from
<br>
GPUs, to optimize multiple GPU map, unmap and retry fault
recover.
<br>
<br>
svm_range_is_mapped is false only if no parital range mapping on
any
<br>
GPUs.
<br>
<br>
Split the bitmap_mapped when unmap from cpu to split the prange.
<br>
<br>
Signed-off-by: Philip Yang <a class="moz-txt-link-rfc2396E" href="mailto:Philip.Yang@amd.com"><Philip.Yang@amd.com></a>
<br>
---
<br>
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 218
++++++++++++++++++++++-----
<br>
drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 4 +-
<br>
2 files changed, 184 insertions(+), 38 deletions(-)
<br>
<br>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
<br>
index 040dc32ad475..626e0dd4ec79 100644
<br>
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
<br>
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
<br>
@@ -292,12 +292,12 @@ static void svm_range_free(struct
svm_range *prange, bool do_unmap)
<br>
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
<br>
}
<br>
<br>
- /* free dma_addr array for each gpu */
<br>
+ /* free dma_addr array, bitmap_mapped for each gpu */
<br>
for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE;
gpuidx++) {
<br>
- if (prange->dma_addr[gpuidx]) {
<br>
+ if (prange->dma_addr[gpuidx])
<br>
kvfree(prange->dma_addr[gpuidx]);
<br>
- prange->dma_addr[gpuidx] =
NULL;
<br>
- }
<br>
+ if (prange->bitmap_mapped[gpuidx])
<br>
+
bitmap_free(prange->bitmap_mapped[gpuidx]);
<br>
}
<br>
<br>
mutex_destroy(&prange->lock);
<br>
@@ -323,19 +323,38 @@ svm_range *svm_range_new(struct
svm_range_list *svms, uint64_t start,
<br>
uint64_t size = last - start + 1;
<br>
struct svm_range *prange;
<br>
struct kfd_process *p;
<br>
-
<br>
- prange = kzalloc(sizeof(*prange), GFP_KERNEL);
<br>
- if (!prange)
<br>
- return NULL;
<br>
+ unsigned int nbits;
<br>
+ uint32_t gpuidx;
<br>
<br>
p = container_of(svms, struct kfd_process, svms);
<br>
if (!p->xnack_enabled && update_mem_usage
&&
<br>
amdgpu_amdkfd_reserve_mem_limit(NULL, size <<
PAGE_SHIFT,
<br>
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0)) {
<br>
pr_info("SVM mapping failed, exceeds resident
system memory limit\n");
<br>
- kfree(prange);
<br>
return NULL;
<br>
}
<br>
+
<br>
+ prange = kzalloc(sizeof(*prange), GFP_KERNEL);
<br>
+ if (!prange)
<br>
+ return NULL;
<br>
+
<br>
+
svm_range_set_default_attributes(&prange->preferred_loc,
<br>
+
&prange->prefetch_loc,
<br>
+
&prange->granularity, &prange->flags);
<br>
+
<br>
+ nbits = svm_range_mapped_nbits(size,
prange->granularity);
<br>
+ pr_debug("prange 0x%p [0x%llx 0x%llx] bitmap_mapped
nbits %d\n", prange,
<br>
+ start, last, nbits);
<br>
+ for_each_set_bit(gpuidx, p->svms.bitmap_supported,
p->n_pdds) {
<br>
+ prange->bitmap_mapped[gpuidx] =
bitmap_zalloc(nbits, GFP_KERNEL);
<br>
+ if (!prange->bitmap_mapped[gpuidx]) {
<br>
+ while (gpuidx--)
<br>
+
bitmap_free(prange->bitmap_mapped[gpuidx]);
<br>
+ kfree(prange);
<br>
+ return NULL;
<br>
+ }
<br>
+ }
<br>
+
<br>
prange->npages = size;
<br>
prange->svms = svms;
<br>
prange->start = start;
<br>
@@ -354,10 +373,6 @@ svm_range *svm_range_new(struct
svm_range_list *svms, uint64_t start,
<br>
bitmap_copy(prange->bitmap_access,
svms->bitmap_supported,
<br>
MAX_GPU_INSTANCE);
<br>
<br>
-
svm_range_set_default_attributes(&prange->preferred_loc,
<br>
-
&prange->prefetch_loc,
<br>
-
&prange->granularity, &prange->flags);
<br>
-
<br>
pr_debug("svms 0x%p [0x%llx 0x%llx]\n", svms, start,
last);
<br>
<br>
return prange;
<br>
@@ -972,6 +987,48 @@ svm_range_split_nodes(struct svm_range
*new, struct svm_range *old,
<br>
return 0;
<br>
}
<br>
<br>
+static int
<br>
+svm_range_split_bitmap_mapped(struct svm_range *new, struct
svm_range *old,
<br>
+ uint64_t start, uint64_t last)
<br>
+{
<br>
+ struct kfd_process *p = container_of(new->svms,
struct kfd_process, svms);
<br>
+ unsigned int nbits, old_nbits, old_nbits2;
<br>
+ unsigned long *bits;
<br>
+ uint32_t gpuidx;
<br>
+
<br>
+ nbits = svm_range_mapped_nbits(new->npages,
new->granularity);
<br>
+ old_nbits = svm_range_mapped_nbits(old->npages,
old->granularity);
<br>
+ old_nbits2 = svm_range_mapped_nbits(last - start + 1,
old->granularity);
<br>
+
<br>
+ pr_debug("old 0x%p [0x%lx 0x%lx] => [0x%llx 0x%llx]
nbits %d => %d\n",
<br>
+ old, old->start, old->last, start, last,
old_nbits, old_nbits2);
<br>
+ pr_debug("new 0x%p [0x%lx 0x%lx] nbits %d\n", new,
new->start, new->last,
<br>
+ nbits);
<br>
+
<br>
+ for_each_set_bit(gpuidx, p->svms.bitmap_supported,
p->n_pdds) {
<br>
+ bits = bitmap_alloc(old_nbits2, GFP_KERNEL);
<br>
+ if (!bits)
<br>
+ return -ENOMEM;
<br>
+
<br>
+ if (start == old->start) {
<br>
+
bitmap_shift_right(new->bitmap_mapped[gpuidx],
<br>
+
old->bitmap_mapped[gpuidx],
<br>
+ old_nbits2,
old_nbits);
<br>
+ bitmap_shift_right(bits,
old->bitmap_mapped[gpuidx], 0,
<br>
+ old_nbits2);
<br>
+ } else {
<br>
+
bitmap_copy(new->bitmap_mapped[gpuidx],
<br>
+
old->bitmap_mapped[gpuidx], nbits);
<br>
+ bitmap_shift_right(bits,
old->bitmap_mapped[gpuidx],
<br>
+ nbits, old_nbits);
<br>
+ }
<br>
+ bitmap_free(old->bitmap_mapped[gpuidx]);
<br>
+ old->bitmap_mapped[gpuidx] = bits;
<br>
+ }
<br>
+
<br>
+ return 0;
<br>
+}
<br>
+
<br>
/**
<br>
* svm_range_split_adjust - split range and adjust
<br>
*
<br>
@@ -1012,6 +1069,10 @@ svm_range_split_adjust(struct svm_range
*new, struct svm_range *old,
<br>
return r;
<br>
}
<br>
<br>
+ r = svm_range_split_bitmap_mapped(new, old, start,
last);
<br>
+ if (r)
<br>
+ return r;
<br>
+
<br>
old->npages = last - start + 1;
<br>
old->start = start;
<br>
old->last = last;
<br>
@@ -1020,7 +1081,6 @@ svm_range_split_adjust(struct svm_range
*new, struct svm_range *old,
<br>
new->prefetch_loc = old->prefetch_loc;
<br>
new->actual_loc = old->actual_loc;
<br>
new->granularity = old->granularity;
<br>
- new->mapped_to_gpu = old->mapped_to_gpu;
<br>
bitmap_copy(new->bitmap_access,
old->bitmap_access, MAX_GPU_INSTANCE);
<br>
bitmap_copy(new->bitmap_aip, old->bitmap_aip,
MAX_GPU_INSTANCE);
<br>
<br>
@@ -1310,6 +1370,84 @@ svm_range_unmap_from_gpu(struct
amdgpu_device *adev, struct amdgpu_vm *vm,
<br>
fence);
<br>
}
<br>
<br>
+/**
<br>
+ * svm_range_partial_mapped_dev - check if prange mapped on the
specific gpu
<br>
+ *
<br>
+ * @gpuidx: the gpu to check
<br>
+ * @prange: prange to check
<br>
+ * @start: the start address in pages
<br>
+ * @last: the last address in pages
<br>
+ *
<br>
+ * Return:
<br>
+ * true: if any partial range mapped on gpu based on
granularity boundary
<br>
+ * false: if the entire range not mapped
<br>
+ */
<br>
+static bool
<br>
+svm_range_partial_mapped_dev(uint32_t gpuidx, struct svm_range
*prange,
<br>
+ unsigned long start, unsigned long
last)
<br>
+{
<br>
+ unsigned long index, off;
<br>
+ bool mapped = false;
<br>
+
<br>
+ start = max(start, prange->start);
<br>
+ last = min(last, prange->last);
<br>
+ if (last < start)
<br>
+ goto out;
<br>
+
<br>
+ for (off = start; off <= last; off += (1UL <<
prange->granularity)) {
<br>
+ index = (off - prange->start) >>
prange->granularity;
<br>
+ if (test_bit(index,
prange->bitmap_mapped[gpuidx])) {
<br>
+ mapped = true;
<br>
+ break;
<br>
+ }
<br>
+ }
<br>
+out:
<br>
+ pr_debug("prange 0x%p [0x%lx 0x%lx] mapped %d on gpu
%d\n", prange,
<br>
+ start, last, mapped, gpuidx);
<br>
+ return mapped;
<br>
+}
<br>
+
<br>
+static bool
<br>
+svm_range_partial_mapped(struct svm_range *prange, unsigned
long start,
<br>
+ unsigned long last)
<br>
+{
<br>
+ struct kfd_process *p = container_of(prange->svms,
struct kfd_process, svms);
<br>
+ struct svm_range *pchild;
<br>
+ uint32_t gpuidx;
<br>
+
<br>
+ for_each_set_bit(gpuidx, p->svms.bitmap_supported,
p->n_pdds) {
<br>
+ list_for_each_entry(pchild,
&prange->child_list, child_list) {
<br>
+ if (svm_range_partial_mapped_dev(gpuidx,
pchild, start, last))
<br>
+ return true;
<br>
+ }
<br>
+
<br>
+ if (svm_range_partial_mapped_dev(gpuidx, prange,
start, last))
<br>
+ return true;
<br>
+ }
<br>
+ return false;
<br>
+}
<br>
+
<br>
+static bool svm_range_is_mapped(struct svm_range *prange)
<br>
+{
<br>
+ return svm_range_partial_mapped(prange,
prange->start, prange->last);
<br>
+}
<br>
+
<br>
</blockquote>
I think svm_range_is_mapped actually means if there is any
prange->granularity range inside prange is mapped to any gpu,
not prange got mapped. The name is confusion.
<br>
</blockquote>
Yes, agree, will add comment with detail description to clarify
this.<br>
<blockquote type="cite" cite="mid:a223b7a5-53f2-efaf-c754-5263cb799b3f@amd.com">
<blockquote type="cite">+static void
<br>
+svm_range_update_mapped(uint32_t gpuidx, struct svm_range
*prange,
<br>
+ unsigned long start, unsigned long last,
bool mapped)
<br>
+{
<br>
+ unsigned long index, nbits;
<br>
+
<br>
+ index = (start - prange->start) >>
prange->granularity;
<br>
+ nbits = svm_range_mapped_nbits(last - start + 1,
prange->granularity);
<br>
+ if (mapped)
<br>
+ bitmap_set(prange->bitmap_mapped[gpuidx],
index, nbits);
<br>
+ else
<br>
+ bitmap_clear(prange->bitmap_mapped[gpuidx],
index, nbits);
<br>
+ pr_debug("prange 0x%p [0x%lx 0x%lx] update mapped %d
nbits %ld gpu %d\n",
<br>
+ prange, start, last, mapped, nbits, gpuidx);
<br>
+}
<br>
+
<br>
static int
<br>
svm_range_unmap_from_gpus(struct svm_range *prange, unsigned
long start,
<br>
unsigned long last, uint32_t trigger)
<br>
@@ -1321,29 +1459,28 @@ svm_range_unmap_from_gpus(struct
svm_range *prange, unsigned long start,
<br>
uint32_t gpuidx;
<br>
int r = 0;
<br>
<br>
- if (!prange->mapped_to_gpu) {
<br>
- pr_debug("prange 0x%p [0x%lx 0x%lx] not mapped
to GPU\n",
<br>
- prange, prange->start,
prange->last);
<br>
- return 0;
<br>
- }
<br>
-
<br>
- if (prange->start == start && prange->last
== last) {
<br>
- pr_debug("unmap svms 0x%p prange 0x%p\n",
prange->svms, prange);
<br>
- prange->mapped_to_gpu = false;
<br>
- }
<br>
-
<br>
bitmap_or(bitmap, prange->bitmap_access,
prange->bitmap_aip,
<br>
MAX_GPU_INSTANCE);
<br>
p = container_of(prange->svms, struct kfd_process,
svms);
<br>
<br>
for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
<br>
- pr_debug("unmap from gpu idx 0x%x\n", gpuidx);
<br>
pdd = kfd_process_device_from_gpuidx(p,
gpuidx);
<br>
if (!pdd) {
<br>
pr_debug("failed to find device idx
%d\n", gpuidx);
<br>
- return -EINVAL;
<br>
+ continue;
<br>
+ }
<br>
+
<br>
+ if (!svm_range_partial_mapped_dev(gpuidx,
prange, start, last)) {
<br>
+ pr_debug("svms 0x%p prange 0x%p [0x%lx
0x%lx] not mapped on gpu %d\n",
<br>
+ prange->svms, prange, start,
last, gpuidx);
<br>
+ continue;
<br>
}
<br>
<br>
+ svm_range_update_mapped(gpuidx, prange, start,
last, false);
<br>
+
<br>
+ pr_debug("unmap svms 0x%p prange 0x%p [0x%lx
0x%lx] from gpu %d\n",
<br>
+ prange->svms, prange, start, last,
gpuidx);
<br>
+
<br>
kfd_smi_event_unmap_from_gpu(pdd->dev,
p->lead_thread->pid,
<br>
start, last,
trigger);
<br>
<br>
@@ -1483,6 +1620,10 @@ svm_range_map_to_gpus(struct svm_range
*prange, unsigned long offset,
<br>
if (r)
<br>
break;
<br>
<br>
+ if (!r)
<br>
+ svm_range_update_mapped(gpuidx, prange,
prange->start + offset,
<br>
+ prange->start
+ offset + npages - 1, true);
<br>
</blockquote>
<br>
svm_range_update_mapped set the mapping bitmap with
prange->granularity, but the gpu mapping is for
[prange->start + offset, prange->start + offset + npages -
1]. The mapping bitmap covered range maybe bigger then the range
that got mapped.
<br>
<br>
In following gpu page fault handler you use
svm_range_partial_mapped_dev(gpuidx, prange, addr, addr) to check
if addr is mapped. Is there a room left that addr is not mapped,
but the mapping bitmap covers it? </blockquote>
No, this case should not happen because offset and npages are based
on vma. <br>
<blockquote type="cite" cite="mid:a223b7a5-53f2-efaf-c754-5263cb799b3f@amd.com">That would
cause the page fault at addr never got handled.
<br>
<br>
Regard
<br>
<br>
Xiaogang
<br>
<br>
<blockquote type="cite">+
<br>
if (fence) {
<br>
r = dma_fence_wait(fence, false);
<br>
dma_fence_put(fence);
<br>
@@ -1648,7 +1789,7 @@ static int
svm_range_validate_and_map(struct mm_struct *mm,
<br>
<br>
if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
<br>
bitmap_copy(ctx->bitmap,
prange->bitmap_access, MAX_GPU_INSTANCE);
<br>
- if (!prange->mapped_to_gpu ||
<br>
+ if (!svm_range_is_mapped(prange) ||
<br>
bitmap_empty(ctx->bitmap,
MAX_GPU_INSTANCE)) {
<br>
r = 0;
<br>
goto free_ctx;
<br>
@@ -1729,9 +1870,6 @@ static int
svm_range_validate_and_map(struct mm_struct *mm,
<br>
r = svm_range_map_to_gpus(prange,
offset, npages, readonly,
<br>
ctx->bitmap, flush_tlb);
<br>
<br>
- if (!r && next == end)
<br>
- prange->mapped_to_gpu = true;
<br>
-
<br>
svm_range_unlock(prange);
<br>
<br>
addr = next;
<br>
@@ -1900,10 +2038,10 @@ svm_range_evict(struct svm_range
*prange, struct mm_struct *mm,
<br>
if (!p->xnack_enabled ||
<br>
(prange->flags &
KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) {
<br>
int evicted_ranges;
<br>
- bool mapped = prange->mapped_to_gpu;
<br>
+ bool mapped = svm_range_is_mapped(prange);
<br>
<br>
list_for_each_entry(pchild,
&prange->child_list, child_list) {
<br>
- if (!pchild->mapped_to_gpu)
<br>
+ if (!svm_range_is_mapped(pchild))
<br>
continue;
<br>
mapped = true;
<br>
mutex_lock_nested(&pchild->lock,
1);
<br>
@@ -1966,7 +2104,9 @@ svm_range_evict(struct svm_range *prange,
struct mm_struct *mm,
<br>
<br>
static struct svm_range *svm_range_clone(struct svm_range
*old)
<br>
{
<br>
+ struct kfd_process *p = container_of(old->svms,
struct kfd_process, svms);
<br>
struct svm_range *new;
<br>
+ uint32_t gpuidx;
<br>
<br>
new = svm_range_new(old->svms, old->start,
old->last, false);
<br>
if (!new)
<br>
@@ -1988,7 +2128,11 @@ static struct svm_range
*svm_range_clone(struct svm_range *old)
<br>
new->prefetch_loc = old->prefetch_loc;
<br>
new->actual_loc = old->actual_loc;
<br>
new->granularity = old->granularity;
<br>
- new->mapped_to_gpu = old->mapped_to_gpu;
<br>
+ for_each_set_bit(gpuidx, p->svms.bitmap_supported,
p->n_pdds) {
<br>
+ bitmap_copy(new->bitmap_mapped[gpuidx],
old->bitmap_mapped[gpuidx],
<br>
+ svm_range_mapped_nbits(new->last
- new->start + 1,
<br>
+
new->granularity));
<br>
+ };
<br>
bitmap_copy(new->bitmap_access,
old->bitmap_access, MAX_GPU_INSTANCE);
<br>
bitmap_copy(new->bitmap_aip, old->bitmap_aip,
MAX_GPU_INSTANCE);
<br>
<br>
@@ -2107,7 +2251,7 @@ svm_range_add(struct kfd_process *p,
uint64_t start, uint64_t size,
<br>
next_start = min(node->last, last) + 1;
<br>
<br>
if (svm_range_is_same_attrs(p, prange, nattr,
attrs) &&
<br>
- prange->mapped_to_gpu) {
<br>
+ svm_range_is_mapped(prange)) {
<br>
/* nothing to do */
<br>
} else if (node->start < start ||
node->last > last) {
<br>
/* node intersects the update range and
its attributes
<br>
@@ -3587,7 +3731,7 @@ svm_range_set_attr(struct kfd_process *p,
struct mm_struct *mm,
<br>
<br>
if (migrated && (!p->xnack_enabled
||
<br>
(prange->flags &
KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) &&
<br>
- prange->mapped_to_gpu) {
<br>
+ svm_range_is_mapped(prange)) {
<br>
pr_debug("restore_work will update
mappings of GPUs\n");
<br>
mutex_unlock(&prange->migrate_mutex);
<br>
continue;
<br>
@@ -3598,7 +3742,7 @@ svm_range_set_attr(struct kfd_process *p,
struct mm_struct *mm,
<br>
continue;
<br>
}
<br>
<br>
- flush_tlb = !migrated && update_mapping
&& prange->mapped_to_gpu;
<br>
+ flush_tlb = !migrated && update_mapping
&& svm_range_is_mapped(prange);
<br>
<br>
r = svm_range_validate_and_map(mm, prange,
MAX_GPU_INSTANCE,
<br>
true,
flush_tlb);
<br>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
<br>
index d2e94d8fb4be..10c92c5e23a7 100644
<br>
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
<br>
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
<br>
@@ -132,7 +132,7 @@ struct svm_range {
<br>
struct list_head child_list;
<br>
DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
<br>
DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
<br>
- bool mapped_to_gpu;
<br>
+ unsigned long
*bitmap_mapped[MAX_GPU_INSTANCE];
<br>
};
<br>
<br>
static inline void svm_range_lock(struct svm_range *prange)
<br>
@@ -163,6 +163,8 @@ static inline struct svm_range_bo
*svm_range_bo_ref(struct svm_range_bo *svm_bo)
<br>
return NULL;
<br>
}
<br>
<br>
+#define svm_range_mapped_nbits(size, granularity)
DIV_ROUND_UP((size), 1UL << (granularity))
<br>
+
<br>
int svm_range_list_init(struct kfd_process *p);
<br>
void svm_range_list_fini(struct kfd_process *p);
<br>
int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op,
uint64_t start,
<br>
--
<br>
2.35.1
<br>
<br>
</blockquote>
</blockquote>
</body>
</html>