[PATCH v2 3/7] amd/amdkfd: Add granularity bitmap mapped to gpu flag

Chen, Xiaogang xiaogang.chen at amd.com
Wed Oct 11 20:15:39 UTC 2023


On 10/10/2023 9:40 AM, Philip Yang wrote:
> Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding.
>
>
> Replace prange->mapped_to_gpu with prange->bitmap_mapped[], which is per
> GPU flag and based on prange granularity, updated when map to GPUS or
> unmap from GPUs, to optimize multiple GPU map, unmap and retry fault
> recover.
>
> svm_range_partial_mapped is false only if no part of the range mapping
> on any GPUs.
>
> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 256 +++++++++++++++++++++------
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   8 +-
>   2 files changed, 213 insertions(+), 51 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index fb22b857adbc..4e1af4b181ea 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -296,12 +296,12 @@ static void svm_range_free(struct svm_range *prange, bool do_unmap)
>                                          KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
>          }
>
> -       /* free dma_addr array for each gpu */
> +       /* free dma_addr array, bitmap_mapped for each gpu */
>          for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) {
> -               if (prange->dma_addr[gpuidx]) {
> +               if (prange->dma_addr[gpuidx])
>                          kvfree(prange->dma_addr[gpuidx]);
> -                               prange->dma_addr[gpuidx] = NULL;
I do not know why remove this line.
> -               }
> +               if (prange->bitmap_mapped[gpuidx])
> +                       bitmap_free(prange->bitmap_mapped[gpuidx]);
>          }
>
>          mutex_destroy(&prange->lock);
> @@ -327,19 +327,38 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
>          uint64_t size = last - start + 1;
>          struct svm_range *prange;
>          struct kfd_process *p;
> -
> -       prange = kzalloc(sizeof(*prange), GFP_KERNEL);
> -       if (!prange)
> -               return NULL;
> +       unsigned int nbits;
> +       uint32_t gpuidx;
>
>          p = container_of(svms, struct kfd_process, svms);
>          if (!p->xnack_enabled && update_mem_usage &&
>              amdgpu_amdkfd_reserve_mem_limit(NULL, size << PAGE_SHIFT,
>                                      KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0)) {
>                  pr_info("SVM mapping failed, exceeds resident system memory limit\n");
> -               kfree(prange);
>                  return NULL;
>          }
> +
> +       prange = kzalloc(sizeof(*prange), GFP_KERNEL);
> +       if (!prange)
> +               return NULL;
> +
> +       svm_range_set_default_attributes(&prange->preferred_loc,
> +                                        &prange->prefetch_loc,
> +                                        &prange->granularity, &prange->flags);
> +
> +       nbits = svm_range_mapped_nbits(start, last, prange->granularity);
> +       pr_debug("prange 0x%p [0x%llx 0x%llx] bitmap_mapped nbits %d\n", prange,
> +                start, last, nbits);
> +       for_each_set_bit(gpuidx, p->svms.bitmap_supported, p->n_pdds) {
> +               prange->bitmap_mapped[gpuidx] = bitmap_zalloc(nbits, GFP_KERNEL);
> +               if (!prange->bitmap_mapped[gpuidx]) {
> +                       while (gpuidx--)
> +                               bitmap_free(prange->bitmap_mapped[gpuidx]);
> +                       kfree(prange);
> +                       return NULL;
> +               }
> +       }
> +
>          prange->npages = size;
>          prange->svms = svms;
>          prange->start = start;
> @@ -359,10 +378,6 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
>                  bitmap_copy(prange->bitmap_access, svms->bitmap_supported,
>                              MAX_GPU_INSTANCE);
>
> -       svm_range_set_default_attributes(&prange->preferred_loc,
> -                                        &prange->prefetch_loc,
> -                                        &prange->granularity, &prange->flags);
> -
>          pr_debug("svms 0x%p [0x%llx 0x%llx]\n", svms, start, last);
>
>          return prange;
> @@ -984,6 +999,47 @@ svm_range_split_nodes(struct svm_range *new, struct svm_range *old,
>          return 0;
>   }
>
> +static int
> +svm_range_split_bitmap_mapped(struct svm_range *new, struct svm_range *old,
> +                             uint64_t start, uint64_t last)
> +{
> +       struct kfd_process *p = container_of(new->svms, struct kfd_process, svms);
> +       unsigned int nbits, old_nbits, old_nbits2;
> +       unsigned long *bits;
> +       uint32_t gpuidx;
> +
> +       nbits = svm_range_mapped_nbits(new->start, new->last, new->granularity);
> +       old_nbits = svm_range_mapped_nbits(old->start, old->last, old->granularity);
> +       old_nbits2 = svm_range_mapped_nbits(start, last, old->granularity);
> +
> +       pr_debug("old 0x%p [0x%lx 0x%lx] => [0x%llx 0x%llx] nbits %d => %d\n",
> +                old, old->start, old->last, start, last, old_nbits, old_nbits2);
> +       pr_debug("new 0x%p [0x%lx 0x%lx] nbits %d\n", new, new->start, new->last,
> +                nbits);
> +
> +       for_each_set_bit(gpuidx, p->svms.bitmap_supported, p->n_pdds) {
> +               bits = bitmap_alloc(old_nbits2, GFP_KERNEL);
> +               if (!bits)
> +                       return -ENOMEM;
> +
> +               if (start == old->start) {
> +                       bitmap_shift_right(new->bitmap_mapped[gpuidx],
> +                                          old->bitmap_mapped[gpuidx],
> +                                          old_nbits2 - 1, old_nbits);
> +                       bitmap_copy(bits, old->bitmap_mapped[gpuidx], old_nbits2);
> +               } else {
> +                       bitmap_copy(new->bitmap_mapped[gpuidx],
> +                                   old->bitmap_mapped[gpuidx], nbits);
> +                       bitmap_shift_right(bits, old->bitmap_mapped[gpuidx],
> +                                          nbits - 1, old_nbits);
> +               }
> +               bitmap_free(old->bitmap_mapped[gpuidx]);
> +               old->bitmap_mapped[gpuidx] = bits;
> +       }
> +
> +       return 0;
> +}
> +
>   /**
>    * svm_range_split_adjust - split range and adjust
>    *
> @@ -1024,6 +1080,10 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old,
>                          return r;
>          }
>
> +       r = svm_range_split_bitmap_mapped(new, old, start, last);
> +       if (r)
> +               return r;
> +
>          old->npages = last - start + 1;
>          old->start = start;
>          old->last = last;
> @@ -1032,7 +1092,6 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old,
>          new->prefetch_loc = old->prefetch_loc;
>          new->actual_loc = old->actual_loc;
>          new->granularity = old->granularity;
> -       new->mapped_to_gpu = old->mapped_to_gpu;
>          bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
>          bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
>
> @@ -1346,6 +1405,108 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>                                        fence);
>   }
>
> +/**
> + * svm_range_partial_mapped_dev - check if prange mapped on the specific GPU
> + *
> + * @gpuidx: the GPU index to check
> + * @prange: prange to check
> + * @start: the start address in pages
> + * @last: the last address in pages
> + *
> + * Return:
> + * true: if any part of the range within [start, last] mapped on the GPU
> + * false: if the entire range [start, last] not mapped on the GPU
> + */
> +static bool
> +svm_range_partial_mapped_dev(uint32_t gpuidx, struct svm_range *prange,
> +                            unsigned long start, unsigned long last)
> +{
> +       unsigned long start_index, last_index;
> +
> +       start = max(start, prange->start);
> +       last = min(last, prange->last);
> +       if (last < start)
> +               return false;
> +
> +       start_index = (start - prange->start) >> prange->granularity;
> +       last_index = (last - prange->start) >> prange->granularity;
> +       return find_next_bit(prange->bitmap_mapped[gpuidx], last_index + 1,
> +                            start_index) <= last_index;
The second parameter of find_next_bit is size that find_next_bit will 
look from bitmap offset, not end of bit. Look at: 
https://elixir.bootlin.com/linux/latest/source/include/linux/find.h#L54
> +}
> +
> +/**
> + * svm_range_partial_mapped - check if prange mapped on any GPU
> + *
> + * @prange: prange to check
> + * @start: the start address in pages
> + * @last: the last address in pages
> + *
> + * Return:
> + * true: if any part of prange mapped on any GPU currently
> + * false: if the entire range is not mapped on any GPU
> + */
> +static bool
> +svm_range_partial_mapped(struct svm_range *prange, unsigned long start,
> +                        unsigned long last)
> +{
> +       struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
> +       struct svm_range *pchild;
> +       uint32_t gpuidx;
> +
> +       for_each_set_bit(gpuidx, p->svms.bitmap_supported, p->n_pdds) {
> +               list_for_each_entry(pchild, &prange->child_list, child_list) {
> +                       if (svm_range_partial_mapped_dev(gpuidx, pchild, start, last))
> +                               return true;
> +               }
> +
> +               if (svm_range_partial_mapped_dev(gpuidx, prange, start, last))
> +                       return true;
> +       }
> +       return false;
> +}
> +
> +/**
> + * svm_range_complete_mapped - check if prange mapped on all GPUs completely
> + *
> + * @prange: prange to check
> + *
> + * Return:
> + * true: if the entire prange mapped completely on all GPUs that need access
> + * otherwise return false
> + */
> +static bool svm_range_complete_mapped(struct svm_range *prange)
> +{
> +       int nbits = svm_range_mapped_nbits(prange->start, prange->last, prange->granularity);
> +       DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
> +       uint32_t gpuidx;
> +       int r;
> +
> +       r = svm_range_need_access_gpus(bitmap, prange);
> +       if (r)
> +               return false;
> +
> +       for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE)
> +               if (!bitmap_full(prange->bitmap_mapped[gpuidx], nbits))
> +                       return false;
> +       return true;
> +}
> +
> +static void
> +svm_range_update_mapped(uint32_t gpuidx, struct svm_range *prange,
> +                       unsigned long start, unsigned long last, bool mapped)
> +{
> +       unsigned long index, nbits;
> +
> +       index = (start - prange->start) >> prange->granularity;
> +       nbits = svm_range_mapped_nbits(start, last, prange->granularity);
> +       if (mapped)
> +               bitmap_set(prange->bitmap_mapped[gpuidx], index, nbits);
> +       else
> +               bitmap_clear(prange->bitmap_mapped[gpuidx], index, nbits);
> +       pr_debug("prange 0x%p [0x%lx 0x%lx] update mapped %d nbits %ld gpu %d\n",
> +                prange, start, last, mapped, nbits, gpuidx);
> +}
> +
prange->bitmap_mapped is based on prange->granularity, not same as 
[start, last]. The result is bitmap_mapped may cover bigger range than 
[start, last]. Is it ok?
>   static int
>   svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
>                            unsigned long last, uint32_t trigger)
> @@ -1357,29 +1518,28 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
>          uint32_t gpuidx;
>          int r = 0;
>
> -       if (!prange->mapped_to_gpu) {
> -               pr_debug("prange 0x%p [0x%lx 0x%lx] not mapped to GPU\n",
> -                        prange, prange->start, prange->last);
> -               return 0;
> -       }
> -
> -       if (prange->start == start && prange->last == last) {
> -               pr_debug("unmap svms 0x%p prange 0x%p\n", prange->svms, prange);
> -               prange->mapped_to_gpu = false;
> -       }
> -
>          bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
>                    MAX_GPU_INSTANCE);
>          p = container_of(prange->svms, struct kfd_process, svms);
>
>          for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
> -               pr_debug("unmap from gpu idx 0x%x\n", gpuidx);
>                  pdd = kfd_process_device_from_gpuidx(p, gpuidx);
>                  if (!pdd) {
>                          pr_debug("failed to find device idx %d\n", gpuidx);
> -                       return -EINVAL;
> +                       continue;
>                  }
>
> +               if (!svm_range_partial_mapped_dev(gpuidx, prange, start, last)) {
> +                       pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] not mapped on gpu %d\n",
> +                                prange->svms, prange, start, last, gpuidx);
> +                       continue;
> +               }
> +
> +               svm_range_update_mapped(gpuidx, prange, start, last, false);
svm_range_update_mapped should happen after svm_range_unmap_from_gpu, 
not before.
> +
> +               pr_debug("unmap svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu %d\n",
> +                        prange->svms, prange, start, last, gpuidx);
> +
>                  kfd_smi_event_unmap_from_gpu(pdd->dev, p->lead_thread->pid,
>                                               start, last, trigger);
>
> @@ -1519,6 +1679,10 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
>                  if (r)
>                          break;
>
> +               if (!r)
> +                       svm_range_update_mapped(gpuidx, prange, prange->start + offset,
> +                                               prange->start + offset + npages - 1, true);
> +

Same as above that the mapping range is not same as the range that 
svm_range_update_mapped covers. This leds to that some pages are not 
mapped, but the bitmap marks they are mapped.

Regards

Xiaogang

>                  if (fence) {
>                          r = dma_fence_wait(fence, false);
>                          dma_fence_put(fence);
> @@ -1670,7 +1834,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
>
>          if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
>                  bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
> -               if (!prange->mapped_to_gpu ||
> +               if (!svm_range_partial_mapped(prange, prange->start, prange->last) ||
>                      bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
>                          r = 0;
>                          goto free_ctx;
> @@ -1755,9 +1919,6 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
>                          r = svm_range_map_to_gpus(prange, offset, npages, readonly,
>                                                    ctx->bitmap, flush_tlb);
>
> -               if (!r && next == end)
> -                       prange->mapped_to_gpu = true;
> -
>                  svm_range_unlock(prange);
>
>                  addr = next;
> @@ -1939,22 +2100,8 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
>          if (!p->xnack_enabled ||
>              (prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) {
>                  int evicted_ranges;
> -               bool mapped = prange->mapped_to_gpu;
> -
> -               list_for_each_entry(pchild, &prange->child_list, child_list) {
> -                       if (!pchild->mapped_to_gpu)
> -                               continue;
> -                       mapped = true;
> -                       mutex_lock_nested(&pchild->lock, 1);
> -                       if (pchild->start <= last && pchild->last >= start) {
> -                               pr_debug("increment pchild invalid [0x%lx 0x%lx]\n",
> -                                        pchild->start, pchild->last);
> -                               atomic_inc(&pchild->invalid);
> -                       }
> -                       mutex_unlock(&pchild->lock);
> -               }
>
> -               if (!mapped)
> +               if (!svm_range_partial_mapped(prange, start, last))
>                          return r;
>
>                  if (prange->start <= last && prange->last >= start)
> @@ -2005,7 +2152,10 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
>
>   static struct svm_range *svm_range_clone(struct svm_range *old)
>   {
> +       struct kfd_process *p = container_of(old->svms, struct kfd_process, svms);
>          struct svm_range *new;
> +       uint32_t gpuidx;
> +       int nbits;
>
>          new = svm_range_new(old->svms, old->start, old->last, false);
>          if (!new)
> @@ -2027,8 +2177,13 @@ static struct svm_range *svm_range_clone(struct svm_range *old)
>          new->prefetch_loc = old->prefetch_loc;
>          new->actual_loc = old->actual_loc;
>          new->granularity = old->granularity;
> -       new->mapped_to_gpu = old->mapped_to_gpu;
>          new->vram_pages = old->vram_pages;
> +       nbits = svm_range_mapped_nbits(new->start, new->last, new->granularity);
> +       for_each_set_bit(gpuidx, p->svms.bitmap_supported, p->n_pdds) {
> +               bitmap_copy(new->bitmap_mapped[gpuidx],
> +                           old->bitmap_mapped[gpuidx],
> +                           nbits);
> +       };
>          bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
>          bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
>
> @@ -2147,7 +2302,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
>                  next_start = min(node->last, last) + 1;
>
>                  if (svm_range_is_same_attrs(p, prange, nattr, attrs) &&
> -                   prange->mapped_to_gpu) {
> +                   svm_range_complete_mapped(prange)) {
>                          /* nothing to do */
>                  } else if (node->start < start || node->last > last) {
>                          /* node intersects the update range and its attributes
> @@ -3641,7 +3796,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
>
>                  if (migrated && (!p->xnack_enabled ||
>                      (prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) &&
> -                   prange->mapped_to_gpu) {
> +                   svm_range_partial_mapped(prange, prange->start, prange->last)) {
>                          pr_debug("restore_work will update mappings of GPUs\n");
>                          mutex_unlock(&prange->migrate_mutex);
>                          continue;
> @@ -3652,7 +3807,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
>                          continue;
>                  }
>
> -               flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu;
> +               flush_tlb = !migrated && update_mapping &&
> +                           svm_range_partial_mapped(prange, prange->start, prange->last);
>
>                  r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
>                                                 true, flush_tlb);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> index bf2fde008115..7e165854bc0e 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> @@ -136,7 +136,7 @@ struct svm_range {
>          struct list_head                child_list;
>          DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
>          DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
> -       bool                            mapped_to_gpu;
> +       unsigned long                   *bitmap_mapped[MAX_GPU_INSTANCE];
>   };
>
>   static inline void svm_range_lock(struct svm_range *prange)
> @@ -167,6 +167,12 @@ static inline struct svm_range_bo *svm_range_bo_ref(struct svm_range_bo *svm_bo)
>                  return NULL;
>   }
>
> +static inline int svm_range_mapped_nbits(uint64_t start, uint64_t last,
> +                                        uint8_t granularity)
> +{
> +       return (last >> granularity) - (start >> granularity) + 1;
> +}
> +
>   int svm_range_list_init(struct kfd_process *p);
>   void svm_range_list_fini(struct kfd_process *p);
>   int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
> --
> 2.35.1
>


More information about the amd-gfx mailing list