[PATCH 17/35] drm/amdkfd: register HMM device private zone

Daniel Vetter daniel at ffwll.ch
Mon Mar 1 08:32:32 UTC 2021


On Wed, Jan 06, 2021 at 10:01:09PM -0500, Felix Kuehling wrote:
> From: Philip Yang <Philip.Yang at amd.com>
>
> Register vram memory as MEMORY_DEVICE_PRIVATE type resource, to
> allocate vram backing pages for page migration.
>
> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>

So maybe I'm getting this all wrong, but I think that the current ttm
fault code relies on devmap pte entries (especially for hugepte entries)
to stop get_user_pages. But this only works if the pte happens to not
point at a range with devmap pages.

This patch here changes that, and so probably breaks this devmap pte hack
ttm is using?

If I'm not wrong here then I think we need to first fix up the ttm code to
not use the devmap hack anymore, before a ttm based driver can register a
dev_pagemap. Also adding Thomas since that just came up in another
discussion.
-Daniel


> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |   3 +
>  drivers/gpu/drm/amd/amdkfd/Makefile        |   3 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c   | 101 +++++++++++++++++++++
>  drivers/gpu/drm/amd/amdkfd/kfd_migrate.h   |  48 ++++++++++
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h      |   3 +
>  5 files changed, 157 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
>  create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index db96d69eb45e..562bb5b69137 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -30,6 +30,7 @@
>  #include <linux/dma-buf.h>
>  #include "amdgpu_xgmi.h"
>  #include <uapi/linux/kfd_ioctl.h>
> +#include "kfd_migrate.h"
>
>  /* Total memory size in system memory and all GPU VRAM. Used to
>   * estimate worst case amount of memory to reserve for page tables
> @@ -170,12 +171,14 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
>               }
>
>               kgd2kfd_device_init(adev->kfd.dev, adev_to_drm(adev), &gpu_resources);
> +             svm_migrate_init(adev);
>       }
>  }
>
>  void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
>  {
>       if (adev->kfd.dev) {
> +             svm_migrate_fini(adev);
>               kgd2kfd_device_exit(adev->kfd.dev);
>               adev->kfd.dev = NULL;
>       }
> diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
> index 387ce0217d35..a93301dbc464 100644
> --- a/drivers/gpu/drm/amd/amdkfd/Makefile
> +++ b/drivers/gpu/drm/amd/amdkfd/Makefile
> @@ -55,7 +55,8 @@ AMDKFD_FILES        := $(AMDKFD_PATH)/kfd_module.o \
>               $(AMDKFD_PATH)/kfd_dbgmgr.o \
>               $(AMDKFD_PATH)/kfd_smi_events.o \
>               $(AMDKFD_PATH)/kfd_crat.o \
> -             $(AMDKFD_PATH)/kfd_svm.o
> +             $(AMDKFD_PATH)/kfd_svm.o \
> +             $(AMDKFD_PATH)/kfd_migrate.o
>
>  ifneq ($(CONFIG_AMD_IOMMU_V2),)
>  AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> new file mode 100644
> index 000000000000..1950b86f1562
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -0,0 +1,101 @@
> +/*
> + * Copyright 2020 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + */
> +
> +#include <linux/types.h>
> +#include <linux/hmm.h>
> +#include <linux/dma-direction.h>
> +#include <linux/dma-mapping.h>
> +#include "amdgpu_sync.h"
> +#include "amdgpu_object.h"
> +#include "amdgpu_vm.h"
> +#include "amdgpu_mn.h"
> +#include "kfd_priv.h"
> +#include "kfd_svm.h"
> +#include "kfd_migrate.h"
> +
> +static void svm_migrate_page_free(struct page *page)
> +{
> +}
> +
> +/**
> + * svm_migrate_to_ram - CPU page fault handler
> + * @vmf: CPU vm fault vma, address
> + *
> + * Context: vm fault handler, mm->mmap_sem is taken
> + *
> + * Return:
> + * 0 - OK
> + * VM_FAULT_SIGBUS - notice application to have SIGBUS page fault
> + */
> +static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
> +{
> +     return VM_FAULT_SIGBUS;
> +}
> +
> +static const struct dev_pagemap_ops svm_migrate_pgmap_ops = {
> +     .page_free              = svm_migrate_page_free,
> +     .migrate_to_ram         = svm_migrate_to_ram,
> +};
> +
> +int svm_migrate_init(struct amdgpu_device *adev)
> +{
> +     struct kfd_dev *kfddev = adev->kfd.dev;
> +     struct dev_pagemap *pgmap;
> +     struct resource *res;
> +     unsigned long size;
> +     void *r;
> +
> +     /* Page migration works on Vega10 or newer */
> +     if (kfddev->device_info->asic_family < CHIP_VEGA10)
> +             return -EINVAL;
> +
> +     pgmap = &kfddev->pgmap;
> +     memset(pgmap, 0, sizeof(*pgmap));
> +
> +     /* TODO: register all vram to HMM for now.
> +      * should remove reserved size
> +      */
> +     size = ALIGN(adev->gmc.real_vram_size, 2ULL << 20);
> +     res = devm_request_free_mem_region(adev->dev, &iomem_resource, size);
> +     if (IS_ERR(res))
> +             return -ENOMEM;
> +
> +     pgmap->type = MEMORY_DEVICE_PRIVATE;
> +     pgmap->res = *res;
> +     pgmap->ops = &svm_migrate_pgmap_ops;
> +     pgmap->owner = adev;
> +     pgmap->flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
> +     r = devm_memremap_pages(adev->dev, pgmap);
> +     if (IS_ERR(r)) {
> +             pr_err("failed to register HMM device memory\n");
> +             return PTR_ERR(r);
> +     }
> +
> +     pr_info("HMM registered %ldMB device memory\n", size >> 20);
> +
> +     return 0;
> +}
> +
> +void svm_migrate_fini(struct amdgpu_device *adev)
> +{
> +     memunmap_pages(&adev->kfd.dev->pgmap);
> +}
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
> new file mode 100644
> index 000000000000..98ab685d3e17
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
> @@ -0,0 +1,48 @@
> +/*
> + * Copyright 2020 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#ifndef KFD_MIGRATE_H_
> +#define KFD_MIGRATE_H_
> +
> +#include <linux/rwsem.h>
> +#include <linux/list.h>
> +#include <linux/mutex.h>
> +#include <linux/sched/mm.h>
> +#include <linux/hmm.h>
> +#include "kfd_priv.h"
> +#include "kfd_svm.h"
> +
> +#if defined(CONFIG_DEVICE_PRIVATE)
> +int svm_migrate_init(struct amdgpu_device *adev);
> +void svm_migrate_fini(struct amdgpu_device *adev);
> +
> +#else
> +static inline int svm_migrate_init(struct amdgpu_device *adev)
> +{
> +     DRM_WARN_ONCE("DEVICE_PRIVATE kernel config option is not enabled, "
> +                   "add CONFIG_DEVICE_PRIVATE=y in config file to fix\n");
> +     return -ENODEV;
> +}
> +static inline void svm_migrate_fini(struct amdgpu_device *adev) {}
> +#endif
> +#endif /* KFD_MIGRATE_H_ */
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 7a4b4b6dcf32..d5367e770b39 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -317,6 +317,9 @@ struct kfd_dev {
>       unsigned int max_doorbell_slices;
>
>       int noretry;
> +
> +     /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
> +     struct dev_pagemap pgmap;
>  };
>
>  enum kfd_mempool {
> --
> 2.29.2
>
> _______________________________________________
> dri-devel mailing list
> dri-devel at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


More information about the amd-gfx mailing list