[PATCH v2 1/3] drm/amdgpu: Add new placement for preemptible SG BOs
Christian König
ckoenig.leichtzumerken at gmail.com
Fri May 21 08:39:42 UTC 2021
Am 21.05.21 um 04:22 schrieb Felix Kuehling:
> SG BOs such as dmabuf imports and userptr BOs do not consume system
> resources directly. Instead they point to resources owned elsewhere.
> They typically get evicted by DMABuf move notifiers of MMU notifiers.
> If those notifiers don't need to wait for hardware fences (i.e. the SG
> BOs are used in a preemptible context), then we don't need to limit
> them to the GTT size and we don't need TTM to evict them.
>
> Create a new placement for such preemptible SG BOs that does not impose
> artificial size limits and TTM evictions.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
Reviewed-by: Christian König <christian.koenig at amd.com>
This will clash with the TTM changes I have in pipeline, so somebody
needs to handle the merge fallout at some point.
But I'm pretty sure that are just a few lines of code.
> ---
> drivers/gpu/drm/amd/amdgpu/Makefile | 7 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +-
> .../gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c | 190 ++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 37 +++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 11 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 +-
> include/uapi/drm/amdgpu_drm.h | 4 +
> 7 files changed, 247 insertions(+), 9 deletions(-)
> create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 6331a11299d0..6cf0fe871d6c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -51,9 +51,10 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
> atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
> amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
> amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
> - amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
> - amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
> - amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
> + amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \
> + amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
> + amdgpu_debugfs.o amdgpu_ids.o amdgpu_gmc.o amdgpu_mmhub.o \
> + amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
> amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
> amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
> amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_hdp.o
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 745fcf3ea450..3f85ba8222ef 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -157,7 +157,9 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
> if (domain & AMDGPU_GEM_DOMAIN_GTT) {
> places[c].fpfn = 0;
> places[c].lpfn = 0;
> - places[c].mem_type = TTM_PL_TT;
> + places[c].mem_type =
> + abo->flags & AMDGPU_GEM_CREATE_PREEMPTIBLE ?
> + AMDGPU_PL_PREEMPT : TTM_PL_TT;
> places[c].flags = 0;
> c++;
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
> new file mode 100644
> index 000000000000..d607f314cc1b
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
> @@ -0,0 +1,190 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/*
> + * Copyright 2016-2021 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + * Authors: Christian König, Felix Kuehling
> + */
> +
> +#include "amdgpu.h"
> +
> +static inline struct amdgpu_preempt_mgr *
> +to_preempt_mgr(struct ttm_resource_manager *man)
> +{
> + return container_of(man, struct amdgpu_preempt_mgr, manager);
> +}
> +
> +/**
> + * DOC: mem_info_preempt_used
> + *
> + * The amdgpu driver provides a sysfs API for reporting current total amount of
> + * used preemptible memory.
> + * The file mem_info_preempt_used is used for this, and returns the current
> + * used size of the preemptible block, in bytes
> + */
> +static ssize_t mem_info_preempt_used_show(struct device *dev,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + struct drm_device *ddev = dev_get_drvdata(dev);
> + struct amdgpu_device *adev = drm_to_adev(ddev);
> + struct ttm_resource_manager *man;
> +
> + man = ttm_manager_type(&adev->mman.bdev, AMDGPU_PL_PREEMPT);
> + return sysfs_emit(buf, "%llu\n", amdgpu_preempt_mgr_usage(man));
> +}
> +
> +static DEVICE_ATTR_RO(mem_info_preempt_used);
> +
> +/**
> + * amdgpu_preempt_mgr_new - allocate a new node
> + *
> + * @man: TTM memory type manager
> + * @tbo: TTM BO we need this range for
> + * @place: placement flags and restrictions
> + * @mem: the resulting mem object
> + *
> + * Dummy, just count the space used without allocating resources or any limit.
> + */
> +static int amdgpu_preempt_mgr_new(struct ttm_resource_manager *man,
> + struct ttm_buffer_object *tbo,
> + const struct ttm_place *place,
> + struct ttm_resource *mem)
> +{
> + struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man);
> +
> + atomic64_add(mem->num_pages, &mgr->used);
> +
> + mem->mm_node = NULL;
> + mem->start = AMDGPU_BO_INVALID_OFFSET;
> + return 0;
> +}
> +
> +/**
> + * amdgpu_preempt_mgr_del - free ranges
> + *
> + * @man: TTM memory type manager
> + * @mem: TTM memory object
> + *
> + * Free the allocated GTT again.
> + */
> +static void amdgpu_preempt_mgr_del(struct ttm_resource_manager *man,
> + struct ttm_resource *mem)
> +{
> + struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man);
> +
> + atomic64_sub(mem->num_pages, &mgr->used);
> +}
> +
> +/**
> + * amdgpu_preempt_mgr_usage - return usage of PREEMPT domain
> + *
> + * @man: TTM memory type manager
> + *
> + * Return how many bytes are used in the GTT domain
> + */
> +uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man)
> +{
> + struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man);
> + s64 result = atomic64_read(&mgr->used);
> +
> + return (result > 0 ? result : 0) * PAGE_SIZE;
> +}
> +
> +/**
> + * amdgpu_preempt_mgr_debug - dump VRAM table
> + *
> + * @man: TTM memory type manager
> + * @printer: DRM printer to use
> + *
> + * Dump the table content using printk.
> + */
> +static void amdgpu_preempt_mgr_debug(struct ttm_resource_manager *man,
> + struct drm_printer *printer)
> +{
> + struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man);
> +
> + drm_printf(printer, "man size:%llu pages, preempt used:%lld pages\n",
> + man->size, (u64)atomic64_read(&mgr->used));
> +}
> +
> +static const struct ttm_resource_manager_func amdgpu_preempt_mgr_func = {
> + .alloc = amdgpu_preempt_mgr_new,
> + .free = amdgpu_preempt_mgr_del,
> + .debug = amdgpu_preempt_mgr_debug
> +};
> +
> +/**
> + * amdgpu_preempt_mgr_init - init PREEMPT manager and DRM MM
> + *
> + * @adev: amdgpu_device pointer
> + *
> + * Allocate and initialize the GTT manager.
> + */
> +int amdgpu_preempt_mgr_init(struct amdgpu_device *adev)
> +{
> + struct amdgpu_preempt_mgr *mgr = &adev->mman.preempt_mgr;
> + struct ttm_resource_manager *man = &mgr->manager;
> + int ret;
> +
> + man->use_tt = true;
> + man->func = &amdgpu_preempt_mgr_func;
> +
> + ttm_resource_manager_init(man, (1 << 30));
> +
> + atomic64_set(&mgr->used, 0);
> +
> + ret = device_create_file(adev->dev, &dev_attr_mem_info_preempt_used);
> + if (ret) {
> + DRM_ERROR("Failed to create device file mem_info_preempt_used\n");
> + return ret;
> + }
> +
> + ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT,
> + &mgr->manager);
> + ttm_resource_manager_set_used(man, true);
> + return 0;
> +}
> +
> +/**
> + * amdgpu_preempt_mgr_fini - free and destroy GTT manager
> + *
> + * @adev: amdgpu_device pointer
> + *
> + * Destroy and free the GTT manager, returns -EBUSY if ranges are still
> + * allocated inside it.
> + */
> +void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev)
> +{
> + struct amdgpu_preempt_mgr *mgr = &adev->mman.preempt_mgr;
> + struct ttm_resource_manager *man = &mgr->manager;
> + int ret;
> +
> + ttm_resource_manager_set_used(man, false);
> +
> + ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
> + if (ret)
> + return;
> +
> + device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used);
> +
> + ttm_resource_manager_cleanup(man);
> + ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, NULL);
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 94d470108f83..c0aef327292a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -158,6 +158,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
> }
> break;
> case TTM_PL_TT:
> + case AMDGPU_PL_PREEMPT:
> default:
> amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
> break;
> @@ -217,6 +218,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
>
> BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
> AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
> + BUG_ON(mem->mem_type == AMDGPU_PL_PREEMPT);
>
> /* Map only what can't be accessed directly */
> if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
> @@ -480,7 +482,8 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
> struct ttm_resource *old_mem = &bo->mem;
> int r;
>
> - if (new_mem->mem_type == TTM_PL_TT) {
> + if (new_mem->mem_type == TTM_PL_TT ||
> + new_mem->mem_type == AMDGPU_PL_PREEMPT) {
> r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, new_mem);
> if (r)
> return r;
> @@ -498,11 +501,13 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
> goto out;
> }
> if (old_mem->mem_type == TTM_PL_SYSTEM &&
> - new_mem->mem_type == TTM_PL_TT) {
> + (new_mem->mem_type == TTM_PL_TT ||
> + new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
> ttm_bo_move_null(bo, new_mem);
> goto out;
> }
> - if (old_mem->mem_type == TTM_PL_TT &&
> + if ((old_mem->mem_type == TTM_PL_TT ||
> + old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
> new_mem->mem_type == TTM_PL_SYSTEM) {
> r = ttm_bo_wait_ctx(bo, ctx);
> if (r)
> @@ -587,6 +592,7 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
> /* system memory */
> return 0;
> case TTM_PL_TT:
> + case AMDGPU_PL_PREEMPT:
> break;
> case TTM_PL_VRAM:
> mem->bus.offset = mem->start << PAGE_SHIFT;
> @@ -1294,7 +1300,8 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)
> if (mem && mem->mem_type != TTM_PL_SYSTEM)
> flags |= AMDGPU_PTE_VALID;
>
> - if (mem && mem->mem_type == TTM_PL_TT) {
> + if (mem && (mem->mem_type == TTM_PL_TT ||
> + mem->mem_type == AMDGPU_PL_PREEMPT)) {
> flags |= AMDGPU_PTE_SYSTEM;
>
> if (ttm->caching == ttm_cached)
> @@ -1368,6 +1375,15 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
> }
>
> switch (bo->mem.mem_type) {
> + case AMDGPU_PL_PREEMPT:
> + /* Preemptible BOs don't own system resources managed by the
> + * driver (pages, VRAM, GART space). They point to resources
> + * owned by someone else (e.g. pageable memory in user mode
> + * or a DMABuf). They are used in a preemptible context so we
> + * can guarantee no deadlocks and good QoS in case of MMU
> + * notifiers or DMABuf move notifiers from the resource owner.
> + */
> + return false;
> case TTM_PL_TT:
> if (amdgpu_bo_is_amdgpu_bo(bo) &&
> amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
> @@ -1749,6 +1765,13 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
> DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
> (unsigned)(gtt_size / (1024 * 1024)));
>
> + /* Initialize preemptible memory pool */
> + r = amdgpu_preempt_mgr_init(adev);
> + if (r) {
> + DRM_ERROR("Failed initializing PREEMPT heap.\n");
> + return r;
> + }
> +
> /* Initialize various on-chip memory pools */
> r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS, adev->gds.gds_size);
> if (r) {
> @@ -1793,6 +1816,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
>
> amdgpu_vram_mgr_fini(adev);
> amdgpu_gtt_mgr_fini(adev);
> + amdgpu_preempt_mgr_fini(adev);
> ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS);
> ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
> ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
> @@ -1987,6 +2011,11 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
> return -EINVAL;
> }
>
> + if (bo->tbo.mem.mem_type == AMDGPU_PL_PREEMPT) {
> + DRM_ERROR("Trying to clear preemptible memory.\n");
> + return -EINVAL;
> + }
> +
> if (bo->tbo.mem.mem_type == TTM_PL_TT) {
> r = amdgpu_ttm_alloc_gart(&bo->tbo);
> if (r)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index eb84a69c4b74..2877a924086f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -31,6 +31,7 @@
> #define AMDGPU_PL_GDS (TTM_PL_PRIV + 0)
> #define AMDGPU_PL_GWS (TTM_PL_PRIV + 1)
> #define AMDGPU_PL_OA (TTM_PL_PRIV + 2)
> +#define AMDGPU_PL_PREEMPT (TTM_PL_PRIV + 3)
>
> #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512
> #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2
> @@ -54,6 +55,11 @@ struct amdgpu_gtt_mgr {
> atomic64_t available;
> };
>
> +struct amdgpu_preempt_mgr {
> + struct ttm_resource_manager manager;
> + atomic64_t used;
> +};
> +
> struct amdgpu_mman {
> struct ttm_device bdev;
> bool initialized;
> @@ -70,6 +76,7 @@ struct amdgpu_mman {
>
> struct amdgpu_vram_mgr vram_mgr;
> struct amdgpu_gtt_mgr gtt_mgr;
> + struct amdgpu_preempt_mgr preempt_mgr;
>
> uint64_t stolen_vga_size;
> struct amdgpu_bo *stolen_vga_memory;
> @@ -97,6 +104,8 @@ struct amdgpu_copy_mem {
>
> int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size);
> void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev);
> +int amdgpu_preempt_mgr_init(struct amdgpu_device *adev);
> +void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev);
> int amdgpu_vram_mgr_init(struct amdgpu_device *adev);
> void amdgpu_vram_mgr_fini(struct amdgpu_device *adev);
>
> @@ -104,6 +113,8 @@ bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem);
> uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man);
> int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man);
>
> +uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man);
> +
> u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo);
> int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
> struct ttm_resource *mem,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 55991f393481..da155c276c51 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1817,7 +1817,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
> bo = gem_to_amdgpu_bo(gobj);
> }
> mem = &bo->tbo.mem;
> - if (mem->mem_type == TTM_PL_TT)
> + if (mem->mem_type == TTM_PL_TT ||
> + mem->mem_type == AMDGPU_PL_PREEMPT)
> pages_addr = bo->tbo.ttm->dma_address;
> }
>
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 90bcd64c0147..f7a4f9fd2be8 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -139,6 +139,10 @@ extern "C" {
> * accessing it with various hw blocks
> */
> #define AMDGPU_GEM_CREATE_ENCRYPTED (1 << 10)
> +/* Flag that BO will be used only in preemptible context, which does
> + * not require GTT memory accounting
> + */
> +#define AMDGPU_GEM_CREATE_PREEMPTIBLE (1 << 11)
>
> struct drm_amdgpu_gem_create_in {
> /** the requested memory size */
More information about the amd-gfx
mailing list