[Intel-gfx] [PATCH v11] drm/amdgpu: add drm buddy support to amdgpu
Christian König
ckoenig.leichtzumerken at gmail.com
Wed Mar 23 07:37:11 UTC 2022
Am 23.03.22 um 07:25 schrieb Arunpravin Paneer Selvam:
> [SNIP]
> @@ -415,48 +409,86 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
> goto error_fini;
> }
>
> - mode = DRM_MM_INSERT_BEST;
> + INIT_LIST_HEAD(&node->blocks);
> +
> if (place->flags & TTM_PL_FLAG_TOPDOWN)
> - mode = DRM_MM_INSERT_HIGH;
> + node->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
>
> - pages_left = node->base.num_pages;
> + if (place->fpfn || lpfn != man->size >> PAGE_SHIFT)
> + /* Allocate blocks in desired range */
> + node->flags |= DRM_BUDDY_RANGE_ALLOCATION;
>
> - /* Limit maximum size to 2GB due to SG table limitations */
> - pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
> + BUG_ON(!node->base.num_pages);
Please drop this BUG_ON(). This is not something which prevents further
data corruption, so the BUG_ON() is not justified.
> + pages_left = node->base.num_pages;
>
> i = 0;
> - spin_lock(&mgr->lock);
> while (pages_left) {
> - uint32_t alignment = tbo->page_alignment;
> + if (tbo->page_alignment)
> + min_page_size = tbo->page_alignment << PAGE_SHIFT;
> + else
> + min_page_size = mgr->default_page_size;
The handling here looks extremely awkward to me.
min_page_size should be determined outside of the loop, based on
default_page_size, alignment and contiguous flag.
Then why do you drop the lock and grab it again inside the loop? And
what is "i" actually good for?
> +
> + /* Limit maximum size to 2GB due to SG table limitations */
> + pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
>
> if (pages >= pages_per_node)
> - alignment = pages_per_node;
> -
> - r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
> - alignment, 0, place->fpfn,
> - lpfn, mode);
> - if (unlikely(r)) {
> - if (pages > pages_per_node) {
> - if (is_power_of_2(pages))
> - pages = pages / 2;
> - else
> - pages = rounddown_pow_of_two(pages);
> - continue;
> - }
> - goto error_free;
> + min_page_size = pages_per_node << PAGE_SHIFT;
> +
> + if (!is_contiguous && !IS_ALIGNED(pages, min_page_size >> PAGE_SHIFT))
> + is_contiguous = 1;
> +
> + if (is_contiguous) {
> + pages = roundup_pow_of_two(pages);
> + min_page_size = pages << PAGE_SHIFT;
> +
> + if (pages > lpfn)
> + lpfn = pages;
> }
>
> - vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
> - amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
> - pages_left -= pages;
> + BUG_ON(min_page_size < mm->chunk_size);
> +
> + mutex_lock(&mgr->lock);
> + r = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT,
> + (u64)lpfn << PAGE_SHIFT,
> + (u64)pages << PAGE_SHIFT,
> + min_page_size,
> + &node->blocks,
> + node->flags);
> + mutex_unlock(&mgr->lock);
> + if (unlikely(r))
> + goto error_free_blocks;
> +
> ++i;
>
> if (pages > pages_left)
> - pages = pages_left;
> + pages_left = 0;
> + else
> + pages_left -= pages;
> }
> - spin_unlock(&mgr->lock);
>
> - if (i == 1)
> + /* Free unused pages for contiguous allocation */
> + if (is_contiguous) {
Well that looks really odd, why is trimming not part of
drm_buddy_alloc_blocks() ?
> + u64 actual_size = (u64)node->base.num_pages << PAGE_SHIFT;
> +
> + mutex_lock(&mgr->lock);
> + drm_buddy_block_trim(mm,
> + actual_size,
> + &node->blocks);
Why is the drm_buddy_block_trim() function given all the blocks and not
just the last one?
Regards,
Christian.
> + mutex_unlock(&mgr->lock);
> + }
> +
> + list_for_each_entry(block, &node->blocks, link)
> + vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
> +
> + block = amdgpu_vram_mgr_first_block(&node->blocks);
> + if (!block) {
> + r = -EINVAL;
> + goto error_fini;
> + }
> +
> + node->base.start = amdgpu_node_start(block) >> PAGE_SHIFT;
> +
> + if (i == 1 && is_contiguous)
> node->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
>
> if (adev->gmc.xgmi.connected_to_cpu)
> @@ -468,13 +500,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
> *res = &node->base;
> return 0;
>
> -error_free:
> - while (i--)
> - drm_mm_remove_node(&node->mm_nodes[i]);
> - spin_unlock(&mgr->lock);
> +error_free_blocks:
> + mutex_lock(&mgr->lock);
> + drm_buddy_free_list(mm, &node->blocks);
> + mutex_unlock(&mgr->lock);
> error_fini:
> ttm_resource_fini(man, &node->base);
> - kvfree(node);
> + kfree(node);
>
> return r;
> }
> @@ -490,27 +522,26 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
> static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
> struct ttm_resource *res)
> {
> - struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
> + struct amdgpu_vram_mgr_node *node = to_amdgpu_vram_mgr_node(res);
> struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
> struct amdgpu_device *adev = to_amdgpu_device(mgr);
> + struct drm_buddy *mm = &mgr->mm;
> + struct drm_buddy_block *block;
> uint64_t vis_usage = 0;
> - unsigned i, pages;
>
> - spin_lock(&mgr->lock);
> - for (i = 0, pages = res->num_pages; pages;
> - pages -= node->mm_nodes[i].size, ++i) {
> - struct drm_mm_node *mm = &node->mm_nodes[i];
> + mutex_lock(&mgr->lock);
> + list_for_each_entry(block, &node->blocks, link)
> + vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
>
> - drm_mm_remove_node(mm);
> - vis_usage += amdgpu_vram_mgr_vis_size(adev, mm);
> - }
> amdgpu_vram_mgr_do_reserve(man);
> - spin_unlock(&mgr->lock);
> +
> + drm_buddy_free_list(mm, &node->blocks);
> + mutex_unlock(&mgr->lock);
>
> atomic64_sub(vis_usage, &mgr->vis_usage);
>
> ttm_resource_fini(man, res);
> - kvfree(node);
> + kfree(node);
> }
>
> /**
> @@ -648,13 +679,22 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
> struct drm_printer *printer)
> {
> struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
> + struct drm_buddy *mm = &mgr->mm;
> + struct drm_buddy_block *block;
>
> drm_printf(printer, " vis usage:%llu\n",
> amdgpu_vram_mgr_vis_usage(mgr));
>
> - spin_lock(&mgr->lock);
> - drm_mm_print(&mgr->mm, printer);
> - spin_unlock(&mgr->lock);
> + mutex_lock(&mgr->lock);
> + drm_printf(printer, "default_page_size: %lluKiB\n",
> + mgr->default_page_size >> 10);
> +
> + drm_buddy_print(mm, printer);
> +
> + drm_printf(printer, "reserved:\n");
> + list_for_each_entry(block, &mgr->reserved_pages, link)
> + drm_buddy_block_print(mm, block, printer);
> + mutex_unlock(&mgr->lock);
> }
>
> static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
> @@ -674,16 +714,21 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
> {
> struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
> struct ttm_resource_manager *man = &mgr->manager;
> + int err;
>
> ttm_resource_manager_init(man, &adev->mman.bdev,
> adev->gmc.real_vram_size);
>
> man->func = &amdgpu_vram_mgr_func;
>
> - drm_mm_init(&mgr->mm, 0, man->size >> PAGE_SHIFT);
> - spin_lock_init(&mgr->lock);
> + err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
> + if (err)
> + return err;
> +
> + mutex_init(&mgr->lock);
> INIT_LIST_HEAD(&mgr->reservations_pending);
> INIT_LIST_HEAD(&mgr->reserved_pages);
> + mgr->default_page_size = PAGE_SIZE;
>
> ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
> ttm_resource_manager_set_used(man, true);
> @@ -711,16 +756,16 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
> if (ret)
> return;
>
> - spin_lock(&mgr->lock);
> + mutex_lock(&mgr->lock);
> list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node)
> kfree(rsv);
>
> list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) {
> - drm_mm_remove_node(&rsv->mm_node);
> + drm_buddy_free_list(&mgr->mm, &rsv->block);
> kfree(rsv);
> }
> - drm_mm_takedown(&mgr->mm);
> - spin_unlock(&mgr->lock);
> + drm_buddy_fini(&mgr->mm);
> + mutex_unlock(&mgr->lock);
>
> ttm_resource_manager_cleanup(man);
> ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL);
>
> base-commit: a678f97326454b60ffbbde6abf52d23997d71a27
More information about the Intel-gfx
mailing list