[RFC PATCH 08/18] drm/amdgpu: Don't try moving BOs to preferred domain before submit
Friedrich Vock
friedrich.vock at gmx.de
Wed Apr 24 16:56:58 UTC 2024
TTM now takes care of moving buffers to the best possible domain.
Signed-off-by: Friedrich Vock <friedrich.vock at gmx.de>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 -
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 191 +--------------------
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h | 4 -
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 7 -
4 files changed, 3 insertions(+), 201 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index cac0ca64367b3..3004adc6fa679 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1404,8 +1404,6 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev);
bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev);
bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);
-void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
- u64 num_vis_bytes);
int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev);
void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
const u32 *registers,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index e9168677ef0a6..92a0cffc1adc3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -638,196 +638,19 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
return 0;
}
-/* Convert microseconds to bytes. */
-static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
-{
- if (us <= 0 || !adev->mm_stats.log2_max_MBps)
- return 0;
-
- /* Since accum_us is incremented by a million per second, just
- * multiply it by the number of MB/s to get the number of bytes.
- */
- return us << adev->mm_stats.log2_max_MBps;
-}
-
-static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
-{
- if (!adev->mm_stats.log2_max_MBps)
- return 0;
-
- return bytes >> adev->mm_stats.log2_max_MBps;
-}
-
-/* Returns how many bytes TTM can move right now. If no bytes can be moved,
- * it returns 0. If it returns non-zero, it's OK to move at least one buffer,
- * which means it can go over the threshold once. If that happens, the driver
- * will be in debt and no other buffer migrations can be done until that debt
- * is repaid.
- *
- * This approach allows moving a buffer of any size (it's important to allow
- * that).
- *
- * The currency is simply time in microseconds and it increases as the clock
- * ticks. The accumulated microseconds (us) are converted to bytes and
- * returned.
- */
-static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
- u64 *max_bytes,
- u64 *max_vis_bytes)
-{
- s64 time_us, increment_us;
- u64 free_vram, total_vram, used_vram;
- /* Allow a maximum of 200 accumulated ms. This is basically per-IB
- * throttling.
- *
- * It means that in order to get full max MBps, at least 5 IBs per
- * second must be submitted and not more than 200ms apart from each
- * other.
- */
- const s64 us_upper_bound = 200000;
-
- if (!adev->mm_stats.log2_max_MBps) {
- *max_bytes = 0;
- *max_vis_bytes = 0;
- return;
- }
-
- total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
- used_vram = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);
- free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
-
- spin_lock(&adev->mm_stats.lock);
-
- /* Increase the amount of accumulated us. */
- time_us = ktime_to_us(ktime_get());
- increment_us = time_us - adev->mm_stats.last_update_us;
- adev->mm_stats.last_update_us = time_us;
- adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
- us_upper_bound);
-
- /* This prevents the short period of low performance when the VRAM
- * usage is low and the driver is in debt or doesn't have enough
- * accumulated us to fill VRAM quickly.
- *
- * The situation can occur in these cases:
- * - a lot of VRAM is freed by userspace
- * - the presence of a big buffer causes a lot of evictions
- * (solution: split buffers into smaller ones)
- *
- * If 128 MB or 1/8th of VRAM is free, start filling it now by setting
- * accum_us to a positive number.
- */
- if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
- s64 min_us;
-
- /* Be more aggressive on dGPUs. Try to fill a portion of free
- * VRAM now.
- */
- if (!(adev->flags & AMD_IS_APU))
- min_us = bytes_to_us(adev, free_vram / 4);
- else
- min_us = 0; /* Reset accum_us on APUs. */
-
- adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
- }
-
- /* This is set to 0 if the driver is in debt to disallow (optional)
- * buffer moves.
- */
- *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
-
- /* Do the same for visible VRAM if half of it is free */
- if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
- u64 total_vis_vram = adev->gmc.visible_vram_size;
- u64 used_vis_vram =
- amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
-
- if (used_vis_vram < total_vis_vram) {
- u64 free_vis_vram = total_vis_vram - used_vis_vram;
-
- adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
- increment_us, us_upper_bound);
-
- if (free_vis_vram >= total_vis_vram / 2)
- adev->mm_stats.accum_us_vis =
- max(bytes_to_us(adev, free_vis_vram / 2),
- adev->mm_stats.accum_us_vis);
- }
-
- *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
- } else {
- *max_vis_bytes = 0;
- }
-
- spin_unlock(&adev->mm_stats.lock);
-}
-
-/* Report how many bytes have really been moved for the last command
- * submission. This can result in a debt that can stop buffer migrations
- * temporarily.
- */
-void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
- u64 num_vis_bytes)
-{
- spin_lock(&adev->mm_stats.lock);
- adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
- adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
- spin_unlock(&adev->mm_stats.lock);
-}
-
static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo)
{
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- struct amdgpu_cs_parser *p = param;
struct ttm_operation_ctx ctx = {
.interruptible = true,
.no_wait_gpu = false,
.resv = bo->tbo.base.resv
};
- uint32_t domain;
- int r;
if (bo->tbo.pin_count)
return 0;
- /* Don't move this buffer if we have depleted our allowance
- * to move it. Don't move anything if the threshold is zero.
- */
- if (p->bytes_moved < p->bytes_moved_threshold &&
- (!bo->tbo.base.dma_buf ||
- list_empty(&bo->tbo.base.dma_buf->attachments))) {
- if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
- (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
- /* And don't move a CPU_ACCESS_REQUIRED BO to limited
- * visible VRAM if we've depleted our allowance to do
- * that.
- */
- if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
- domain = bo->preferred_domains;
- else
- domain = bo->allowed_domains;
- } else {
- domain = bo->preferred_domains;
- }
- } else {
- domain = bo->allowed_domains;
- }
-
-retry:
- amdgpu_bo_placement_from_domain(bo, domain);
- r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
-
- p->bytes_moved += ctx.bytes_moved;
- if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
- amdgpu_res_cpu_visible(adev, bo->tbo.resource))
- p->bytes_moved_vis += ctx.bytes_moved;
-
- if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
- domain = bo->allowed_domains;
- goto retry;
- }
-
- return r;
+ amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+ return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
}
static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
@@ -947,13 +770,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
e->user_pages = NULL;
}
- amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
- &p->bytes_moved_vis_threshold);
- p->bytes_moved = 0;
- p->bytes_moved_vis = 0;
-
r = amdgpu_vm_validate(p->adev, &fpriv->vm, NULL,
- amdgpu_cs_bo_validate, p);
+ amdgpu_cs_bo_validate, NULL);
if (r) {
DRM_ERROR("amdgpu_vm_validate() failed.\n");
goto out_free_user_pages;
@@ -973,9 +791,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(p->uf_bo);
}
- amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
- p->bytes_moved_vis);
-
for (i = 0; i < p->gang_size; ++i)
amdgpu_job_set_resources(p->jobs[i], p->bo_list->gds_obj,
p->bo_list->gws_obj,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
index 39c33ad100cb7..e3d04ac4764be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
@@ -67,10 +67,6 @@ struct amdgpu_cs_parser {
struct amdgpu_bo_list *bo_list;
struct amdgpu_mn *mn;
struct dma_fence *fence;
- uint64_t bytes_moved_threshold;
- uint64_t bytes_moved_vis_threshold;
- uint64_t bytes_moved;
- uint64_t bytes_moved_vis;
/* user fence */
struct amdgpu_bo *uf_bo;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 331b9ed8062c7..5834a95d680d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -621,13 +621,6 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (unlikely(r != 0))
return r;
- if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
- amdgpu_res_cpu_visible(adev, bo->tbo.resource))
- amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved,
- ctx.bytes_moved);
- else
- amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0);
-
if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
bo->tbo.resource->mem_type == TTM_PL_VRAM) {
struct dma_fence *fence;
--
2.44.0
More information about the amd-gfx
mailing list