[PATCH] drm/amdgpu: Add a low priority scheduler for VRAM clearing
Felix Kuehling
felix.kuehling at amd.com
Wed May 17 22:10:19 UTC 2023
On 2023-05-17 17:40, Mukul Joshi wrote:
> Add a low priority DRM scheduler for VRAM clearing instead of using
> the exisiting high priority scheduler. Use the high priority scheduler
> for migrations and evictions.
>
> Signed-off-by: Mukul Joshi <mukul.joshi at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +--
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 37 ++++++++++++++++------
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 5 ++-
> 3 files changed, 34 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 9924c8de57ab..46f249912b67 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -627,7 +627,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
> bo->tbo.resource->mem_type == TTM_PL_VRAM) {
> struct dma_fence *fence;
>
> - r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence);
> + r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
> if (unlikely(r))
> goto fail_unreserve;
>
> @@ -1354,7 +1354,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
> if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
> return;
>
> - r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence);
> + r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true);
> if (!WARN_ON(r)) {
> amdgpu_bo_fence(abo, fence, false);
> dma_fence_put(fence);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 34724b771ace..bbdad0dc1b07 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -384,7 +384,8 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
> (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
> struct dma_fence *wipe_fence = NULL;
>
> - r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence);
> + r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
> + false);
> if (r) {
> goto error;
> } else if (wipe_fence) {
> @@ -2040,8 +2041,18 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
> r);
> return;
> }
> +
> + r = drm_sched_entity_init(&adev->mman.delayed,
I'm not a big fan of the "delayed" name. But I see the parallel with
"immediate" and "delayed" entities in amdgpu_vm. We may want to rename
adev->mman.entity to "immediate" as well to make the distinction clearer.
> + DRM_SCHED_PRIORITY_NORMAL, &sched,
I wonder if this could even be DRM_SCHED_PRIORITY_MIN to minimize its
performance impact on page table updates and graphics user mode
submissions. Either way, the patch is
Acked-by: Felix Kuehling <Felix.Kuehling at amd.com>
Thanks,
Felix
> + 1, NULL);
> + if (r) {
> + DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
> + r);
> + goto error_free_entity;
> + }
> } else {
> drm_sched_entity_destroy(&adev->mman.entity);
> + drm_sched_entity_destroy(&adev->mman.delayed);
> dma_fence_put(man->move);
> man->move = NULL;
> }
> @@ -2053,6 +2064,11 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
> size = adev->gmc.visible_vram_size;
> man->size = size;
> adev->mman.buffer_funcs_enabled = enable;
> +
> + return;
> +
> +error_free_entity:
> + drm_sched_entity_destroy(&adev->mman.entity);
> }
>
> static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
> @@ -2060,14 +2076,16 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
> unsigned int num_dw,
> struct dma_resv *resv,
> bool vm_needs_flush,
> - struct amdgpu_job **job)
> + struct amdgpu_job **job,
> + bool delayed)
> {
> enum amdgpu_ib_pool_type pool = direct_submit ?
> AMDGPU_IB_POOL_DIRECT :
> AMDGPU_IB_POOL_DELAYED;
> int r;
> -
> - r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity,
> + struct drm_sched_entity *entity = delayed ? &adev->mman.delayed :
> + &adev->mman.entity;
> + r = amdgpu_job_alloc_with_ib(adev, entity,
> AMDGPU_FENCE_OWNER_UNDEFINED,
> num_dw * 4, pool, job);
> if (r)
> @@ -2108,7 +2126,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> num_loops = DIV_ROUND_UP(byte_count, max_bytes);
> num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
> r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw,
> - resv, vm_needs_flush, &job);
> + resv, vm_needs_flush, &job, false);
> if (r)
> return r;
>
> @@ -2144,7 +2162,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
> uint64_t dst_addr, uint32_t byte_count,
> struct dma_resv *resv,
> struct dma_fence **fence,
> - bool vm_needs_flush)
> + bool vm_needs_flush, bool delayed)
> {
> struct amdgpu_device *adev = ring->adev;
> unsigned int num_loops, num_dw;
> @@ -2157,7 +2175,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
> num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes);
> num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8);
> r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush,
> - &job);
> + &job, delayed);
> if (r)
> return r;
>
> @@ -2180,7 +2198,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
> int amdgpu_fill_buffer(struct amdgpu_bo *bo,
> uint32_t src_data,
> struct dma_resv *resv,
> - struct dma_fence **f)
> + struct dma_fence **f,
> + bool delayed)
> {
> struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> @@ -2209,7 +2228,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
> goto error;
>
> r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv,
> - &next, true);
> + &next, true, delayed);
> if (r)
> goto error;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 8ef048a0a33e..e82b1edee7a4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -61,6 +61,8 @@ struct amdgpu_mman {
> struct mutex gtt_window_lock;
> /* Scheduler entity for buffer moves */
> struct drm_sched_entity entity;
> + /* Scheduler entity for VRAM clearing */
> + struct drm_sched_entity delayed;
>
> struct amdgpu_vram_mgr vram_mgr;
> struct amdgpu_gtt_mgr gtt_mgr;
> @@ -152,7 +154,8 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> int amdgpu_fill_buffer(struct amdgpu_bo *bo,
> uint32_t src_data,
> struct dma_resv *resv,
> - struct dma_fence **fence);
> + struct dma_fence **fence,
> + bool delayed);
>
> int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
> void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
More information about the amd-gfx
mailing list