[PATCH] drm/amdgpu: Use the slab allocator to reduce job allocation fragmentation

Prike Liang Prike.Liang at amd.com
Fri May 10 08:11:45 UTC 2024


Using kzalloc() results in about 50% memory fragmentation, therefore
use the slab allocator to reproduce memory fragmentation.

Signed-off-by: Prike Liang <Prike.Liang at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 26 ++++++++++++++++++++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h |  1 +
 3 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index ea14f1c8f430..3de1b42291b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -3040,6 +3040,7 @@ static void __exit amdgpu_exit(void)
 	amdgpu_fence_slab_fini();
 	mmu_notifier_synchronize();
 	amdgpu_xcp_drv_release();
+	amdgpue_job_slab_fini();
 }
 
 module_init(amdgpu_init);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index e4742b65032d..8327bf017a0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -31,6 +31,8 @@
 #include "amdgpu_trace.h"
 #include "amdgpu_reset.h"
 
+static struct kmem_cache *amdgpu_job_slab;
+
 static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
 {
 	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
@@ -101,10 +103,19 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	if (num_ibs == 0)
 		return -EINVAL;
 
-	*job = kzalloc(struct_size(*job, ibs, num_ibs), GFP_KERNEL);
-	if (!*job)
+	amdgpu_job_slab = kmem_cache_create("amdgpu_job",
+				struct_size(*job, ibs, num_ibs), 0,
+				SLAB_HWCACHE_ALIGN, NULL);
+	if (!amdgpu_job_slab) {
+		DRM_ERROR("create amdgpu_job cache failed\n");
 		return -ENOMEM;
+	}
 
+	*job = kmem_cache_zalloc(amdgpu_job_slab, GFP_KERNEL);
+	if (!*job) {
+		kmem_cache_destroy(amdgpu_job_slab);
+		return -ENOMEM;
+	}
 	/*
 	 * Initialize the scheduler to at least some ring so that we always
 	 * have a pointer to adev.
@@ -138,7 +149,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
 	if (r) {
 		if (entity)
 			drm_sched_job_cleanup(&(*job)->base);
-		kfree(*job);
+		kmem_cache_free(amdgpu_job_slab, job);
 	}
 
 	return r;
@@ -179,6 +190,11 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
 		amdgpu_ib_free(ring->adev, &job->ibs[i], f);
 }
 
+void amdgpue_job_slab_fini(void)
+{
+	kmem_cache_destroy(amdgpu_job_slab);
+}
+
 static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
 {
 	struct amdgpu_job *job = to_amdgpu_job(s_job);
@@ -189,7 +205,7 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
 
 	/* only put the hw fence if has embedded fence */
 	if (!job->hw_fence.ops)
-		kfree(job);
+		kmem_cache_free(amdgpu_job_slab, job);
 	else
 		dma_fence_put(&job->hw_fence);
 }
@@ -221,7 +237,7 @@ void amdgpu_job_free(struct amdgpu_job *job)
 		dma_fence_put(job->gang_submit);
 
 	if (!job->hw_fence.ops)
-		kfree(job);
+		kmem_cache_free(amdgpu_job_slab, job);
 	else
 		dma_fence_put(&job->hw_fence);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index a963a25ddd62..4491d5f9c74d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -103,5 +103,6 @@ int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
 			     struct dma_fence **fence);
 
 void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched);
+void amdgpue_job_slab_fini(void);
 
 #endif
-- 
2.34.1



More information about the amd-gfx mailing list