[PATCH] drm/amdkfd: option to disable system mem limit
Philip Yang
Philip.Yang at amd.com
Mon Jul 27 13:24:55 UTC 2020
If multiple process share system memory through /dev/shm, KFD allocate
memory should not fail if it reachs the system memory limit because
one copy of physical system memory are shared by multiple process.
Add module parameter to provide user option to disable system memory
limit check, to run multiple process share memory application. By
default the system memory limit is on.
Print out debug message to warn user if KFD allocate memory failed
because of system memory limit.
Signed-off-by: Philip Yang <Philip.Yang at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 9 ++++++++-
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 9 +++++++++
3 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e97c088d03b3..3c0d5ecfe0d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -187,9 +187,11 @@ extern int amdgpu_force_asic_type;
#ifdef CONFIG_HSA_AMD
extern int sched_policy;
extern bool debug_evictions;
+extern bool no_system_mem_limit;
#else
static const int sched_policy = KFD_SCHED_POLICY_HWS;
static const bool debug_evictions; /* = false */
+static const bool no_system_mem_limit;
#endif
extern int amdgpu_tmz;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 8703aa1fe4a5..502e8204c012 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -99,7 +99,10 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
mem *= si.mem_unit;
spin_lock_init(&kfd_mem_limit.mem_limit_lock);
- kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
+ if (no_system_mem_limit)
+ kfd_mem_limit.max_system_mem_limit = U64_MAX;
+ else
+ kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4);
kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
(kfd_mem_limit.max_system_mem_limit >> 20),
@@ -148,6 +151,10 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
spin_lock(&kfd_mem_limit.mem_limit_lock);
+ if (kfd_mem_limit.system_mem_used + system_mem_needed >
+ kfd_mem_limit.max_system_mem_limit)
+ pr_debug("Set no_system_mem_limit if using shared memory\n");
+
if ((kfd_mem_limit.system_mem_used + system_mem_needed >
kfd_mem_limit.max_system_mem_limit) ||
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6291f5f0d223..e9acd0a9f327 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -715,6 +715,15 @@ MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption timeout in ms (1
bool debug_evictions;
module_param(debug_evictions, bool, 0644);
MODULE_PARM_DESC(debug_evictions, "enable eviction debug messages (false = default)");
+
+/**
+ * DOC: no_system_mem_limit(bool)
+ * Disable system memory limit, to support multiple process shared memory
+ */
+bool no_system_mem_limit;
+module_param(no_system_mem_limit, bool, 0644);
+MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = default)");
+
#endif
/**
--
2.17.1
More information about the amd-gfx
mailing list