[PATCH 144/159] drm/amdkfd: Add kernel parameter to stop queue eviction on vm fault
Alex Deucher
alexander.deucher at amd.com
Wed Feb 24 22:18:44 UTC 2021
From: Oak Zeng <Oak.Zeng at amd.com>
This is to keep wavefront context for debug purpose
Signed-off-by: Oak Zeng <Oak.Zeng at amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 7 +++++++
drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c | 5 +++--
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 5 +++--
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 +++++
4 files changed, 18 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 1d7839dbbc33..c4d822b46ea4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -748,6 +748,13 @@ bool no_system_mem_limit;
module_param(no_system_mem_limit, bool, 0644);
MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = default)");
+/**
+ * DOC: no_queue_eviction_on_vm_fault (int)
+ * If set, process queues will not be evicted on gpuvm fault. This is to keep the wavefront context for debugging (0 = queue eviction, 1 = no queue eviction). The default is 0 (queue eviction).
+ */
+int amdgpu_no_queue_eviction_on_vm_fault = 0;
+MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (0 = queue eviction, 1 = no queue eviction)");
+module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444);
#endif
/**
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index 8e64c01565ac..60f752d75833 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -80,8 +80,9 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
ihre->source_id == CIK_INTSRC_SDMA_TRAP ||
ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG ||
ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE ||
- ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
- ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT;
+ ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
+ ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) &&
+ !amdgpu_no_queue_eviction_on_vm_fault);
}
static void cik_event_interrupt_wq(struct kfd_dev *dev,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index 74a460be077b..1c20458f3962 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -98,9 +98,10 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
source_id == SOC15_INTSRC_SDMA_TRAP ||
source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
source_id == SOC15_INTSRC_CP_BAD_OPCODE ||
- client_id == SOC15_IH_CLIENTID_VMC ||
+ ((client_id == SOC15_IH_CLIENTID_VMC ||
client_id == SOC15_IH_CLIENTID_VMC1 ||
- client_id == SOC15_IH_CLIENTID_UTCL2;
+ client_id == SOC15_IH_CLIENTID_UTCL2) &&
+ !amdgpu_no_queue_eviction_on_vm_fault);
}
static void event_interrupt_wq_v9(struct kfd_dev *dev,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index e2ebd5a1d4de..b9839c650f21 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -169,6 +169,11 @@ extern bool hws_gws_support;
/* Queue preemption timeout in ms */
extern int queue_preemption_timeout_ms;
+/*
+ * Don't evict process queues on vm fault
+ */
+extern int amdgpu_no_queue_eviction_on_vm_fault;
+
/* Enable eviction debug messages */
extern bool debug_evictions;
--
2.29.2
More information about the amd-gfx
mailing list