[PATCH 12/14] drm/amdkfd: Update interrupt handling for GFX9.4.3
Alex Deucher
alexander.deucher at amd.com
Wed Mar 29 20:14:24 UTC 2023
From: Mukul Joshi <mukul.joshi at amd.com>
Update interrupt handling in CPX mode for GFX9.4.3 by using the
VMID space instead of SDMA client id to determine if an interrupt
should be processed by a KFD node. This is especially needed for
handling retry faults from MMHUB.
Signed-off-by: Mukul Joshi <mukul.joshi at amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 +++++--
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +-
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 ++--
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 16 ++++++----------
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 8 ++++----
drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 2 +-
6 files changed, 19 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a5064d95e7f5..e9b4b3c68b1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2431,6 +2431,9 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
* amdgpu_vm_handle_fault - graceful handling of VM faults.
* @adev: amdgpu device pointer
* @pasid: PASID of the VM
+ * @vmid: VMID, only used for GFX 9.4.3.
+ * @node_id: Node_id received in IH cookie. Only applicable for
+ * GFX 9.4.3.
* @addr: Address of the fault
* @write_fault: true is write fault, false is read fault
*
@@ -2438,7 +2441,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
* shouldn't be reported any more.
*/
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
- u32 client_id, u32 node_id, uint64_t addr,
+ u32 vmid, u32 node_id, uint64_t addr,
bool write_fault)
{
bool is_compute_context = false;
@@ -2463,7 +2466,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
addr /= AMDGPU_GPU_PAGE_SIZE;
- if (is_compute_context && !svm_range_restore_pages(adev, pasid, client_id,
+ if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid,
node_id, addr, write_fault)) {
amdgpu_bo_unref(&root);
return true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 721bc55bfafa..14864a8541ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -455,7 +455,7 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
struct amdgpu_task_info *task_info);
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
- u32 client_id, u32 node_id, uint64_t addr,
+ u32 vmid, u32 node_id, uint64_t addr,
bool write_fault);
void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 5c209f2d38c6..d819b544b043 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -593,8 +593,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
/* Try to handle the recoverable page faults by filling page
* tables
*/
- if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->client_id, node_id,
- addr, write_fault))
+ if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid,
+ node_id, addr, write_fault))
return 1;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index df372de6b056..fb3cf2c51da8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1073,18 +1073,14 @@ struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id);
struct kfd_node *kfd_device_by_id(uint32_t gpu_id);
struct kfd_node *kfd_device_by_pci_dev(const struct pci_dev *pdev);
struct kfd_node *kfd_device_by_adev(const struct amdgpu_device *adev);
-static inline bool kfd_irq_is_from_node(struct kfd_node *node, uint32_t client_id,
- uint32_t node_id)
+static inline bool kfd_irq_is_from_node(struct kfd_node *node, uint32_t node_id,
+ uint32_t vmid)
{
- if ((node->interrupt_bitmap & (0x1U << node_id)) ||
- ((node_id % 4) == 0 &&
- (node->interrupt_bitmap >> 16) & (0x1U << client_id)))
- return true;
-
- return false;
+ return (node->interrupt_bitmap & (1 << node_id)) != 0 &&
+ (node->compute_vmid_bitmap & (1 << vmid)) != 0;
}
static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev,
- uint32_t client_id, uint32_t node_id) {
+ uint32_t node_id, uint32_t vmid) {
struct kfd_dev *dev = adev->kfd.dev;
uint32_t i;
@@ -1092,7 +1088,7 @@ static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev,
return dev->nodes[0];
for (i = 0; i < dev->num_nodes; i++)
- if (kfd_irq_is_from_node(dev->nodes[i], client_id, node_id))
+ if (kfd_irq_is_from_node(dev->nodes[i], node_id, vmid))
return dev->nodes[i];
return NULL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 0e2b21ec468c..8bd9d88655b8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2788,7 +2788,7 @@ svm_fault_allowed(struct vm_area_struct *vma, bool write_fault)
int
svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
- uint32_t client_id, uint32_t node_id,
+ uint32_t vmid, uint32_t node_id,
uint64_t addr, bool write_fault)
{
struct mm_struct *mm = NULL;
@@ -2840,10 +2840,10 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
goto out;
}
- node = kfd_node_by_irq_ids(adev, node_id, client_id);
+ node = kfd_node_by_irq_ids(adev, node_id, vmid);
if (!node) {
- pr_debug("kfd node does not exist node_id: %d, client_id: %d\n", node_id,
- client_id);
+ pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id,
+ vmid);
r = -EFAULT;
goto out;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index a165c73b40b2..5116786718b6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -173,7 +173,7 @@ int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
unsigned long addr, struct svm_range *parent,
struct svm_range *prange);
int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
- uint32_t client_id, uint32_t node_id, uint64_t addr,
+ uint32_t vmid, uint32_t node_id, uint64_t addr,
bool write_fault);
int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence);
void svm_range_add_list_work(struct svm_range_list *svms,
--
2.39.2
More information about the amd-gfx
mailing list