[PATCH 08/25] drm/amdkfd: fix zero reading of VMID and PASID for Hawaii
Felix Kuehling
Felix.Kuehling at amd.com
Thu Jul 12 02:32:51 UTC 2018
From: Lan Xiao <Lan.Xiao at amd.com>
Upon VM Fault, the VMID and PASID written by HW are zeros in
Hawaii. Instead of reading from ih_ring_entry, read directly
from the registers. This workaround fix the soft hang issues
caused by mishandled VM Fault in Hawaii.
Signed-off-by: Lan Xiao <Lan.Xiao at amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 20 +++++++++++++++-
drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c | 29 ++++++++++++++++++++++-
drivers/gpu/drm/amd/amdkfd/kfd_device.c | 14 +++++++++--
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 4 +++-
drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c | 6 +++--
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 9 ++++---
drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 5 ++++
7 files changed, 77 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 5364e22..87fcdfd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -145,6 +145,7 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint32_t page_table_base);
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
+static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd);
/* Because of REG_GET_FIELD() being used, we put this function in the
* asic specific file.
@@ -216,7 +217,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
.invalidate_tlbs = invalidate_tlbs,
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib,
- .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info
+ .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
+ .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg
};
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
@@ -912,3 +914,19 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
RREG32(mmVM_INVALIDATE_RESPONSE);
return 0;
}
+
+ /**
+ * read_vmid_from_vmfault_reg - read vmid from register
+ *
+ * adev: amdgpu_device pointer
+ * @vmid: vmid pointer
+ * read vmid from register (CIK).
+ */
+static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ uint32_t status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
+
+ return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index cc33870..5d2475d 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -25,12 +25,39 @@
#include "cik_int.h"
static bool cik_event_interrupt_isr(struct kfd_dev *dev,
- const uint32_t *ih_ring_entry)
+ const uint32_t *ih_ring_entry,
+ uint32_t *patched_ihre,
+ bool *patched_flag)
{
const struct cik_ih_ring_entry *ihre =
(const struct cik_ih_ring_entry *)ih_ring_entry;
+ const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
unsigned int vmid, pasid;
+ /* This workaround is due to HW/FW limitation on Hawaii that
+ * VMID and PASID are not written into ih_ring_entry
+ */
+ if ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
+ ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) &&
+ dev->device_info->asic_family == CHIP_HAWAII) {
+ struct cik_ih_ring_entry *tmp_ihre =
+ (struct cik_ih_ring_entry *)patched_ihre;
+
+ *patched_flag = true;
+ *tmp_ihre = *ihre;
+
+ vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd);
+ pasid = f2g->get_atc_vmid_pasid_mapping_pasid(dev->kgd, vmid);
+
+ tmp_ihre->ring_id &= 0x000000ff;
+ tmp_ihre->ring_id |= vmid << 8;
+ tmp_ihre->ring_id |= pasid << 16;
+
+ return (pasid != 0) &&
+ vmid >= dev->vm_info.first_vmid_kfd &&
+ vmid <= dev->vm_info.last_vmid_kfd;
+ }
+
/* Only handle interrupts from KFD VMIDs */
vmid = (ihre->ring_id & 0x0000ff00) >> 8;
if (vmid < dev->vm_info.first_vmid_kfd ||
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 48c505e..6007511 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -577,14 +577,24 @@ static int kfd_resume(struct kfd_dev *kfd)
/* This is called directly from KGD at ISR. */
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
{
+ uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE];
+ bool is_patched = false;
+
if (!kfd->init_complete)
return;
+ if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) {
+ dev_err_once(kfd_device, "Ring entry too small\n");
+ return;
+ }
+
spin_lock(&kfd->interrupt_lock);
if (kfd->interrupts_active
- && interrupt_is_wanted(kfd, ih_ring_entry)
- && enqueue_ih_ring_entry(kfd, ih_ring_entry))
+ && interrupt_is_wanted(kfd, ih_ring_entry,
+ patched_ihre, &is_patched)
+ && enqueue_ih_ring_entry(kfd,
+ is_patched ? patched_ihre : ih_ring_entry))
queue_work(kfd->ih_wq, &kfd->interrupt_work);
spin_unlock(&kfd->interrupt_lock);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index d6b64e6..f836897 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -26,7 +26,9 @@
static bool event_interrupt_isr_v9(struct kfd_dev *dev,
- const uint32_t *ih_ring_entry)
+ const uint32_t *ih_ring_entry,
+ uint32_t *patched_ihre,
+ bool *patched_flag)
{
uint16_t source_id, client_id, pasid, vmid;
const uint32_t *data = ih_ring_entry;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index db6d933..c56ac47 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -151,13 +151,15 @@ static void interrupt_wq(struct work_struct *work)
ih_ring_entry);
}
-bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry)
+bool interrupt_is_wanted(struct kfd_dev *dev,
+ const uint32_t *ih_ring_entry,
+ uint32_t *patched_ihre, bool *flag)
{
/* integer and bitwise OR so there is no boolean short-circuiting */
unsigned int wanted = 0;
wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev,
- ih_ring_entry);
+ ih_ring_entry, patched_ihre, flag);
return wanted != 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 91a3368..cd5121d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -180,9 +180,10 @@ enum cache_policy {
struct kfd_event_interrupt_class {
bool (*interrupt_isr)(struct kfd_dev *dev,
- const uint32_t *ih_ring_entry);
+ const uint32_t *ih_ring_entry, uint32_t *patched_ihre,
+ bool *patched_flag);
void (*interrupt_wq)(struct kfd_dev *dev,
- const uint32_t *ih_ring_entry);
+ const uint32_t *ih_ring_entry);
};
struct kfd_device_info {
@@ -806,7 +807,9 @@ int kfd_interrupt_init(struct kfd_dev *dev);
void kfd_interrupt_exit(struct kfd_dev *dev);
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry);
-bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry);
+bool interrupt_is_wanted(struct kfd_dev *dev,
+ const uint32_t *ih_ring_entry,
+ uint32_t *patched_ihre, bool *flag);
/* Power Management */
void kgd2kfd_suspend(struct kfd_dev *kfd);
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 28b11d1..76a30cb 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -276,6 +276,10 @@ struct tile_config {
* faults. On GFXv9 VM fault information is fully contained in the IH
* packet and this function is not needed.
*
+ * @read_vmid_from_vmfault_reg: On Hawaii the VMID is not set in the
+ * IH ring entry. This function allows the KFD ISR to get the VMID
+ * from the fault status register as early as possible.
+ *
* This structure contains function pointers to services that the kgd driver
* provides to amdkfd driver.
*
@@ -394,6 +398,7 @@ struct kfd2kgd_calls {
int (*get_vm_fault_info)(struct kgd_dev *kgd,
struct kfd_vm_fault_info *info);
+ uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd);
};
/**
--
2.7.4
More information about the amd-gfx
mailing list