[PATCH] drm/amdgpu: add sdma 4_x interrupts printing
Xu, Feifei
Feifei.Xu at amd.com
Thu Mar 4 07:23:55 UTC 2021
Thanks. Will modify like this:
if (instance < 0 || instance > adev->sdma.num_instances) {
Thanks,
Feifei
-----Original Message-----
From: Zhang, Hawking <Hawking.Zhang at amd.com>
Sent: Thursday, March 4, 2021 2:54 PM
To: Xu, Feifei <Feifei.Xu at amd.com>; amd-gfx at lists.freedesktop.org
Cc: Xu, Feifei <Feifei.Xu at amd.com>
Subject: RE: [PATCH] drm/amdgpu: add sdma 4_x interrupts printing
[AMD Public Use]
+ if (instance < 0 || instance > 7){
Please check sdma.num_instances for the maximum instance, instead of hard coded to 7. The SDMA instance numbers vary from ASIC to ASIC.
With above fixed, the patch is
Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>
Regards,
Hawking
-----Original Message-----
From: Feifei Xu <Feifei.Xu at amd.com>
Sent: Thursday, March 4, 2021 11:45
To: amd-gfx at lists.freedesktop.org
Cc: Zhang, Hawking <Hawking.Zhang at amd.com>; Xu, Feifei <Feifei.Xu at amd.com>
Subject: [PATCH] drm/amdgpu: add sdma 4_x interrupts printing
Add VM_HOLE/DOORBELL_INVALID_BE/POLL_TIMEOUT/SRBMWRITE
interrupt info printing.
Signed-off-by: Feifei Xu <Feifei.Xu at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 5 +
drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 119 +++++++++++++++++++++++
2 files changed, 124 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index e5b8fb8e75c5..f8fb755e3aa6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -64,6 +64,11 @@ struct amdgpu_sdma {
struct amdgpu_irq_src trap_irq;
struct amdgpu_irq_src illegal_inst_irq;
struct amdgpu_irq_src ecc_irq;
+ struct amdgpu_irq_src vm_hole_irq;
+ struct amdgpu_irq_src doorbell_invalid_irq;
+ struct amdgpu_irq_src pool_timeout_irq;
+ struct amdgpu_irq_src srbm_write_irq;
+
int num_instances;
uint32_t srbm_soft_reset;
bool has_page_queue;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 3bede8a70d7e..3305b8ec5025 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1944,6 +1944,33 @@ static int sdma_v4_0_sw_init(void *handle)
return r;
}
+ /* SDMA VM_HOLE/DOORBELL_INV/POLL_TIMEOUT/SRBM_WRITE_PROTECTION event*/
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_VM_HOLE,
+ &adev->sdma.vm_hole_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_DOORBELL_INVALID,
+ &adev->sdma.doorbell_invalid_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_POLL_TIMEOUT,
+ &adev->sdma.pool_timeout_irq);
+ if (r)
+ return r;
+
+ r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i),
+ SDMA0_4_0__SRCID__SDMA_SRBMWRITE,
+ &adev->sdma.srbm_write_irq);
+ if (r)
+ return r;
+ }
+
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
@@ -2198,6 +2225,72 @@ static int sdma_v4_0_set_ecc_irq_state(struct amdgpu_device *adev,
return 0;
}
+static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry) {
+ int instance;
+ struct amdgpu_task_info task_info;
+ u64 addr;
+ instance = sdma_v4_0_irq_id_to_seq(entry->client_id);
+ if (instance < 0 || instance > 7){
+ dev_err(adev->dev, "sdma instance invalid %d\n", instance);
+ return -EINVAL;
+ }
+
+ addr = (u64)entry->src_data[0] << 12;
+ addr |= ((u64)entry->src_data[1] & 0xf) << 44;
+
+ memset(&task_info, 0, sizeof(struct amdgpu_task_info));
+ amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
+
+ dev_info(adev->dev,
+ "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u "
+ "pasid:%u, for process %s pid %d thread %s pid %d\n",
+ instance, addr, entry->src_id, entry->ring_id, entry->vmid,
+ entry->pasid, task_info.process_name, task_info.tgid,
+ task_info.task_name, task_info.pid);
+ return 0;
+}
+
+static int sdma_v4_0_process_vm_hole_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry) {
+ dev_err(adev->dev,"MC or SEM address in VM hole. \n");
+ sdma_v4_0_print_iv_entry(adev, entry);
+ return 0;
+}
+
+static int sdma_v4_0_process_doorbell_invalid_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry) {
+ dev_err(adev->dev,
+ "SDMA received a doorbell from BIF with byte_enable != 0xff. \n");
+ sdma_v4_0_print_iv_entry(adev, entry);
+ return 0;
+}
+
+static int sdma_v4_0_process_pool_timeout_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry) {
+ dev_err(adev->dev,
+ "Polling register/memory timeout executing POLL_REG/MEM with finite timer\n");
+ sdma_v4_0_print_iv_entry(adev, entry);
+ return 0;
+}
+
+static int sdma_v4_0_process_srbm_write_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry) {
+ dev_err(adev->dev,
+ "SDMA gets an Register Write SRBM_WRITE command in non-privilege command buffer.\n");
+ sdma_v4_0_print_iv_entry(adev, entry);
+ return 0;
+}
+
static void sdma_v4_0_update_medium_grain_clock_gating(
struct amdgpu_device *adev,
bool enable)
@@ -2515,7 +2608,21 @@ static const struct amdgpu_irq_src_funcs sdma_v4_0_ecc_irq_funcs = {
.process = amdgpu_sdma_process_ecc_irq, };
+static const struct amdgpu_irq_src_funcs sdma_v4_0_vm_hole_irq_funcs = {
+ .process = sdma_v4_0_process_vm_hole_irq, };
+
+static const struct amdgpu_irq_src_funcs sdma_v4_0_doorbell_invalid_irq_funcs = {
+ .process = sdma_v4_0_process_doorbell_invalid_irq,
+};
+static const struct amdgpu_irq_src_funcs sdma_v4_0_pool_timeout_irq_funcs = {
+ .process = sdma_v4_0_process_pool_timeout_irq,
+};
+
+static const struct amdgpu_irq_src_funcs sdma_v4_0_srbm_write_irq_funcs = {
+ .process = sdma_v4_0_process_srbm_write_irq, };
static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev) { @@ -2527,10 +2634,18 @@ static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
case 5:
adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5;
adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5;
+ adev->sdma.vm_hole_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5;
+ adev->sdma.doorbell_invalid_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5;
+ adev->sdma.pool_timeout_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5;
+ adev->sdma.srbm_write_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5;
break;
case 8:
adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
+ adev->sdma.vm_hole_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5;
+ adev->sdma.doorbell_invalid_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
+ adev->sdma.pool_timeout_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
+ adev->sdma.srbm_write_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
break;
case 2:
default:
@@ -2541,6 +2656,10 @@ static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs;
adev->sdma.illegal_inst_irq.funcs = &sdma_v4_0_illegal_inst_irq_funcs;
adev->sdma.ecc_irq.funcs = &sdma_v4_0_ecc_irq_funcs;
+ adev->sdma.vm_hole_irq.funcs = &sdma_v4_0_vm_hole_irq_funcs;
+ adev->sdma.doorbell_invalid_irq.funcs = &sdma_v4_0_doorbell_invalid_irq_funcs;
+ adev->sdma.pool_timeout_irq.funcs = &sdma_v4_0_pool_timeout_irq_funcs;
+ adev->sdma.srbm_write_irq.funcs = &sdma_v4_0_srbm_write_irq_funcs;
}
/**
--
2.25.1
More information about the amd-gfx
mailing list