[PATCH 12/32] drm/amdkfd: Update context save handling for multi XCC setup (v2)
Alex Deucher
alexander.deucher at amd.com
Tue Mar 28 15:13:24 UTC 2023
From: Mukul Joshi <mukul.joshi at amd.com>
Context save handling needs to be updated for a multi XCC
setup:
- On a multi XCC setup, KFD needs to report context save base
address and size for each XCC in MQD.
- Thunk will allocate a large context save area covering all
XCCs which will be equal to: num_of_xccs in a partition * size
of context save area for 1 XCC. However, it will report only the
size of context save area for 1 XCC only in the ioctl call.
- Driver then setups the MQD correctly using the size passed from
Thunk and information about number of XCCs in a partition.
- Update get_wave_state function to return context save area
for all XCCs in the partition.
v2: update the get_wave_state function for mqd manager v11 (Morris)
Signed-off-by: Mukul Joshi <mukul.joshi at amd.com>
Tested-by: Amber Lin <Amber.Lin at amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
Signed-off-by: Morris Zhang <Shiwu.Zhang at amd.com>
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
.../drm/amd/amdkfd/kfd_device_queue_manager.c | 4 +-
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h | 1 +
.../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 1 +
.../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 1 +
.../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 62 ++++++++++++++++++-
.../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 1 +
6 files changed, 67 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index a49ac19ca12e..07a04c41e92a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2095,8 +2095,8 @@ static int get_wave_state(struct device_queue_manager *dqm,
* and the queue should be protected against destruction by the process
* lock.
*/
- return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack,
- ctl_stack_used_size, save_area_used_size);
+ return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, &q->properties,
+ ctl_stack, ctl_stack_used_size, save_area_used_size);
}
static void get_queue_checkpoint_info(struct device_queue_manager *dqm,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index eb18be74f559..23158db7da03 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -97,6 +97,7 @@ struct mqd_manager {
uint32_t queue_id);
int (*get_wave_state)(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
void __user *ctl_stack,
u32 *ctl_stack_used_size,
u32 *save_area_used_size);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index d54c6fdebbb6..772c09b5821b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -227,6 +227,7 @@ static uint32_t read_doorbell_id(void *mqd)
}
static int get_wave_state(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
void __user *ctl_stack,
u32 *ctl_stack_used_size,
u32 *save_area_used_size)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
index e7acde3a849b..31f7732166fa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
@@ -253,6 +253,7 @@ static uint32_t read_doorbell_id(void *mqd)
}
static int get_wave_state(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
void __user *ctl_stack,
u32 *ctl_stack_used_size,
u32 *save_area_used_size)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 89f8ba8a127c..09083e905fee 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -294,6 +294,7 @@ static uint32_t read_doorbell_id(void *mqd)
}
static int get_wave_state(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
void __user *ctl_stack,
u32 *ctl_stack_used_size,
u32 *save_area_used_size)
@@ -560,6 +561,7 @@ static void init_mqd_v9_4_3(struct mqd_manager *mm, void **mqd,
int xcc = 0;
struct kfd_mem_obj xcc_mqd_mem_obj;
uint64_t xcc_gart_addr = 0;
+ uint64_t xcc_ctx_save_restore_area_address;
uint64_t offset = mm->mqd_stride(mm, q);
memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj));
@@ -569,6 +571,23 @@ static void init_mqd_v9_4_3(struct mqd_manager *mm, void **mqd,
init_mqd(mm, (void **)&m, &xcc_mqd_mem_obj, &xcc_gart_addr, q);
m->cp_mqd_stride_size = offset;
+
+ /*
+ * Update the CWSR address for each XCC if CWSR is enabled
+ * and CWSR area is allocated in thunk
+ */
+ if (mm->dev->kfd->cwsr_enabled &&
+ q->ctx_save_restore_area_address) {
+ xcc_ctx_save_restore_area_address =
+ q->ctx_save_restore_area_address +
+ (xcc * q->ctx_save_restore_area_size);
+
+ m->cp_hqd_ctx_save_base_addr_lo =
+ lower_32_bits(xcc_ctx_save_restore_area_address);
+ m->cp_hqd_ctx_save_base_addr_hi =
+ upper_32_bits(xcc_ctx_save_restore_area_address);
+ }
+
if (q->format == KFD_QUEUE_FORMAT_AQL) {
m->compute_tg_chunk_size = 1;
@@ -688,6 +707,46 @@ static int load_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
return err;
}
+static int get_wave_state_v9_4_3(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
+ void __user *ctl_stack,
+ u32 *ctl_stack_used_size,
+ u32 *save_area_used_size)
+{
+ int xcc, err = 0;
+ void *xcc_mqd;
+ void __user *xcc_ctl_stack;
+ uint64_t mqd_stride_size = mm->mqd_stride(mm, q);
+ u32 tmp_ctl_stack_used_size = 0, tmp_save_area_used_size = 0;
+
+ for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
+ xcc_mqd = mqd + mqd_stride_size * xcc;
+ xcc_ctl_stack = (void __user *)((uintptr_t)ctl_stack +
+ q->ctx_save_restore_area_size * xcc);
+
+ err = get_wave_state(mm, xcc_mqd, q, xcc_ctl_stack,
+ &tmp_ctl_stack_used_size,
+ &tmp_save_area_used_size);
+ if (err)
+ break;
+
+ /*
+ * Set the ctl_stack_used_size and save_area_used_size to
+ * ctl_stack_used_size and save_area_used_size of XCC 0 when
+ * passing the info the user-space.
+ * For multi XCC, user-space would have to look at the header
+ * info of each Control stack area to determine the control
+ * stack size and save area used.
+ */
+ if (xcc == 0) {
+ *ctl_stack_used_size = tmp_ctl_stack_used_size;
+ *save_area_used_size = tmp_save_area_used_size;
+ }
+ }
+
+ return err;
+}
+
#if defined(CONFIG_DEBUG_FS)
static int debugfs_show_mqd(struct seq_file *m, void *data)
@@ -725,7 +784,6 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
mqd->allocate_mqd = allocate_mqd;
mqd->free_mqd = kfd_free_mqd_cp;
mqd->is_occupied = kfd_is_occupied_cp;
- mqd->get_wave_state = get_wave_state;
mqd->get_checkpoint_info = get_checkpoint_info;
mqd->checkpoint_mqd = checkpoint_mqd;
mqd->restore_mqd = restore_mqd;
@@ -739,11 +797,13 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
mqd->load_mqd = load_mqd_v9_4_3;
mqd->update_mqd = update_mqd_v9_4_3;
mqd->destroy_mqd = destroy_mqd_v9_4_3;
+ mqd->get_wave_state = get_wave_state_v9_4_3;
} else {
mqd->init_mqd = init_mqd;
mqd->load_mqd = load_mqd;
mqd->update_mqd = update_mqd;
mqd->destroy_mqd = kfd_destroy_mqd_cp;
+ mqd->get_wave_state = get_wave_state;
}
break;
case KFD_MQD_TYPE_HIQ:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index ebf963f42b51..fe69492b1bb3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -261,6 +261,7 @@ static void update_mqd_tonga(struct mqd_manager *mm, void *mqd,
}
static int get_wave_state(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q,
void __user *ctl_stack,
u32 *ctl_stack_used_size,
u32 *save_area_used_size)
--
2.39.2
More information about the amd-gfx
mailing list