[PATCH v2 2/2] drm/amdkfd: Fix checkpoint-restore on multi-xcc
Yat Sin, David
David.YatSin at amd.com
Mon Jul 28 17:17:24 UTC 2025
[AMD Official Use Only - AMD Internal Distribution Only]
> -----Original Message-----
> From: Kuehling, Felix <Felix.Kuehling at amd.com>
> Sent: Monday, July 28, 2025 12:12 PM
> To: Yat Sin, David <David.YatSin at amd.com>; amd-gfx at lists.freedesktop.org
> Cc: Bhardwaj, Rajneesh <Rajneesh.Bhardwaj at amd.com>
> Subject: Re: [PATCH v2 2/2] drm/amdkfd: Fix checkpoint-restore on multi-xcc
>
>
> On 2025-07-22 13:47, David Yat Sin wrote:
> > GPUs with multi-xcc have multiple MQDs per queue. This patch saves and
> > restores all the MQDs within the partition.
> >
> > Signed-off-by: David Yat Sin <David.YatSin at amd.com>
> > ---
> > .../drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +-
> > .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 61 ++++++++++++++++---
> > .../amd/amdkfd/kfd_process_queue_manager.c | 20 ++++--
> > 3 files changed, 67 insertions(+), 16 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > index 2d91027e2a74..6c5c7c1bf5ed 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > @@ -2725,7 +2725,7 @@ static void get_queue_checkpoint_info(struct
> > device_queue_manager *dqm,
> >
> > dqm_lock(dqm);
> > mqd_mgr = dqm->mqd_mgrs[mqd_type];
> > - *mqd_size = mqd_mgr->mqd_size;
> > + *mqd_size = mqd_mgr->mqd_size * NUM_XCC(mqd_mgr->dev-
> >xcc_mask);
> > *ctl_stack_size = 0;
> >
> > if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE &&
> > mqd_mgr->get_checkpoint_info) diff --git
> > a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> > index 97933d2a3803..f2dee320fada 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> > @@ -373,7 +373,7 @@ static void get_checkpoint_info(struct mqd_manager
> *mm, void *mqd, u32 *ctl_stac
> > {
> > struct v9_mqd *m = get_mqd(mqd);
> >
> > - *ctl_stack_size = m->cp_hqd_cntl_stack_size;
> > + *ctl_stack_size = m->cp_hqd_cntl_stack_size *
> > +NUM_XCC(mm->dev->xcc_mask);
> > }
> >
> > static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void
> > *mqd_dst, void *ctl_stack_dst) @@ -388,6 +388,24 @@ static void
> checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, voi
> > memcpy(ctl_stack_dst, ctl_stack, m->cp_hqd_cntl_stack_size);
> > }
> >
> > +static void checkpoint_mqd_v9_4_3(struct mqd_manager *mm,
> > + void *mqd,
> > + void *mqd_dst,
> > + void *ctl_stack_dst)
> > +{
> > + struct v9_mqd *m;
> > + int xcc;
> > + uint64_t size = get_mqd(mqd)->cp_mqd_stride_size;
> > +
> > + for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
> > + m = get_mqd(mqd + size * xcc);
> > +
> > + checkpoint_mqd(mm, m,
> > + (uint8_t *)mqd_dst + sizeof(*m) * xcc,
> > + (uint8_t *)ctl_stack_dst + m->cp_hqd_cntl_stack_size
> * xcc);
> > + }
> > +}
> > +
> > static void restore_mqd(struct mqd_manager *mm, void **mqd,
> > struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
> > struct queue_properties *qp,
> > @@ -764,13 +782,35 @@ static void restore_mqd_v9_4_3(struct mqd_manager
> *mm, void **mqd,
> > const void *mqd_src,
> > const void *ctl_stack_src, u32 ctl_stack_size)
> > {
> > - restore_mqd(mm, mqd, mqd_mem_obj, gart_addr, qp, mqd_src,
> ctl_stack_src, ctl_stack_size);
> > - if (amdgpu_sriov_multi_vf_mode(mm->dev->adev)) {
> > - struct v9_mqd *m;
> > + struct kfd_mem_obj xcc_mqd_mem_obj;
> > + u32 mqd_ctl_stack_size;
> > + struct v9_mqd *m;
> > + u32 num_xcc;
> > + int xcc;
> >
> > - m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr;
> > - m->cp_hqd_pq_doorbell_control |= 1 <<
> > -
> CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
>
> Is this special handling for SRIOV no longer needed? Maybe it's enough that it's
> handled during queue initialization. Then checkpoint and restore just preserves the
> updated doorbell mode. Other than that, this patch looks good to me.
>
> Reviewed-by: Felix Kuehling <felix.kuehling at amd.com>
Thank you for the review.
Yes, It looks the doorbell mode would be preserved with the checkpoint and restore. We are still waiting for validation on SRIOV. In case we need to set m->cp_hqd_pq_doorbell_control in restore_mqd_v9_4_3, I will add it via another patch later.
~David
>
>
> > + uint64_t offset = mm->mqd_stride(mm, qp);
> > +
> > + mm->dev->dqm->current_logical_xcc_start++;
> > +
> > + num_xcc = NUM_XCC(mm->dev->xcc_mask);
> > + mqd_ctl_stack_size = ctl_stack_size / num_xcc;
> > +
> > + memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj));
> > +
> > + /* Set the MQD pointer and gart address to XCC0 MQD */
> > + *mqd = mqd_mem_obj->cpu_ptr;
> > + if (gart_addr)
> > + *gart_addr = mqd_mem_obj->gpu_addr;
> > +
> > + for (xcc = 0; xcc < num_xcc; xcc++) {
> > + get_xcc_mqd(mqd_mem_obj, &xcc_mqd_mem_obj, offset * xcc);
> > + restore_mqd(mm, (void **)&m,
> > + &xcc_mqd_mem_obj,
> > + NULL,
> > + qp,
> > + (uint8_t *)mqd_src + xcc * sizeof(*m),
> > + (uint8_t *)ctl_stack_src + xcc *
> mqd_ctl_stack_size,
> > + mqd_ctl_stack_size);
> > }
> > }
> > static int destroy_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, @@
> > -906,7 +946,6 @@ struct mqd_manager *mqd_manager_init_v9(enum
> KFD_MQD_TYPE type,
> > mqd->free_mqd = kfd_free_mqd_cp;
> > mqd->is_occupied = kfd_is_occupied_cp;
> > mqd->get_checkpoint_info = get_checkpoint_info;
> > - mqd->checkpoint_mqd = checkpoint_mqd;
> > mqd->mqd_size = sizeof(struct v9_mqd);
> > mqd->mqd_stride = mqd_stride_v9;
> > #if defined(CONFIG_DEBUG_FS)
> > @@ -918,16 +957,18 @@ struct mqd_manager *mqd_manager_init_v9(enum
> KFD_MQD_TYPE type,
> > mqd->init_mqd = init_mqd_v9_4_3;
> > mqd->load_mqd = load_mqd_v9_4_3;
> > mqd->update_mqd = update_mqd_v9_4_3;
> > - mqd->restore_mqd = restore_mqd_v9_4_3;
> > mqd->destroy_mqd = destroy_mqd_v9_4_3;
> > mqd->get_wave_state = get_wave_state_v9_4_3;
> > + mqd->checkpoint_mqd = checkpoint_mqd_v9_4_3;
> > + mqd->restore_mqd = restore_mqd_v9_4_3;
> > } else {
> > mqd->init_mqd = init_mqd;
> > mqd->load_mqd = load_mqd;
> > mqd->update_mqd = update_mqd;
> > - mqd->restore_mqd = restore_mqd;
> > mqd->destroy_mqd = kfd_destroy_mqd_cp;
> > mqd->get_wave_state = get_wave_state;
> > + mqd->checkpoint_mqd = checkpoint_mqd;
> > + mqd->restore_mqd = restore_mqd;
> > }
> > break;
> > case KFD_MQD_TYPE_HIQ:
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> > b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> > index fe4c48930aad..bae200035ff4 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> > @@ -922,7 +922,10 @@ static int criu_checkpoint_queues_device(struct
> > kfd_process_device *pdd,
> >
> > q_data = (struct kfd_criu_queue_priv_data *)q_private_data;
> >
> > - /* data stored in this order: priv_data, mqd, ctl_stack */
> > + /*
> > + * data stored in this order:
> > + * priv_data, mqd[xcc0], mqd[xcc1],..., ctl_stack[xcc0],
> ctl_stack[xcc1]...
> > + */
> > q_data->mqd_size = mqd_size;
> > q_data->ctl_stack_size = ctl_stack_size;
> >
> > @@ -971,7 +974,7 @@ int kfd_criu_checkpoint_queues(struct kfd_process *p,
> > }
> >
> > static void set_queue_properties_from_criu(struct queue_properties *qp,
> > - struct kfd_criu_queue_priv_data *q_data)
> > + struct kfd_criu_queue_priv_data *q_data,
> uint32_t num_xcc)
> > {
> > qp->is_interop = false;
> > qp->queue_percent = q_data->q_percent; @@ -985,7 +988,11 @@ static
> > void set_queue_properties_from_criu(struct queue_properties *qp,
> > qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size;
> > qp->ctx_save_restore_area_address = q_data-
> >ctx_save_restore_area_address;
> > qp->ctx_save_restore_area_size = q_data->ctx_save_restore_area_size;
> > - qp->ctl_stack_size = q_data->ctl_stack_size;
> > + if (q_data->type == KFD_QUEUE_TYPE_COMPUTE)
> > + qp->ctl_stack_size = q_data->ctl_stack_size / num_xcc;
> > + else
> > + qp->ctl_stack_size = q_data->ctl_stack_size;
> > +
> > qp->type = q_data->type;
> > qp->format = q_data->format;
> > }
> > @@ -1045,12 +1052,15 @@ int kfd_criu_restore_queue(struct kfd_process *p,
> > goto exit;
> > }
> >
> > - /* data stored in this order: mqd, ctl_stack */
> > + /*
> > + * data stored in this order:
> > + * mqd[xcc0], mqd[xcc1],..., ctl_stack[xcc0], ctl_stack[xcc1]...
> > + */
> > mqd = q_extra_data;
> > ctl_stack = mqd + q_data->mqd_size;
> >
> > memset(&qp, 0, sizeof(qp));
> > - set_queue_properties_from_criu(&qp, q_data);
> > + set_queue_properties_from_criu(&qp, q_data,
> > +NUM_XCC(pdd->dev->adev->gfx.xcc_mask));
> >
> > print_queue_properties(&qp);
> >
More information about the amd-gfx
mailing list