[PATCH 4/6] drm/amdkfd: enable grace period for xcc instance

Kim, Jonathan Jonathan.Kim at amd.com
Fri Jul 7 15:56:26 UTC 2023


[Public]

> -----Original Message-----
> From: Huang, JinHuiEric <JinHuiEric.Huang at amd.com>
> Sent: Friday, July 7, 2023 11:46 AM
> To: Kim, Jonathan <Jonathan.Kim at amd.com>; amd-gfx at lists.freedesktop.org
> Subject: Re: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance
>
>
> On 2023-07-07 10:59, Kim, Jonathan wrote:
> > [Public]
> >
> >> -----Original Message-----
> >> From: Huang, JinHuiEric <JinHuiEric.Huang at amd.com>
> >> Sent: Thursday, July 6, 2023 2:19 PM
> >> To: amd-gfx at lists.freedesktop.org
> >> Cc: Kim, Jonathan <Jonathan.Kim at amd.com>; Huang, JinHuiEric
> >> <JinHuiEric.Huang at amd.com>
> >> Subject: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance
> >>
> >> each xcc instance needs to get iq wait time and set
> >> grace period accordingly.
> >>
> >> Signed-off-by: Eric Huang <jinhuieric.huang at amd.com>
> >> ---
> >>   .../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 ++++--
> >>   .../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 +-
> >>   .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 32 +++++++++++-------
> -
> >>   .../drm/amd/amdkfd/kfd_packet_manager_v9.c    |  9 +++---
> >>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  2 +-
> >>   5 files changed, 32 insertions(+), 22 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> >> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> >> index a2bff3f01359..0f12c1989e14 100644
> >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> >> @@ -1606,6 +1606,8 @@ static int set_sched_resources(struct
> >> device_queue_manager *dqm)
> >>
> >>   static int initialize_cpsch(struct device_queue_manager *dqm)
> >>   {
> >> +     uint32_t xcc_id, xcc_mask = dqm->dev->xcc_mask;
> >> +
> >>        pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
> >>
> >>        mutex_init(&dqm->lock_hidden);
> >> @@ -1620,8 +1622,11 @@ static int initialize_cpsch(struct
> >> device_queue_manager *dqm)
> >>        init_sdma_bitmaps(dqm);
> >>
> >>        if (dqm->dev->kfd2kgd->get_iq_wait_times)
> >> -             dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
> >> -                                     &dqm->wait_times, 0);
> >> +             for_each_inst(xcc_id, xcc_mask)
> >> +                     dqm->dev->kfd2kgd->get_iq_wait_times(
> >> +                                     dqm->dev->adev,
> >> +                                     &dqm->wait_times[xcc_id],
> >> +                                     xcc_id);
> >>        return 0;
> >>   }
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> >> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> >> index 7dd4b177219d..62a6dc8d3032 100644
> >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> >> @@ -262,7 +262,7 @@ struct device_queue_manager {
> >>        /* used for GFX 9.4.3 only */
> >>        uint32_t                current_logical_xcc_start;
> >>
> >> -     uint32_t                wait_times;
> >> +     uint32_t                wait_times[32];
> > I think wait_times[16] should be sufficient.  We only get the hamming
> weight of 16 bits for NUM_XCC and I believe the xcc_mask is declared as a
> uint16_t in the KGD portion anyway.  We may as well align to that.
> >
> >>        wait_queue_head_t       destroy_wait;
> >>   };
> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> >> b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> >> index 401096c103b2..f37ab4b6d88c 100644
> >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> >> @@ -374,27 +374,31 @@ int pm_update_grace_period(struct
> >> packet_manager *pm, uint32_t grace_period)
> >>   {
> >>        int retval = 0;
> >>        uint32_t *buffer, size;
> >> +     uint32_t xcc_id, xcc_mask = pm->dqm->dev->xcc_mask;
> >>
> >>        size = pm->pmf->set_grace_period_size;
> >>
> >>        mutex_lock(&pm->lock);
> >>
> >>        if (size) {
> >> -             kq_acquire_packet_buffer(pm->priv_queue,
> >> -                     size / sizeof(uint32_t),
> >> -                     (unsigned int **)&buffer);
> >> -
> >> -             if (!buffer) {
> >> -                     pr_err("Failed to allocate buffer on kernel queue\n");
> >> -                     retval = -ENOMEM;
> >> -                     goto out;
> >> -             }
> >> +             for_each_inst(xcc_id, xcc_mask) {
> >> +                     kq_acquire_packet_buffer(pm->priv_queue,
> >> +                                     size / sizeof(uint32_t),
> >> +                                     (unsigned int **)&buffer);
> >>
> >> -             retval = pm->pmf->set_grace_period(pm, buffer,
> >> grace_period);
> >> -             if (!retval)
> >> -                     kq_submit_packet(pm->priv_queue);
> >> -             else
> >> -                     kq_rollback_packet(pm->priv_queue);
> >> +                     if (!buffer) {
> >> +                             pr_err("Failed to allocate buffer on kernel
> >> queue\n");
> >> +                             retval = -ENOMEM;
> >> +                             goto out;
> >> +                     }
> >> +
> >> +                     retval = pm->pmf->set_grace_period(pm, buffer,
> >> +                                     grace_period, xcc_id);
> >> +                     if (!retval)
> >> +                             kq_submit_packet(pm->priv_queue);
> >> +                     else
> >> +                             kq_rollback_packet(pm->priv_queue);
> > In the event of partial success do we need to roll back (i.e. resubmit default
> grace period) on failure?
> The function pm_set_grace_period_v9 always return 0, and it is not
> complicate operation, it should be always successful. Partial success
> will not be the case we should care about at this moment.

There's a roll back logic already built-in prior to this that's now only partial with this patch.
Either way, after a side discussion with Mukul, it looks like the CP register to set the default grace period is done by the master XCC per partition so we'd can just stick to the current way of setting the grace period as long as we reference the start_xcc_id correctly on read and write.
Plus the suspend call will set the grace period per partition, so the wait_times array wasn't required in the first place.

Thanks,

Jon

>
> Regards,
> Eric
> > I believe the default grace period is put in place for better CWSR
> performance in normal mode, so leaving fast preemption settings on failure
> could impact performance.
> >
> > Thanks,
> >
> > Jon
> >
> >> +             }
> >>        }
> >>
> >>   out:
> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> >> b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> >> index 8fda16e6fee6..a9443d661957 100644
> >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> >> @@ -287,7 +287,8 @@ static int pm_map_queues_v9(struct
> packet_manager
> >> *pm, uint32_t *buffer,
> >>
> >>   static int pm_set_grace_period_v9(struct packet_manager *pm,
> >>                uint32_t *buffer,
> >> -             uint32_t grace_period)
> >> +             uint32_t grace_period,
> >> +             uint32_t inst)
> >>   {
> >>        struct pm4_mec_write_data_mmio *packet;
> >>        uint32_t reg_offset = 0;
> >> @@ -295,14 +296,14 @@ static int pm_set_grace_period_v9(struct
> >> packet_manager *pm,
> >>
> >>        pm->dqm->dev->kfd2kgd->build_grace_period_packet_info(
> >>                        pm->dqm->dev->adev,
> >> -                     pm->dqm->wait_times,
> >> +                     pm->dqm->wait_times[inst],
> >>                        grace_period,
> >>                        &reg_offset,
> >>                        &reg_data,
> >> -                     0);
> >> +                     inst);
> >>
> >>        if (grace_period == USE_DEFAULT_GRACE_PERIOD)
> >> -             reg_data = pm->dqm->wait_times;
> >> +             reg_data = pm->dqm->wait_times[inst];
> >>
> >>        packet = (struct pm4_mec_write_data_mmio *)buffer;
> >>        memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio));
> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> >> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> >> index d4c9ee3f9953..22c4a403ddd7 100644
> >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> >> @@ -1400,7 +1400,7 @@ struct packet_manager_funcs {
> >>                        enum kfd_unmap_queues_filter mode,
> >>                        uint32_t filter_param, bool reset);
> >>        int (*set_grace_period)(struct packet_manager *pm, uint32_t *buffer,
> >> -                     uint32_t grace_period);
> >> +                     uint32_t grace_period, uint32_t inst);
> >>        int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
> >>                        uint64_t fence_address, uint64_t
> >> fence_value);
> >>        int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
> >> --
> >> 2.34.1



More information about the amd-gfx mailing list