[PATCH 4/6] drm/amdkfd: enable grace period for xcc instance

Kim, Jonathan Jonathan.Kim at amd.com
Fri Jul 7 19:08:33 UTC 2023


[Public]

> -----Original Message-----
> From: Kim, Jonathan
> Sent: Friday, July 7, 2023 1:06 PM
> To: Huang, JinHuiEric <JinHuiEric.Huang at amd.com>; amd-
> gfx at lists.freedesktop.org
> Cc: Joshi, Mukul <Mukul.Joshi at amd.com>
> Subject: RE: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance
>
>
>
> > -----Original Message-----
> > From: Huang, JinHuiEric <JinHuiEric.Huang at amd.com>
> > Sent: Friday, July 7, 2023 12:44 PM
> > To: Kim, Jonathan <Jonathan.Kim at amd.com>; amd-
> gfx at lists.freedesktop.org
> > Cc: Joshi, Mukul <Mukul.Joshi at amd.com>
> > Subject: Re: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance
> >
> >
> > On 2023-07-07 11:56, Kim, Jonathan wrote:
> > > [Public]
> > >
> > >> -----Original Message-----
> > >> From: Huang, JinHuiEric <JinHuiEric.Huang at amd.com>
> > >> Sent: Friday, July 7, 2023 11:46 AM
> > >> To: Kim, Jonathan <Jonathan.Kim at amd.com>; amd-
> > gfx at lists.freedesktop.org
> > >> Subject: Re: [PATCH 4/6] drm/amdkfd: enable grace period for xcc
> instance
> > >>
> > >>
> > >> On 2023-07-07 10:59, Kim, Jonathan wrote:
> > >>> [Public]
> > >>>
> > >>>> -----Original Message-----
> > >>>> From: Huang, JinHuiEric <JinHuiEric.Huang at amd.com>
> > >>>> Sent: Thursday, July 6, 2023 2:19 PM
> > >>>> To: amd-gfx at lists.freedesktop.org
> > >>>> Cc: Kim, Jonathan <Jonathan.Kim at amd.com>; Huang, JinHuiEric
> > >>>> <JinHuiEric.Huang at amd.com>
> > >>>> Subject: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance
> > >>>>
> > >>>> each xcc instance needs to get iq wait time and set
> > >>>> grace period accordingly.
> > >>>>
> > >>>> Signed-off-by: Eric Huang <jinhuieric.huang at amd.com>
> > >>>> ---
> > >>>>    .../drm/amd/amdkfd/kfd_device_queue_manager.c |  9 ++++--
> > >>>>    .../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 +-
> > >>>>    .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 32 +++++++++++-
> --
> > ----
> > >> -
> > >>>>    .../drm/amd/amdkfd/kfd_packet_manager_v9.c    |  9 +++---
> > >>>>    drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  2 +-
> > >>>>    5 files changed, 32 insertions(+), 22 deletions(-)
> > >>>>
> > >>>> diff --git
> a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > >>>> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > >>>> index a2bff3f01359..0f12c1989e14 100644
> > >>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > >>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> > >>>> @@ -1606,6 +1606,8 @@ static int set_sched_resources(struct
> > >>>> device_queue_manager *dqm)
> > >>>>
> > >>>>    static int initialize_cpsch(struct device_queue_manager *dqm)
> > >>>>    {
> > >>>> +     uint32_t xcc_id, xcc_mask = dqm->dev->xcc_mask;
> > >>>> +
> > >>>>         pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
> > >>>>
> > >>>>         mutex_init(&dqm->lock_hidden);
> > >>>> @@ -1620,8 +1622,11 @@ static int initialize_cpsch(struct
> > >>>> device_queue_manager *dqm)
> > >>>>         init_sdma_bitmaps(dqm);
> > >>>>
> > >>>>         if (dqm->dev->kfd2kgd->get_iq_wait_times)
> > >>>> -             dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
> > >>>> -                                     &dqm->wait_times, 0);
> > >>>> +             for_each_inst(xcc_id, xcc_mask)
> > >>>> +                     dqm->dev->kfd2kgd->get_iq_wait_times(
> > >>>> +                                     dqm->dev->adev,
> > >>>> +                                     &dqm->wait_times[xcc_id],
> > >>>> +                                     xcc_id);
> > >>>>         return 0;
> > >>>>    }
> > >>>>
> > >>>> diff --git
> a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> > >>>> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> > >>>> index 7dd4b177219d..62a6dc8d3032 100644
> > >>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> > >>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> > >>>> @@ -262,7 +262,7 @@ struct device_queue_manager {
> > >>>>         /* used for GFX 9.4.3 only */
> > >>>>         uint32_t                current_logical_xcc_start;
> > >>>>
> > >>>> -     uint32_t                wait_times;
> > >>>> +     uint32_t                wait_times[32];
> > >>> I think wait_times[16] should be sufficient.  We only get the hamming
> > >> weight of 16 bits for NUM_XCC and I believe the xcc_mask is declared as
> a
> > >> uint16_t in the KGD portion anyway.  We may as well align to that.
> > >>>>         wait_queue_head_t       destroy_wait;
> > >>>>    };
> > >>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> > >>>> b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> > >>>> index 401096c103b2..f37ab4b6d88c 100644
> > >>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> > >>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
> > >>>> @@ -374,27 +374,31 @@ int pm_update_grace_period(struct
> > >>>> packet_manager *pm, uint32_t grace_period)
> > >>>>    {
> > >>>>         int retval = 0;
> > >>>>         uint32_t *buffer, size;
> > >>>> +     uint32_t xcc_id, xcc_mask = pm->dqm->dev->xcc_mask;
> > >>>>
> > >>>>         size = pm->pmf->set_grace_period_size;
> > >>>>
> > >>>>         mutex_lock(&pm->lock);
> > >>>>
> > >>>>         if (size) {
> > >>>> -             kq_acquire_packet_buffer(pm->priv_queue,
> > >>>> -                     size / sizeof(uint32_t),
> > >>>> -                     (unsigned int **)&buffer);
> > >>>> -
> > >>>> -             if (!buffer) {
> > >>>> -                     pr_err("Failed to allocate buffer on kernel queue\n");
> > >>>> -                     retval = -ENOMEM;
> > >>>> -                     goto out;
> > >>>> -             }
> > >>>> +             for_each_inst(xcc_id, xcc_mask) {
> > >>>> +                     kq_acquire_packet_buffer(pm->priv_queue,
> > >>>> +                                     size / sizeof(uint32_t),
> > >>>> +                                     (unsigned int **)&buffer);
> > >>>>
> > >>>> -             retval = pm->pmf->set_grace_period(pm, buffer,
> > >>>> grace_period);
> > >>>> -             if (!retval)
> > >>>> -                     kq_submit_packet(pm->priv_queue);
> > >>>> -             else
> > >>>> -                     kq_rollback_packet(pm->priv_queue);
> > >>>> +                     if (!buffer) {
> > >>>> +                             pr_err("Failed to allocate buffer on kernel
> > >>>> queue\n");
> > >>>> +                             retval = -ENOMEM;
> > >>>> +                             goto out;
> > >>>> +                     }
> > >>>> +
> > >>>> +                     retval = pm->pmf->set_grace_period(pm, buffer,
> > >>>> +                                     grace_period, xcc_id);
> > >>>> +                     if (!retval)
> > >>>> +                             kq_submit_packet(pm->priv_queue);
> > >>>> +                     else
> > >>>> +                             kq_rollback_packet(pm->priv_queue);
> > >>> In the event of partial success do we need to roll back (i.e. resubmit
> > default
> > >> grace period) on failure?
> > >> The function pm_set_grace_period_v9 always return 0, and it is not
> > >> complicate operation, it should be always successful. Partial success
> > >> will not be the case we should care about at this moment.
> > > There's a roll back logic already built-in prior to this that's now only
> partial
> > with this patch.
> > > Either way, after a side discussion with Mukul, it looks like the CP register
> > to set the default grace period is done by the master XCC per partition so
> > we'd can just stick to the current way of setting the grace period as long as
> > we reference the start_xcc_id correctly on read and write.
> > The start_xcc_id is not defined in amd_staging_drm_next. If we assume 0
> > is always the first instance number, the instance parameter in function
> > get_iq_wait_times and build_grace_period_packet_info will be not needed.
>
> I see. Each logical partition should have its own mask.
> As long as we rely on GET_INST then you are correct.

Sorry for the churn Eric.
So after more investigation and confirming with Mukul, we need to wait for a couple of his missing patches as xcc_mask have a unique bit-wise position start per partition.  Grace period read/write will indeed require proper instancing.

Thanks,

Jon

>
> Thanks,
>
> Jon
>
> >
> > Regards,
> > Eric
> > > Plus the suspend call will set the grace period per partition, so the
> > wait_times array wasn't required in the first place.
> > >
> > > Thanks,
> > >
> > > Jon
> > >
> > >> Regards,
> > >> Eric
> > >>> I believe the default grace period is put in place for better CWSR
> > >> performance in normal mode, so leaving fast preemption settings on
> > failure
> > >> could impact performance.
> > >>> Thanks,
> > >>>
> > >>> Jon
> > >>>
> > >>>> +             }
> > >>>>         }
> > >>>>
> > >>>>    out:
> > >>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> > >>>> b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> > >>>> index 8fda16e6fee6..a9443d661957 100644
> > >>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> > >>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> > >>>> @@ -287,7 +287,8 @@ static int pm_map_queues_v9(struct
> > >> packet_manager
> > >>>> *pm, uint32_t *buffer,
> > >>>>
> > >>>>    static int pm_set_grace_period_v9(struct packet_manager *pm,
> > >>>>                 uint32_t *buffer,
> > >>>> -             uint32_t grace_period)
> > >>>> +             uint32_t grace_period,
> > >>>> +             uint32_t inst)
> > >>>>    {
> > >>>>         struct pm4_mec_write_data_mmio *packet;
> > >>>>         uint32_t reg_offset = 0;
> > >>>> @@ -295,14 +296,14 @@ static int pm_set_grace_period_v9(struct
> > >>>> packet_manager *pm,
> > >>>>
> > >>>>         pm->dqm->dev->kfd2kgd->build_grace_period_packet_info(
> > >>>>                         pm->dqm->dev->adev,
> > >>>> -                     pm->dqm->wait_times,
> > >>>> +                     pm->dqm->wait_times[inst],
> > >>>>                         grace_period,
> > >>>>                         &reg_offset,
> > >>>>                         &reg_data,
> > >>>> -                     0);
> > >>>> +                     inst);
> > >>>>
> > >>>>         if (grace_period == USE_DEFAULT_GRACE_PERIOD)
> > >>>> -             reg_data = pm->dqm->wait_times;
> > >>>> +             reg_data = pm->dqm->wait_times[inst];
> > >>>>
> > >>>>         packet = (struct pm4_mec_write_data_mmio *)buffer;
> > >>>>         memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio));
> > >>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> > >>>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> > >>>> index d4c9ee3f9953..22c4a403ddd7 100644
> > >>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> > >>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> > >>>> @@ -1400,7 +1400,7 @@ struct packet_manager_funcs {
> > >>>>                         enum kfd_unmap_queues_filter mode,
> > >>>>                         uint32_t filter_param, bool reset);
> > >>>>         int (*set_grace_period)(struct packet_manager *pm, uint32_t
> > *buffer,
> > >>>> -                     uint32_t grace_period);
> > >>>> +                     uint32_t grace_period, uint32_t inst);
> > >>>>         int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
> > >>>>                         uint64_t fence_address, uint64_t
> > >>>> fence_value);
> > >>>>         int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
> > >>>> --
> > >>>> 2.34.1



More information about the amd-gfx mailing list