<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
<style type="text/css" style="display:none;"> P {margin-top:0;margin-bottom:0;} </style>
</head>
<body dir="ltr">
<p style="font-family:Arial;font-size:10pt;color:#0078D7;margin:15pt;" align="Left">
[AMD Official Use Only - Internal Distribution Only]<br>
</p>
<br>
<div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
Thanks! I will update the commit message before pushing. It should be the way how SDMA queue count were used to unmap SDMA engines according to the previous understanding was wrong.</div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
<br>
</div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
Regards,</div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
Yong</div>
<div id="appendonsend"></div>
<hr style="display:inline-block;width:98%" tabindex="-1">
<div id="divRplyFwdMsg" dir="ltr"><font face="Calibri, sans-serif" style="font-size:11pt" color="#000000"><b>From:</b> Kuehling, Felix <Felix.Kuehling@amd.com><br>
<b>Sent:</b> Tuesday, February 25, 2020 12:06 PM<br>
<b>To:</b> Zhao, Yong <Yong.Zhao@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org><br>
<b>Subject:</b> Re: [PATCH 6/6] drm/amdkfd: Delete unnecessary unmap queue package submissions</font>
<div> </div>
</div>
<div class="BodyFragment"><font size="2"><span style="font-size:11pt;">
<div class="PlainText">As I understand it, the SDMA queue counting wasn't incorrect. The main
<br>
change here is that you no longer send separate unmap packets for SDMA <br>
queues, and that makes SDMA queue counting unnecessary.<br>
<br>
That said, this patch series is a nice cleanup and improvement. The <br>
series is<br>
<br>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com><br>
<br>
On 2020-02-24 17:18, Yong Zhao wrote:<br>
> The previous SDMA queue counting was wrong. In addition, after confirming<br>
> with MEC firmware team, we understands that only one unmap queue package,<br>
> instead of one unmap queue package for CP and each SDMA engine, is needed,<br>
> which results in much simpler driver code.<br>
><br>
> Change-Id: I84fd2f7e63d6b7f664580b425a78d3e995ce9abc<br>
> Signed-off-by: Yong Zhao <Yong.Zhao@amd.com><br>
> ---<br>
>   .../drm/amd/amdkfd/kfd_device_queue_manager.c | 79 ++++++-------------<br>
>   .../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 -<br>
>   .../amd/amdkfd/kfd_process_queue_manager.c    | 16 ++--<br>
>   3 files changed, 29 insertions(+), 68 deletions(-)<br>
><br>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c<br>
> index 958275db3f55..692abfd2088a 100644<br>
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c<br>
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c<br>
> @@ -109,6 +109,11 @@ static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm)<br>
>        return dqm->dev->device_info->num_xgmi_sdma_engines;<br>
>   }<br>
>   <br>
> +static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)<br>
> +{<br>
> +     return get_num_sdma_engines(dqm) + get_num_xgmi_sdma_engines(dqm);<br>
> +}<br>
> +<br>
>   unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)<br>
>   {<br>
>        return dqm->dev->device_info->num_sdma_engines<br>
> @@ -375,11 +380,6 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,<br>
>        if (q->properties.is_active)<br>
>                increment_queue_count(dqm, q->properties.type);<br>
>   <br>
> -     if (q->properties.type == KFD_QUEUE_TYPE_SDMA)<br>
> -             dqm->sdma_queue_count++;<br>
> -     else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)<br>
> -             dqm->xgmi_sdma_queue_count++;<br>
> -<br>
>        /*<br>
>         * Unconditionally increment this counter, regardless of the queue's<br>
>         * type or whether the queue is active.<br>
> @@ -460,15 +460,13 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,<br>
>        mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(<br>
>                        q->properties.type)];<br>
>   <br>
> -     if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {<br>
> +     if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)<br>
>                deallocate_hqd(dqm, q);<br>
> -     } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {<br>
> -             dqm->sdma_queue_count--;<br>
> +     else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)<br>
>                deallocate_sdma_queue(dqm, q);<br>
> -     } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {<br>
> -             dqm->xgmi_sdma_queue_count--;<br>
> +     else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)<br>
>                deallocate_sdma_queue(dqm, q);<br>
> -     } else {<br>
> +     else {<br>
>                pr_debug("q->properties.type %d is invalid\n",<br>
>                                q->properties.type);<br>
>                return -EINVAL;<br>
> @@ -915,8 +913,6 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)<br>
>        INIT_LIST_HEAD(&dqm->queues);<br>
>        dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;<br>
>        dqm->active_cp_queue_count = 0;<br>
> -     dqm->sdma_queue_count = 0;<br>
> -     dqm->xgmi_sdma_queue_count = 0;<br>
>   <br>
>        for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {<br>
>                int pipe_offset = pipe * get_queues_per_pipe(dqm);<br>
> @@ -981,8 +977,11 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,<br>
>        int bit;<br>
>   <br>
>        if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {<br>
> -             if (dqm->sdma_bitmap == 0)<br>
> +             if (dqm->sdma_bitmap == 0) {<br>
> +                     pr_err("No more SDMA queue to allocate\n");<br>
>                        return -ENOMEM;<br>
> +             }<br>
> +<br>
>                bit = __ffs64(dqm->sdma_bitmap);<br>
>                dqm->sdma_bitmap &= ~(1ULL << bit);<br>
>                q->sdma_id = bit;<br>
> @@ -991,8 +990,10 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,<br>
>                q->properties.sdma_queue_id = q->sdma_id /<br>
>                                get_num_sdma_engines(dqm);<br>
>        } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {<br>
> -             if (dqm->xgmi_sdma_bitmap == 0)<br>
> +             if (dqm->xgmi_sdma_bitmap == 0) {<br>
> +                     pr_err("No more XGMI SDMA queue to allocate\n");<br>
>                        return -ENOMEM;<br>
> +             }<br>
>                bit = __ffs64(dqm->xgmi_sdma_bitmap);<br>
>                dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);<br>
>                q->sdma_id = bit;<br>
> @@ -1081,8 +1082,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm)<br>
>        INIT_LIST_HEAD(&dqm->queues);<br>
>        dqm->active_queue_count = dqm->processes_count = 0;<br>
>        dqm->active_cp_queue_count = 0;<br>
> -     dqm->sdma_queue_count = 0;<br>
> -     dqm->xgmi_sdma_queue_count = 0;<br>
> +<br>
>        dqm->active_runlist = false;<br>
>        dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));<br>
>        dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));<br>
> @@ -1254,11 +1254,6 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,<br>
>        list_add(&q->list, &qpd->queues_list);<br>
>        qpd->queue_count++;<br>
>   <br>
> -     if (q->properties.type == KFD_QUEUE_TYPE_SDMA)<br>
> -             dqm->sdma_queue_count++;<br>
> -     else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)<br>
> -             dqm->xgmi_sdma_queue_count++;<br>
> -<br>
>        if (q->properties.is_active) {<br>
>                increment_queue_count(dqm, q->properties.type);<br>
>   <br>
> @@ -1315,20 +1310,6 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr,<br>
>        return 0;<br>
>   }<br>
>   <br>
> -static int unmap_sdma_queues(struct device_queue_manager *dqm)<br>
> -{<br>
> -     int i, retval = 0;<br>
> -<br>
> -     for (i = 0; i < dqm->dev->device_info->num_sdma_engines +<br>
> -             dqm->dev->device_info->num_xgmi_sdma_engines; i++) {<br>
> -             retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,<br>
> -                     KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, i);<br>
> -             if (retval)<br>
> -                     return retval;<br>
> -     }<br>
> -     return retval;<br>
> -}<br>
> -<br>
>   /* dqm->lock mutex has to be locked before calling this function */<br>
>   static int map_queues_cpsch(struct device_queue_manager *dqm)<br>
>   {<br>
> @@ -1366,12 +1347,6 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,<br>
>        if (!dqm->active_runlist)<br>
>                return retval;<br>
>   <br>
> -     pr_debug("Before destroying queues, sdma queue count is : %u, xgmi sdma queue count is : %u\n",<br>
> -             dqm->sdma_queue_count, dqm->xgmi_sdma_queue_count);<br>
> -<br>
> -     if (dqm->sdma_queue_count > 0 || dqm->xgmi_sdma_queue_count)<br>
> -             unmap_sdma_queues(dqm);<br>
> -<br>
>        retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,<br>
>                        filter, filter_param, false, 0);<br>
>        if (retval)<br>
> @@ -1444,13 +1419,10 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,<br>
>   <br>
>        deallocate_doorbell(qpd, q);<br>
>   <br>
> -     if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {<br>
> -             dqm->sdma_queue_count--;<br>
> +     if (q->properties.type == KFD_QUEUE_TYPE_SDMA)<br>
>                deallocate_sdma_queue(dqm, q);<br>
> -     } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {<br>
> -             dqm->xgmi_sdma_queue_count--;<br>
> +     else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)<br>
>                deallocate_sdma_queue(dqm, q);<br>
> -     }<br>
>   <br>
>        list_del(&q->list);<br>
>        qpd->queue_count--;<br>
> @@ -1673,13 +1645,10 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,<br>
>   <br>
>        /* Clear all user mode queues */<br>
>        list_for_each_entry(q, &qpd->queues_list, list) {<br>
> -             if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {<br>
> -                     dqm->sdma_queue_count--;<br>
> +             if (q->properties.type == KFD_QUEUE_TYPE_SDMA)<br>
>                        deallocate_sdma_queue(dqm, q);<br>
> -             } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {<br>
> -                     dqm->xgmi_sdma_queue_count--;<br>
> +             else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)<br>
>                        deallocate_sdma_queue(dqm, q);<br>
> -             }<br>
>   <br>
>                if (q->properties.is_active)<br>
>                        decrement_queue_count(dqm, q->properties.type);<br>
> @@ -1759,8 +1728,7 @@ static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)<br>
>        struct kfd_dev *dev = dqm->dev;<br>
>        struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;<br>
>        uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *<br>
> -             (dev->device_info->num_sdma_engines +<br>
> -             dev->device_info->num_xgmi_sdma_engines) *<br>
> +             get_num_all_sdma_engines(dqm) *<br>
>                dev->device_info->num_sdma_queues_per_engine +<br>
>                dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;<br>
>   <br>
> @@ -2012,8 +1980,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)<br>
>                }<br>
>        }<br>
>   <br>
> -     for (pipe = 0; pipe < get_num_sdma_engines(dqm) +<br>
> -                     get_num_xgmi_sdma_engines(dqm); pipe++) {<br>
> +     for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) {<br>
>                for (queue = 0;<br>
>                     queue < dqm->dev->device_info->num_sdma_queues_per_engine;<br>
>                     queue++) {<br>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h<br>
> index 05e0afc04cd9..50d919f814e9 100644<br>
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h<br>
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h<br>
> @@ -182,8 +182,6 @@ struct device_queue_manager {<br>
>        unsigned int            processes_count;<br>
>        unsigned int            active_queue_count;<br>
>        unsigned int            active_cp_queue_count;<br>
> -     unsigned int            sdma_queue_count;<br>
> -     unsigned int            xgmi_sdma_queue_count;<br>
>        unsigned int            total_queue_count;<br>
>        unsigned int            next_pipe_to_allocate;<br>
>        unsigned int            *allocated_queues;<br>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c<br>
> index 3bfa5c8d9654..eb1635ac8988 100644<br>
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c<br>
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c<br>
> @@ -241,16 +241,12 @@ int pqm_create_queue(struct process_queue_manager *pqm,<br>
>        switch (type) {<br>
>        case KFD_QUEUE_TYPE_SDMA:<br>
>        case KFD_QUEUE_TYPE_SDMA_XGMI:<br>
> -             if ((type == KFD_QUEUE_TYPE_SDMA && dev->dqm->sdma_queue_count<br>
> -                     >= get_num_sdma_queues(dev->dqm)) ||<br>
> -                     (type == KFD_QUEUE_TYPE_SDMA_XGMI &&<br>
> -                     dev->dqm->xgmi_sdma_queue_count<br>
> -                     >= get_num_xgmi_sdma_queues(dev->dqm))) {<br>
> -                     pr_debug("Over-subscription is not allowed for SDMA.\n");<br>
> -                     retval = -EPERM;<br>
> -                     goto err_create_queue;<br>
> -             }<br>
> -<br>
> +             /* SDMA queues are always allocated statically no matter<br>
> +              * which scheduler mode is used. We also do not need to<br>
> +              * check whether a SDMA queue can be allocated here, because<br>
> +              * allocate_sdma_queue() in create_queue() has the<br>
> +              * corresponding check logic.<br>
> +              */<br>
>                retval = init_user_queue(pqm, dev, &q, properties, f, *qid);<br>
>                if (retval != 0)<br>
>                        goto err_create_queue;<br>
</div>
</span></font></div>
</div>
</body>
</html>