[PATCH 2/9] drm/amdkfd: Simplify execute and unmap queue APIs

Sun Oct 8 12:02:34 UTC 2017

On Wed, Sep 27, 2017 at 7:09 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote:
> From: Yong Zhao <yong.zhao at amd.com>
>
> Pass filter parameters directly to unmap_queues_cpsch.
>
> Also remove lock parameters and do locking explicitly in the caller.
>
> Signed-off-by: Yong Zhao <yong.zhao at amd.com>
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
>  .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 75 +++++++++-------------
>  1 file changed, 32 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 1995e0a..be925a4 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -44,9 +44,10 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
>                                         struct queue *q,
>                                         struct qcm_process_device *qpd);
>
> -static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
> +static int execute_queues_cpsch(struct device_queue_manager *dqm);
>  static int unmap_queues_cpsch(struct device_queue_manager *dqm,
> -                               bool static_queues_included, bool lock);
> +                               enum kfd_unmap_queues_filter filter,
> +                               uint32_t filter_param);
>
>  static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
>                                         struct queue *q,
> @@ -379,7 +380,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
>                 dqm->queue_count--;
>
>         if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
> -               retval = execute_queues_cpsch(dqm, false);
> +               retval = execute_queues_cpsch(dqm);
>
>  out_unlock:
>         mutex_unlock(&dqm->lock);
> @@ -695,7 +696,9 @@ static int start_cpsch(struct device_queue_manager *dqm)
>
>         init_interrupts(dqm);
>
> -       execute_queues_cpsch(dqm, true);
> +       mutex_lock(&dqm->lock);
> +       execute_queues_cpsch(dqm);
> +       mutex_unlock(&dqm->lock);
>
>         return 0;
>  fail_allocate_vidmem:
> @@ -707,7 +710,9 @@ static int start_cpsch(struct device_queue_manager *dqm)
>
>  static int stop_cpsch(struct device_queue_manager *dqm)
>  {
> -       unmap_queues_cpsch(dqm, true, true);
> +       mutex_lock(&dqm->lock);
> +       unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
> +       mutex_unlock(&dqm->lock);
>
>         kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
>         pm_uninit(&dqm->packets);
> @@ -738,7 +743,7 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
>         list_add(&kq->list, &qpd->priv_queue_list);
>         dqm->queue_count++;
>         qpd->is_debug = true;
> -       execute_queues_cpsch(dqm, false);
> +       execute_queues_cpsch(dqm);
>         mutex_unlock(&dqm->lock);
>
>         return 0;
> @@ -750,11 +755,11 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
>  {
>         mutex_lock(&dqm->lock);
>         /* here we actually preempt the DIQ */
> -       unmap_queues_cpsch(dqm, true, false);
> +       unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
>         list_del(&kq->list);
>         dqm->queue_count--;
>         qpd->is_debug = false;
> -       execute_queues_cpsch(dqm, false);
> +       execute_queues_cpsch(dqm);
>         /*
>          * Unconditionally decrement this counter, regardless of the queue's
>          * type.
> @@ -813,7 +818,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
>         list_add(&q->list, &qpd->queues_list);
>         if (q->properties.is_active) {
>                 dqm->queue_count++;
> -               retval = execute_queues_cpsch(dqm, false);
> +               retval = execute_queues_cpsch(dqm);
>         }
>
>         if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
> @@ -857,19 +862,18 @@ static int unmap_sdma_queues(struct device_queue_manager *dqm,
>                         sdma_engine);
>  }
>
> +/* dqm->lock mutex has to be locked before calling this function */
>  static int unmap_queues_cpsch(struct device_queue_manager *dqm,
> -                               bool static_queues_included, bool lock)
> +                               enum kfd_unmap_queues_filter filter,
> +                               uint32_t filter_param)
>  {
>         int retval;
> -       enum kfd_unmap_queues_filter filter;
>         struct kfd_process_device *pdd;
>
>         retval = 0;
>
> -       if (lock)
> -               mutex_lock(&dqm->lock);
>         if (!dqm->active_runlist)
> -               goto out;
> +               return retval;
>
>         pr_debug("Before destroying queues, sdma queue count is : %u\n",
>                 dqm->sdma_queue_count);
> @@ -879,14 +883,10 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
>                 unmap_sdma_queues(dqm, 1);
>         }
>
> -       filter = static_queues_included ?
> -                       KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
> -                       KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
> -
>         retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
> -                       filter, 0, false, 0);
> +                       filter, filter_param, false, 0);
>         if (retval)
> -               goto out;
> +               return retval;
>
>         *dqm->fence_addr = KFD_FENCE_INIT;
>         pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
> @@ -898,50 +898,39 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
>                 pdd = kfd_get_process_device_data(dqm->dev,
>                                 kfd_get_process(current));
>                 pdd->reset_wavefronts = true;
> -               goto out;
> +               return retval;
>         }
>         pm_release_ib(&dqm->packets);
>         dqm->active_runlist = false;
>
> -out:
> -       if (lock)
> -               mutex_unlock(&dqm->lock);
>         return retval;
>  }
>
> -static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock)
> +/* dqm->lock mutex has to be locked before calling this function */
> +static int execute_queues_cpsch(struct device_queue_manager *dqm)
>  {
>         int retval;
>
> -       if (lock)
> -               mutex_lock(&dqm->lock);
> -
> -       retval = unmap_queues_cpsch(dqm, false, false);
> +       retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES,
> +                       0);
>         if (retval) {
>                 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption");
> -               goto out;
> +               return retval;
>         }
>
> -       if (dqm->queue_count <= 0 || dqm->processes_count <= 0) {
> -               retval = 0;
> -               goto out;
> -       }
> +       if (dqm->queue_count <= 0 || dqm->processes_count <= 0)
> +               return 0;
>
> -       if (dqm->active_runlist) {
> -               retval = 0;
> -               goto out;
> -       }
> +       if (dqm->active_runlist)
> +               return 0;
>
>         retval = pm_send_runlist(&dqm->packets, &dqm->queues);
>         if (retval) {
>                 pr_err("failed to execute runlist");
> -               goto out;
> +               return retval;
>         }
>         dqm->active_runlist = true;
>
> -out:
> -       if (lock)
> -               mutex_unlock(&dqm->lock);
>         return retval;
>  }
>
> @@ -984,7 +973,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
>         if (q->properties.is_active)
>                 dqm->queue_count--;
>
> -       execute_queues_cpsch(dqm, false);
> +       execute_queues_cpsch(dqm);
>
>         mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
>
> --
> 2.7.4
>

This patch is basically fine, but it actually contains two separate
changes that are not related to each other. It makes the patch hard to
understand and follow and makes future bisecting harder.
Therefore, I took the patch but split it to two patches. Please tell
me if you have any objection.

Oded