[PATCH 4/9] drm/amdkfd: Make sched_policy a per-device setting

Oded Gabbay oded.gabbay at gmail.com
Wed Jan 31 15:06:08 UTC 2018


On Fri, Jan 5, 2018 at 12:17 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote:
> Some dGPUs don't support HWS. Allow them to use a per-device
> sched_policy that may be different from the global default.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c           |  3 ++-
>  drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c            |  3 ++-
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c            |  2 +-
>  .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 22 +++++++++++++++++++---
>  .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h  |  1 +
>  .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c |  3 ++-
>  6 files changed, 27 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 62c3d9c..6fe2496 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -901,7 +901,8 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
>
>         mutex_unlock(&p->mutex);
>
> -       if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0)
> +       if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
> +           pdd->qpd.vmid != 0)
>                 dev->kfd2kgd->set_scratch_backing_va(
>                         dev->kgd, args->va_addr, pdd->qpd.vmid);
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
> index 3da25f7..9d4af96 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
> @@ -33,6 +33,7 @@
>  #include "kfd_pm4_headers_diq.h"
>  #include "kfd_dbgmgr.h"
>  #include "kfd_dbgdev.h"
> +#include "kfd_device_queue_manager.h"
>
>  static DEFINE_MUTEX(kfd_dbgmgr_mutex);
>
> @@ -83,7 +84,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
>         }
>
>         /* get actual type of DBGDevice cpsch or not */
> -       if (sched_policy == KFD_SCHED_POLICY_NO_HWS)
> +       if (pdev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
>                 type = DBGDEV_TYPE_NODIQ;
>
>         kfd_dbgdev_init(new_buff->dbgdev, pdev, type);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 5205b34..6dd50cc 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -352,7 +352,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
>                  kfd->pdev->device);
>
>         pr_debug("Starting kfd with the following scheduling policy %d\n",
> -               sched_policy);
> +               kfd->dqm->sched_policy);
>
>         goto out;
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index d0693fd..3e2f53b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -385,7 +385,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
>         prev_active = q->properties.is_active;
>
>         /* Make sure the queue is unmapped before updating the MQD */
> -       if (sched_policy != KFD_SCHED_POLICY_NO_HWS) {
> +       if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
>                 retval = unmap_queues_cpsch(dqm,
>                                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
>                 if (retval) {
> @@ -417,7 +417,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
>         else if (!q->properties.is_active && prev_active)
>                 dqm->queue_count--;
>
> -       if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
> +       if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
>                 retval = map_queues_cpsch(dqm);
>         else if (q->properties.is_active &&
>                  (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
> @@ -1097,7 +1097,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
>                         alternate_aperture_base,
>                         alternate_aperture_size);
>
> -       if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
> +       if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
>                 program_sh_mem_settings(dqm, qpd);
>
>         pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
> @@ -1242,6 +1242,22 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
>         if (!dqm)
>                 return NULL;
>
> +       switch (dev->device_info->asic_family) {
> +       /* HWS is not available on Hawaii. */
> +       case CHIP_HAWAII:
> +       /* HWS depends on CWSR for timely dequeue. CWSR is not
> +        * available on Tonga.
> +        *
> +        * FIXME: This argument also applies to Kaveri.
So why not add here "case CHIP_KAVERI:" ?

> +        */
> +       case CHIP_TONGA:
> +               dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
> +               break;
> +       default:
> +               dqm->sched_policy = sched_policy;
> +               break;
> +       }
> +
>         dqm->dev = dev;
>         switch (sched_policy) {
This should be changed to:
switch (dqm->sched_policy) {


>         case KFD_SCHED_POLICY_HWS:
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> index c61b693..9fdc9c2 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> @@ -180,6 +180,7 @@ struct device_queue_manager {
>         unsigned int            *fence_addr;
>         struct kfd_mem_obj      *fence_mem;
>         bool                    active_runlist;
> +       int                     sched_policy;
>  };
>
>  void device_queue_manager_init_cik(
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> index 8763806..7817e32 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> @@ -208,7 +208,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
>
>         case KFD_QUEUE_TYPE_COMPUTE:
>                 /* check if there is over subscription */
> -               if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
> +               if ((dev->dqm->sched_policy ==
> +                    KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
>                 ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) ||
>                 (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) {
>                         pr_err("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n");
> --
> 2.7.4
>


More information about the amd-gfx mailing list