[PATCH 4/9] drm/amdkfd: Make sched_policy a per-device setting

Felix Kuehling felix.kuehling at amd.com
Wed Jan 31 16:18:14 UTC 2018


On 2018-01-31 10:06 AM, Oded Gabbay wrote:
> On Fri, Jan 5, 2018 at 12:17 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote:
>> Some dGPUs don't support HWS. Allow them to use a per-device
>> sched_policy that may be different from the global default.
>>
>> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
>> ---
>>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c           |  3 ++-
>>  drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c            |  3 ++-
>>  drivers/gpu/drm/amd/amdkfd/kfd_device.c            |  2 +-
>>  .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 22 +++++++++++++++++++---
>>  .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h  |  1 +
>>  .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c |  3 ++-
>>  6 files changed, 27 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> index 62c3d9c..6fe2496 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> @@ -901,7 +901,8 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
>>
>>         mutex_unlock(&p->mutex);
>>
>> -       if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0)
>> +       if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
>> +           pdd->qpd.vmid != 0)
>>                 dev->kfd2kgd->set_scratch_backing_va(
>>                         dev->kgd, args->va_addr, pdd->qpd.vmid);
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
>> index 3da25f7..9d4af96 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
>> @@ -33,6 +33,7 @@
>>  #include "kfd_pm4_headers_diq.h"
>>  #include "kfd_dbgmgr.h"
>>  #include "kfd_dbgdev.h"
>> +#include "kfd_device_queue_manager.h"
>>
>>  static DEFINE_MUTEX(kfd_dbgmgr_mutex);
>>
>> @@ -83,7 +84,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
>>         }
>>
>>         /* get actual type of DBGDevice cpsch or not */
>> -       if (sched_policy == KFD_SCHED_POLICY_NO_HWS)
>> +       if (pdev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
>>                 type = DBGDEV_TYPE_NODIQ;
>>
>>         kfd_dbgdev_init(new_buff->dbgdev, pdev, type);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> index 5205b34..6dd50cc 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> @@ -352,7 +352,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
>>                  kfd->pdev->device);
>>
>>         pr_debug("Starting kfd with the following scheduling policy %d\n",
>> -               sched_policy);
>> +               kfd->dqm->sched_policy);
>>
>>         goto out;
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> index d0693fd..3e2f53b 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> @@ -385,7 +385,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
>>         prev_active = q->properties.is_active;
>>
>>         /* Make sure the queue is unmapped before updating the MQD */
>> -       if (sched_policy != KFD_SCHED_POLICY_NO_HWS) {
>> +       if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
>>                 retval = unmap_queues_cpsch(dqm,
>>                                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
>>                 if (retval) {
>> @@ -417,7 +417,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
>>         else if (!q->properties.is_active && prev_active)
>>                 dqm->queue_count--;
>>
>> -       if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
>> +       if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
>>                 retval = map_queues_cpsch(dqm);
>>         else if (q->properties.is_active &&
>>                  (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
>> @@ -1097,7 +1097,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
>>                         alternate_aperture_base,
>>                         alternate_aperture_size);
>>
>> -       if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
>> +       if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
>>                 program_sh_mem_settings(dqm, qpd);
>>
>>         pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
>> @@ -1242,6 +1242,22 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
>>         if (!dqm)
>>                 return NULL;
>>
>> +       switch (dev->device_info->asic_family) {
>> +       /* HWS is not available on Hawaii. */
>> +       case CHIP_HAWAII:
>> +       /* HWS depends on CWSR for timely dequeue. CWSR is not
>> +        * available on Tonga.
>> +        *
>> +        * FIXME: This argument also applies to Kaveri.
> So why not add here "case CHIP_KAVERI:" ?

Right.

>
>> +        */
>> +       case CHIP_TONGA:
>> +               dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
>> +               break;
>> +       default:
>> +               dqm->sched_policy = sched_policy;
>> +               break;
>> +       }
>> +
>>         dqm->dev = dev;
>>         switch (sched_policy) {
> This should be changed to:
> switch (dqm->sched_policy) {

The fix is in my latest patch series and could be squashed with this
([PATCH 12/25] drm/amdkfd: Use per-device sched_policy).

Regards,
  Felix

>
>
>>         case KFD_SCHED_POLICY_HWS:
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
>> index c61b693..9fdc9c2 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
>> @@ -180,6 +180,7 @@ struct device_queue_manager {
>>         unsigned int            *fence_addr;
>>         struct kfd_mem_obj      *fence_mem;
>>         bool                    active_runlist;
>> +       int                     sched_policy;
>>  };
>>
>>  void device_queue_manager_init_cik(
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
>> index 8763806..7817e32 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
>> @@ -208,7 +208,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
>>
>>         case KFD_QUEUE_TYPE_COMPUTE:
>>                 /* check if there is over subscription */
>> -               if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
>> +               if ((dev->dqm->sched_policy ==
>> +                    KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
>>                 ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) ||
>>                 (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) {
>>                         pr_err("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n");
>> --
>> 2.7.4
>>



More information about the amd-gfx mailing list