[PATCH 4/9] drm/amdkfd: Make sched_policy a per-device setting
Felix Kuehling
felix.kuehling at amd.com
Wed Jan 31 16:18:14 UTC 2018
On 2018-01-31 10:06 AM, Oded Gabbay wrote:
> On Fri, Jan 5, 2018 at 12:17 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote:
>> Some dGPUs don't support HWS. Allow them to use a per-device
>> sched_policy that may be different from the global default.
>>
>> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 3 ++-
>> drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c | 3 ++-
>> drivers/gpu/drm/amd/amdkfd/kfd_device.c | 2 +-
>> .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 22 +++++++++++++++++++---
>> .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 1 +
>> .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 3 ++-
>> 6 files changed, 27 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> index 62c3d9c..6fe2496 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> @@ -901,7 +901,8 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
>>
>> mutex_unlock(&p->mutex);
>>
>> - if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0)
>> + if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
>> + pdd->qpd.vmid != 0)
>> dev->kfd2kgd->set_scratch_backing_va(
>> dev->kgd, args->va_addr, pdd->qpd.vmid);
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
>> index 3da25f7..9d4af96 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
>> @@ -33,6 +33,7 @@
>> #include "kfd_pm4_headers_diq.h"
>> #include "kfd_dbgmgr.h"
>> #include "kfd_dbgdev.h"
>> +#include "kfd_device_queue_manager.h"
>>
>> static DEFINE_MUTEX(kfd_dbgmgr_mutex);
>>
>> @@ -83,7 +84,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
>> }
>>
>> /* get actual type of DBGDevice cpsch or not */
>> - if (sched_policy == KFD_SCHED_POLICY_NO_HWS)
>> + if (pdev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
>> type = DBGDEV_TYPE_NODIQ;
>>
>> kfd_dbgdev_init(new_buff->dbgdev, pdev, type);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> index 5205b34..6dd50cc 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> @@ -352,7 +352,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
>> kfd->pdev->device);
>>
>> pr_debug("Starting kfd with the following scheduling policy %d\n",
>> - sched_policy);
>> + kfd->dqm->sched_policy);
>>
>> goto out;
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> index d0693fd..3e2f53b 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> @@ -385,7 +385,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
>> prev_active = q->properties.is_active;
>>
>> /* Make sure the queue is unmapped before updating the MQD */
>> - if (sched_policy != KFD_SCHED_POLICY_NO_HWS) {
>> + if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
>> retval = unmap_queues_cpsch(dqm,
>> KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
>> if (retval) {
>> @@ -417,7 +417,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
>> else if (!q->properties.is_active && prev_active)
>> dqm->queue_count--;
>>
>> - if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
>> + if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
>> retval = map_queues_cpsch(dqm);
>> else if (q->properties.is_active &&
>> (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
>> @@ -1097,7 +1097,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
>> alternate_aperture_base,
>> alternate_aperture_size);
>>
>> - if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
>> + if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
>> program_sh_mem_settings(dqm, qpd);
>>
>> pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
>> @@ -1242,6 +1242,22 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
>> if (!dqm)
>> return NULL;
>>
>> + switch (dev->device_info->asic_family) {
>> + /* HWS is not available on Hawaii. */
>> + case CHIP_HAWAII:
>> + /* HWS depends on CWSR for timely dequeue. CWSR is not
>> + * available on Tonga.
>> + *
>> + * FIXME: This argument also applies to Kaveri.
> So why not add here "case CHIP_KAVERI:" ?
Right.
>
>> + */
>> + case CHIP_TONGA:
>> + dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
>> + break;
>> + default:
>> + dqm->sched_policy = sched_policy;
>> + break;
>> + }
>> +
>> dqm->dev = dev;
>> switch (sched_policy) {
> This should be changed to:
> switch (dqm->sched_policy) {
The fix is in my latest patch series and could be squashed with this
([PATCH 12/25] drm/amdkfd: Use per-device sched_policy).
Regards,
Felix
>
>
>> case KFD_SCHED_POLICY_HWS:
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
>> index c61b693..9fdc9c2 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
>> @@ -180,6 +180,7 @@ struct device_queue_manager {
>> unsigned int *fence_addr;
>> struct kfd_mem_obj *fence_mem;
>> bool active_runlist;
>> + int sched_policy;
>> };
>>
>> void device_queue_manager_init_cik(
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
>> index 8763806..7817e32 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
>> @@ -208,7 +208,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
>>
>> case KFD_QUEUE_TYPE_COMPUTE:
>> /* check if there is over subscription */
>> - if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
>> + if ((dev->dqm->sched_policy ==
>> + KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
>> ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) ||
>> (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) {
>> pr_err("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n");
>> --
>> 2.7.4
>>
More information about the amd-gfx
mailing list