[PATCH v2] drm/amdkfd: Enable GWS based on FW Support
Felix Kuehling
felix.kuehling at amd.com
Fri Jan 17 21:46:48 UTC 2020
On 2020-01-17 4:39 p.m., Joseph Greathouse wrote:
> Rather than only enabling GWS support based on the hws_gws_support
> modparm, also check whether the GPU's HWS firmware supports GWS.
> Leave the old modparm in place in case users want to test GWS
> on GPUs not yet in the support list.
>
> v2: fix broken syntax from the first patch.
>
> Change-Id: Ife6833c2d571f5e7fe0726f9340649ce0ef10443
> Signed-off-by: Joseph Greathouse <Joseph.Greathouse at amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 7 ++--
> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 8 +++--
> drivers/gpu/drm/amd/amdkfd/kfd_device.c | 40 ++++++++++++++++++-----
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 +
> drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 +-
> 5 files changed, 41 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 137e76f0e3db..f28d040de3ce 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -688,13 +688,12 @@ MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (defau
>
> /**
> * DOC: hws_gws_support(bool)
> - * Whether HWS support gws barriers. Default value: false (not supported)
> - * This will be replaced with a MEC firmware version check once firmware
> - * is ready
> + * Assume that HWS supports GWS barriers regardless of what firmware version
> + * check says. Default value: false (rely on MEC2 firmware version check).
> */
> bool hws_gws_support;
> module_param(hws_gws_support, bool, 0444);
> -MODULE_PARM_DESC(hws_gws_support, "MEC FW support gws barriers (false = not supported (Default), true = supported)");
> +MODULE_PARM_DESC(hws_gws_support, "Assume MEC2 FW supports GWS barriers (false = rely on FW version check (Default), true = force supported)");
>
> /**
> * DOC: queue_preemption_timeout_ms (int)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 1aebda4bbbe7..275f79ab0900 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1590,9 +1590,6 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep,
> struct queue *q;
> struct kfd_dev *dev;
>
> - if (!hws_gws_support)
> - return -ENODEV;
> -
> mutex_lock(&p->mutex);
> q = pqm_get_user_queue(&p->pqm, args->queue_id);
>
> @@ -1603,6 +1600,11 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep,
> goto out_unlock;
> }
>
> + if (!dev->gws) {
> + retval = -ENODEV;
> + goto out_unlock;
> + }
> +
> if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
> retval = -ENODEV;
> goto out_unlock;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 2a9e40131735..798ad1c8f799 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -569,6 +569,23 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
> }
> }
>
> +static int kfd_gws_init(struct kfd_dev *kfd)
> +{
> + int ret = 0;
> +
> + if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
> + return 0;
> +
> + if (hws_gws_support
> + || (kfd->device_info->asic_family >= CHIP_VEGA10
> + && kfd->device_info->asic_family <= CHIP_RAVEN
> + && kfd->mec2_fw_version >= 0x1b3))
> + ret = amdgpu_amdkfd_alloc_gws(kfd->kgd,
> + amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws);
> +
> + return ret;
> +}
> +
> bool kgd2kfd_device_init(struct kfd_dev *kfd,
> struct drm_device *ddev,
> const struct kgd2kfd_shared_resources *gpu_resources)
> @@ -578,6 +595,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
> kfd->ddev = ddev;
> kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
> KGD_ENGINE_MEC1);
> + kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
> + KGD_ENGINE_MEC2);
> kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
> KGD_ENGINE_SDMA1);
> kfd->shared_resources = *gpu_resources;
> @@ -598,13 +617,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
> } else
> kfd->max_proc_per_quantum = hws_max_conc_proc;
>
> - /* Allocate global GWS that is shared by all KFD processes */
> - if (hws_gws_support && amdgpu_amdkfd_alloc_gws(kfd->kgd,
> - amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws)) {
> - dev_err(kfd_device, "Could not allocate %d gws\n",
> - amdgpu_amdkfd_get_num_gws(kfd->kgd));
> - goto out;
> - }
> /* calculate max size of mqds needed for queues */
> size = max_num_of_queues_per_device *
> kfd->device_info->mqd_size_aligned;
> @@ -659,6 +671,15 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
> goto device_queue_manager_error;
> }
>
> + /* If supported on this device, allocate global GWS that is shared
> + * by all KFD processes
> + */
> + if (kfd_gws_init(kfd)) {
> + dev_err(kfd_device, "Could not allocate %d gws\n",
> + amdgpu_amdkfd_get_num_gws(kfd->kgd));
> + goto gws_error;
> + }
> +
> if (kfd_iommu_device_init(kfd)) {
> dev_err(kfd_device, "Error initializing iommuv2\n");
> goto device_iommu_error;
> @@ -688,6 +709,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
> kfd_topology_add_device_error:
> kfd_resume_error:
> device_iommu_error:
> +gws_error:
> device_queue_manager_uninit(kfd->dqm);
> device_queue_manager_error:
> kfd_interrupt_exit(kfd);
> @@ -698,7 +720,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
> kfd_gtt_sa_init_error:
> amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
> alloc_gtt_mem_failure:
> - if (hws_gws_support)
> + if (kfd->gws)
> amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
> dev_err(kfd_device,
> "device %x:%x NOT added due to errors\n",
> @@ -717,7 +739,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
> kfd_doorbell_fini(kfd);
> kfd_gtt_sa_fini(kfd);
> amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
> - if (hws_gws_support)
> + if (kfd->gws)
> amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
> }
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 8ac680dc90f1..c0b0defc8f7a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -281,6 +281,7 @@ struct kfd_dev {
>
> /* Firmware versions */
> uint16_t mec_fw_version;
> + uint16_t mec2_fw_version;
> uint16_t sdma_fw_version;
>
> /* Maximum process number mapped to HW scheduler */
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> index 203c823d65f1..43a82cf76628 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> @@ -1315,7 +1315,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
> gpu->device_info->num_xgmi_sdma_engines;
> dev->node_props.num_sdma_queues_per_engine =
> gpu->device_info->num_sdma_queues_per_engine;
> - dev->node_props.num_gws = (hws_gws_support &&
> + dev->node_props.num_gws = (dev->gpu->gws &&
> dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
> amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0;
> dev->node_props.num_cp_queues = get_queues_num(dev->gpu->dqm);
More information about the amd-gfx
mailing list