[PATCH 3/9] drm/amdkfd: Make IOMMUv2 code conditional
Felix Kuehling
felix.kuehling at amd.com
Wed Jan 31 16:14:58 UTC 2018
On 2018-01-31 10:00 AM, Oded Gabbay wrote:
> On Wed, Jan 31, 2018 at 4:56 PM, Oded Gabbay <oded.gabbay at gmail.com> wrote:
>> Hi Felix,
>> Please don't spread 19 #ifdefs throughout the code.
>> I suggest to put one #ifdef in linux/amd-iommu.h itself around all the
>> functions declarations and in the #else section put macros with empty
>> implementations. This is much more readable and maintainable.
I've seen that way of handling #ifdefs. But that would require changing
the IOMMU driver, which will add more time before this patch series can
be applied. Maybe drop just this patch for now, the rest should still apply.
BTW, there is another change coming later that puts a few #ifdef
CONFIG_ACPI around code that depends on ACPI (for getting the CRAT table
and some NUMA-related code). That's another change needed to allow KFD
to compile work on non-x86 architectures without ACPI.
>>
>> Oded
> To emphasize my point, there is a call to amd_iommu_bind_pasid in
> kfd_bind_processes_to_device() which isn't wrapped
There is a fix for that in my latest patch series ([PATCH 11/25]
drm/amdkfd: Add missing #ifdef CONFIG_AMD_IOMMU_V2 guard) that you could
squash with this commit.
> with the #ifdef so
> the compliation breaks. Putting the #ifdefs around the calls is simply
> not scalable.
There are #ifdefs around more than just these calls. For example I'm
removing support for the APU device IDs if there is no IOMMU driver. As
an alternative to changing amd-iommu.h I could try to restructure the
code to put all iommu-related code in one place so the #ifdefs aren't
scattered all over the place.
Regards,
Felix
>
> Oded
>
>>
>> On Fri, Jan 5, 2018 at 12:17 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote:
>>> dGPUs work without IOMMUv2. Make IOMMUv2 initialization dependent on
>>> ASIC information. Also allow building KFD without IOMMUv2 support.
>>> This is still useful for dGPUs and prepares for enabling KFD on
>>> architectures that don't support AMD IOMMUv2.
>>>
>>> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdkfd/Kconfig | 2 +-
>>> drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 8 +++-
>>> drivers/gpu/drm/amd/amdkfd/kfd_device.c | 62 +++++++++++++++++++++----------
>>> drivers/gpu/drm/amd/amdkfd/kfd_events.c | 2 +
>>> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 +++
>>> drivers/gpu/drm/amd/amdkfd/kfd_process.c | 17 ++++++---
>>> drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 +
>>> drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 2 +
>>> 8 files changed, 74 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
>>> index bc5a294..5bbeb95 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/Kconfig
>>> +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
>>> @@ -4,6 +4,6 @@
>>>
>>> config HSA_AMD
>>> tristate "HSA kernel driver for AMD GPU devices"
>>> - depends on DRM_AMDGPU && AMD_IOMMU_V2 && X86_64
>>> + depends on DRM_AMDGPU && X86_64
>>> help
>>> Enable this if you want to use HSA features on AMD GPU devices.
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>>> index 2bc2816..3478270 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>>> @@ -22,7 +22,9 @@
>>>
>>> #include <linux/pci.h>
>>> #include <linux/acpi.h>
>>> +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
>>> #include <linux/amd-iommu.h>
>>> +#endif
>>> #include "kfd_crat.h"
>>> #include "kfd_priv.h"
>>> #include "kfd_topology.h"
>>> @@ -1037,15 +1039,17 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
>>> struct crat_subtype_generic *sub_type_hdr;
>>> struct crat_subtype_computeunit *cu;
>>> struct kfd_cu_info cu_info;
>>> - struct amd_iommu_device_info iommu_info;
>>> int avail_size = *size;
>>> uint32_t total_num_of_cu;
>>> int num_of_cache_entries = 0;
>>> int cache_mem_filled = 0;
>>> int ret = 0;
>>> +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
>>> + struct amd_iommu_device_info iommu_info;
>>> const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
>>> AMD_IOMMU_DEVICE_FLAG_PRI_SUP |
>>> AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
>>> +#endif
>>> struct kfd_local_mem_info local_mem_info;
>>>
>>> if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU)
>>> @@ -1106,12 +1110,14 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
>>> /* Check if this node supports IOMMU. During parsing this flag will
>>> * translate to HSA_CAP_ATS_PRESENT
>>> */
>>> +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
>>> iommu_info.flags = 0;
>>> if (amd_iommu_device_info(kdev->pdev, &iommu_info) == 0) {
>>> if ((iommu_info.flags & required_iommu_flags) ==
>>> required_iommu_flags)
>>> cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
>>> }
>>> +#endif
>>>
>>> crat_table->length += sub_type_hdr->length;
>>> crat_table->total_entries++;
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>> index fafe971..5205b34 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>> @@ -20,7 +20,9 @@
>>> * OTHER DEALINGS IN THE SOFTWARE.
>>> */
>>>
>>> +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
>>> #include <linux/amd-iommu.h>
>>> +#endif
>>> #include <linux/bsearch.h>
>>> #include <linux/pci.h>
>>> #include <linux/slab.h>
>>> @@ -31,6 +33,7 @@
>>>
>>> #define MQD_SIZE_ALIGNED 768
>>>
>>> +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
>>> static const struct kfd_device_info kaveri_device_info = {
>>> .asic_family = CHIP_KAVERI,
>>> .max_pasid_bits = 16,
>>> @@ -41,6 +44,7 @@ static const struct kfd_device_info kaveri_device_info = {
>>> .num_of_watch_points = 4,
>>> .mqd_size_aligned = MQD_SIZE_ALIGNED,
>>> .supports_cwsr = false,
>>> + .needs_iommu_device = true,
>>> .needs_pci_atomics = false,
>>> };
>>>
>>> @@ -54,8 +58,10 @@ static const struct kfd_device_info carrizo_device_info = {
>>> .num_of_watch_points = 4,
>>> .mqd_size_aligned = MQD_SIZE_ALIGNED,
>>> .supports_cwsr = true,
>>> + .needs_iommu_device = true,
>>> .needs_pci_atomics = false,
>>> };
>>> +#endif
>>>
>>> struct kfd_deviceid {
>>> unsigned short did;
>>> @@ -64,6 +70,7 @@ struct kfd_deviceid {
>>>
>>> /* Please keep this sorted by increasing device id. */
>>> static const struct kfd_deviceid supported_devices[] = {
>>> +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
>>> { 0x1304, &kaveri_device_info }, /* Kaveri */
>>> { 0x1305, &kaveri_device_info }, /* Kaveri */
>>> { 0x1306, &kaveri_device_info }, /* Kaveri */
>>> @@ -91,6 +98,7 @@ static const struct kfd_deviceid supported_devices[] = {
>>> { 0x9875, &carrizo_device_info }, /* Carrizo */
>>> { 0x9876, &carrizo_device_info }, /* Carrizo */
>>> { 0x9877, &carrizo_device_info } /* Carrizo */
>>> +#endif
>>> };
>>>
>>> static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
>>> @@ -161,6 +169,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
>>> return kfd;
>>> }
>>>
>>> +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
>>> static bool device_iommu_pasid_init(struct kfd_dev *kfd)
>>> {
>>> const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP |
>>> @@ -231,6 +240,7 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
>>>
>>> return AMD_IOMMU_INV_PRI_RSP_INVALID;
>>> }
>>> +#endif /* CONFIG_AMD_IOMMU_V2 */
>>>
>>> static void kfd_cwsr_init(struct kfd_dev *kfd)
>>> {
>>> @@ -321,12 +331,14 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
>>> goto device_queue_manager_error;
>>> }
>>>
>>> - if (!device_iommu_pasid_init(kfd)) {
>>> - dev_err(kfd_device,
>>> - "Error initializing iommuv2 for device %x:%x\n",
>>> - kfd->pdev->vendor, kfd->pdev->device);
>>> - goto device_iommu_pasid_error;
>>> +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
>>> + if (kfd->device_info->needs_iommu_device) {
>>> + if (!device_iommu_pasid_init(kfd)) {
>>> + dev_err(kfd_device, "Error initializing iommuv2\n");
>>> + goto device_iommu_pasid_error;
>>> + }
>>> }
>>> +#endif
>>>
>>> kfd_cwsr_init(kfd);
>>>
>>> @@ -386,11 +398,16 @@ void kgd2kfd_suspend(struct kfd_dev *kfd)
>>>
>>> kfd->dqm->ops.stop(kfd->dqm);
>>>
>>> +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
>>> + if (!kfd->device_info->needs_iommu_device)
>>> + return;
>>> +
>>> kfd_unbind_processes_from_device(kfd);
>>>
>>> amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
>>> amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
>>> amd_iommu_free_device(kfd->pdev);
>>> +#endif
>>> }
>>>
>>> int kgd2kfd_resume(struct kfd_dev *kfd)
>>> @@ -405,19 +422,24 @@ int kgd2kfd_resume(struct kfd_dev *kfd)
>>> static int kfd_resume(struct kfd_dev *kfd)
>>> {
>>> int err = 0;
>>> - unsigned int pasid_limit = kfd_get_pasid_limit();
>>>
>>> - err = amd_iommu_init_device(kfd->pdev, pasid_limit);
>>> - if (err)
>>> - return -ENXIO;
>>> - amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
>>> - iommu_pasid_shutdown_callback);
>>> - amd_iommu_set_invalid_ppr_cb(kfd->pdev,
>>> - iommu_invalid_ppr_cb);
>>> -
>>> - err = kfd_bind_processes_to_device(kfd);
>>> - if (err)
>>> - goto processes_bind_error;
>>> +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
>>> + if (kfd->device_info->needs_iommu_device) {
>>> + unsigned int pasid_limit = kfd_get_pasid_limit();
>>> +
>>> + err = amd_iommu_init_device(kfd->pdev, pasid_limit);
>>> + if (err)
>>> + return -ENXIO;
>>> + amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
>>> + iommu_pasid_shutdown_callback);
>>> + amd_iommu_set_invalid_ppr_cb(kfd->pdev,
>>> + iommu_invalid_ppr_cb);
>>> +
>>> + err = kfd_bind_processes_to_device(kfd);
>>> + if (err)
>>> + goto processes_bind_error;
>>> + }
>>> +#endif
>>>
>>> err = kfd->dqm->ops.start(kfd->dqm);
>>> if (err) {
>>> @@ -431,8 +453,10 @@ static int kfd_resume(struct kfd_dev *kfd)
>>>
>>> dqm_start_error:
>>> processes_bind_error:
>>> - amd_iommu_free_device(kfd->pdev);
>>> -
>>> +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
>>> + if (kfd->device_info->needs_iommu_device)
>>> + amd_iommu_free_device(kfd->pdev);
>>> +#endif
>>> return err;
>>> }
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>> index 93aae5c..f770dc7 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>> @@ -837,6 +837,7 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
>>> }
>>> }
>>>
>>> +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
>>> void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
>>> unsigned long address, bool is_write_requested,
>>> bool is_execute_requested)
>>> @@ -905,6 +906,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
>>> mutex_unlock(&p->event_mutex);
>>> kfd_unref_process(p);
>>> }
>>> +#endif /* CONFIG_AMD_IOMMU_V2_MODULE */
>>>
>>> void kfd_signal_hw_exception_event(unsigned int pasid)
>>> {
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> index eebfb1e..9f4766c 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> @@ -158,6 +158,7 @@ struct kfd_device_info {
>>> uint8_t num_of_watch_points;
>>> uint16_t mqd_size_aligned;
>>> bool supports_cwsr;
>>> + bool needs_iommu_device;
>>> bool needs_pci_atomics;
>>> };
>>>
>>> @@ -617,9 +618,11 @@ void kfd_unref_process(struct kfd_process *p);
>>>
>>> struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
>>> struct kfd_process *p);
>>> +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
>>> int kfd_bind_processes_to_device(struct kfd_dev *dev);
>>> void kfd_unbind_processes_from_device(struct kfd_dev *dev);
>>> void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid);
>>> +#endif
>>> struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
>>> struct kfd_process *p);
>>> struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
>>> @@ -784,9 +787,11 @@ int kfd_wait_on_events(struct kfd_process *p,
>>> uint32_t *wait_result);
>>> void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
>>> uint32_t valid_id_bits);
>>> +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
>>> void kfd_signal_iommu_event(struct kfd_dev *dev,
>>> unsigned int pasid, unsigned long address,
>>> bool is_write_requested, bool is_execute_requested);
>>> +#endif
>>> void kfd_signal_hw_exception_event(unsigned int pasid);
>>> int kfd_set_event(struct kfd_process *p, uint32_t event_id);
>>> int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> index a22fb071..1d0e02c 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> @@ -173,14 +173,17 @@ static void kfd_process_wq_release(struct work_struct *work)
>>> {
>>> struct kfd_process *p = container_of(work, struct kfd_process,
>>> release_work);
>>> +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
>>> struct kfd_process_device *pdd;
>>>
>>> pr_debug("Releasing process (pasid %d) in workqueue\n", p->pasid);
>>>
>>> list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
>>> - if (pdd->bound == PDD_BOUND)
>>> + if (pdd->bound == PDD_BOUND &&
>>> + pdd->dev->device_info->needs_iommu_device)
>>> amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
>>> }
>>> +#endif
>>>
>>> kfd_process_destroy_pdds(p);
>>>
>>> @@ -421,7 +424,6 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
>>> struct kfd_process *p)
>>> {
>>> struct kfd_process_device *pdd;
>>> - int err;
>>>
>>> pdd = kfd_get_process_device_data(dev, p);
>>> if (!pdd) {
>>> @@ -436,9 +438,14 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
>>> return ERR_PTR(-EINVAL);
>>> }
>>>
>>> - err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread);
>>> - if (err < 0)
>>> - return ERR_PTR(err);
>>> +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
>>> + if (dev->device_info->needs_iommu_device) {
>>> + int err = amd_iommu_bind_pasid(dev->pdev, p->pasid,
>>> + p->lead_thread);
>>> + if (err < 0)
>>> + return ERR_PTR(err);
>>> + }
>>> +#endif
>>>
>>> pdd->bound = PDD_BOUND;
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
>>> index c6a7609..f57c305 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
>>> @@ -875,6 +875,7 @@ static void find_system_memory(const struct dmi_header *dm,
>>> */
>>> static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev)
>>> {
>>> +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
>>> struct kfd_perf_properties *props;
>>>
>>> if (amd_iommu_pc_supported()) {
>>> @@ -886,6 +887,7 @@ static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev)
>>> amd_iommu_pc_get_max_counters(0); /* assume one iommu */
>>> list_add_tail(&props->list, &kdev->perf_props);
>>> }
>>> +#endif
>>>
>>> return 0;
>>> }
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
>>> index 53fca1f..111fda2 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
>>> @@ -183,8 +183,10 @@ struct kfd_topology_device *kfd_create_topology_device(
>>> struct list_head *device_list);
>>> void kfd_release_topology_device_list(struct list_head *device_list);
>>>
>>> +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
>>> extern bool amd_iommu_pc_supported(void);
>>> extern u8 amd_iommu_pc_get_max_banks(u16 devid);
>>> extern u8 amd_iommu_pc_get_max_counters(u16 devid);
>>> +#endif
>>>
>>> #endif /* __KFD_TOPOLOGY_H__ */
>>> --
>>> 2.7.4
>>>
More information about the amd-gfx
mailing list