[PATCH v2 1/3] drm/amdkfd: force raven as "dgpu" path (v2)

Huang Rui ray.huang at amd.com
Wed Aug 19 02:53:15 UTC 2020


On Tue, Aug 18, 2020 at 11:01:25PM +0800, Kuehling, Felix wrote:
> Am 2020-08-18 um 9:09 a.m. schrieb Huang Rui:
> > We still have a few iommu issues which need to address, so force raven
> > as "dgpu" path for the moment.
> >
> > This is to add the fallback path to bypass IOMMU if IOMMU v2 is disabled
> > or ACPI CRAT table not correct.
> >
> > v2: use ignore_crat parameter to decide whether it will go with IOMMUv2.
> >
> > Signed-off-by: Huang Rui <ray.huang at amd.com>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       |  5 +++-
> >  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      |  2 +-
> >  drivers/gpu/drm/amd/amdkfd/kfd_crat.c         | 27 ++++++++++++++++++-
> >  drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c       |  4 +--
> >  drivers/gpu/drm/amd/amdkfd/kfd_device.c       |  2 +-
> >  .../amd/amdkfd/kfd_device_queue_manager_v9.c  |  2 +-
> >  drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c  |  4 +--
> >  drivers/gpu/drm/amd/amdkfd/kfd_iommu.c        | 10 +++----
> >  drivers/gpu/drm/amd/amdkfd/kfd_topology.c     |  7 ++---
> >  9 files changed, 46 insertions(+), 17 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > index a9a4319c24ae..189f9d7e190d 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > @@ -684,11 +684,14 @@ MODULE_PARM_DESC(debug_largebar,
> >   * Ignore CRAT table during KFD initialization. By default, KFD uses the ACPI CRAT
> >   * table to get information about AMD APUs. This option can serve as a workaround on
> >   * systems with a broken CRAT table.
> > + *
> > + * Default is auto (according to asic type, iommu_v2, and crat table, to decide
> > + * whehter use CRAT)
> >   */
> >  int ignore_crat;
> >  module_param(ignore_crat, int, 0444);
> >  MODULE_PARM_DESC(ignore_crat,
> > -	"Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)");
> > +	"Ignore CRAT table during KFD initialization (0 = auto (default), 1 = ignore CRAT)");
> >  
> >  /**
> >   * DOC: halt_if_hws_hang (int)
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > index 1b60e0ed6b5c..20ef754dc62e 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > @@ -1258,7 +1258,7 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev)
> >  		return true;
> >  	}
> >  
> > -	if (dev->device_info->needs_iommu_device)
> > +	if (dev->device_info->needs_iommu_device && !ignore_crat)
> >  		return false;
> >  
> >  	amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info);
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> > index 59557e3e206a..98993584802f 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> > @@ -22,6 +22,7 @@
> >  
> >  #include <linux/pci.h>
> >  #include <linux/acpi.h>
> > +#include <asm/processor.h>
> >  #include "kfd_crat.h"
> >  #include "kfd_priv.h"
> >  #include "kfd_topology.h"
> > @@ -740,6 +741,29 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
> >  	return 0;
> >  }
> >  
> > +
> > +#ifdef CONFIG_ACPI
> > +static void kfd_setup_ignore_crat_option(void)
> > +{
> > +
> > +	if (ignore_crat)
> > +		return;
> > +
> > +#ifndef KFD_SUPPORT_IOMMU_V2
> > +	ignore_crat = 1;
> > +#else
> > +	ignore_crat = 0;
> > +#endif
> > +
> > +	/* Raven series will go with the fallback path to use virtual CRAT */
> > +	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
> > +	    boot_cpu_data.x86 == 0x17) {
> > +		ignore_crat = 1;
> 
> Does your change require Thunk changes? If yes, then changing this by
> default breaks existing user mode and cannot be applied upstream.
> 
> If you expect that this is a temporary workaround, then don't apply it
> here, apply it in your grub config or modprobe config.

I see. Will keep the default using CRAT for Raven as well to make sure the
existing thunk workable.

Thanks,
Ray

> 
> Regards,
>   Felix
> 
> 
> > +	}
> > +
> > +	return;
> > +}
> > +
> >  /*
> >   * kfd_create_crat_image_acpi - Allocates memory for CRAT image and
> >   * copies CRAT from ACPI (if available).
> > @@ -751,7 +775,6 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
> >   *
> >   *	Return 0 if successful else return error code
> >   */
> > -#ifdef CONFIG_ACPI
> >  int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
> >  {
> >  	struct acpi_table_header *crat_table;
> > @@ -775,6 +798,8 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
> >  		return -EINVAL;
> >  	}
> >  
> > +	kfd_setup_ignore_crat_option();
> > +
> >  	if (ignore_crat) {
> >  		pr_info("CRAT table disabled by module option\n");
> >  		return -ENODATA;
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
> > index 3e5904f8876a..0c4161ac4102 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
> > @@ -309,7 +309,7 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
> >  	for (i = 0; i < adw_info->num_watch_points; i++) {
> >  		dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
> >  				&cntl, i, pdd->qpd.vmid,
> > -				dbgdev->dev->device_info->needs_iommu_device);
> > +				dbgdev->dev->device_info->needs_iommu_device && !ignore_crat);
> >  
> >  		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
> >  		pr_debug("\t\t%20s %08x\n", "register index :", i);
> > @@ -399,7 +399,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
> >  	for (i = 0; i < adw_info->num_watch_points; i++) {
> >  		dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
> >  				&cntl, i, vmid,
> > -				dbgdev->dev->device_info->needs_iommu_device);
> > +				dbgdev->dev->device_info->needs_iommu_device && !ignore_crat);
> >  
> >  		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
> >  		pr_debug("\t\t%20s %08x\n", "register index :", i);
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > index 2c030c2b5b8d..dab44951c4d8 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> > @@ -112,6 +112,7 @@ static const struct kfd_device_info carrizo_device_info = {
> >  	.num_xgmi_sdma_engines = 0,
> >  	.num_sdma_queues_per_engine = 2,
> >  };
> > +#endif
> >  
> >  static const struct kfd_device_info raven_device_info = {
> >  	.asic_family = CHIP_RAVEN,
> > @@ -130,7 +131,6 @@ static const struct kfd_device_info raven_device_info = {
> >  	.num_xgmi_sdma_engines = 0,
> >  	.num_sdma_queues_per_engine = 2,
> >  };
> > -#endif
> >  
> >  static const struct kfd_device_info hawaii_device_info = {
> >  	.asic_family = CHIP_HAWAII,
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
> > index 95a82ac455f2..48b86bdbb680 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
> > @@ -62,7 +62,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
> >  				SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
> >  					SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
> >  		if (amdgpu_noretry &&
> > -		    !dqm->dev->device_info->needs_iommu_device)
> > +		    !(dqm->dev->device_info->needs_iommu_device && !ignore_crat))
> >  			qpd->sh_mem_config |=
> >  				1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
> >  
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
> > index 78714f9a8b11..6044d36bceb6 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
> > @@ -321,7 +321,7 @@ static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)
> >  	pdd->lds_base = MAKE_LDS_APP_BASE_VI();
> >  	pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
> >  
> > -	if (!pdd->dev->device_info->needs_iommu_device) {
> > +	if (!(pdd->dev->device_info->needs_iommu_device && !ignore_crat)) {
> >  		/* dGPUs: SVM aperture starting at 0
> >  		 * with small reserved space for kernel.
> >  		 * Set them to CANONICAL addresses.
> > @@ -423,7 +423,7 @@ int kfd_init_apertures(struct kfd_process *process)
> >  				return -EINVAL;
> >  			}
> >  
> > -			if (!dev->device_info->needs_iommu_device) {
> > +			if (!(dev->device_info->needs_iommu_device && !ignore_crat)) {
> >  				/* dGPUs: the reserved space for kernel
> >  				 * before SVM
> >  				 */
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
> > index 4d3b4188b9a1..c767b524bea4 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
> > @@ -41,7 +41,7 @@ int kfd_iommu_check_device(struct kfd_dev *kfd)
> >  	struct amd_iommu_device_info iommu_info;
> >  	int err;
> >  
> > -	if (!kfd->device_info->needs_iommu_device)
> > +	if (!(kfd->device_info->needs_iommu_device && !ignore_crat))
> >  		return -ENODEV;
> >  
> >  	iommu_info.flags = 0;
> > @@ -63,7 +63,7 @@ int kfd_iommu_device_init(struct kfd_dev *kfd)
> >  	unsigned int pasid_limit;
> >  	int err;
> >  
> > -	if (!kfd->device_info->needs_iommu_device)
> > +	if (!(kfd->device_info->needs_iommu_device && !ignore_crat))
> >  		return 0;
> >  
> >  	iommu_info.flags = 0;
> > @@ -109,7 +109,7 @@ int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd)
> >  	struct kfd_process *p = pdd->process;
> >  	int err;
> >  
> > -	if (!dev->device_info->needs_iommu_device || pdd->bound == PDD_BOUND)
> > +	if (!(dev->device_info->needs_iommu_device && !ignore_crat) || pdd->bound == PDD_BOUND)
> >  		return 0;
> >  
> >  	if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) {
> > @@ -284,7 +284,7 @@ static void kfd_unbind_processes_from_device(struct kfd_dev *kfd)
> >   */
> >  void kfd_iommu_suspend(struct kfd_dev *kfd)
> >  {
> > -	if (!kfd->device_info->needs_iommu_device)
> > +	if (!(kfd->device_info->needs_iommu_device && !ignore_crat))
> >  		return;
> >  
> >  	kfd_unbind_processes_from_device(kfd);
> > @@ -304,7 +304,7 @@ int kfd_iommu_resume(struct kfd_dev *kfd)
> >  	unsigned int pasid_limit;
> >  	int err;
> >  
> > -	if (!kfd->device_info->needs_iommu_device)
> > +	if (!(kfd->device_info->needs_iommu_device && !ignore_crat))
> >  		return 0;
> >  
> >  	pasid_limit = kfd_get_pasid_limit();
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> > index cbb8535abf0c..ad8139594b6f 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> > @@ -545,7 +545,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
> >  		 * If the ASIC is APU except Kaveri, set local memory size
> >  		 * to 0 to disable local memory support
> >  		 */
> > -		if (!dev->gpu->device_info->needs_iommu_device
> > +		if (!(dev->gpu->device_info->needs_iommu_device && !ignore_crat)
> >  			|| dev->gpu->device_info->asic_family == CHIP_KAVERI) {
> >  			amdgpu_amdkfd_get_local_mem_info(dev->gpu->kgd,
> >  				&local_mem_info);
> > @@ -1090,6 +1090,7 @@ int kfd_topology_init(void)
> >  						    COMPUTE_UNIT_CPU, NULL,
> >  						    proximity_domain);
> >  		cpu_only_node = 1;
> > +		ignore_crat = 1;
> >  		if (ret) {
> >  			pr_err("Error creating VCRAT table for CPU\n");
> >  			return ret;
> > @@ -1197,7 +1198,7 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
> >  		/* Discrete GPUs need their own topology device list
> >  		 * entries. Don't assign them to CPU/APU nodes.
> >  		 */
> > -		if (!gpu->device_info->needs_iommu_device &&
> > +		if (!(gpu->device_info->needs_iommu_device && !ignore_crat) &&
> >  		    dev->node_props.cpu_cores_count)
> >  			continue;
> >  
> > @@ -1452,7 +1453,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
> >  	* Overwrite ATS capability according to needs_iommu_device to fix
> >  	* potential missing corresponding bit in CRAT of BIOS.
> >  	*/
> > -	if (dev->gpu->device_info->needs_iommu_device)
> > +	if (dev->gpu->device_info->needs_iommu_device && !ignore_crat)
> >  		dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
> >  	else
> >  		dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT;


More information about the amd-gfx mailing list