[PATCH 2/3] drm/amdgpu: Add peer-to-peer support among PCIe connected AMD GPUs
Felix Kuehling
felix.kuehling at amd.com
Fri Jun 3 22:26:41 UTC 2022
On 2022-06-03 06:52, Ramesh Errabolu wrote:
> Add support for peer-to-peer communication among AMD GPUs over PCIe
> bus. Support REQUIRES enablement of config HSA_AMD_P2P.
>
> Signed-off-by: Ramesh Errabolu <Ramesh.Errabolu at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 +
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 312 ++++++++++++++----
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 33 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 +
> 4 files changed, 294 insertions(+), 60 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index f8b9f27adcf5..5c00ea1df21c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -48,6 +48,7 @@ enum kfd_mem_attachment_type {
> KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */
> KFD_MEM_ATT_USERPTR, /* SG bo to DMA map pages from a userptr bo */
> KFD_MEM_ATT_DMABUF, /* DMAbuf to DMA map TTM BOs */
> + KFD_MEM_ATT_SG /* Tag to DMA map SG BOs */
> };
>
> struct kfd_mem_attachment {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 054e4a76ae2e..8e37eae26e49 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -241,6 +241,42 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
> kfree(bo->kfd_bo);
> }
>
> +/**
> + * @create_dmamap_sg_bo: Creates a amdgpu_bo object to reflect information
> + * about USERPTR or DOOREBELL or MMIO BO.
> + * @adev: Device for which dmamap BO is being created
> + * @mem: BO of peer device that is being DMA mapped. Provides parameters
> + * in building the dmamap BO
> + * @bo_out: Output parameter updated with handle of dmamap BO
> + */
> +static int
> +create_dmamap_sg_bo(struct amdgpu_device *adev,
> + struct kgd_mem *mem, struct amdgpu_bo **bo_out)
> +{
> + struct drm_gem_object *gem_obj;
> + int ret, align;
> +
> + ret = amdgpu_bo_reserve(mem->bo, false);
> + if (ret)
> + return ret;
> +
> + align = 1;
> + ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, align,
> + AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE,
> + ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj);
> +
> + amdgpu_bo_unreserve(mem->bo);
> +
> + if (ret) {
> + pr_err("Error in creating DMA mappable SG BO on domain: %d\n", ret);
> + return -EINVAL;
> + }
> +
> + *bo_out = gem_to_amdgpu_bo(gem_obj);
> + (*bo_out)->parent = amdgpu_bo_ref(mem->bo);
> + return ret;
> +}
> +
> /* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's
> * reservation object.
> *
> @@ -481,6 +517,38 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
> return pte_flags;
> }
>
> +/**
> + * create_sg_table() - Create an sg_table for a contiguous DMA addr range
> + * @addr: The starting address to point to
> + * @size: Size of memory area in bytes being pointed to
> + *
> + * Allocates an instance of sg_table and initializes it to point to memory
> + * area specified by input parameters. The address used to build is assumed
> + * to be DMA mapped, if needed.
> + *
> + * DOORBELL or MMIO BOs use only one scatterlist node in their sg_table
> + * because they are physically contiguous.
> + *
> + * Return: Initialized instance of SG Table or NULL
> + */
> +static struct sg_table *create_sg_table(uint64_t addr, uint32_t size)
> +{
> + struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
> +
> + if (!sg)
> + return NULL;
> + if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
> + kfree(sg);
> + return NULL;
> + }
> + sg_dma_address(sg->sgl) = addr;
> + sg->sgl->length = size;
> +#ifdef CONFIG_NEED_SG_DMA_LENGTH
> + sg->sgl->dma_length = size;
> +#endif
> + return sg;
> +}
> +
> static int
> kfd_mem_dmamap_userptr(struct kgd_mem *mem,
> struct kfd_mem_attachment *attachment)
> @@ -545,6 +613,87 @@ kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment)
> return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
> }
>
> +/**
> + * kfd_mem_dmamap_sg_bo() - Create DMA mapped sg_table to access DOORBELL or MMIO BO
> + * @mem: SG BO of the DOORBELL or MMIO resource on the owning device
> + * @attachment: Virtual address attachment of the BO on accessing device
> + *
> + * An access request from the device that owns DOORBELL does not require DMA mapping.
> + * This is because the request doesn't go through PCIe root complex i.e. it instead
> + * loops back. The need to DMA map arises only when accessing peer device's DOORBELL
> + *
> + * In contrast, all access requests for MMIO need to be DMA mapped without regard to
> + * device ownership. This is because access requests for MMIO go through PCIe root
> + * complex.
> + *
> + * This is accomplished in two steps:
> + * - Obtain DMA mapped address of DOORBELL or MMIO memory that could be used
> + * in updating requesting device's page table
> + * - Signal TTM to mark memory pointed to by requesting device's BO as GPU
> + * accessible. This allows an update of requesting device's page table
> + * with entries associated with DOOREBELL or MMIO memory
> + *
> + * This method is invoked in the following contexts:
> + * - Mapping of DOORBELL or MMIO BO of same or peer device
> + * - Validating an evicted DOOREBELL or MMIO BO on device seeking access
> + *
> + * Return: ZERO if successful, NON-ZERO otherwise
> + */
> +static int
> +kfd_mem_dmamap_sg_bo(struct kgd_mem *mem,
> + struct kfd_mem_attachment *attachment)
> +{
> + struct ttm_operation_ctx ctx = {.interruptible = true};
> + struct amdgpu_bo *bo = attachment->bo_va->base.bo;
> + struct amdgpu_device *adev = attachment->adev;
> + struct ttm_tt *ttm = bo->tbo.ttm;
> + enum dma_data_direction dir;
> + dma_addr_t dma_addr;
> + bool mmio;
> + int ret;
> +
> + /* Expect SG Table of dmapmap BO to be NULL */
> + mmio = (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP);
> + if (unlikely(ttm->sg)) {
> + pr_err("SG Table of %d BO for peer device is UNEXPECTEDLY NON-NULL", mmio);
> + return -EINVAL;
> + }
> +
> + dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
> + DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
> + dma_addr = mem->bo->tbo.sg->sgl->dma_address;
> + pr_debug("%d BO size: %d\n", mmio, mem->bo->tbo.sg->sgl->length);
> + pr_debug("%d BO address before DMA mapping: %llx\n", mmio, dma_addr);
> + dma_addr = dma_map_resource(adev->dev, dma_addr,
> + mem->bo->tbo.sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC);
> + ret = dma_mapping_error(adev->dev, dma_addr);
> + if (unlikely(ret))
> + return ret;
> + pr_debug("%d BO address after DMA mapping: %llx\n", mmio, dma_addr);
> +
> + ttm->sg = create_sg_table(dma_addr, mem->bo->tbo.sg->sgl->length);
> + if (unlikely(!ttm->sg)) {
> + ret = -ENOMEM;
> + goto unmap_sg;
> + }
> +
> + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
> + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
> + if (unlikely(ret))
> + goto free_sg;
> +
> + return ret;
> +
> +free_sg:
> + sg_free_table(ttm->sg);
> + kfree(ttm->sg);
> + ttm->sg = NULL;
> +unmap_sg:
> + dma_unmap_resource(adev->dev, dma_addr, mem->bo->tbo.sg->sgl->length,
> + dir, DMA_ATTR_SKIP_CPU_SYNC);
> + return ret;
> +}
> +
> static int
> kfd_mem_dmamap_attachment(struct kgd_mem *mem,
> struct kfd_mem_attachment *attachment)
> @@ -556,6 +705,8 @@ kfd_mem_dmamap_attachment(struct kgd_mem *mem,
> return kfd_mem_dmamap_userptr(mem, attachment);
> case KFD_MEM_ATT_DMABUF:
> return kfd_mem_dmamap_dmabuf(attachment);
> + case KFD_MEM_ATT_SG:
> + return kfd_mem_dmamap_sg_bo(mem, attachment);
> default:
> WARN_ON_ONCE(1);
> }
> @@ -596,6 +747,50 @@ kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment)
> ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
> }
>
> +/**
> + * kfd_mem_dmaunmap_sg_bo() - Free DMA mapped sg_table of DOORBELL or MMIO BO
> + * @mem: SG BO of the DOORBELL or MMIO resource on the owning device
> + * @attachment: Virtual address attachment of the BO on accessing device
> + *
> + * The method performs following steps:
> + * - Signal TTM to mark memory pointed to by BO as GPU inaccessible
> + * - Free SG Table that is used to encapsulate DMA mapped memory of
> + * peer device's DOORBELL or MMIO memory
> + *
> + * This method is invoked in the following contexts:
> + * UNMapping of DOORBELL or MMIO BO on a device having access to its memory
> + * Eviction of DOOREBELL or MMIO BO on device having access to its memory
> + *
> + * Return: void
> + */
> +static void
> +kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem,
> + struct kfd_mem_attachment *attachment)
> +{
> + struct ttm_operation_ctx ctx = {.interruptible = true};
> + struct amdgpu_bo *bo = attachment->bo_va->base.bo;
> + struct amdgpu_device *adev = attachment->adev;
> + struct ttm_tt *ttm = bo->tbo.ttm;
> + enum dma_data_direction dir;
> +
> + if (unlikely(!ttm->sg)) {
> + pr_err("SG Table of BO is UNEXPECTEDLY NULL");
> + return;
> + }
> +
> + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
> + ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
> +
> + dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
> + DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
> + dma_unmap_resource(adev->dev, ttm->sg->sgl->dma_address,
> + ttm->sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC);
> + sg_free_table(ttm->sg);
> + kfree(ttm->sg);
> + ttm->sg = NULL;
> + bo->tbo.sg = NULL;
> +}
> +
> static void
> kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
> struct kfd_mem_attachment *attachment)
> @@ -609,38 +804,14 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
> case KFD_MEM_ATT_DMABUF:
> kfd_mem_dmaunmap_dmabuf(attachment);
> break;
> + case KFD_MEM_ATT_SG:
> + kfd_mem_dmaunmap_sg_bo(mem, attachment);
> + break;
> default:
> WARN_ON_ONCE(1);
> }
> }
>
> -static int
> -kfd_mem_attach_userptr(struct amdgpu_device *adev, struct kgd_mem *mem,
> - struct amdgpu_bo **bo)
> -{
> - unsigned long bo_size = mem->bo->tbo.base.size;
> - struct drm_gem_object *gobj;
> - int ret;
> -
> - ret = amdgpu_bo_reserve(mem->bo, false);
> - if (ret)
> - return ret;
> -
> - ret = amdgpu_gem_object_create(adev, bo_size, 1,
> - AMDGPU_GEM_DOMAIN_CPU,
> - AMDGPU_GEM_CREATE_PREEMPTIBLE,
> - ttm_bo_type_sg, mem->bo->tbo.base.resv,
> - &gobj);
> - amdgpu_bo_unreserve(mem->bo);
> - if (ret)
> - return ret;
> -
> - *bo = gem_to_amdgpu_bo(gobj);
> - (*bo)->parent = amdgpu_bo_ref(mem->bo);
> -
> - return 0;
> -}
> -
> static int
> kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
> struct amdgpu_bo **bo)
> @@ -691,6 +862,8 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
> uint64_t va = mem->va;
> struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
> struct amdgpu_bo *bo[2] = {NULL, NULL};
> + bool same_hive = false;
> + bool accessible = false;
> int i, ret;
>
> if (!va) {
> @@ -698,6 +871,31 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
> return -EINVAL;
> }
>
> + /* Determine access to VRAM, MMIO and DOORBELL BOs of peer devices
> + *
> + * The access path of MMIO and DOORBELL BOs of is always over PCIe.
> + * In contrast the access path of VRAM BOs depens upon the type of
> + * link that connects the peer device. Access over PCIe is allowed
> + * if peer device has large BAR. In contrast, access over xGMI is
> + * allowed for both small and large BAR configurations of peer device
> + */
> +
> + if ((adev != bo_adev) &&
> + ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
The parentheses around == and != are unnecessary.
> + (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
> + (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
> + accessible = amdgpu_device_is_peer_accessible(bo_adev, adev);
This can still call amdgpu_device_is_peer_accessible unnecessarily on
XGMI connected GPUs.
This whole block could be simplified like this
if (adev != bo_adev &&
(mem->domain == AMDGPU_GEM_DOMAIN_VRAM ||
(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
if (mem->domain == AMDGPU_GEM_DOMAIN_VRAM)
same_hive = amdgpu_xgmi_same_hive(adev, bo_adev);
if (!same_hive && !amdgpu_device_is_peer_accessible(bo_adev, adev))
return -EINVAL;
}
The short-circuit of the && operator make sure
amdgpu_device_is_peer_accessible is only called if necessary. And you
don't need the bool accessible variable any more.
> + if ((mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
> + (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))
> + if (!accessible)
> + return -EINVAL;
> + if (mem->domain == AMDGPU_GEM_DOMAIN_VRAM) {
> + same_hive = amdgpu_xgmi_same_hive(adev, bo_adev);
> + if (!same_hive && !accessible)
> + return -EINVAL;
> + }
> + }
> +
> for (i = 0; i <= is_aql; i++) {
> attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL);
> if (unlikely(!attachment[i])) {
> @@ -708,9 +906,9 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
> pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
> va + bo_size, vm);
>
> - if (adev == bo_adev ||
> - (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) ||
> - (mem->domain == AMDGPU_GEM_DOMAIN_VRAM && amdgpu_xgmi_same_hive(adev, bo_adev))) {
> + if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) ||
> + (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) ||
> + same_hive) {
> /* Mappings on the local GPU, or VRAM mappings in the
> * local hive, or userptr mapping IOMMU direct map mode
> * share the original BO
> @@ -726,26 +924,38 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
> } else if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
> /* Create an SG BO to DMA-map userptrs on other GPUs */
> attachment[i]->type = KFD_MEM_ATT_USERPTR;
> - ret = kfd_mem_attach_userptr(adev, mem, &bo[i]);
> + ret = create_dmamap_sg_bo(adev, mem, &bo[i]);
> if (ret)
> goto unwind;
> } else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT &&
> mem->bo->tbo.type != ttm_bo_type_sg) {
> - /* GTT BOs use DMA-mapping ability of dynamic-attach
> - * DMA bufs. TODO: The same should work for VRAM on
> - * large-BAR GPUs.
> - */
> + /* GTT BOs use DMA-mapping ability of dynamic-attach DMA bufs */
> + attachment[i]->type = KFD_MEM_ATT_DMABUF;
> + ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]);
> + if (ret)
> + goto unwind;
> + /* Enable acces to VRAM BOs of peer devices */
> +#if defined(CONFIG_HSA_AMD_P2P)
> + } else if (mem->domain == AMDGPU_GEM_DOMAIN_VRAM &&
> + mem->bo->tbo.type == ttm_bo_type_device) {
> attachment[i]->type = KFD_MEM_ATT_DMABUF;
> ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]);
Now this looks identical to the GTT case. Since the CONFIG_HSA_AMD_P2P
check is already done in amdgpu_device_is_peer_accessible, you can
probably just merge the two if-cases into one now:
...
} else if ((mem->domain == AMDGPU_GEM_DOMAIN_GTT &&
mem->bo->tbo.type != ttm_bo_type_sg) ||
mem->domain == AMDGPU_GEM_DOMAIN_VRAM) {
/* GTT and VRAM BOs use DMA-mapping ability of dynamic-attach
* DMA bufs.
*/
attachment[i]->type = KFD_MEM_ATT_DMABUF;
ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]);
if (ret)
goto unwind;
...
If you move this after the doorbell and MMIO case, you can even drop the
mem->bo->tbo.type != ttm_bo_type_sg check.
> if (ret)
> goto unwind;
> + pr_debug("Employ DMABUF mechanim to enable peer GPU access\n");
> +#endif
> + /* Handle DOORBELL BOs of peer devices and MMIO BOs of local and peer devices */
> + } else if ((mem->bo->tbo.type == ttm_bo_type_sg) &&
> + ((mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
> + (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
I think userptr, doorbell and MMIO BOs are the only ones that use SGs.
So you don't really need to check the mem->alloc_flags here. Except
maybe as a WARN_ONCE sanity check.
> + attachment[i]->type = KFD_MEM_ATT_SG;
> + ret = create_dmamap_sg_bo(adev, mem, &bo[i]);
> + if (ret)
> + goto unwind;
> } else {
> - /* FIXME: Need to DMA-map other BO types:
> - * large-BAR VRAM, doorbells, MMIO remap
> - */
> - attachment[i]->type = KFD_MEM_ATT_SHARED;
> - bo[i] = mem->bo;
> - drm_gem_object_get(&bo[i]->tbo.base);
> + WARN_ONCE(true, "Handling invalid ATTACH request");
> + ret = -EINVAL;
> + goto unwind;
> }
>
> /* Add BO to VM internal data structures */
> @@ -1146,24 +1356,6 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem,
> return ret;
> }
>
> -static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size)
> -{
> - struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
> -
> - if (!sg)
> - return NULL;
> - if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
> - kfree(sg);
> - return NULL;
> - }
> - sg->sgl->dma_address = addr;
> - sg->sgl->length = size;
> -#ifdef CONFIG_NEED_SG_DMA_LENGTH
> - sg->sgl->dma_length = size;
> -#endif
> - return sg;
> -}
> -
> static int process_validate_vms(struct amdkfd_process_info *process_info)
> {
> struct amdgpu_vm *peer_vm;
> @@ -1532,7 +1724,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
> bo_type = ttm_bo_type_sg;
> if (size > UINT_MAX)
> return -EINVAL;
> - sg = create_doorbell_sg(*offset, size);
> + sg = create_sg_table(*offset, size);
> if (!sg)
> return -ENOMEM;
> } else {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index b5ee0eb984ee..acb9e934adc9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -32,6 +32,7 @@
> #include <linux/slab.h>
> #include <linux/iommu.h>
> #include <linux/pci.h>
> +#include <linux/pci-p2pdma.h>
>
> #include <drm/drm_atomic_helper.h>
> #include <drm/drm_probe_helper.h>
> @@ -127,6 +128,8 @@ const char *amdgpu_asic_name[] = {
> "LAST",
> };
>
> +extern bool pcie_p2p;
> +
> /**
> * DOC: pcie_replay_count
> *
> @@ -5434,6 +5437,36 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
> }
> }
>
> +/**
> + * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
> + *
> + * @adev: amdgpu_device pointer
> + * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
> + *
> + * Return true if @peer_adev can access (DMA) @adev through the PCIe
> + * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
> + * @peer_adev.
> + */
> +bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
> + struct amdgpu_device *peer_adev)
> +{
> +#ifdef CONFIG_HSA_AMD_P2P
> + bool p2p_access = false;
> + uint64_t address_mask = peer_adev->dev->dma_mask ?
> + ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
> + resource_size_t aper_limit =
> + adev->gmc.aper_base + adev->gmc.aper_size - 1;
> + p2p_access = !(pci_p2pdma_distance_many(adev->pdev, &peer_adev->dev, 1, true) < 0);
This would give you a checkpatch warning. Please run checkpatch.
Why can't you initialize p2p_access in the declaration above?
Regards,
Felix
> +
> + return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
> + adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
> + !(adev->gmc.aper_base & address_mask ||
> + aper_limit & address_mask));
> +#else
> + return false;
> +#endif
> +}
> +
> int amdgpu_device_baco_enter(struct drm_device *dev)
> {
> struct amdgpu_device *adev = drm_to_adev(dev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index bed4ed88951f..d1c82a9e8569 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -802,6 +802,14 @@ MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (
> module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444);
> #endif
>
> +/**
> + * DOC: pcie_p2p (bool)
> + * Enable PCIe P2P (requires large-BAR). Default value: true (on)
> + */
> +bool pcie_p2p = true;
> +module_param(pcie_p2p, bool, 0444);
> +MODULE_PARM_DESC(pcie_p2p, "Enable PCIe P2P (requires large-BAR). (N = off, Y = on(default))");
> +
> /**
> * DOC: dcfeaturemask (uint)
> * Override display features enabled. See enum DC_FEATURE_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
More information about the amd-gfx
mailing list