[PATCH 10/10] vfio/pci: Add dma-buf export support for MMIO regions

Robin Murphy robin.murphy at arm.com
Tue Jul 29 19:44:21 UTC 2025


On 2025-07-23 2:00 pm, Leon Romanovsky wrote:
[...]
> +static struct sg_table *
> +vfio_pci_dma_buf_map(struct dma_buf_attachment *attachment,
> +		     enum dma_data_direction dir)
> +{
> +	struct vfio_pci_dma_buf *priv = attachment->dmabuf->priv;
> +	struct p2pdma_provider *provider = priv->vdev->provider;
> +	struct dma_iova_state *state = attachment->priv;
> +	struct phys_vec *phys_vec = &priv->phys_vec;
> +	struct scatterlist *sgl;
> +	struct sg_table *sgt;
> +	dma_addr_t addr;
> +	int ret;
> +
> +	dma_resv_assert_held(priv->dmabuf->resv);
> +
> +	sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
> +	if (!sgt)
> +		return ERR_PTR(-ENOMEM);
> +
> +	ret = sg_alloc_table(sgt, 1, GFP_KERNEL | __GFP_ZERO);
> +	if (ret)
> +		goto err_kfree_sgt;
> +
> +	sgl = sgt->sgl;
> +
> +	if (!state) {
> +		addr = pci_p2pdma_bus_addr_map(provider, phys_vec->paddr);
> +	} else if (dma_use_iova(state)) {
> +		ret = dma_iova_link(attachment->dev, state, phys_vec->paddr, 0,
> +				    phys_vec->len, dir, DMA_ATTR_SKIP_CPU_SYNC);

The supposed benefits of this API are only for replacing scatterlists 
where multiple disjoint pages are being mapped. In this case with just 
one single contiguous mapping, it is clearly objectively worse to have 
to bounce in and out of the IOMMU layer 3 separate times and store a 
dma_map_state, to achieve the exact same operations that a single call 
to iommu_dma_map_resource() will perform more efficiently and with no 
external state required.

Oh yeah, and mapping MMIO with regular memory attributes (IOMMU_CACHE) 
rather than appropriate ones (IOMMU_MMIO), as this will end up doing, 
isn't guaranteed not to end badly either (e.g. if the system 
interconnect ends up merging consecutive write bursts and exceeding the 
target root port's MPS.)

> +		if (ret)
> +			goto err_free_table;
> +
> +		ret = dma_iova_sync(attachment->dev, state, 0, phys_vec->len);
> +		if (ret)
> +			goto err_unmap_dma;
> +
> +		addr = state->addr;
> +	} else {
> +		addr = dma_map_phys(attachment->dev, phys_vec->paddr,
> +				    phys_vec->len, dir, DMA_ATTR_SKIP_CPU_SYNC);

And again, if the IOMMU is in bypass (the idea of P2P with vfio-noiommu 
simply isn't worth entertaining) then what purpose do you imagine this 
call serves at all, other than to hilariously crash under 
"swiotlb=force"? Even in the case that phys_to_dma(phys_vec->paddr) != 
phys_vec->paddr, in almost all circumstances (both hardware offsets and 
CoCo environments with address-based aliasing), it is more likely than 
not that the latter is still the address you want and the former is 
wrong (and liable to lead to corruption or fatal system errors), because 
MMIO and memory remain fundamentally different things.

AFAICS you're *depending* on this call being an effective no-op, and 
thus only demonstrating that the dma_map_phys() idea is still entirely 
unnecessary.

> +		ret = dma_mapping_error(attachment->dev, addr);
> +		if (ret)
> +			goto err_free_table;
> +	}
> +
> +	fill_sg_entry(sgl, phys_vec->len, addr);
> +	return sgt;
> +
> +err_unmap_dma:
> +	dma_iova_destroy(attachment->dev, state, phys_vec->len, dir,
> +			 DMA_ATTR_SKIP_CPU_SYNC);
> +err_free_table:
> +	sg_free_table(sgt);
> +err_kfree_sgt:
> +	kfree(sgt);
> +	return ERR_PTR(ret);
> +}
> +
> +static void vfio_pci_dma_buf_unmap(struct dma_buf_attachment *attachment,
> +				   struct sg_table *sgt,
> +				   enum dma_data_direction dir)
> +{
> +	struct vfio_pci_dma_buf *priv = attachment->dmabuf->priv;
> +	struct dma_iova_state *state = attachment->priv;
> +	struct scatterlist *sgl;
> +	int i;
> +
> +	if (!state)
> +		; /* Do nothing */
> +	else if (dma_use_iova(state))
> +		dma_iova_destroy(attachment->dev, state, priv->phys_vec.len,
> +				 dir, DMA_ATTR_SKIP_CPU_SYNC);
> +	else
> +		for_each_sgtable_dma_sg(sgt, sgl, i)

The table always has exactly one entry...

Thanks,
Robin.

> +			dma_unmap_phys(attachment->dev, sg_dma_address(sgl),
> +				       sg_dma_len(sgl), dir,
> +				       DMA_ATTR_SKIP_CPU_SYNC);
> +
> +	sg_free_table(sgt);
> +	kfree(sgt);
> +}


More information about the dri-devel mailing list