[Spice-devel] [PATCH] drm/ttm: use dma_alloc_pages for the page pool

Tue May 11 07:35:20 UTC 2021

Am 11.05.21 um 08:05 schrieb Christoph Hellwig:
> Use the dma_alloc_pages allocator for the TTM pool allocator.
> This allocator is a front end to the page allocator which takes the
> DMA mask of the device into account, thus offering the best of both
> worlds of the two existing allocator versions.  This conversion also
> removes the ugly layering violation where the TTM pool assumes what
> kind of virtual address dma_alloc_attrs can return.

Oh yes please. But please split that up into multiple patches.

At least one switching from dma_alloc_attrs() to dma_alloc_pages() 
separately.

We certainly going to need the drm_need_swiotlb() for userptr support 
(unless we add some approach for drivers to opt out of swiotlb).

Then while I really want to get rid of GFP_DMA32 as well I'm not 100% 
sure if we can handle this without the flag.

And last we need something better to store the DMA address and order 
than allocating a separate memory object for each page.

Christian.

>
> Signed-off-by: Christoph Hellwig <hch at lst.de>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |   1 -
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |   4 +-
>   drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c   |   1 -
>   drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   |   1 -
>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   |   1 -
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   |   1 -
>   drivers/gpu/drm/drm_cache.c             |  31 -----
>   drivers/gpu/drm/drm_gem_vram_helper.c   |   3 +-
>   drivers/gpu/drm/nouveau/nouveau_ttm.c   |   8 +-
>   drivers/gpu/drm/qxl/qxl_ttm.c           |   3 +-
>   drivers/gpu/drm/radeon/radeon.h         |   1 -
>   drivers/gpu/drm/radeon/radeon_device.c  |   1 -
>   drivers/gpu/drm/radeon/radeon_ttm.c     |   4 +-
>   drivers/gpu/drm/ttm/ttm_device.c        |   7 +-
>   drivers/gpu/drm/ttm/ttm_pool.c          | 178 ++++--------------------
>   drivers/gpu/drm/ttm/ttm_tt.c            |  25 +---
>   drivers/gpu/drm/vmwgfx/vmwgfx_drv.c     |   4 +-
>   include/drm/drm_cache.h                 |   1 -
>   include/drm/ttm/ttm_device.h            |   3 +-
>   include/drm/ttm/ttm_pool.h              |   9 +-
>   20 files changed, 41 insertions(+), 246 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index dc3a69296321b3..5f40527eeef1ff 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -819,7 +819,6 @@ struct amdgpu_device {
>   	int				usec_timeout;
>   	const struct amdgpu_asic_funcs	*asic_funcs;
>   	bool				shutdown;
> -	bool				need_swiotlb;
>   	bool				accel_working;
>   	struct notifier_block		acpi_nb;
>   	struct amdgpu_i2c_chan		*i2c_bus[AMDGPU_MAX_I2C_BUS];
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 3bef0432cac2f7..9bf17b44cba6fe 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1705,9 +1705,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>   	/* No others user of address space so set it to 0 */
>   	r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
>   			       adev_to_drm(adev)->anon_inode->i_mapping,
> -			       adev_to_drm(adev)->vma_offset_manager,
> -			       adev->need_swiotlb,
> -			       dma_addressing_limited(adev->dev));
> +			       adev_to_drm(adev)->vma_offset_manager);
>   	if (r) {
>   		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
>   		return r;
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> index 405d6ad09022ca..2d4fa754513033 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> @@ -846,7 +846,6 @@ static int gmc_v6_0_sw_init(void *handle)
>   		dev_warn(adev->dev, "No suitable DMA available.\n");
>   		return r;
>   	}
> -	adev->need_swiotlb = drm_need_swiotlb(44);
>   
>   	r = gmc_v6_0_init_microcode(adev);
>   	if (r) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index 210ada2289ec9c..a504db24f4c2a8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -1025,7 +1025,6 @@ static int gmc_v7_0_sw_init(void *handle)
>   		pr_warn("No suitable DMA available\n");
>   		return r;
>   	}
> -	adev->need_swiotlb = drm_need_swiotlb(40);
>   
>   	r = gmc_v7_0_init_microcode(adev);
>   	if (r) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index e4f27b3f28fb58..42e7b1eb84b3bc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -1141,7 +1141,6 @@ static int gmc_v8_0_sw_init(void *handle)
>   		pr_warn("No suitable DMA available\n");
>   		return r;
>   	}
> -	adev->need_swiotlb = drm_need_swiotlb(40);
>   
>   	r = gmc_v8_0_init_microcode(adev);
>   	if (r) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 455bb91060d0bc..f74784b3423740 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -1548,7 +1548,6 @@ static int gmc_v9_0_sw_init(void *handle)
>   		printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
>   		return r;
>   	}
> -	adev->need_swiotlb = drm_need_swiotlb(44);
>   
>   	if (adev->gmc.xgmi.supported) {
>   		r = adev->gfxhub.funcs->get_xgmi_info(adev);
> diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
> index 79a50ef1250fd1..7e6eb4b33d0069 100644
> --- a/drivers/gpu/drm/drm_cache.c
> +++ b/drivers/gpu/drm/drm_cache.c
> @@ -178,34 +178,3 @@ drm_clflush_virt_range(void *addr, unsigned long length)
>   #endif
>   }
>   EXPORT_SYMBOL(drm_clflush_virt_range);
> -
> -bool drm_need_swiotlb(int dma_bits)
> -{
> -	struct resource *tmp;
> -	resource_size_t max_iomem = 0;
> -
> -	/*
> -	 * Xen paravirtual hosts require swiotlb regardless of requested dma
> -	 * transfer size.
> -	 *
> -	 * NOTE: Really, what it requires is use of the dma_alloc_coherent
> -	 *       allocator used in ttm_dma_populate() instead of
> -	 *       ttm_populate_and_map_pages(), which bounce buffers so much in
> -	 *       Xen it leads to swiotlb buffer exhaustion.
> -	 */
> -	if (xen_pv_domain())
> -		return true;
> -
> -	/*
> -	 * Enforce dma_alloc_coherent when memory encryption is active as well
> -	 * for the same reasons as for Xen paravirtual hosts.
> -	 */
> -	if (mem_encrypt_active())
> -		return true;
> -
> -	for (tmp = iomem_resource.child; tmp; tmp = tmp->sibling)
> -		max_iomem = max(max_iomem,  tmp->end);
> -
> -	return max_iomem > ((u64)1 << dma_bits);
> -}
> -EXPORT_SYMBOL(drm_need_swiotlb);
> diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c
> index 2b7c3a07956d59..6ce93d2d84d0e9 100644
> --- a/drivers/gpu/drm/drm_gem_vram_helper.c
> +++ b/drivers/gpu/drm/drm_gem_vram_helper.c
> @@ -1034,8 +1034,7 @@ static int drm_vram_mm_init(struct drm_vram_mm *vmm, struct drm_device *dev,
>   
>   	ret = ttm_device_init(&vmm->bdev, &bo_driver, dev->dev,
>   				 dev->anon_inode->i_mapping,
> -				 dev->vma_offset_manager,
> -				 false, true);
> +				 dev->vma_offset_manager);
>   	if (ret)
>   		return ret;
>   
> diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
> index e8b506a6685b56..4876a637933980 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
> @@ -285,7 +285,6 @@ nouveau_ttm_init(struct nouveau_drm *drm)
>   	struct nvkm_pci *pci = device->pci;
>   	struct nvif_mmu *mmu = &drm->client.mmu;
>   	struct drm_device *dev = drm->dev;
> -	bool need_swiotlb = false;
>   	int typei, ret;
>   
>   	ret = nouveau_ttm_init_host(drm, 0);
> @@ -320,14 +319,9 @@ nouveau_ttm_init(struct nouveau_drm *drm)
>   		drm->agp.cma = pci->agp.cma;
>   	}
>   
> -#if IS_ENABLED(CONFIG_SWIOTLB) && IS_ENABLED(CONFIG_X86)
> -	need_swiotlb = is_swiotlb_active();
> -#endif
> -
>   	ret = ttm_device_init(&drm->ttm.bdev, &nouveau_bo_driver, drm->dev->dev,
>   				  dev->anon_inode->i_mapping,
> -				  dev->vma_offset_manager, need_swiotlb,
> -				  drm->client.mmu.dmabits <= 32);
> +				  dev->vma_offset_manager);
>   	if (ret) {
>   		NV_ERROR(drm, "error initialising bo driver, %d\n", ret);
>   		return ret;
> diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
> index 47afe95d04a1a7..823eb562ba3d53 100644
> --- a/drivers/gpu/drm/qxl/qxl_ttm.c
> +++ b/drivers/gpu/drm/qxl/qxl_ttm.c
> @@ -186,8 +186,7 @@ int qxl_ttm_init(struct qxl_device *qdev)
>   	/* No others user of address space so set it to 0 */
>   	r = ttm_device_init(&qdev->mman.bdev, &qxl_bo_driver, NULL,
>   			    qdev->ddev.anon_inode->i_mapping,
> -			    qdev->ddev.vma_offset_manager,
> -			    false, false);
> +			    qdev->ddev.vma_offset_manager);
>   	if (r) {
>   		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
>   		return r;
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index 42281fce552e6e..7d41302c55886d 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -2379,7 +2379,6 @@ struct radeon_device {
>   	struct radeon_wb		wb;
>   	struct radeon_dummy_page	dummy_page;
>   	bool				shutdown;
> -	bool				need_swiotlb;
>   	bool				accel_working;
>   	bool				fastfb_working; /* IGP feature*/
>   	bool				needs_reset, in_reset;
> diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
> index cc445c4cba2e3d..3efdb5fb55ad57 100644
> --- a/drivers/gpu/drm/radeon/radeon_device.c
> +++ b/drivers/gpu/drm/radeon/radeon_device.c
> @@ -1385,7 +1385,6 @@ int radeon_device_init(struct radeon_device *rdev,
>   		pr_warn("radeon: No suitable DMA available\n");
>   		return r;
>   	}
> -	rdev->need_swiotlb = drm_need_swiotlb(dma_bits);
>   
>   	/* Registers mapping */
>   	/* TODO: block userspace mapping of io register */
> diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
> index 380b3007fd0b0d..28334f86eaa24a 100644
> --- a/drivers/gpu/drm/radeon/radeon_ttm.c
> +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
> @@ -717,9 +717,7 @@ int radeon_ttm_init(struct radeon_device *rdev)
>   	/* No others user of address space so set it to 0 */
>   	r = ttm_device_init(&rdev->mman.bdev, &radeon_bo_driver, rdev->dev,
>   			       rdev->ddev->anon_inode->i_mapping,
> -			       rdev->ddev->vma_offset_manager,
> -			       rdev->need_swiotlb,
> -			       dma_addressing_limited(&rdev->pdev->dev));
> +			       rdev->ddev->vma_offset_manager);
>   	if (r) {
>   		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
>   		return r;
> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
> index 510e3e001dabe0..289aab652486ef 100644
> --- a/drivers/gpu/drm/ttm/ttm_device.c
> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> @@ -198,8 +198,6 @@ static void ttm_device_delayed_workqueue(struct work_struct *work)
>    * @dev: The core kernel device pointer for DMA mappings and allocations.
>    * @mapping: The address space to use for this bo.
>    * @vma_manager: A pointer to a vma manager.
> - * @use_dma_alloc: If coherent DMA allocation API should be used.
> - * @use_dma32: If we should use GFP_DMA32 for device memory allocations.
>    *
>    * Initializes a struct ttm_device:
>    * Returns:
> @@ -207,8 +205,7 @@ static void ttm_device_delayed_workqueue(struct work_struct *work)
>    */
>   int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
>   		    struct device *dev, struct address_space *mapping,
> -		    struct drm_vma_offset_manager *vma_manager,
> -		    bool use_dma_alloc, bool use_dma32)
> +		    struct drm_vma_offset_manager *vma_manager)
>   {
>   	struct ttm_global *glob = &ttm_glob;
>   	int ret;
> @@ -223,7 +220,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
>   	bdev->funcs = funcs;
>   
>   	ttm_init_sysman(bdev);
> -	ttm_pool_init(&bdev->pool, dev, use_dma_alloc, use_dma32);
> +	ttm_pool_init(&bdev->pool, dev);
>   
>   	bdev->vma_manager = vma_manager;
>   	INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> index cb38b1a17b0985..51b6585e54470c 100644
> --- a/drivers/gpu/drm/ttm/ttm_pool.c
> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> @@ -50,11 +50,10 @@
>    * struct ttm_pool_dma - Helper object for coherent DMA mappings
>    *
>    * @addr: original DMA address returned for the mapping
> - * @vaddr: original vaddr return for the mapping and order in the lower bits
>    */
>   struct ttm_pool_dma {
>   	dma_addr_t addr;
> -	unsigned long vaddr;
> +	unsigned int order;
>   };
>   
>   static unsigned long page_pool_size;
> @@ -64,12 +63,6 @@ module_param(page_pool_size, ulong, 0644);
>   
>   static atomic_long_t allocated_pages;
>   
> -static struct ttm_pool_type global_write_combined[MAX_ORDER];
> -static struct ttm_pool_type global_uncached[MAX_ORDER];
> -
> -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
> -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
> -
>   static struct mutex shrinker_lock;
>   static struct list_head shrinker_list;
>   static struct shrinker mm_shrinker;
> @@ -78,10 +71,8 @@ static struct shrinker mm_shrinker;
>   static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags,
>   					unsigned int order)
>   {
> -	unsigned long attr = DMA_ATTR_FORCE_CONTIGUOUS;
>   	struct ttm_pool_dma *dma;
>   	struct page *p;
> -	void *vaddr;
>   
>   	/* Don't set the __GFP_COMP flag for higher order allocations.
>   	 * Mapping pages directly into an userspace process and calling
> @@ -91,34 +82,16 @@ static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags,
>   		gfp_flags |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN |
>   			__GFP_KSWAPD_RECLAIM;
>   
> -	if (!pool->use_dma_alloc) {
> -		p = alloc_pages(gfp_flags, order);
> -		if (p)
> -			p->private = order;
> -		return p;
> -	}
> -
>   	dma = kmalloc(sizeof(*dma), GFP_KERNEL);
>   	if (!dma)
>   		return NULL;
>   
> -	if (order)
> -		attr |= DMA_ATTR_NO_WARN;
> -
> -	vaddr = dma_alloc_attrs(pool->dev, (1ULL << order) * PAGE_SIZE,
> -				&dma->addr, gfp_flags, attr);
> -	if (!vaddr)
> +	p = dma_alloc_pages(pool->dev, (1ULL << order) * PAGE_SIZE,
> +				&dma->addr, DMA_BIDIRECTIONAL, gfp_flags);
> +	if (!p)
>   		goto error_free;
>   
> -	/* TODO: This is an illegal abuse of the DMA API, but we need to rework
> -	 * TTM page fault handling and extend the DMA API to clean this up.
> -	 */
> -	if (is_vmalloc_addr(vaddr))
> -		p = vmalloc_to_page(vaddr);
> -	else
> -		p = virt_to_page(vaddr);
> -
> -	dma->vaddr = (unsigned long)vaddr | order;
> +	dma->order = order;
>   	p->private = (unsigned long)dma;
>   	return p;
>   
> @@ -131,9 +104,7 @@ static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags,
>   static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching,
>   			       unsigned int order, struct page *p)
>   {
> -	unsigned long attr = DMA_ATTR_FORCE_CONTIGUOUS;
>   	struct ttm_pool_dma *dma;
> -	void *vaddr;
>   
>   #ifdef CONFIG_X86
>   	/* We don't care that set_pages_wb is inefficient here. This is only
> @@ -143,18 +114,14 @@ static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching,
>   		set_pages_wb(p, 1 << order);
>   #endif
>   
> -	if (!pool || !pool->use_dma_alloc) {
> +	if (!pool) {
>   		__free_pages(p, order);
>   		return;
>   	}
>   
> -	if (order)
> -		attr |= DMA_ATTR_NO_WARN;
> -
>   	dma = (void *)p->private;
> -	vaddr = (void *)(dma->vaddr & PAGE_MASK);
> -	dma_free_attrs(pool->dev, (1UL << order) * PAGE_SIZE, vaddr, dma->addr,
> -		       attr);
> +	dma_free_pages(pool->dev, (1UL << order) * PAGE_SIZE, p, dma->addr,
> +			DMA_BIDIRECTIONAL);
>   	kfree(dma);
>   }
>   
> @@ -184,20 +151,12 @@ static int ttm_pool_apply_caching(struct page **first, struct page **last,
>   static int ttm_pool_map(struct ttm_pool *pool, unsigned int order,
>   			struct page *p, dma_addr_t **dma_addr)
>   {
> -	dma_addr_t addr;
> +	struct ttm_pool_dma *dma = (void *)p->private;
> +	dma_addr_t addr = dma->addr;
>   	unsigned int i;
>   
> -	if (pool->use_dma_alloc) {
> -		struct ttm_pool_dma *dma = (void *)p->private;
> -
> -		addr = dma->addr;
> -	} else {
> -		size_t size = (1ULL << order) * PAGE_SIZE;
> -
> -		addr = dma_map_page(pool->dev, p, 0, size, DMA_BIDIRECTIONAL);
> -		if (dma_mapping_error(pool->dev, addr))
> -			return -EFAULT;
> -	}
> +	dma_sync_single_for_device(pool->dev, addr,
> +			(1ULL << dma->order) * PAGE_SIZE, DMA_BIDIRECTIONAL);
>   
>   	for (i = 1 << order; i ; --i) {
>   		*(*dma_addr)++ = addr;
> @@ -211,12 +170,8 @@ static int ttm_pool_map(struct ttm_pool *pool, unsigned int order,
>   static void ttm_pool_unmap(struct ttm_pool *pool, dma_addr_t dma_addr,
>   			   unsigned int num_pages)
>   {
> -	/* Unmapped while freeing the page */
> -	if (pool->use_dma_alloc)
> -		return;
> -
> -	dma_unmap_page(pool->dev, dma_addr, (long)num_pages << PAGE_SHIFT,
> -		       DMA_BIDIRECTIONAL);
> +	dma_sync_single_for_cpu(pool->dev, dma_addr, num_pages * PAGE_SIZE,
> +				DMA_BIDIRECTIONAL);
>   }
>   
>   /* Give pages into a specific pool_type */
> @@ -286,27 +241,7 @@ static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool,
>   						  enum ttm_caching caching,
>   						  unsigned int order)
>   {
> -	if (pool->use_dma_alloc)
> -		return &pool->caching[caching].orders[order];
> -
> -#ifdef CONFIG_X86
> -	switch (caching) {
> -	case ttm_write_combined:
> -		if (pool->use_dma32)
> -			return &global_dma32_write_combined[order];
> -
> -		return &global_write_combined[order];
> -	case ttm_uncached:
> -		if (pool->use_dma32)
> -			return &global_dma32_uncached[order];
> -
> -		return &global_uncached[order];
> -	default:
> -		break;
> -	}
> -#endif
> -
> -	return NULL;
> +	return &pool->caching[caching].orders[order];
>   }
>   
>   /* Free pages using the global shrinker list */
> @@ -336,13 +271,9 @@ static unsigned int ttm_pool_shrink(void)
>   /* Return the allocation order based for a page */
>   static unsigned int ttm_pool_page_order(struct ttm_pool *pool, struct page *p)
>   {
> -	if (pool->use_dma_alloc) {
> -		struct ttm_pool_dma *dma = (void *)p->private;
> +	struct ttm_pool_dma *dma = (void *)p->private;
>   
> -		return dma->vaddr & ~PAGE_MASK;
> -	}
> -
> -	return p->private;
> +	return dma->order;
>   }
>   
>   /**
> @@ -364,7 +295,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
>   	dma_addr_t *dma_addr = tt->dma_address;
>   	struct page **caching = tt->pages;
>   	struct page **pages = tt->pages;
> -	gfp_t gfp_flags = GFP_USER;
> +	gfp_t gfp_flags = GFP_HIGHUSER;
>   	unsigned int i, order;
>   	struct page *p;
>   	int r;
> @@ -378,11 +309,6 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
>   	if (ctx->gfp_retry_mayfail)
>   		gfp_flags |= __GFP_RETRY_MAYFAIL;
>   
> -	if (pool->use_dma32)
> -		gfp_flags |= GFP_DMA32;
> -	else
> -		gfp_flags |= GFP_HIGHUSER;
> -
>   	for (order = min(MAX_ORDER - 1UL, __fls(num_pages)); num_pages;
>   	     order = min_t(unsigned int, order, __fls(num_pages))) {
>   		bool apply_caching = false;
> @@ -489,28 +415,20 @@ EXPORT_SYMBOL(ttm_pool_free);
>    *
>    * @pool: the pool to initialize
>    * @dev: device for DMA allocations and mappings
> - * @use_dma_alloc: true if coherent DMA alloc should be used
> - * @use_dma32: true if GFP_DMA32 should be used
>    *
>    * Initialize the pool and its pool types.
>    */
> -void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
> -		   bool use_dma_alloc, bool use_dma32)
> +void ttm_pool_init(struct ttm_pool *pool, struct device *dev)
>   {
>   	unsigned int i, j;
>   
> -	WARN_ON(!dev && use_dma_alloc);
> +	WARN_ON(!dev);
>   
>   	pool->dev = dev;
> -	pool->use_dma_alloc = use_dma_alloc;
> -	pool->use_dma32 = use_dma32;
> -
> -	if (use_dma_alloc) {
> -		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> -			for (j = 0; j < MAX_ORDER; ++j)
> -				ttm_pool_type_init(&pool->caching[i].orders[j],
> -						   pool, i, j);
> -	}
> +	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> +		for (j = 0; j < MAX_ORDER; ++j)
> +			ttm_pool_type_init(&pool->caching[i].orders[j], pool, i,
> +					   j);
>   }
>   
>   /**
> @@ -525,11 +443,9 @@ void ttm_pool_fini(struct ttm_pool *pool)
>   {
>   	unsigned int i, j;
>   
> -	if (pool->use_dma_alloc) {
> -		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> -			for (j = 0; j < MAX_ORDER; ++j)
> -				ttm_pool_type_fini(&pool->caching[i].orders[j]);
> -	}
> +	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> +		for (j = 0; j < MAX_ORDER; ++j)
> +			ttm_pool_type_fini(&pool->caching[i].orders[j]);
>   }
>   
>   /* As long as pages are available make sure to release at least one */
> @@ -603,18 +519,6 @@ static void ttm_pool_debugfs_footer(struct seq_file *m)
>   static int ttm_pool_debugfs_globals_show(struct seq_file *m, void *data)
>   {
>   	ttm_pool_debugfs_header(m);
> -
> -	mutex_lock(&shrinker_lock);
> -	seq_puts(m, "wc\t:");
> -	ttm_pool_debugfs_orders(global_write_combined, m);
> -	seq_puts(m, "uc\t:");
> -	ttm_pool_debugfs_orders(global_uncached, m);
> -	seq_puts(m, "wc 32\t:");
> -	ttm_pool_debugfs_orders(global_dma32_write_combined, m);
> -	seq_puts(m, "uc 32\t:");
> -	ttm_pool_debugfs_orders(global_dma32_uncached, m);
> -	mutex_unlock(&shrinker_lock);
> -
>   	ttm_pool_debugfs_footer(m);
>   
>   	return 0;
> @@ -633,11 +537,6 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m)
>   {
>   	unsigned int i;
>   
> -	if (!pool->use_dma_alloc) {
> -		seq_puts(m, "unused\n");
> -		return 0;
> -	}
> -
>   	ttm_pool_debugfs_header(m);
>   
>   	mutex_lock(&shrinker_lock);
> @@ -688,25 +587,12 @@ DEFINE_SHOW_ATTRIBUTE(ttm_pool_debugfs_shrink);
>    */
>   int ttm_pool_mgr_init(unsigned long num_pages)
>   {
> -	unsigned int i;
> -
>   	if (!page_pool_size)
>   		page_pool_size = num_pages;
>   
>   	mutex_init(&shrinker_lock);
>   	INIT_LIST_HEAD(&shrinker_list);
>   
> -	for (i = 0; i < MAX_ORDER; ++i) {
> -		ttm_pool_type_init(&global_write_combined[i], NULL,
> -				   ttm_write_combined, i);
> -		ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i);
> -
> -		ttm_pool_type_init(&global_dma32_write_combined[i], NULL,
> -				   ttm_write_combined, i);
> -		ttm_pool_type_init(&global_dma32_uncached[i], NULL,
> -				   ttm_uncached, i);
> -	}
> -
>   #ifdef CONFIG_DEBUG_FS
>   	debugfs_create_file("page_pool", 0444, ttm_debugfs_root, NULL,
>   			    &ttm_pool_debugfs_globals_fops);
> @@ -727,16 +613,6 @@ int ttm_pool_mgr_init(unsigned long num_pages)
>    */
>   void ttm_pool_mgr_fini(void)
>   {
> -	unsigned int i;
> -
> -	for (i = 0; i < MAX_ORDER; ++i) {
> -		ttm_pool_type_fini(&global_write_combined[i]);
> -		ttm_pool_type_fini(&global_uncached[i]);
> -
> -		ttm_pool_type_fini(&global_dma32_write_combined[i]);
> -		ttm_pool_type_fini(&global_dma32_uncached[i]);
> -	}
> -
>   	unregister_shrinker(&mm_shrinker);
>   	WARN_ON(!list_empty(&shrinker_list));
>   }
> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
> index a1a25410ec742d..d8f30ce4c65655 100644
> --- a/drivers/gpu/drm/ttm/ttm_tt.c
> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
> @@ -51,7 +51,6 @@ MODULE_PARM_DESC(dma32_pages_limit, "Limit for the allocated DMA32 pages");
>   module_param_named(dma32_pages_limit, ttm_dma32_pages_limit, ulong, 0644);
>   
>   static atomic_long_t ttm_pages_allocated;
> -static atomic_long_t ttm_dma32_pages_allocated;
>   
>   /*
>    * Allocates a ttm structure for the given BO.
> @@ -317,17 +316,10 @@ int ttm_tt_populate(struct ttm_device *bdev,
>   	if (ttm_tt_is_populated(ttm))
>   		return 0;
>   
> -	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
> +	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG))
>   		atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
> -		if (bdev->pool.use_dma32)
> -			atomic_long_add(ttm->num_pages,
> -					&ttm_dma32_pages_allocated);
> -	}
> -
> -	while (atomic_long_read(&ttm_pages_allocated) > ttm_pages_limit ||
> -	       atomic_long_read(&ttm_dma32_pages_allocated) >
> -	       ttm_dma32_pages_limit) {
>   
> +	while (atomic_long_read(&ttm_pages_allocated) > ttm_pages_limit) {
>   		ret = ttm_global_swapout(ctx, GFP_KERNEL);
>   		if (ret == 0)
>   			break;
> @@ -355,12 +347,8 @@ int ttm_tt_populate(struct ttm_device *bdev,
>   	return 0;
>   
>   error:
> -	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
> +	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG))
>   		atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
> -		if (bdev->pool.use_dma32)
> -			atomic_long_sub(ttm->num_pages,
> -					&ttm_dma32_pages_allocated);
> -	}
>   	return ret;
>   }
>   EXPORT_SYMBOL(ttm_tt_populate);
> @@ -390,13 +378,8 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
>   	else
>   		ttm_pool_free(&bdev->pool, ttm);
>   
> -	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
> +	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG))
>   		atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
> -		if (bdev->pool.use_dma32)
> -			atomic_long_sub(ttm->num_pages,
> -					&ttm_dma32_pages_allocated);
> -	}
> -
>   	ttm->page_flags &= ~TTM_PAGE_FLAG_PRIV_POPULATED;
>   }
>   
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> index 399f70d340eb5b..0c5783651ae187 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> @@ -881,9 +881,7 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id)
>   	ret = ttm_device_init(&dev_priv->bdev, &vmw_bo_driver,
>   			      dev_priv->drm.dev,
>   			      dev_priv->drm.anon_inode->i_mapping,
> -			      &dev_priv->vma_manager,
> -			      dev_priv->map_mode == vmw_dma_alloc_coherent,
> -			      false);
> +			      &dev_priv->vma_manager);
>   	if (unlikely(ret != 0)) {
>   		DRM_ERROR("Failed initializing TTM buffer object driver.\n");
>   		goto out_no_bdev;
> diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h
> index e9ad4863d91568..c3122588811731 100644
> --- a/include/drm/drm_cache.h
> +++ b/include/drm/drm_cache.h
> @@ -38,7 +38,6 @@
>   void drm_clflush_pages(struct page *pages[], unsigned long num_pages);
>   void drm_clflush_sg(struct sg_table *st);
>   void drm_clflush_virt_range(void *addr, unsigned long length);
> -bool drm_need_swiotlb(int dma_bits);
>   
>   
>   static inline bool drm_arch_can_wc_memory(void)
> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
> index 7c8f87bd52d383..8b5281b982b7da 100644
> --- a/include/drm/ttm/ttm_device.h
> +++ b/include/drm/ttm/ttm_device.h
> @@ -310,8 +310,7 @@ static inline void ttm_set_driver_manager(struct ttm_device *bdev, int type,
>   
>   int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
>   		    struct device *dev, struct address_space *mapping,
> -		    struct drm_vma_offset_manager *vma_manager,
> -		    bool use_dma_alloc, bool use_dma32);
> +		    struct drm_vma_offset_manager *vma_manager);
>   void ttm_device_fini(struct ttm_device *bdev);
>   
>   #endif
> diff --git a/include/drm/ttm/ttm_pool.h b/include/drm/ttm/ttm_pool.h
> index 4321728bdd11f8..7c7800c934e272 100644
> --- a/include/drm/ttm/ttm_pool.h
> +++ b/include/drm/ttm/ttm_pool.h
> @@ -60,16 +60,10 @@ struct ttm_pool_type {
>   /**
>    * ttm_pool - Pool for all caching and orders
>    *
> - * @use_dma_alloc: if coherent DMA allocations should be used
> - * @use_dma32: if GFP_DMA32 should be used
>    * @caching: pools for each caching/order
>    */
>   struct ttm_pool {
>   	struct device *dev;
> -
> -	bool use_dma_alloc;
> -	bool use_dma32;
> -
>   	struct {
>   		struct ttm_pool_type orders[MAX_ORDER];
>   	} caching[TTM_NUM_CACHING_TYPES];
> @@ -79,8 +73,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
>   		   struct ttm_operation_ctx *ctx);
>   void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt);
>   
> -void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
> -		   bool use_dma_alloc, bool use_dma32);
> +void ttm_pool_init(struct ttm_pool *pool, struct device *dev);
>   void ttm_pool_fini(struct ttm_pool *pool);
>   
>   int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m);