[PATCH 8/8] drm/ttm: nuke caching placement flags

Wed Oct 7 09:07:32 UTC 2020

Am 05.10.20 um 18:17 schrieb Ruhl, Michael J:
>> -----Original Message-----
>> From: dri-devel <dri-devel-bounces at lists.freedesktop.org> On Behalf Of
>> Christian König
>> Sent: Thursday, October 1, 2020 7:28 AM
>> To: dri-devel at lists.freedesktop.org; ray.huang at amd.com;
>> airlied at gmail.com; daniel at ffwll.ch
>> Subject: [PATCH 8/8] drm/ttm: nuke caching placement flags
>>
>> Changing the caching on the fly never really worked
>> flawlessly.
>>
>> So stop this completely and just let drivers specific the
>> desired caching in the tt or bus object.
>>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 20 +++-------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 12 ++----
>> drivers/gpu/drm/drm_gem_vram_helper.c      |  7 +---
>> drivers/gpu/drm/nouveau/nouveau_bo.c       | 36 +++++------------
>> drivers/gpu/drm/qxl/qxl_object.c           | 10 ++---
>> drivers/gpu/drm/qxl/qxl_ttm.c              |  2 +-
>> drivers/gpu/drm/radeon/radeon_object.c     | 46 +++++-----------------
>> drivers/gpu/drm/radeon/radeon_ttm.c        | 18 ++-------
>> drivers/gpu/drm/ttm/ttm_agp_backend.c      |  2 +-
>> drivers/gpu/drm/ttm/ttm_bo.c               | 44 ++-------------------
>> drivers/gpu/drm/ttm/ttm_bo_util.c          | 10 ++---
>> drivers/gpu/drm/ttm/ttm_tt.c               | 29 --------------
>> drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c | 30 +++++++-------
>> include/drm/ttm/ttm_placement.h            | 14 -------
>> include/drm/ttm/ttm_tt.h                   | 15 -------
>> 15 files changed, 61 insertions(+), 234 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index 80bc7177cd45..964f9512dd6e 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -137,7 +137,7 @@ void amdgpu_bo_placement_from_domain(struct
>> amdgpu_bo *abo, u32 domain)
>> 		places[c].fpfn = 0;
>> 		places[c].lpfn = 0;
>> 		places[c].mem_type = TTM_PL_VRAM;
>> -		places[c].flags = TTM_PL_FLAG_WC |
>> TTM_PL_FLAG_UNCACHED;
>> +		places[c].flags = 0;
>>
>> 		if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
>> 			places[c].lpfn = visible_pfn;
>> @@ -154,11 +154,6 @@ void amdgpu_bo_placement_from_domain(struct
>> amdgpu_bo *abo, u32 domain)
>> 		places[c].lpfn = 0;
>> 		places[c].mem_type = TTM_PL_TT;
>> 		places[c].flags = 0;
>> -		if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
>> -			places[c].flags |= TTM_PL_FLAG_WC |
>> -				TTM_PL_FLAG_UNCACHED;
>> -		else
>> -			places[c].flags |= TTM_PL_FLAG_CACHED;
>> 		c++;
>> 	}
>>
>> @@ -167,11 +162,6 @@ void amdgpu_bo_placement_from_domain(struct
>> amdgpu_bo *abo, u32 domain)
>> 		places[c].lpfn = 0;
>> 		places[c].mem_type = TTM_PL_SYSTEM;
>> 		places[c].flags = 0;
>> -		if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
>> -			places[c].flags |= TTM_PL_FLAG_WC |
>> -				TTM_PL_FLAG_UNCACHED;
>> -		else
>> -			places[c].flags |= TTM_PL_FLAG_CACHED;
>> 		c++;
>> 	}
>>
>> @@ -179,7 +169,7 @@ void amdgpu_bo_placement_from_domain(struct
>> amdgpu_bo *abo, u32 domain)
>> 		places[c].fpfn = 0;
>> 		places[c].lpfn = 0;
>> 		places[c].mem_type = AMDGPU_PL_GDS;
>> -		places[c].flags = TTM_PL_FLAG_UNCACHED;
>> +		places[c].flags = 0;
>> 		c++;
>> 	}
>>
>> @@ -187,7 +177,7 @@ void amdgpu_bo_placement_from_domain(struct
>> amdgpu_bo *abo, u32 domain)
>> 		places[c].fpfn = 0;
>> 		places[c].lpfn = 0;
>> 		places[c].mem_type = AMDGPU_PL_GWS;
>> -		places[c].flags = TTM_PL_FLAG_UNCACHED;
>> +		places[c].flags = 0;
>> 		c++;
>> 	}
>>
>> @@ -195,7 +185,7 @@ void amdgpu_bo_placement_from_domain(struct
>> amdgpu_bo *abo, u32 domain)
>> 		places[c].fpfn = 0;
>> 		places[c].lpfn = 0;
>> 		places[c].mem_type = AMDGPU_PL_OA;
>> -		places[c].flags = TTM_PL_FLAG_UNCACHED;
>> +		places[c].flags = 0;
>> 		c++;
>> 	}
>>
>> @@ -203,7 +193,7 @@ void amdgpu_bo_placement_from_domain(struct
>> amdgpu_bo *abo, u32 domain)
>> 		places[c].fpfn = 0;
>> 		places[c].lpfn = 0;
>> 		places[c].mem_type = TTM_PL_SYSTEM;
>> -		places[c].flags = TTM_PL_MASK_CACHING;
>> +		places[c].flags = 0;
>> 		c++;
>> 	}
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 5b56a66063fd..8cdec58b9106 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -92,7 +92,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object
>> *bo,
>> 		.fpfn = 0,
>> 		.lpfn = 0,
>> 		.mem_type = TTM_PL_SYSTEM,
>> -		.flags = TTM_PL_MASK_CACHING
>> +		.flags = 0
>> 	};
>>
>> 	/* Don't handle scatter gather BOs */
>> @@ -538,19 +538,13 @@ static int amdgpu_move_vram_ram(struct
>> ttm_buffer_object *bo, bool evict,
>> 	placements.fpfn = 0;
>> 	placements.lpfn = 0;
>> 	placements.mem_type = TTM_PL_TT;
>> -	placements.flags = TTM_PL_MASK_CACHING;
>> +	placements.flags = 0;
>> 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
>> 	if (unlikely(r)) {
>> 		pr_err("Failed to find GTT space for blit from VRAM\n");
>> 		return r;
>> 	}
>>
>> -	/* set caching flags */
>> -	r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement);
>> -	if (unlikely(r)) {
>> -		goto out_cleanup;
>> -	}
>> -
>> 	r = ttm_tt_populate(bo->bdev, bo->ttm, ctx);
>> 	if (unlikely(r))
>> 		goto out_cleanup;
>> @@ -599,7 +593,7 @@ static int amdgpu_move_ram_vram(struct
>> ttm_buffer_object *bo, bool evict,
>> 	placements.fpfn = 0;
>> 	placements.lpfn = 0;
>> 	placements.mem_type = TTM_PL_TT;
>> -	placements.flags = TTM_PL_MASK_CACHING;
>> +	placements.flags = 0;
>> 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
>> 	if (unlikely(r)) {
>> 		pr_err("Failed to find GTT space for blit to VRAM\n");
>> diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c
>> b/drivers/gpu/drm/drm_gem_vram_helper.c
>> index 79151b1c157c..b9c0ea720efd 100644
>> --- a/drivers/gpu/drm/drm_gem_vram_helper.c
>> +++ b/drivers/gpu/drm/drm_gem_vram_helper.c
>> @@ -147,15 +147,12 @@ static void drm_gem_vram_placement(struct
>> drm_gem_vram_object *gbo,
>>
>> 	if (pl_flag & DRM_GEM_VRAM_PL_FLAG_VRAM) {
>> 		gbo->placements[c].mem_type = TTM_PL_VRAM;
>> -		gbo->placements[c++].flags = TTM_PL_FLAG_WC |
>> -					     TTM_PL_FLAG_UNCACHED |
>> -					     invariant_flags;
>> +		gbo->placements[c++].flags = invariant_flags;
>> 	}
>>
>> 	if (pl_flag & DRM_GEM_VRAM_PL_FLAG_SYSTEM || !c) {
>> 		gbo->placements[c].mem_type = TTM_PL_SYSTEM;
>> -		gbo->placements[c++].flags = TTM_PL_MASK_CACHING |
>> -					     invariant_flags;
>> +		gbo->placements[c++].flags = invariant_flags;
>> 	}
>>
>> 	gbo->placement.num_placement = c;
>> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c
>> b/drivers/gpu/drm/nouveau/nouveau_bo.c
>> index 8ed30f471ec7..b37dfd12c7b9 100644
>> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
>> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
>> @@ -343,37 +343,23 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size,
>> int align,
>> }
>>
>> static void
>> -set_placement_list(struct nouveau_drm *drm, struct ttm_place *pl,
>> unsigned *n,
>> -		   uint32_t domain, uint32_t flags)
>> +set_placement_list(struct ttm_place *pl, unsigned *n, uint32_t domain)
>> {
>> 	*n = 0;
>>
>> 	if (domain & NOUVEAU_GEM_DOMAIN_VRAM) {
>> -		struct nvif_mmu *mmu = &drm->client.mmu;
>> -		const u8 type = mmu->type[drm->ttm.type_vram].type;
>> -
>> 		pl[*n].mem_type = TTM_PL_VRAM;
>> -		pl[*n].flags = flags & ~TTM_PL_FLAG_CACHED;
>> -
>> -		/* Some BARs do not support being ioremapped WC */
>> -		if (drm->client.device.info.family >=
>> NV_DEVICE_INFO_V0_TESLA &&
>> -		    type & NVIF_MEM_UNCACHED)
>> -			pl[*n].flags &= ~TTM_PL_FLAG_WC;
>> -
>> +		pl[*n].flags = 0;
>> 		(*n)++;
>> 	}
>> 	if (domain & NOUVEAU_GEM_DOMAIN_GART) {
>> 		pl[*n].mem_type = TTM_PL_TT;
>> -		pl[*n].flags = flags;
>> -
>> -		if (drm->agp.bridge)
>> -			pl[*n].flags &= ~TTM_PL_FLAG_CACHED;
>> -
>> +		pl[*n].flags = 0;
>> 		(*n)++;
>> 	}
>> 	if (domain & NOUVEAU_GEM_DOMAIN_CPU) {
>> 		pl[*n].mem_type = TTM_PL_SYSTEM;
>> -		pl[(*n)++].flags = flags;
>> +		pl[(*n)++].flags = 0;
>> 	}
>> }
>>
>> @@ -415,18 +401,14 @@ void
>> nouveau_bo_placement_set(struct nouveau_bo *nvbo, uint32_t domain,
>> 			 uint32_t busy)
>> {
>> -	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
>> 	struct ttm_placement *pl = &nvbo->placement;
>> -	uint32_t flags = nvbo->force_coherent ? TTM_PL_FLAG_UNCACHED :
>> -						TTM_PL_MASK_CACHING;
>>
>> 	pl->placement = nvbo->placements;
>> -	set_placement_list(drm, nvbo->placements, &pl->num_placement,
>> -			   domain, flags);
>> +	set_placement_list(nvbo->placements, &pl->num_placement,
>> domain);
>>
>> 	pl->busy_placement = nvbo->busy_placements;
>> -	set_placement_list(drm, nvbo->busy_placements, &pl-
>>> num_busy_placement,
>> -			   domain | busy, flags);
>> +	set_placement_list(nvbo->busy_placements, &pl-
>>> num_busy_placement,
>> +			   domain | busy);
>>
>> 	set_placement_range(nvbo, domain);
>> }
>> @@ -888,7 +870,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object
>> *bo, bool evict,
>> 		.fpfn = 0,
>> 		.lpfn = 0,
>> 		.mem_type = TTM_PL_TT,
>> -		.flags = TTM_PL_MASK_CACHING
>> +		.flags = 0
>> 	};
>> 	struct ttm_placement placement;
>> 	struct ttm_resource tmp_reg;
>> @@ -930,7 +912,7 @@ nouveau_bo_move_flips(struct ttm_buffer_object
>> *bo, bool evict,
>> 		.fpfn = 0,
>> 		.lpfn = 0,
>> 		.mem_type = TTM_PL_TT,
>> -		.flags = TTM_PL_MASK_CACHING
>> +		.flags = 0
>> 	};
>> 	struct ttm_placement placement;
>> 	struct ttm_resource tmp_reg;
>> diff --git a/drivers/gpu/drm/qxl/qxl_object.c
>> b/drivers/gpu/drm/qxl/qxl_object.c
>> index c8b67e7a3f02..5ba8aac00f5c 100644
>> --- a/drivers/gpu/drm/qxl/qxl_object.c
>> +++ b/drivers/gpu/drm/qxl/qxl_object.c
>> @@ -64,21 +64,21 @@ void qxl_ttm_placement_from_domain(struct qxl_bo
>> *qbo, u32 domain)
>> 	qbo->placement.busy_placement = qbo->placements;
>> 	if (domain == QXL_GEM_DOMAIN_VRAM) {
>> 		qbo->placements[c].mem_type = TTM_PL_VRAM;
>> -		qbo->placements[c++].flags = TTM_PL_FLAG_CACHED | pflag;
>> +		qbo->placements[c++].flags = pflag;
>> 	}
>> 	if (domain == QXL_GEM_DOMAIN_SURFACE) {
>> 		qbo->placements[c].mem_type = TTM_PL_PRIV;
>> -		qbo->placements[c++].flags = TTM_PL_FLAG_CACHED | pflag;
>> +		qbo->placements[c++].flags = pflag;
>> 		qbo->placements[c].mem_type = TTM_PL_VRAM;
>> -		qbo->placements[c++].flags = TTM_PL_FLAG_CACHED | pflag;
>> +		qbo->placements[c++].flags = pflag;
>> 	}
>> 	if (domain == QXL_GEM_DOMAIN_CPU) {
>> 		qbo->placements[c].mem_type = TTM_PL_SYSTEM;
>> -		qbo->placements[c++].flags = TTM_PL_MASK_CACHING |
>> pflag;
>> +		qbo->placements[c++].flags = pflag;
>> 	}
>> 	if (!c) {
>> 		qbo->placements[c].mem_type = TTM_PL_SYSTEM;
>> -		qbo->placements[c++].flags = TTM_PL_MASK_CACHING;
>> +		qbo->placements[c++].flags = 0;
>> 	}
>> 	qbo->placement.num_placement = c;
>> 	qbo->placement.num_busy_placement = c;
>> diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
>> index e79d4df99790..d535e836be72 100644
>> --- a/drivers/gpu/drm/qxl/qxl_ttm.c
>> +++ b/drivers/gpu/drm/qxl/qxl_ttm.c
>> @@ -56,7 +56,7 @@ static void qxl_evict_flags(struct ttm_buffer_object *bo,
>> 		.fpfn = 0,
>> 		.lpfn = 0,
>> 		.mem_type = TTM_PL_SYSTEM,
>> -		.flags = TTM_PL_MASK_CACHING
>> +		.flags = 0
>> 	};
>>
>> 	if (!qxl_ttm_bo_is_qxl_bo(bo)) {
>> diff --git a/drivers/gpu/drm/radeon/radeon_object.c
>> b/drivers/gpu/drm/radeon/radeon_object.c
>> index 8c285eb118f9..a0b2c4541a2a 100644
>> --- a/drivers/gpu/drm/radeon/radeon_object.c
>> +++ b/drivers/gpu/drm/radeon/radeon_object.c
>> @@ -113,57 +113,29 @@ void radeon_ttm_placement_from_domain(struct
>> radeon_bo *rbo, u32 domain)
>> 			rbo->placements[c].fpfn =
>> 				rbo->rdev->mc.visible_vram_size >>
>> PAGE_SHIFT;
>> 			rbo->placements[c].mem_type = TTM_PL_VRAM;
>> -			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
>> -						     TTM_PL_FLAG_UNCACHED;
>> +			rbo->placements[c++].flags = 0;
>> 		}
>>
>> 		rbo->placements[c].fpfn = 0;
>> 		rbo->placements[c].mem_type = TTM_PL_VRAM;
>> -		rbo->placements[c++].flags = TTM_PL_FLAG_WC |
>> -					     TTM_PL_FLAG_UNCACHED;
>> +		rbo->placements[c++].flags = 0;
>> 	}
>>
>> 	if (domain & RADEON_GEM_DOMAIN_GTT) {
>> -		if (rbo->flags & RADEON_GEM_GTT_UC) {
>> -			rbo->placements[c].fpfn = 0;
>> -			rbo->placements[c].mem_type = TTM_PL_TT;
>> -			rbo->placements[c++].flags =
>> TTM_PL_FLAG_UNCACHED;
>> -
>> -		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
>> -			   (rbo->rdev->flags & RADEON_IS_AGP)) {
>> -			rbo->placements[c].fpfn = 0;
>> -			rbo->placements[c].mem_type = TTM_PL_TT;
>> -			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
>> -				TTM_PL_FLAG_UNCACHED;
>> -		} else {
>> -			rbo->placements[c].fpfn = 0;
>> -			rbo->placements[c].mem_type = TTM_PL_TT;
>> -			rbo->placements[c++].flags =
>> TTM_PL_FLAG_CACHED;
>> -		}
>> +		rbo->placements[c].fpfn = 0;
>> +		rbo->placements[c].mem_type = TTM_PL_TT;
>> +		rbo->placements[c++].flags = 0;
>> 	}
>>
>> 	if (domain & RADEON_GEM_DOMAIN_CPU) {
>> -		if (rbo->flags & RADEON_GEM_GTT_UC) {
>> -			rbo->placements[c].fpfn = 0;
>> -			rbo->placements[c].mem_type = TTM_PL_SYSTEM;
>> -			rbo->placements[c++].flags =
>> TTM_PL_FLAG_UNCACHED;
>> -
>> -		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
>> -		    rbo->rdev->flags & RADEON_IS_AGP) {
>> -			rbo->placements[c].fpfn = 0;
>> -			rbo->placements[c].mem_type = TTM_PL_SYSTEM;
>> -			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
>> -				TTM_PL_FLAG_UNCACHED;
>> -		} else {
>> -			rbo->placements[c].fpfn = 0;
>> -			rbo->placements[c].mem_type = TTM_PL_SYSTEM;
>> -			rbo->placements[c++].flags =
>> TTM_PL_FLAG_CACHED;
>> -		}
>> +		rbo->placements[c].fpfn = 0;
>> +		rbo->placements[c].mem_type = TTM_PL_SYSTEM;
>> +		rbo->placements[c++].flags = 0;
>> 	}
>> 	if (!c) {
>> 		rbo->placements[c].fpfn = 0;
>> 		rbo->placements[c].mem_type = TTM_PL_SYSTEM;
>> -		rbo->placements[c++].flags = TTM_PL_MASK_CACHING;
>> +		rbo->placements[c++].flags = 0;
>> 	}
>>
>> 	rbo->placement.num_placement = c;
>> diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c
>> b/drivers/gpu/drm/radeon/radeon_ttm.c
>> index 9b53a1d80632..d6f42fbc81f4 100644
>> --- a/drivers/gpu/drm/radeon/radeon_ttm.c
>> +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
>> @@ -89,7 +89,7 @@ static void radeon_evict_flags(struct ttm_buffer_object
>> *bo,
>> 		.fpfn = 0,
>> 		.lpfn = 0,
>> 		.mem_type = TTM_PL_SYSTEM,
>> -		.flags = TTM_PL_MASK_CACHING
>> +		.flags = 0
>> 	};
>>
>> 	struct radeon_bo *rbo;
>> @@ -225,17 +225,12 @@ static int radeon_move_vram_ram(struct
>> ttm_buffer_object *bo,
>> 	placements.fpfn = 0;
>> 	placements.lpfn = 0;
>> 	placements.mem_type = TTM_PL_TT;
>> -	placements.flags = TTM_PL_MASK_CACHING;
>> +	placements.flags = 0;
>> 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
>> 	if (unlikely(r)) {
>> 		return r;
>> 	}
>>
>> -	r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement);
>> -	if (unlikely(r)) {
>> -		goto out_cleanup;
>> -	}
>> -
>> 	r = ttm_tt_populate(bo->bdev, bo->ttm, ctx);
>> 	if (unlikely(r)) {
>> 		goto out_cleanup;
>> @@ -275,7 +270,7 @@ static int radeon_move_ram_vram(struct
>> ttm_buffer_object *bo,
>> 	placements.fpfn = 0;
>> 	placements.lpfn = 0;
>> 	placements.mem_type = TTM_PL_TT;
>> -	placements.flags = TTM_PL_MASK_CACHING;
>> +	placements.flags = 0;
>> 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx);
>> 	if (unlikely(r)) {
>> 		return r;
>> @@ -389,12 +384,7 @@ static int radeon_ttm_io_mem_reserve(struct
>> ttm_bo_device *bdev, struct ttm_reso
>> 		 * Alpha: use bus.addr to hold the ioremap() return,
>> 		 * so we can modify bus.base below.
>> 		 */
>> -		if (mem->placement & TTM_PL_FLAG_WC)
>> -			mem->bus.addr =
>> -				ioremap_wc(mem->bus.offset, bus_size);
>> -		else
>> -			mem->bus.addr =
>> -				ioremap(mem->bus.offset, bus_size);
>> +		mem->bus.addr = ioremap_wc(mem->bus.offset, bus_size);
> Why can you eliminate the ioremap() here?
>
> does this need to be:
>
> if mem->bus.caching instead?

Radeon always uses WC for VRAM. So the uncached ioremap() path should 
have never been used.

But on the other hand Alpha systems are out of production for quite some 
time now (20 years or so?).

So I would be more than happy to just drop support for this :)

>
>
>> 		if (!mem->bus.addr)
>> 			return -ENOMEM;
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c
>> b/drivers/gpu/drm/ttm/ttm_agp_backend.c
>> index a723062d37e7..4f76c9287159 100644
>> --- a/drivers/gpu/drm/ttm/ttm_agp_backend.c
>> +++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c
>> @@ -54,7 +54,7 @@ int ttm_agp_bind(struct ttm_tt *ttm, struct
>> ttm_resource *bo_mem)
>> 	struct page *dummy_read_page = ttm_bo_glob.dummy_read_page;
>> 	struct drm_mm_node *node = bo_mem->mm_node;
>> 	struct agp_memory *mem;
>> -	int ret, cached = (bo_mem->placement & TTM_PL_FLAG_CACHED);
>> +	int ret, cached = ttm->caching == ttm_cached;
>> 	unsigned i;
>>
>> 	if (agp_be->mem)
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>> index e11e8eaa6602..2fe4cbefde28 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> @@ -252,10 +252,6 @@ static int ttm_bo_handle_move_mem(struct
>> ttm_buffer_object *bo,
>> 		if (ret)
>> 			goto out_err;
>>
>> -		ret = ttm_tt_set_placement_caching(bo->ttm, mem-
>>> placement);
>> -		if (ret)
>> -			goto out_err;
>> -
>> 		if (mem->mem_type != TTM_PL_SYSTEM) {
>> 			ret = ttm_tt_populate(bdev, bo->ttm, ctx);
>> 			if (ret)
>> @@ -854,29 +850,6 @@ static int ttm_bo_mem_force_space(struct
>> ttm_buffer_object *bo,
>> 	return ttm_bo_add_move_fence(bo, man, mem, ctx->no_wait_gpu);
>> }
>>
>> -static uint32_t ttm_bo_select_caching(struct ttm_resource_manager *man,
>> -				      uint32_t cur_placement,
>> -				      uint32_t proposed_placement)
>> -{
>> -	uint32_t caching = proposed_placement & TTM_PL_MASK_CACHING;
>> -	uint32_t result = proposed_placement & ~TTM_PL_MASK_CACHING;
>> -
>> -	/**
>> -	 * Keep current caching if possible.
>> -	 */
>> -
>> -	if ((cur_placement & caching) != 0)
>> -		result |= (cur_placement & caching);
>> -	else if ((TTM_PL_FLAG_CACHED & caching) != 0)
>> -		result |= TTM_PL_FLAG_CACHED;
>> -	else if ((TTM_PL_FLAG_WC & caching) != 0)
>> -		result |= TTM_PL_FLAG_WC;
>> -	else if ((TTM_PL_FLAG_UNCACHED & caching) != 0)
>> -		result |= TTM_PL_FLAG_UNCACHED;
>> -
>> -	return result;
>> -}
>> -
>> /**
>>   * ttm_bo_mem_placement - check if placement is compatible
>>   * @bo: BO to find memory for
>> @@ -895,18 +868,13 @@ static int ttm_bo_mem_placement(struct
>> ttm_buffer_object *bo,
>> {
>> 	struct ttm_bo_device *bdev = bo->bdev;
>> 	struct ttm_resource_manager *man;
>> -	uint32_t cur_flags = 0;
>>
>> 	man = ttm_manager_type(bdev, place->mem_type);
>> 	if (!man || !ttm_resource_manager_used(man))
>> 		return -EBUSY;
>>
>> -	cur_flags = ttm_bo_select_caching(man, bo->mem.placement,
>> -					  place->flags);
>> -	cur_flags |= place->flags & ~TTM_PL_MASK_CACHING;
>> -
>> 	mem->mem_type = place->mem_type;
>> -	mem->placement = cur_flags;
>> +	mem->placement = place->flags;
>>
>> 	spin_lock(&ttm_bo_glob.lru_lock);
>> 	ttm_bo_del_from_lru(bo);
>> @@ -1039,8 +1007,7 @@ static bool ttm_bo_places_compat(const struct
>> ttm_place *places,
>> 			continue;
>>
>> 		*new_flags = heap->flags;
>> -		if ((*new_flags & mem->placement &
>> TTM_PL_MASK_CACHING) &&
>> -		    (mem->mem_type == heap->mem_type) &&
>> +		if ((mem->mem_type == heap->mem_type) &&
>> 		    (!(*new_flags & TTM_PL_FLAG_CONTIGUOUS) ||
>> 		     (mem->placement & TTM_PL_FLAG_CONTIGUOUS)))
>> 			return true;
>> @@ -1094,9 +1061,6 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
>> 		ret = ttm_bo_move_buffer(bo, placement, ctx);
>> 		if (ret)
>> 			return ret;
>> -	} else {
>> -		bo->mem.placement &= TTM_PL_MASK_CACHING;
>> -		bo->mem.placement |= new_flags &
>> ~TTM_PL_MASK_CACHING;
>> 	}
>> 	/*
>> 	 * We might need to add a TTM.
>> @@ -1164,7 +1128,7 @@ int ttm_bo_init_reserved(struct ttm_bo_device
>> *bdev,
>> 	bo->mem.bus.offset = 0;
>> 	bo->mem.bus.addr = NULL;
>> 	bo->moving = NULL;
>> -	bo->mem.placement = TTM_PL_FLAG_CACHED;
>> +	bo->mem.placement = 0;
>> 	bo->acc_size = acc_size;
>> 	bo->pin_count = 0;
>> 	bo->sg = sg;
>> @@ -1512,7 +1476,7 @@ int ttm_bo_swapout(struct ttm_bo_global *glob,
>> struct ttm_operation_ctx *ctx)
>>
>> 		evict_mem = bo->mem;
>> 		evict_mem.mm_node = NULL;
>> -		evict_mem.placement = TTM_PL_MASK_CACHING;
>> +		evict_mem.placement = 0;
>> 		evict_mem.mem_type = TTM_PL_SYSTEM;
>>
>> 		ret = ttm_bo_handle_move_mem(bo, &evict_mem, true,
>> &ctx);
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c
>> b/drivers/gpu/drm/ttm/ttm_bo_util.c
>> index 0542097dc419..ba7ab5ed85d0 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
>> @@ -72,10 +72,6 @@ int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
>> 		old_mem->mem_type = TTM_PL_SYSTEM;
>> 	}
>>
>> -	ret = ttm_tt_set_placement_caching(ttm, new_mem->placement);
>> -	if (unlikely(ret != 0))
>> -		return ret;
>> -
>> 	if (new_mem->mem_type != TTM_PL_SYSTEM) {
>>
>> 		ret = ttm_tt_populate(bo->bdev, ttm, ctx);
>> @@ -135,7 +131,7 @@ static int ttm_resource_ioremap(struct ttm_bo_device
>> *bdev,
>> 	} else {
>> 		size_t bus_size = (size_t)mem->num_pages << PAGE_SHIFT;
>>
>> -		if (mem->placement & TTM_PL_FLAG_WC)
>> +		if (mem->bus.caching == ttm_write_combined)
>> 			addr = ioremap_wc(mem->bus.offset, bus_size);
>> 		else
>> 			addr = ioremap(mem->bus.offset, bus_size);
>> @@ -427,7 +423,7 @@ static int ttm_bo_ioremap(struct ttm_buffer_object
>> *bo,
>> 		map->virtual = (void *)(((u8 *)bo->mem.bus.addr) + offset);
>> 	} else {
>> 		map->bo_kmap_type = ttm_bo_map_iomap;
>> -		if (mem->placement & TTM_PL_FLAG_WC)
>> +		if (mem->bus.caching == ttm_write_combined)
>> 			map->virtual = ioremap_wc(bo->mem.bus.offset +
>> offset,
>> 						  size);
>> 		else
>> @@ -457,7 +453,7 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object
>> *bo,
>> 	if (ret)
>> 		return ret;
>>
>> -	if (num_pages == 1 && (mem->placement &
>> TTM_PL_FLAG_CACHED)) {
>> +	if (num_pages == 1 && ttm->caching == ttm_cached) {
>> 		/*
>> 		 * We're mapping a single page, and the desired
>> 		 * page protection is consistent with the bo.
>> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
>> index a465f51df027..3e5dd6271d4c 100644
>> --- a/drivers/gpu/drm/ttm/ttm_tt.c
>> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
>> @@ -114,35 +114,6 @@ static int ttm_sg_tt_alloc_page_directory(struct
>> ttm_dma_tt *ttm)
>> 	return 0;
>> }
>>
>> -static int ttm_tt_set_caching(struct ttm_tt *ttm, enum ttm_caching caching)
>> -{
>> -	if (ttm->caching == caching)
>> -		return 0;
>> -
>> -	/* Can't change the caching state after TT is populated */
>> -	if (WARN_ON_ONCE(ttm_tt_is_populated(ttm)))
>> -		return -EINVAL;
>> -
>> -	ttm->caching = caching;
>> -
>> -	return 0;
>> -}
>> -
>> -int ttm_tt_set_placement_caching(struct ttm_tt *ttm, uint32_t placement)
>> -{
>> -	enum ttm_caching state;
>> -
>> -	if (placement & TTM_PL_FLAG_WC)
>> -		state = ttm_write_combined;
>> -	else if (placement & TTM_PL_FLAG_UNCACHED)
>> -		state = ttm_uncached;
>> -	else
>> -		state = ttm_cached;
>> -
>> -	return ttm_tt_set_caching(ttm, state);
>> -}
>> -EXPORT_SYMBOL(ttm_tt_set_placement_caching);
>> -
>> void ttm_tt_destroy_common(struct ttm_bo_device *bdev, struct ttm_tt
>> *ttm)
>> {
>> 	ttm_tt_unpopulate(bdev, ttm);
>> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
>> b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
>> index 2a7b5f964776..975d06c2e35e 100644
>> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
>> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
>> @@ -34,28 +34,28 @@ static const struct ttm_place vram_placement_flags = {
>> 	.fpfn = 0,
>> 	.lpfn = 0,
>> 	.mem_type = TTM_PL_VRAM,
>> -	.flags = TTM_PL_FLAG_CACHED
>> +	.flags = 0
>> };
>>
>> static const struct ttm_place sys_placement_flags = {
>> 	.fpfn = 0,
>> 	.lpfn = 0,
>> 	.mem_type = TTM_PL_SYSTEM,
>> -	.flags = TTM_PL_FLAG_CACHED
>> +	.flags = 0
>> };
>>
>> static const struct ttm_place gmr_placement_flags = {
>> 	.fpfn = 0,
>> 	.lpfn = 0,
>> 	.mem_type = VMW_PL_GMR,
>> -	.flags = TTM_PL_FLAG_CACHED
>> +	.flags = 0
>> };
>>
>> static const struct ttm_place mob_placement_flags = {
>> 	.fpfn = 0,
>> 	.lpfn = 0,
>> 	.mem_type = VMW_PL_MOB,
>> -	.flags = TTM_PL_FLAG_CACHED
>> +	.flags = 0
>> };
>>
>> struct ttm_placement vmw_vram_placement = {
>> @@ -70,12 +70,12 @@ static const struct ttm_place
>> vram_gmr_placement_flags[] = {
>> 		.fpfn = 0,
>> 		.lpfn = 0,
>> 		.mem_type = TTM_PL_VRAM,
>> -		.flags = TTM_PL_FLAG_CACHED
>> +		.flags = 0
>> 	}, {
>> 		.fpfn = 0,
>> 		.lpfn = 0,
>> 		.mem_type = VMW_PL_GMR,
>> -		.flags = TTM_PL_FLAG_CACHED
>> +		.flags = 0
>> 	}
>> };
>>
>> @@ -84,12 +84,12 @@ static const struct ttm_place
>> gmr_vram_placement_flags[] = {
>> 		.fpfn = 0,
>> 		.lpfn = 0,
>> 		.mem_type = VMW_PL_GMR,
>> -		.flags = TTM_PL_FLAG_CACHED
>> +		.flags = 0
>> 	}, {
>> 		.fpfn = 0,
>> 		.lpfn = 0,
>> 		.mem_type = TTM_PL_VRAM,
>> -		.flags = TTM_PL_FLAG_CACHED
>> +		.flags = 0
>> 	}
>> };
>>
>> @@ -119,22 +119,22 @@ static const struct ttm_place
>> evictable_placement_flags[] = {
>> 		.fpfn = 0,
>> 		.lpfn = 0,
>> 		.mem_type = TTM_PL_SYSTEM,
>> -		.flags = TTM_PL_FLAG_CACHED
>> +		.flags = 0
>> 	}, {
>> 		.fpfn = 0,
>> 		.lpfn = 0,
>> 		.mem_type = TTM_PL_VRAM,
>> -		.flags = TTM_PL_FLAG_CACHED
>> +		.flags = 0
>> 	}, {
>> 		.fpfn = 0,
>> 		.lpfn = 0,
>> 		.mem_type = VMW_PL_GMR,
>> -		.flags = TTM_PL_FLAG_CACHED
>> +		.flags = 0
>> 	}, {
>> 		.fpfn = 0,
>> 		.lpfn = 0,
>> 		.mem_type = VMW_PL_MOB,
>> -		.flags = TTM_PL_FLAG_CACHED
>> +		.flags = 0
>> 	}
>> };
>>
>> @@ -143,17 +143,17 @@ static const struct ttm_place
>> nonfixed_placement_flags[] = {
>> 		.fpfn = 0,
>> 		.lpfn = 0,
>> 		.mem_type = TTM_PL_SYSTEM,
>> -		.flags = TTM_PL_FLAG_CACHED
>> +		.flags = 0
>> 	}, {
>> 		.fpfn = 0,
>> 		.lpfn = 0,
>> 		.mem_type = VMW_PL_GMR,
>> -		.flags = TTM_PL_FLAG_CACHED
>> +		.flags = 0
>> 	}, {
>> 		.fpfn = 0,
>> 		.lpfn = 0,
>> 		.mem_type = VMW_PL_MOB,
>> -		.flags = TTM_PL_FLAG_CACHED
>> +		.flags = 0
>> 	}
>> };
>>
>> diff --git a/include/drm/ttm/ttm_placement.h
>> b/include/drm/ttm/ttm_placement.h
>> index 50e72df48b8d..aa6ba4d0cf78 100644
>> --- a/include/drm/ttm/ttm_placement.h
>> +++ b/include/drm/ttm/ttm_placement.h
>> @@ -43,27 +43,13 @@
>> #define TTM_PL_PRIV             3
>>
>> /*
>> - * Other flags that affects data placement.
>> - * TTM_PL_FLAG_CACHED indicates cache-coherent mappings
>> - * if available.
>> - * TTM_PL_FLAG_SHARED means that another application may
>> - * reference the buffer.
>> - * TTM_PL_FLAG_NO_EVICT means that the buffer may never
>> - * be evicted to make room for other buffers.
>>   * TTM_PL_FLAG_TOPDOWN requests to be placed from the
>>   * top of the memory area, instead of the bottom.
>>   */
>>
>> -#define TTM_PL_FLAG_CACHED      (1 << 16)
>> -#define TTM_PL_FLAG_UNCACHED    (1 << 17)
>> -#define TTM_PL_FLAG_WC          (1 << 18)
>> #define TTM_PL_FLAG_CONTIGUOUS  (1 << 19)
>> #define TTM_PL_FLAG_TOPDOWN     (1 << 22)
> Do these bit shifts mean anything anymore?

Nope, want to turn them into fields in the placement or encode them 
somehow else.

So I didn't bother renumbering them here.

>
> This series is a nice cleanup.  For me it makes the caching mechanism a lot more clear.
>
> If the removal of the ioremap() (comment above) is ok,:
>
> Reviewed-by: Michael J. Ruhl <michael.j.ruhl at intel.com>

Thanks,
Christian.

>
> m
>
>> -#define TTM_PL_MASK_CACHING     (TTM_PL_FLAG_CACHED | \
>> -				 TTM_PL_FLAG_UNCACHED | \
>> -				 TTM_PL_FLAG_WC)
>> -
>> /**
>>   * struct ttm_place
>>   *
>> diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
>> index c39c722d5184..e042dec5e6c1 100644
>> --- a/include/drm/ttm/ttm_tt.h
>> +++ b/include/drm/ttm/ttm_tt.h
>> @@ -164,21 +164,6 @@ void ttm_tt_destroy_common(struct ttm_bo_device
>> *bdev, struct ttm_tt *ttm);
>>   * Swap in a previously swap out ttm_tt.
>>   */
>> int ttm_tt_swapin(struct ttm_tt *ttm);
>> -
>> -/**
>> - * ttm_tt_set_placement_caching:
>> - *
>> - * @ttm A struct ttm_tt the backing pages of which will change caching policy.
>> - * @placement: Flag indicating the desired caching policy.
>> - *
>> - * This function will change caching policy of any default kernel mappings of
>> - * the pages backing @ttm. If changing from cached to uncached or
>> - * write-combined,
>> - * all CPU caches will first be flushed to make sure the data of the pages
>> - * hit RAM. This function may be very costly as it involves global TLB
>> - * and cache flushes and potential page splitting / combining.
>> - */
>> -int ttm_tt_set_placement_caching(struct ttm_tt *ttm, uint32_t placement);
>> int ttm_tt_swapout(struct ttm_bo_device *bdev, struct ttm_tt *ttm);
>>
>> /**
>> --
>> 2.17.1
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel