[PATCH] drm/ttm: WIP limit the TTM pool to 32bit CPUs
Matthew Auld
matthew.auld at intel.com
Tue Aug 12 10:36:04 UTC 2025
On 06/08/2025 14:28, Christian König wrote:
> On some old x86 systems we had the problem that changing the caching flags
> of system memory requires changing the global MTRR/PAT tables.
>
> But on any modern x86 system (CPUs introduced rughly after 2004) we
> actually don't need that any more and can update the caching flags
> directly in the PTEs of the CPU mapping. It was just never disabled
> because of the fear of regressions.
>
> We already use the PTE flags for encryption on x86 64bit for quite a while
> and all other supported platforms (Sparc, PowerPC, ARM, MIPS, LONGARCH)
> have never done anything different either.
>
> So disable the page pool completely for 64bit systems and just insert a
> clflush to be on the safe side so that we never return memory with dirty
> cache lines.
>
> Testing on a Ryzen 5 and 7 shows that this has absolutely no performance
> impact and of hand the AMD CI can't find a problem either.
>
> Let's see what the i915 and XE CI systems say to that.
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
> drivers/gpu/drm/ttm/ttm_pool.c | 16 +++++++++++-----
> 1 file changed, 11 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> index baf27c70a419..7487eac29398 100644
> --- a/drivers/gpu/drm/ttm/ttm_pool.c
> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> @@ -38,7 +38,7 @@
> #include <linux/highmem.h>
> #include <linux/sched/mm.h>
>
> -#ifdef CONFIG_X86
> +#ifdef CONFIG_X86_32
> #include <asm/set_memory.h>
> #endif
>
> @@ -46,6 +46,7 @@
> #include <drm/ttm/ttm_pool.h>
> #include <drm/ttm/ttm_tt.h>
> #include <drm/ttm/ttm_bo.h>
> +#include <drm/drm_cache.h>
>
> #include "ttm_module.h"
>
> @@ -192,7 +193,7 @@ static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching,
> struct ttm_pool_dma *dma;
> void *vaddr;
>
> -#ifdef CONFIG_X86
> +#ifdef CONFIG_X86_32
> /* We don't care that set_pages_wb is inefficient here. This is only
> * used when we have to shrink and CPU overhead is irrelevant then.
> */
> @@ -218,7 +219,7 @@ static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching,
> /* Apply any cpu-caching deferred during page allocation */
> static int ttm_pool_apply_caching(struct ttm_pool_alloc_state *alloc)
> {
> -#ifdef CONFIG_X86
> +#ifdef CONFIG_X86_32
> unsigned int num_pages = alloc->pages - alloc->caching_divide;
>
> if (!num_pages)
> @@ -232,6 +233,11 @@ static int ttm_pool_apply_caching(struct ttm_pool_alloc_state *alloc)
> case ttm_uncached:
> return set_pages_array_uc(alloc->caching_divide, num_pages);
> }
> +
> +#elif defined(CONFIG_X86_64)
> + unsigned int num_pages = alloc->pages - alloc->caching_divide;
> +
> + drm_clflush_pages(alloc->caching_divide, num_pages);
Do we now also need manual clflushing for things like swap-in? Or what
caching type does copy_highpage() in ttm_tt_swapin() use now? Is it not
wb? If so, is that not one of the nice advantages of using set_pages_*
where raw kmap will respect the PAT setting?
> #endif
> alloc->caching_divide = alloc->pages;
> return 0;
> @@ -342,7 +348,7 @@ static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool,
> if (pool->use_dma_alloc)
> return &pool->caching[caching].orders[order];
>
> -#ifdef CONFIG_X86
> +#ifdef CONFIG_X86_32
> switch (caching) {
> case ttm_write_combined:
> if (pool->nid != NUMA_NO_NODE)
> @@ -980,7 +986,7 @@ long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt,
> pool->use_dma_alloc || ttm_tt_is_backed_up(tt))
> return -EBUSY;
>
> -#ifdef CONFIG_X86
> +#ifdef CONFIG_X86_32
> /* Anything returned to the system needs to be cached. */
> if (tt->caching != ttm_cached)
> set_pages_array_wb(tt->pages, tt->num_pages);
More information about the dri-devel
mailing list