[PATCH] drm/ttm: stop pooling cached NUMA pages v2
Felix Kuehling
felix.kuehling at amd.com
Mon Apr 15 13:53:48 UTC 2024
On 2024-04-15 9:48, Christian König wrote:
> From: Christian König <ckoenig.leichtzumerken at gmail.com>
>
> We only pool write combined and uncached allocations because they
> require extra overhead on allocation and release.
>
> If we also pool cached NUMA it not only means some extra unnecessary
> overhead, but also that under memory pressure it can happen that
> pages from the wrong NUMA node enters the pool and are re-used
> over and over again.
>
> This can lead to performance reduction after running into memory
> pressure.
>
> v2: restructure and cleanup the code a bit from the internal hack to
> test this.
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> Fixes: 4482d3c94d7f ("drm/ttm: add NUMA node id to the pool")
> CC: stable at vger.kernel.org
> ---
> drivers/gpu/drm/ttm/ttm_pool.c | 38 +++++++++++++++++++++++++---------
> 1 file changed, 28 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> index 112438d965ff..6e1fd6985ffc 100644
> --- a/drivers/gpu/drm/ttm/ttm_pool.c
> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> @@ -288,17 +288,23 @@ static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool,
> enum ttm_caching caching,
> unsigned int order)
> {
> - if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE)
> + if (pool->use_dma_alloc)
> return &pool->caching[caching].orders[order];
>
> #ifdef CONFIG_X86
> switch (caching) {
> case ttm_write_combined:
> + if (pool->nid != NUMA_NO_NODE)
> + return &pool->caching[caching].orders[order];
Doesn't this break USWC allocations on NUMA systems, where we set a NUMA
node for the default pool (at least we were planning to at some point)?
Regards,
Felix
> +
> if (pool->use_dma32)
> return &global_dma32_write_combined[order];
>
> return &global_write_combined[order];
> case ttm_uncached:
> + if (pool->nid != NUMA_NO_NODE)
> + return &pool->caching[caching].orders[order];
> +
> if (pool->use_dma32)
> return &global_dma32_uncached[order];
>
> @@ -566,11 +572,17 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
> pool->use_dma_alloc = use_dma_alloc;
> pool->use_dma32 = use_dma32;
>
> - if (use_dma_alloc || nid != NUMA_NO_NODE) {
> - for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> - for (j = 0; j < NR_PAGE_ORDERS; ++j)
> - ttm_pool_type_init(&pool->caching[i].orders[j],
> - pool, i, j);
> + for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
> + for (j = 0; j < NR_PAGE_ORDERS; ++j) {
> + struct ttm_pool_type *pt;
> +
> + /* Initialize only pool types which are actually used */
> + pt = ttm_pool_select_type(pool, i, j);
> + if (pt != &pool->caching[i].orders[j])
> + continue;
> +
> + ttm_pool_type_init(pt, pool, i, j);
> + }
> }
> }
> EXPORT_SYMBOL(ttm_pool_init);
> @@ -599,10 +611,16 @@ void ttm_pool_fini(struct ttm_pool *pool)
> {
> unsigned int i, j;
>
> - if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE) {
> - for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> - for (j = 0; j < NR_PAGE_ORDERS; ++j)
> - ttm_pool_type_fini(&pool->caching[i].orders[j]);
> + for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
> + for (j = 0; j < NR_PAGE_ORDERS; ++j) {
> + struct ttm_pool_type *pt;
> +
> + pt = ttm_pool_select_type(pool, i, j);
> + if (pt != &pool->caching[i].orders[j])
> + continue;
> +
> + ttm_pool_type_fini(pt);
> + }
> }
>
> /* We removed the pool types from the LRU, but we need to also make sure
More information about the dri-devel
mailing list