[PATCH 4/6] shmem shrinker
Matthew Auld
matthew.auld at intel.com
Fri Aug 20 17:49:54 UTC 2021
---
drivers/gpu/drm/i915/gem/i915_gem_object.h | 9 ++
drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 17 ++-
drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 151 ++++++++++++++++++++-
3 files changed, 162 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 2d915f0ed84c..bfc5b16d12f9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -614,6 +614,15 @@ int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj,
bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,
enum intel_memory_type type);
+struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
+ size_t size, struct intel_memory_region *mr,
+ struct address_space *mapping,
+ unsigned int max_segment);
+void shmem_free_st(struct sg_table *st, struct address_space *mapping,
+ bool dirty, bool backup);
+void shmem_truncate(struct drm_i915_gem_object *obj);
+void __shmem_writeback(size_t size, struct address_space *mapping);
+
#ifdef CONFIG_MMU_NOTIFIER
static inline bool
i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 093fa2fa815c..b063b78bca54 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -25,8 +25,8 @@ static void check_release_pagevec(struct pagevec *pvec)
cond_resched();
}
-static void shmem_free_st(struct sg_table *st, struct address_space *mapping,
- bool dirty, bool backup)
+void shmem_free_st(struct sg_table *st, struct address_space *mapping,
+ bool dirty, bool backup)
{
struct sgt_iter sgt_iter;
struct pagevec pvec;
@@ -52,10 +52,10 @@ static void shmem_free_st(struct sg_table *st, struct address_space *mapping,
kfree(st);
}
-static struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
- size_t size, struct intel_memory_region *mr,
- struct address_space *mapping,
- unsigned int max_segment)
+struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
+ size_t size, struct intel_memory_region *mr,
+ struct address_space *mapping,
+ unsigned int max_segment)
{
const unsigned long page_count = size / PAGE_SIZE;
unsigned long i;
@@ -287,8 +287,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
return ret;
}
-static void
-shmem_truncate(struct drm_i915_gem_object *obj)
+void shmem_truncate(struct drm_i915_gem_object *obj)
{
/*
* Our goal here is to return as much of the memory as
@@ -301,7 +300,7 @@ shmem_truncate(struct drm_i915_gem_object *obj)
obj->mm.pages = ERR_PTR(-EFAULT);
}
-static void __shmem_writeback(size_t size, struct address_space *mapping)
+void __shmem_writeback(size_t size, struct address_space *mapping)
{
struct writeback_control wbc = {
.sync_mode = WB_SYNC_NONE,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 771eb2963123..868540799f4c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -48,6 +48,9 @@ struct i915_ttm_tt {
struct ttm_tt ttm;
struct device *dev;
struct sg_table *cached_st;
+
+ struct file *filp;
+ struct drm_i915_gem_object *obj;
};
static const struct ttm_place sys_placement_flags = {
@@ -167,6 +170,86 @@ i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj,
placement->busy_placement = busy;
}
+static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev,
+ struct ttm_tt *ttm,
+ struct ttm_operation_ctx *ctx)
+{
+ struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
+ struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM];
+ struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+ const unsigned int max_segment = i915_sg_segment_size();
+ const size_t size = ttm->num_pages << PAGE_SHIFT;
+ struct drm_i915_gem_object *obj = i915_tt->obj;
+ struct file *filp = i915_tt->filp;
+ struct sgt_iter sgt_iter;
+ struct sg_table *st;
+ struct page *page;
+ unsigned long i;
+ int err;
+
+ st = shmem_alloc_st(i915, size, mr, filp->f_mapping, max_segment);
+ if (IS_ERR(st))
+ return PTR_ERR(st);
+
+ err = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, 0);
+ if (err)
+ goto err_free_st;
+
+ i = 0;
+ for_each_sgt_page(page, sgt_iter, st)
+ ttm->pages[i++] = page;
+
+ if (ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
+ ttm->page_flags &= ~TTM_PAGE_FLAG_SWAPPED;
++
+ i915_tt->cached_st = st;
+
+ return 0;
+
+err_free_st:
+ shmem_free_st(st, filp->f_mapping, false, false);
+ return err;
+}
+
+static void i915_ttm_tt_shmem_unpopulate(struct ttm_tt *ttm)
+{
+ struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+ struct drm_i915_gem_object *obj = i915_tt->obj;
+ bool backup = obj->mm.madv == I915_MADV_WILLNEED;
+
+ shmem_free_st(i915_tt->cached_st, i915_tt->filp->f_mapping,
+ obj->mm.dirty, backup);
+
+ if (backup)
+ ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED;
+ else
+ shmem_truncate_range(file_inode(i915_tt->filp), 0, (loff_t)-1);
+}
+
+static int i915_ttm_tt_shmem_create(struct ttm_buffer_object *bo,
+ struct i915_ttm_tt *i915_tt)
+{
+ struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+ struct file *filp;
+
+ filp = shmem_file_setup("i915-shmem-tt", obj->base.size, VM_NORESERVE);
+ if (IS_ERR(filp))
+ return PTR_ERR(filp);
+
+ i915_tt->obj = i915_gem_object_get(obj);
+ i915_tt->filp = filp;
+
+ return 0;
+}
+
+static void i915_ttm_tt_shmem_destroy(struct ttm_device *bdev, struct ttm_tt *ttm)
+{
+ struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+
+ i915_gem_object_put(i915_tt->obj);
+ fput(i915_tt->filp);
+}
+
static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
uint32_t page_flags)
{
@@ -186,34 +269,63 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags,
i915_ttm_select_tt_caching(obj));
- if (ret) {
- kfree(i915_tt);
- return NULL;
+ if (ret)
+ goto err_free;
+
+ if (i915_gem_object_is_shrinkable(obj)) {
+ ret = i915_ttm_tt_shmem_create(bo, i915_tt);
+ if (ret)
+ goto err_tt_fini;
}
i915_tt->dev = obj->base.dev->dev;
return &i915_tt->ttm;
+
+err_tt_fini:
+ ttm_tt_fini(&i915_tt->ttm);
+err_free:
+ kfree(i915_tt);
+ return NULL;
+}
+
+static int i915_ttm_tt_populate(struct ttm_device *bdev,
+ struct ttm_tt *ttm,
+ struct ttm_operation_ctx *ctx)
+{
+ struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+
+ if (i915_tt->filp)
+ return i915_ttm_tt_shmem_populate(bdev, ttm, ctx);
+
+ return ttm_pool_alloc(&bdev->pool, ttm, ctx);
}
static void i915_ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
{
struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
- if (i915_tt->cached_st) {
+ if (i915_tt->filp) {
+ i915_ttm_tt_shmem_unpopulate(ttm);
+ } else {
+ ttm_pool_free(&bdev->pool, ttm);
+
dma_unmap_sgtable(i915_tt->dev, i915_tt->cached_st,
DMA_BIDIRECTIONAL, 0);
sg_free_table(i915_tt->cached_st);
kfree(i915_tt->cached_st);
- i915_tt->cached_st = NULL;
}
- ttm_pool_free(&bdev->pool, ttm);
+
+ i915_tt->cached_st = NULL;
}
static void i915_ttm_tt_destroy(struct ttm_device *bdev, struct ttm_tt *ttm)
{
struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+ if (i915_tt->filp)
+ i915_ttm_tt_shmem_destroy(bdev, ttm);
+
ttm_tt_destroy_common(bdev, ttm);
ttm_tt_fini(ttm);
kfree(i915_tt);
@@ -317,6 +429,29 @@ static void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj)
i915_gem_object_set_cache_coherency(obj, cache_level);
}
+static void i915_ttm_writeback(struct drm_i915_gem_object *obj)
+{
+ struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+ struct i915_ttm_tt *i915_tt =
+ container_of(bo->ttm, typeof(*i915_tt), ttm);
+ struct ttm_operation_ctx ctx = {
+ .interruptible = true,
+ .no_wait_gpu = false,
+ };
+ struct ttm_placement place = {};
+ int ret;
+
+ if (!bo->ttm || !i915_tt->filp)
+ return;
+
+ GEM_BUG_ON(obj->mm.madv != I915_MADV_WILLNEED);
+
+ /* Unpopulate */
+ ret = ttm_bo_validate(bo, &place, &ctx);
+ if (!ret)
+ __shmem_writeback(obj->base.size, i915_tt->filp->f_mapping);
+}
+
static void i915_ttm_purge(struct drm_i915_gem_object *obj)
{
struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
@@ -596,6 +731,7 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
static struct ttm_device_funcs i915_ttm_bo_driver = {
.ttm_tt_create = i915_ttm_tt_create,
+ .ttm_tt_populate = i915_ttm_tt_populate,
.ttm_tt_unpopulate = i915_ttm_tt_unpopulate,
.ttm_tt_destroy = i915_ttm_tt_destroy,
.eviction_valuable = i915_ttm_eviction_valuable,
@@ -865,9 +1001,12 @@ static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {
.get_pages = i915_ttm_get_pages,
.put_pages = i915_ttm_put_pages,
.truncate = i915_ttm_purge,
+ .writeback = i915_ttm_writeback,
+
.adjust_lru = i915_ttm_adjust_lru,
.delayed_free = i915_ttm_delayed_free,
.migrate = i915_ttm_migrate,
+
.mmap_offset = i915_ttm_mmap_offset,
.mmap_ops = &vm_ops_ttm,
};
--
2.26.3
More information about the Intel-gfx-trybot
mailing list