[Intel-gfx] [PATCH] drm/i915: Migrate stolen objects before hibernation

Chris Wilson chris at chris-wilson.co.uk
Tue Jun 30 02:58:08 PDT 2015


Ville reminded us that stolen memory is not preserved across
hibernation, and a result of this was that context objects now being
allocated from stolen were being corrupted on S4 and promptly hanging
the GPU on resume.

We want to utilise stolen for as much as possible (nothing else will use
that wasted memory otherwise), so we need a strategy for handling
general objects allocated from stolen and hibernation. A simple solution
is to do a CPU copy through the GTT of the stolen object into a fresh
shmemfs backing store and thenceforth treat it as a normal objects. This
can be refined in future to either use a GPU copy to avoid the slow
uncached reads (though it's hibernation!) and recreate stolen objects
upon resume/first-use. For now, a simple approach should suffice for
testing the object migration.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Ville Syrjala <ville.syrjala at linux.intel.com>
Cc: Akash Goel <akash.goel at intel.com>
Cc: Deepak S <deepak.s at linux.intel.com>
Cc: Imre Deak <imre.deak at linux.intel.com>
---
This uses a few constructs that are not upstream yet, but it should be
enough for sanity checking that my conversion of a stolen object to a 
shmemfs object is sane.
---
 drivers/gpu/drm/i915/i915_drv.c         |  17 +++-
 drivers/gpu/drm/i915/i915_drv.h         |   5 +
 drivers/gpu/drm/i915/i915_gem.c         | 156 +++++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_gem_context.c |   3 +
 drivers/gpu/drm/i915/i915_gem_stolen.c  |   9 ++
 drivers/gpu/drm/i915/intel_overlay.c    |   3 +
 6 files changed, 180 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index c45d5722e987..3ece56a98827 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -972,6 +972,21 @@ static int i915_pm_suspend(struct device *dev)
 	return i915_drm_suspend(drm_dev);
 }
 
+static int i915_pm_freeze(struct device *dev)
+{
+	int ret;
+
+	ret = i915_gem_freeze(pci_get_drvdata(to_pci_dev(dev)));
+	if (ret)
+		return ret;
+
+	ret = i915_pm_suspend(dev);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
 static int i915_pm_suspend_late(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_to_i915(dev)->dev;
@@ -1618,7 +1633,7 @@ static const struct dev_pm_ops i915_pm_ops = {
 	 * @restore, @restore_early : called after rebooting and restoring the
 	 *                            hibernation image [PMSG_RESTORE]
 	 */
-	.freeze = i915_pm_suspend,
+	.freeze = i915_pm_freeze,
 	.freeze_late = i915_pm_suspend_late,
 	.thaw_early = i915_pm_resume_early,
 	.thaw = i915_pm_resume,
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ea7881367084..ec10f389886e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1258,6 +1258,9 @@ struct intel_l3_parity {
 struct i915_gem_mm {
 	/** Memory allocator for GTT stolen memory */
 	struct drm_mm stolen;
+	/** List of all objects allocated from stolen */
+	struct list_head stolen_list;
+
 	/** List of all objects in gtt_space. Used to restore gtt
 	 * mappings on resume */
 	struct list_head bound_list;
@@ -2024,6 +2027,7 @@ struct drm_i915_gem_object {
 	struct drm_mm_node *stolen;
 	struct list_head global_list;
 
+	struct list_head stolen_link;
 	struct list_head batch_pool_link;
 	struct list_head tmp_link;
 
@@ -3003,6 +3007,7 @@ int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice);
 void i915_gem_init_swizzling(struct drm_device *dev);
 void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
 int __must_check i915_gpu_idle(struct drm_device *dev);
+int __must_check i915_gem_freeze(struct drm_device *dev);
 int __must_check i915_gem_suspend(struct drm_device *dev);
 void __i915_add_request(struct drm_i915_gem_request *req,
 			struct i915_vma *batch,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 477cd1cb7679..c501e20b7ed0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4580,12 +4580,27 @@ static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
 	.put_pages = i915_gem_object_put_pages_gtt,
 };
 
+static struct address_space *
+i915_gem_set_inode_gfp(struct drm_device *dev, struct file *file)
+{
+	struct address_space *mapping = file_inode(file)->i_mapping;
+	gfp_t mask;
+
+	mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
+	if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
+		/* 965gm cannot relocate objects above 4GiB. */
+		mask &= ~__GFP_HIGHMEM;
+		mask |= __GFP_DMA32;
+	}
+	mapping_set_gfp_mask(mapping, mask);
+
+	return mapping;
+}
+
 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 						  size_t size)
 {
 	struct drm_i915_gem_object *obj;
-	struct address_space *mapping;
-	gfp_t mask;
 	int ret;
 
 	obj = i915_gem_object_alloc(dev);
@@ -4597,16 +4612,7 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 		i915_gem_object_free(obj);
 		return ERR_PTR(ret);
 	}
-
-	mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
-	if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
-		/* 965gm cannot relocate objects above 4GiB. */
-		mask &= ~__GFP_HIGHMEM;
-		mask |= __GFP_DMA32;
-	}
-
-	mapping = file_inode(obj->base.filp)->i_mapping;
-	mapping_set_gfp_mask(mapping, mask);
+	i915_gem_set_inode_gfp(dev, obj->base.filp);
 
 	i915_gem_object_init(obj, &i915_gem_object_ops);
 
@@ -4738,6 +4744,132 @@ i915_gem_stop_ringbuffers(struct drm_device *dev)
 		dev_priv->gt.stop_ring(ring);
 }
 
+static int
+i915_gem_object_migrate_stolen_to_shmemfs(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_vma *vma, *vn;
+	struct drm_mm_node node;
+	struct file *file;
+	struct address_space *mapping;
+	int ret, i;
+
+	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
+		return -EINVAL;
+
+	list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
+		if (i915_vma_unbind(vma))
+			break;
+
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	memset(&node, 0, sizeof(node));
+	ret = drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm,
+						  &node,
+						  4096, 0, I915_CACHE_NONE,
+						  0, i915->gtt.mappable_end,
+						  DRM_MM_SEARCH_DEFAULT,
+						  DRM_MM_CREATE_DEFAULT);
+	if (ret)
+		goto err_unpin;
+
+	file = shmem_file_setup("drm mm object", obj->base.size, VM_NORESERVE);
+	if (IS_ERR(file)) {
+		ret = PTR_ERR(file);
+		goto err_unpin;
+	}
+
+	mapping = i915_gem_set_inode_gfp(obj->base.dev, file);
+	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
+		struct page *page;
+		void *dst, *src;
+
+		wmb();
+		i915->gtt.base.insert_page(&i915->gtt.base,
+					   i915_gem_object_get_dma_address(obj, i),
+					   node.start,
+					   I915_CACHE_NONE,
+					   0);
+		wmb();
+
+		page = shmem_read_mapping_page(mapping, i);
+		if (IS_ERR(page)) {
+			ret = PTR_ERR(page);
+			goto err_file;
+		}
+
+		src = io_mapping_map_atomic_wc(i915->gtt.mappable, node.start);
+		dst = kmap_atomic(page);
+		memcpy(dst, src, PAGE_SIZE);
+		kunmap_atomic(dst);
+		io_mapping_unmap_atomic(src);
+
+		page_cache_release(page);
+	}
+
+	wmb();
+	i915->gtt.base.clear_range(&i915->gtt.base,
+				   node.start, node.size,
+				   true);
+	drm_mm_remove_node(&node);
+
+	i915_gem_object_unpin_pages(obj);
+	ret = i915_gem_object_put_pages(obj);
+	if (ret) {
+		fput(file);
+		return ret;
+	}
+
+	if (obj->ops->release)
+		obj->ops->release(obj);
+
+	obj->base.filp = file;
+	obj->ops = &i915_gem_object_ops;
+
+	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+
+	return 0;
+
+err_file:
+	fput(file);
+err_unpin:
+	i915_gem_object_unpin_pages(obj);
+	return ret;
+}
+
+int
+i915_gem_freeze(struct drm_device *dev)
+{
+	/* Called before freeze when hibernating */
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_gem_object *obj, *tmp;
+	int ret;
+
+	/* Across hibernation, the stolen area is not preserved.
+	 * Anything inside stolen must copied back to normal
+	 * memory if we wish to preserve it.
+	 */
+
+	list_for_each_entry_safe(obj, tmp, &i915->mm.stolen_list, stolen_link) {
+		if (obj->madv != I915_MADV_WILLNEED) {
+			/* XXX any point in setting this? The objects for which
+			 * we preserve never unset it afterwards.
+			 */
+			obj->madv = __I915_MADV_PURGED;
+			continue;
+		}
+
+		ret = i915_gem_object_migrate_stolen_to_shmemfs(obj);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 int
 i915_gem_suspend(struct drm_device *dev)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 03fa6e87f5ac..01e8fe7ae632 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -244,6 +244,9 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size)
 	if (IS_ERR_OR_NULL(obj))
 		return ERR_PTR(-ENOMEM);
 
+	/* contexts need to always be preserved across hibernation/swap-out */
+	obj->madv = I915_MADV_WILLNEED;
+
 	/*
 	 * Try to make the context utilize L3 as well as LLC.
 	 *
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 21ee83bf6307..86e84554e8ae 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -331,6 +331,7 @@ int i915_gem_init_stolen(struct drm_device *dev)
 	drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->gtt.stolen_size -
 		    bios_reserved);
 
+	INIT_LIST_HEAD(&dev_priv->mm.stolen_list);
 	return 0;
 }
 
@@ -387,6 +388,7 @@ static void
 i915_gem_object_release_stolen(struct drm_i915_gem_object *obj)
 {
 	if (obj->stolen) {
+		list_del(&obj->stolen_link);
 		drm_mm_remove_node(obj->stolen);
 		kfree(obj->stolen);
 		obj->stolen = NULL;
@@ -419,6 +421,13 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
 	__i915_gem_object_pin_pages(obj);
 	obj->has_dma_mapping = true;
 	obj->stolen = stolen;
+	list_add(&obj->stolen_link, &to_i915(dev)->mm.stolen_list);
+
+	/* By default, treat the contexts of stolen as volatile. If the object
+	 * must be saved across hibernation, then the caller must take
+	 * action and flag it as WILLNEED.
+	 */
+	obj->madv = I915_MADV_DONTNEED;
 
 	obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
 	obj->cache_level = HAS_LLC(dev) ? I915_CACHE_LLC : I915_CACHE_NONE;
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index cebe857f6df2..ec3ab351e9ff 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -1401,6 +1401,9 @@ void intel_setup_overlay(struct drm_device *dev)
 		goto out_free;
 	overlay->reg_bo = reg_bo;
 
+	/* XXX preserve register values across hibernation */
+	reg_bo->madv = I915_MADV_WILLNEED;
+
 	if (OVERLAY_NEEDS_PHYSICAL(dev)) {
 		ret = i915_gem_object_attach_phys(reg_bo, PAGE_SIZE);
 		if (ret) {
-- 
2.1.4



More information about the Intel-gfx mailing list