[Intel-gfx] [PATCH] drm/i915: Migrate stolen objects before hibernation

Tue Jun 30 03:54:02 PDT 2015

On Tue, Jun 30, 2015 at 10:58:08AM +0100, Chris Wilson wrote:
> Ville reminded us that stolen memory is not preserved across
> hibernation, and a result of this was that context objects now being
> allocated from stolen were being corrupted on S4 and promptly hanging
> the GPU on resume.
> 
> We want to utilise stolen for as much as possible (nothing else will use
> that wasted memory otherwise), so we need a strategy for handling
> general objects allocated from stolen and hibernation. A simple solution
> is to do a CPU copy through the GTT of the stolen object into a fresh
> shmemfs backing store and thenceforth treat it as a normal objects. This
> can be refined in future to either use a GPU copy to avoid the slow
> uncached reads (though it's hibernation!) and recreate stolen objects
> upon resume/first-use. For now, a simple approach should suffice for
> testing the object migration.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Ville Syrjala <ville.syrjala at linux.intel.com>
> Cc: Akash Goel <akash.goel at intel.com>
> Cc: Deepak S <deepak.s at linux.intel.com>
> Cc: Imre Deak <imre.deak at linux.intel.com>
> ---
> This uses a few constructs that are not upstream yet, but it should be
> enough for sanity checking that my conversion of a stolen object to a 
> shmemfs object is sane.
> ---
>  drivers/gpu/drm/i915/i915_drv.c         |  17 +++-
>  drivers/gpu/drm/i915/i915_drv.h         |   5 +
>  drivers/gpu/drm/i915/i915_gem.c         | 156 +++++++++++++++++++++++++++++---
>  drivers/gpu/drm/i915/i915_gem_context.c |   3 +
>  drivers/gpu/drm/i915/i915_gem_stolen.c  |   9 ++
>  drivers/gpu/drm/i915/intel_overlay.c    |   3 +
>  6 files changed, 180 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index c45d5722e987..3ece56a98827 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -972,6 +972,21 @@ static int i915_pm_suspend(struct device *dev)
>  	return i915_drm_suspend(drm_dev);
>  }
>  
> +static int i915_pm_freeze(struct device *dev)
> +{
> +	int ret;
> +
> +	ret = i915_gem_freeze(pci_get_drvdata(to_pci_dev(dev)));
> +	if (ret)
> +		return ret;
> +
> +	ret = i915_pm_suspend(dev);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
> +}
> +
>  static int i915_pm_suspend_late(struct device *dev)
>  {
>  	struct drm_device *drm_dev = dev_to_i915(dev)->dev;
> @@ -1618,7 +1633,7 @@ static const struct dev_pm_ops i915_pm_ops = {
>  	 * @restore, @restore_early : called after rebooting and restoring the
>  	 *                            hibernation image [PMSG_RESTORE]
>  	 */
> -	.freeze = i915_pm_suspend,
> +	.freeze = i915_pm_freeze,
>  	.freeze_late = i915_pm_suspend_late,
>  	.thaw_early = i915_pm_resume_early,
>  	.thaw = i915_pm_resume,
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index ea7881367084..ec10f389886e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1258,6 +1258,9 @@ struct intel_l3_parity {
>  struct i915_gem_mm {
>  	/** Memory allocator for GTT stolen memory */
>  	struct drm_mm stolen;
> +	/** List of all objects allocated from stolen */
> +	struct list_head stolen_list;
> +
>  	/** List of all objects in gtt_space. Used to restore gtt
>  	 * mappings on resume */
>  	struct list_head bound_list;
> @@ -2024,6 +2027,7 @@ struct drm_i915_gem_object {
>  	struct drm_mm_node *stolen;
>  	struct list_head global_list;
>  
> +	struct list_head stolen_link;
>  	struct list_head batch_pool_link;
>  	struct list_head tmp_link;
>  
> @@ -3003,6 +3007,7 @@ int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice);
>  void i915_gem_init_swizzling(struct drm_device *dev);
>  void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
>  int __must_check i915_gpu_idle(struct drm_device *dev);
> +int __must_check i915_gem_freeze(struct drm_device *dev);
>  int __must_check i915_gem_suspend(struct drm_device *dev);
>  void __i915_add_request(struct drm_i915_gem_request *req,
>  			struct i915_vma *batch,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 477cd1cb7679..c501e20b7ed0 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4580,12 +4580,27 @@ static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
>  	.put_pages = i915_gem_object_put_pages_gtt,
>  };
>  
> +static struct address_space *
> +i915_gem_set_inode_gfp(struct drm_device *dev, struct file *file)
> +{
> +	struct address_space *mapping = file_inode(file)->i_mapping;
> +	gfp_t mask;
> +
> +	mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
> +	if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
> +		/* 965gm cannot relocate objects above 4GiB. */
> +		mask &= ~__GFP_HIGHMEM;
> +		mask |= __GFP_DMA32;
> +	}
> +	mapping_set_gfp_mask(mapping, mask);
> +
> +	return mapping;
> +}
> +
>  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>  						  size_t size)
>  {
>  	struct drm_i915_gem_object *obj;
> -	struct address_space *mapping;
> -	gfp_t mask;
>  	int ret;
>  
>  	obj = i915_gem_object_alloc(dev);
> @@ -4597,16 +4612,7 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>  		i915_gem_object_free(obj);
>  		return ERR_PTR(ret);
>  	}
> -
> -	mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
> -	if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
> -		/* 965gm cannot relocate objects above 4GiB. */
> -		mask &= ~__GFP_HIGHMEM;
> -		mask |= __GFP_DMA32;
> -	}
> -
> -	mapping = file_inode(obj->base.filp)->i_mapping;
> -	mapping_set_gfp_mask(mapping, mask);
> +	i915_gem_set_inode_gfp(dev, obj->base.filp);
>  
>  	i915_gem_object_init(obj, &i915_gem_object_ops);
>  
> @@ -4738,6 +4744,132 @@ i915_gem_stop_ringbuffers(struct drm_device *dev)
>  		dev_priv->gt.stop_ring(ring);
>  }
>  
> +static int
> +i915_gem_object_migrate_stolen_to_shmemfs(struct drm_i915_gem_object *obj)
> +{
> +	struct drm_i915_private *i915 = to_i915(obj->base.dev);
> +	struct i915_vma *vma, *vn;
> +	struct drm_mm_node node;
> +	struct file *file;
> +	struct address_space *mapping;
> +	int ret, i;
> +
> +	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
> +		return -EINVAL;
> +
> +	list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
> +		if (i915_vma_unbind(vma))
> +			break;
> +
> +	ret = i915_gem_object_pin_pages(obj);
> +	if (ret)
> +		return ret;
> +
> +	memset(&node, 0, sizeof(node));
> +	ret = drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm,
> +						  &node,
> +						  4096, 0, I915_CACHE_NONE,
> +						  0, i915->gtt.mappable_end,
> +						  DRM_MM_SEARCH_DEFAULT,
> +						  DRM_MM_CREATE_DEFAULT);

Hm, I think the plan with stolen is to mostly use it for giant scanout
buffers where we never plan to access them with the gpu. Maybe go with a
per-page loop here instead? You have a low-level pte writing call below
anyway. Would mean we'd need a 1-entry onstack sg_table too, but that
won't hurt.

And performance doesn't matter either because hibernate is seriously slow.

> +	if (ret)
> +		goto err_unpin;
> +
> +	file = shmem_file_setup("drm mm object", obj->base.size, VM_NORESERVE);
> +	if (IS_ERR(file)) {
> +		ret = PTR_ERR(file);
> +		goto err_unpin;
> +	}
> +
> +	mapping = i915_gem_set_inode_gfp(obj->base.dev, file);
> +	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> +		struct page *page;
> +		void *dst, *src;
> +
> +		wmb();
> +		i915->gtt.base.insert_page(&i915->gtt.base,
> +					   i915_gem_object_get_dma_address(obj, i),
> +					   node.start,
> +					   I915_CACHE_NONE,
> +					   0);
> +		wmb();
> +
> +		page = shmem_read_mapping_page(mapping, i);
> +		if (IS_ERR(page)) {
> +			ret = PTR_ERR(page);
> +			goto err_file;
> +		}
> +
> +		src = io_mapping_map_atomic_wc(i915->gtt.mappable, node.start);
> +		dst = kmap_atomic(page);
> +		memcpy(dst, src, PAGE_SIZE);

memcopy_fromio to appease sparse I think.

> +		kunmap_atomic(dst);
> +		io_mapping_unmap_atomic(src);
> +
> +		page_cache_release(page);
> +	}
> +
> +	wmb();
> +	i915->gtt.base.clear_range(&i915->gtt.base,
> +				   node.start, node.size,
> +				   true);
> +	drm_mm_remove_node(&node);
> +
> +	i915_gem_object_unpin_pages(obj);
> +	ret = i915_gem_object_put_pages(obj);
> +	if (ret) {
> +		fput(file);
> +		return ret;
> +	}
> +
> +	if (obj->ops->release)
> +		obj->ops->release(obj);
> +
> +	obj->base.filp = file;
> +	obj->ops = &i915_gem_object_ops;
> +
> +	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
> +	obj->base.write_domain = I915_GEM_DOMAIN_CPU;

maybe wrap up in a shmem_reinit function and reuse in the gem object creation
path for a bit more clarity (and to reduce chances we forget about this).

Wrt igt not sure whether we can do this in general, since it requires a
working swap partition and working initrd support for hibernate. And
hibernate is kinda a disregarded feature anyway, so imo meh. We could do
an igt_system_hibernate_autowake() if we do the test though.

Imo trying to migrate backwards would be serious pain since we'd need to
make sure no shm mmap is around and audit all the other places. Too much
trouble for no gain (userspace can just realloc if needed).

> +
> +	return 0;
> +
> +err_file:
> +	fput(file);
> +err_unpin:
> +	i915_gem_object_unpin_pages(obj);
> +	return ret;
> +}
> +
> +int
> +i915_gem_freeze(struct drm_device *dev)
> +{
> +	/* Called before freeze when hibernating */
> +	struct drm_i915_private *i915 = to_i915(dev);
> +	struct drm_i915_gem_object *obj, *tmp;
> +	int ret;
> +
> +	/* Across hibernation, the stolen area is not preserved.
> +	 * Anything inside stolen must copied back to normal
> +	 * memory if we wish to preserve it.
> +	 */
> +
> +	list_for_each_entry_safe(obj, tmp, &i915->mm.stolen_list, stolen_link) {
> +		if (obj->madv != I915_MADV_WILLNEED) {
> +			/* XXX any point in setting this? The objects for which
> +			 * we preserve never unset it afterwards.
> +			 */
> +			obj->madv = __I915_MADV_PURGED;
> +			continue;
> +		}
> +
> +		ret = i915_gem_object_migrate_stolen_to_shmemfs(obj);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	return 0;
> +}
> +
>  int
>  i915_gem_suspend(struct drm_device *dev)
>  {
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 03fa6e87f5ac..01e8fe7ae632 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -244,6 +244,9 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size)
>  	if (IS_ERR_OR_NULL(obj))
>  		return ERR_PTR(-ENOMEM);
>  
> +	/* contexts need to always be preserved across hibernation/swap-out */
> +	obj->madv = I915_MADV_WILLNEED;
> +
>  	/*
>  	 * Try to make the context utilize L3 as well as LLC.
>  	 *
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index 21ee83bf6307..86e84554e8ae 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -331,6 +331,7 @@ int i915_gem_init_stolen(struct drm_device *dev)
>  	drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->gtt.stolen_size -
>  		    bios_reserved);
>  
> +	INIT_LIST_HEAD(&dev_priv->mm.stolen_list);
>  	return 0;
>  }
>  
> @@ -387,6 +388,7 @@ static void
>  i915_gem_object_release_stolen(struct drm_i915_gem_object *obj)
>  {
>  	if (obj->stolen) {
> +		list_del(&obj->stolen_link);
>  		drm_mm_remove_node(obj->stolen);
>  		kfree(obj->stolen);
>  		obj->stolen = NULL;
> @@ -419,6 +421,13 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
>  	__i915_gem_object_pin_pages(obj);
>  	obj->has_dma_mapping = true;
>  	obj->stolen = stolen;
> +	list_add(&obj->stolen_link, &to_i915(dev)->mm.stolen_list);
> +
> +	/* By default, treat the contexts of stolen as volatile. If the object
> +	 * must be saved across hibernation, then the caller must take
> +	 * action and flag it as WILLNEED.
> +	 */
> +	obj->madv = I915_MADV_DONTNEED;

Won't this interfere with autoreclaim of stolen objects (to make room for
users which really need it like fbc) which are still in use by userspace?
I think we need a new madv flag for "REAP_ON_HIBERNATE" or something
similar. Otherwise this is way too surprising for userspace.

And with the REAP_ON_HIBERNATE we could also try to just purge them if
they're not pinned or similar (otherwise garbage on screen, ugh).

I guess the condition would then be

	if (madv == REAP_ON_HIBERNATE && !pinned)
		purge(obj)
	else
		convert_to_shmem

Also I'd make the REAPING opt-in since hibernate is such a rarely-used
path. And then perhaps only use it with create2.

Thinking about this more maybe igt would be good indeed: Use create2 to
allocate stolen with autoreaping, wrap in framebuffer and use it,
hibernate.

The gpu should be able so pinned for scanout is the only corner case I can
think of atm.

But overall I think the approach looks good, just a lot of details to get
right as usual.
-Daniel

>  
>  	obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
>  	obj->cache_level = HAS_LLC(dev) ? I915_CACHE_LLC : I915_CACHE_NONE;
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index cebe857f6df2..ec3ab351e9ff 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -1401,6 +1401,9 @@ void intel_setup_overlay(struct drm_device *dev)
>  		goto out_free;
>  	overlay->reg_bo = reg_bo;
>  
> +	/* XXX preserve register values across hibernation */
> +	reg_bo->madv = I915_MADV_WILLNEED;
> +
>  	if (OVERLAY_NEEDS_PHYSICAL(dev)) {
>  		ret = i915_gem_object_attach_phys(reg_bo, PAGE_SIZE);
>  		if (ret) {
> -- 
> 2.1.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch