[Intel-gfx] [PATCH 08/10] drm/i915: Migrate stolen objects before hibernation
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Tue Dec 22 04:33:41 PST 2015
On 22/12/15 06:20, ankitprasad.r.sharma at intel.com wrote:
> From: Chris Wilson <chris at chris-wilson.co.uk>
>
> Ville reminded us that stolen memory is not preserved across
> hibernation, and a result of this was that context objects now being
> allocated from stolen were being corrupted on S4 and promptly hanging
> the GPU on resume.
>
> We want to utilise stolen for as much as possible (nothing else will use
> that wasted memory otherwise), so we need a strategy for handling
> general objects allocated from stolen and hibernation. A simple solution
> is to do a CPU copy through the GTT of the stolen object into a fresh
> shmemfs backing store and thenceforth treat it as a normal objects. This
> can be refined in future to either use a GPU copy to avoid the slow
> uncached reads (though it's hibernation!) and recreate stolen objects
> upon resume/first-use. For now, a simple approach should suffice for
> testing the object migration.
>
> v2:
> Swap PTE for pinned bindings over to the shmemfs. This adds a
> complicated dance, but is required as many stolen objects are likely to
> be pinned for use by the hardware. Swapping the PTEs should not result
> in externally visible behaviour, as each PTE update should be atomic and
> the two pages identical. (danvet)
>
> safe-by-default, or the principle of least surprise. We need a new flag
> to mark objects that we can wilfully discard and recreate across
> hibernation. (danvet)
>
> Just use the global_list rather than invent a new stolen_list. This is
> the slowpath hibernate and so adding a new list and the associated
> complexity isn't worth it.
>
> v3: Rebased on drm-intel-nightly (Ankit)
>
> v4: Use insert_page to map stolen memory backed pages for migration to
> shmem (Chris)
>
> v5: Acquire mutex lock while copying stolen buffer objects to shmem (Chris)
>
> v6: Handled file leak, Splitted object migration function, added kerneldoc
> for migrate_stolen_to_shmemfs() function (Tvrtko)
> Use i915 wrapper function for drm_mm_insert_node_in_range()
>
> v7: Keep the object in cpu domain after get_pages, remove the object from
> the unbound list only when marked PURGED, Corrected split of object migration
> function (Chris)
>
> v8: Split i915_gem_freeze(), removed redundant use of barrier, corrected
> use of set_to_cpu_domain() (Chris)
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma at intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.c | 17 ++-
> drivers/gpu/drm/i915/i915_drv.h | 10 ++
> drivers/gpu/drm/i915/i915_gem.c | 194 ++++++++++++++++++++++++++++++--
> drivers/gpu/drm/i915/i915_gem_stolen.c | 49 ++++++++
> drivers/gpu/drm/i915/intel_display.c | 3 +
> drivers/gpu/drm/i915/intel_fbdev.c | 6 +
> drivers/gpu/drm/i915/intel_pm.c | 2 +
> drivers/gpu/drm/i915/intel_ringbuffer.c | 6 +
> 8 files changed, 275 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index e6935f1..8f675ae7 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -979,6 +979,21 @@ static int i915_pm_suspend(struct device *dev)
> return i915_drm_suspend(drm_dev);
> }
>
> +static int i915_pm_freeze(struct device *dev)
> +{
> + int ret;
> +
> + ret = i915_gem_freeze(pci_get_drvdata(to_pci_dev(dev)));
> + if (ret)
> + return ret;
> +
> + ret = i915_pm_suspend(dev);
> + if (ret)
> + return ret;
> +
> + return 0;
> +}
> +
> static int i915_pm_suspend_late(struct device *dev)
> {
> struct drm_device *drm_dev = dev_to_i915(dev)->dev;
> @@ -1607,7 +1622,7 @@ static const struct dev_pm_ops i915_pm_ops = {
> * @restore, @restore_early : called after rebooting and restoring the
> * hibernation image [PMSG_RESTORE]
> */
> - .freeze = i915_pm_suspend,
> + .freeze = i915_pm_freeze,
> .freeze_late = i915_pm_suspend_late,
> .thaw_early = i915_pm_resume_early,
> .thaw = i915_pm_resume,
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 2f21e71..492878a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2079,6 +2079,12 @@ struct drm_i915_gem_object {
> * Advice: are the backing pages purgeable?
> */
> unsigned int madv:2;
> + /**
> + * Whereas madv is for userspace, there are certain situations
> + * where we want I915_MADV_DONTNEED behaviour on internal objects
> + * without conflating the userspace setting.
> + */
> + unsigned int internal_volatile:1;
>
> /**
> * Current tiling mode for the object.
> @@ -3047,6 +3053,9 @@ int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice);
> void i915_gem_init_swizzling(struct drm_device *dev);
> void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
> int __must_check i915_gpu_idle(struct drm_device *dev);
> +int __must_check i915_gem_freeze(struct drm_device *dev);
> +int __must_check
> +i915_gem_object_migrate_stolen_to_shmemfs(struct drm_i915_gem_object *obj);
> int __must_check i915_gem_suspend(struct drm_device *dev);
> void __i915_add_request(struct drm_i915_gem_request *req,
> struct drm_i915_gem_object *batch_obj,
> @@ -3276,6 +3285,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
> u32 stolen_offset,
> u32 gtt_offset,
> u32 size);
> +int __must_check i915_gem_stolen_freeze(struct drm_i915_private *i915);
>
> /* i915_gem_shrinker.c */
> unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index a0ec1a9..d27a41e 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4573,12 +4573,27 @@ static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
> .put_pages = i915_gem_object_put_pages_gtt,
> };
>
> +static struct address_space *
> +i915_gem_set_inode_gfp(struct drm_device *dev, struct file *file)
> +{
> + struct address_space *mapping = file_inode(file)->i_mapping;
> + gfp_t mask;
> +
> + mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
> + if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
> + /* 965gm cannot relocate objects above 4GiB. */
> + mask &= ~__GFP_HIGHMEM;
> + mask |= __GFP_DMA32;
> + }
> + mapping_set_gfp_mask(mapping, mask);
> +
> + return mapping;
> +}
> +
> struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
> size_t size)
> {
> struct drm_i915_gem_object *obj;
> - struct address_space *mapping;
> - gfp_t mask;
> int ret;
>
> obj = i915_gem_object_alloc(dev);
> @@ -4591,15 +4606,7 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
> return ERR_PTR(ret);
> }
>
> - mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
> - if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
> - /* 965gm cannot relocate objects above 4GiB. */
> - mask &= ~__GFP_HIGHMEM;
> - mask |= __GFP_DMA32;
> - }
> -
> - mapping = file_inode(obj->base.filp)->i_mapping;
> - mapping_set_gfp_mask(mapping, mask);
> + i915_gem_set_inode_gfp(dev, obj->base.filp);
>
> i915_gem_object_init(obj, &i915_gem_object_ops);
>
> @@ -4774,6 +4781,171 @@ i915_gem_stop_ringbuffers(struct drm_device *dev)
> dev_priv->gt.stop_ring(ring);
> }
>
> +static int
> +copy_content(struct drm_i915_gem_object *obj,
> + struct drm_i915_private *i915,
> + struct address_space *mapping)
> +{
> + struct drm_mm_node node;
> + int ret, i;
> +
> + ret = i915_gem_object_set_to_gtt_domain(obj, false);
> + if (ret)
> + return ret;
> +
> + /* stolen objects are already pinned to prevent shrinkage */
> + memset(&node, 0, sizeof(node));
> + ret = insert_mappable_node(i915, &node);
> + if (ret)
> + return ret;
> +
> + for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> + struct page *page;
> + void *__iomem src;
> + void *dst;
> +
> + i915->gtt.base.insert_page(&i915->gtt.base,
> + i915_gem_object_get_dma_address(obj, i),
> + node.start,
> + I915_CACHE_NONE,
> + 0);
> +
> + page = shmem_read_mapping_page(mapping, i);
> + if (IS_ERR(page)) {
> + ret = PTR_ERR(page);
> + break;
> + }
> +
> + src = io_mapping_map_atomic_wc(i915->gtt.mappable, node.start);
> + dst = kmap_atomic(page);
> + wmb();
> + memcpy_fromio(dst, src, PAGE_SIZE);
> + wmb();
> + kunmap_atomic(dst);
> + io_mapping_unmap_atomic(src);
> +
> + page_cache_release(page);
> + }
> +
> + i915->gtt.base.clear_range(&i915->gtt.base,
> + node.start, node.size,
> + true);
> + remove_mappable_node(&node);
> + if (ret)
> + return ret;
> +
> + return i915_gem_object_set_to_cpu_domain(obj, true);
> +}
> +
> +/**
> + * i915_gem_object_migrate_stolen_to_shmemfs() - migrates a stolen backed
> + * object to shmemfs
> + * @obj: stolen backed object to be migrated
> + *
> + * Returns: 0 on successful migration, errno on failure
> + */
> +
> +int
> +i915_gem_object_migrate_stolen_to_shmemfs(struct drm_i915_gem_object *obj)
> +{
> + struct drm_i915_private *i915 = to_i915(obj->base.dev);
> + struct i915_vma *vma, *vn;
> + struct file *file;
> + struct address_space *mapping;
> + struct sg_table *stolen_pages, *shmemfs_pages;
> + int ret;
> +
> + if (WARN_ON_ONCE(i915_gem_object_needs_bit17_swizzle(obj)))
> + return -EINVAL;
> +
> + file = shmem_file_setup("drm mm object", obj->base.size, VM_NORESERVE);
> + if (IS_ERR(file))
> + return PTR_ERR(file);
> + mapping = i915_gem_set_inode_gfp(obj->base.dev, file);
> +
> + list_for_each_entry_safe(vma, vn, &obj->vma_list, vma_link)
> + if (i915_vma_unbind(vma))
> + continue;
> +
> + if (obj->madv != I915_MADV_WILLNEED && list_empty(&obj->vma_list)) {
> + /* Discard the stolen reservation, and replace with
> + * an unpopulated shmemfs object.
> + */
> + obj->madv = __I915_MADV_PURGED;
> + list_del(&obj->global_list);
> + } else {
> + ret = copy_content(obj, i915, mapping);
> + if (ret)
> + goto err_file;
> + }
> +
> + stolen_pages = obj->pages;
> + obj->pages = NULL;
> +
> + obj->base.filp = file;
> +
> + /* Recreate any pinned binding with pointers to the new storage */
> + if (!list_empty(&obj->vma_list)) {
> + ret = i915_gem_object_get_pages_gtt(obj);
> + if (ret) {
> + obj->pages = stolen_pages;
> + goto err_file;
> + }
> +
> + obj->get_page.sg = obj->pages->sgl;
> + obj->get_page.last = 0;
> +
> + list_for_each_entry(vma, &obj->vma_list, vma_link) {
> + if (!drm_mm_node_allocated(&vma->node))
> + continue;
> +
> + WARN_ON(i915_vma_bind(vma,
> + obj->cache_level,
> + PIN_UPDATE));
It looks like this should also fail (and restore) the migration.
Otherwise if it fails it leaves GTT mappings to pages which will be
released below.
Or a big fat comment explaining why it cannot fail, ever.
> + }
> + }
> +
> + /* drop the stolen pin and backing */
> + shmemfs_pages = obj->pages;
> + obj->pages = stolen_pages;
> +
> + i915_gem_object_unpin_pages(obj);
> + obj->ops->put_pages(obj);
> + if (obj->ops->release)
> + obj->ops->release(obj);
> +
> + obj->ops = &i915_gem_object_ops;
> + obj->pages = shmemfs_pages;
> +
> + return 0;
> +
> +err_file:
> + fput(file);
> + obj->base.filp = NULL;
> + return ret;
> +}
> +
> +int
> +i915_gem_freeze(struct drm_device *dev)
> +{
> + /* Called before i915_gem_suspend() when hibernating */
> + struct drm_i915_private *i915 = to_i915(dev);
> + int ret;
> +
> + ret = i915_mutex_lock_interruptible(dev);
> + if (ret)
> + return ret;
> +
> + /* Across hibernation, the stolen area is not preserved.
> + * Anything inside stolen must copied back to normal
> + * memory if we wish to preserve it.
> + */
> + ret = i915_gem_stolen_freeze(i915);
> +
> + mutex_unlock(&dev->struct_mutex);
> + return ret;
> +}
> +
> int
> i915_gem_suspend(struct drm_device *dev)
> {
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index 8db5752..6d1af9d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -839,3 +839,52 @@ err:
> drm_gem_object_unreference(&obj->base);
> return ERR_PTR(ret);
> }
> +
> +int i915_gem_stolen_freeze(struct drm_i915_private *i915)
> +{
> + struct drm_i915_gem_object *obj, *tmp;
> + struct list_head *phase[] = {
> + &i915->mm.unbound_list, &i915->mm.bound_list, NULL
> + }, **p;
> + int ret = 0;
> +
> + for (p = phase; *p; p++) {
> + struct list_head migrate;
> + int ret;
> +
> + INIT_LIST_HEAD(&migrate);
> + list_for_each_entry_safe(obj, tmp, *p, global_list) {
> + if (obj->stolen == NULL)
> + continue;
> +
> + if (obj->internal_volatile)
> + continue;
> +
> + /* In the general case, this object may only be alive
> + * due to an active reference, and that may disappear
> + * when we unbind any of the objects (and so wait upon
> + * the GPU and retire requests). To prevent one of the
> + * objects from disappearing beneath us, we need to
> + * take a reference to each as we build the migration
> + * list.
> + *
> + * This is similar to the strategy required whilst
> + * shrinking or evicting objects (for the same reason).
> + */
> + drm_gem_object_reference(&obj->base);
> + list_move(&obj->global_list, &migrate);
> + }
> +
> + ret = 0;
> + list_for_each_entry_safe(obj, tmp, &migrate, global_list) {
> + if (ret == 0)
> + ret = i915_gem_object_migrate_stolen_to_shmemfs(obj);
> + drm_gem_object_unreference(&obj->base);
> + }
> + list_splice(&migrate, *p);
> + if (ret)
> + break;
> + }
> +
> + return ret;
> +}
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 006d43a..ca2cd44 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -2546,6 +2546,9 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
> if (IS_ERR(obj))
> return false;
>
> + /* Not to be preserved across hibernation */
> + obj->internal_volatile = true;
> +
> obj->tiling_mode = plane_config->tiling;
> if (obj->tiling_mode == I915_TILING_X)
> obj->stride = fb->pitches[0];
> diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
> index b2f134a..e162249 100644
> --- a/drivers/gpu/drm/i915/intel_fbdev.c
> +++ b/drivers/gpu/drm/i915/intel_fbdev.c
> @@ -156,6 +156,12 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
> goto out;
> }
>
> + /* Discard the contents of the BIOS fb across hibernation.
> + * We really want to completely throwaway the earlier fbdev
> + * and reconfigure it anyway.
> + */
> + obj->internal_volatile = true;
> +
> fb = __intel_framebuffer_create(dev, &mode_cmd, obj);
> if (IS_ERR(fb)) {
> drm_gem_object_unreference(&obj->base);
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index c94b39b..3ffd181 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -5198,6 +5198,8 @@ static void valleyview_setup_pctx(struct drm_device *dev)
> I915_WRITE(VLV_PCBR, pctx_paddr);
>
> out:
> + /* The power context need not be preserved across hibernation */
> + pctx->internal_volatile = true;
> DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
> dev_priv->vlv_pctx = pctx;
> }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 56d8375..412212e 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -2094,6 +2094,12 @@ static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
> if (IS_ERR(obj))
> return PTR_ERR(obj);
>
> + /* Ringbuffer objects are by definition volatile - only the commands
> + * between HEAD and TAIL need to be preserved and whilst there are
> + * any commands there, the ringbuffer is pinned by activity.
> + */
> + obj->internal_volatile = true;
> +
> /* mark ring buffers as read-only from GPU side by default */
> obj->gt_ro = 1;
>
>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list