[Intel-gfx] [PATCH 08/10] drm/i915: Migrate stolen objects before hibernation

Tue Dec 22 09:23:05 PST 2015

On 22/12/15 12:33, Tvrtko Ursulin wrote:
>
> On 22/12/15 06:20, ankitprasad.r.sharma at intel.com wrote:
>> From: Chris Wilson <chris at chris-wilson.co.uk>
>>
>> Ville reminded us that stolen memory is not preserved across
>> hibernation, and a result of this was that context objects now being
>> allocated from stolen were being corrupted on S4 and promptly hanging
>> the GPU on resume.
>>
>> We want to utilise stolen for as much as possible (nothing else will use
>> that wasted memory otherwise), so we need a strategy for handling
>> general objects allocated from stolen and hibernation. A simple solution
>> is to do a CPU copy through the GTT of the stolen object into a fresh
>> shmemfs backing store and thenceforth treat it as a normal objects. This
>> can be refined in future to either use a GPU copy to avoid the slow
>> uncached reads (though it's hibernation!) and recreate stolen objects
>> upon resume/first-use. For now, a simple approach should suffice for
>> testing the object migration.
>>
>> v2:
>> Swap PTE for pinned bindings over to the shmemfs. This adds a
>> complicated dance, but is required as many stolen objects are likely to
>> be pinned for use by the hardware. Swapping the PTEs should not result
>> in externally visible behaviour, as each PTE update should be atomic and
>> the two pages identical. (danvet)
>>
>> safe-by-default, or the principle of least surprise. We need a new flag
>> to mark objects that we can wilfully discard and recreate across
>> hibernation. (danvet)
>>
>> Just use the global_list rather than invent a new stolen_list. This is
>> the slowpath hibernate and so adding a new list and the associated
>> complexity isn't worth it.
>>
>> v3: Rebased on drm-intel-nightly (Ankit)
>>
>> v4: Use insert_page to map stolen memory backed pages for migration to
>> shmem (Chris)
>>
>> v5: Acquire mutex lock while copying stolen buffer objects to shmem
>> (Chris)
>>
>> v6: Handled file leak, Splitted object migration function, added
>> kerneldoc
>> for migrate_stolen_to_shmemfs() function (Tvrtko)
>> Use i915 wrapper function for drm_mm_insert_node_in_range()
>>
>> v7: Keep the object in cpu domain after get_pages, remove the object from
>> the unbound list only when marked PURGED, Corrected split of object
>> migration
>> function (Chris)
>>
>> v8: Split i915_gem_freeze(), removed redundant use of barrier, corrected
>> use of set_to_cpu_domain() (Chris)
>>
>> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma at intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.c         |  17 ++-
>>   drivers/gpu/drm/i915/i915_drv.h         |  10 ++
>>   drivers/gpu/drm/i915/i915_gem.c         | 194
>> ++++++++++++++++++++++++++++++--
>>   drivers/gpu/drm/i915/i915_gem_stolen.c  |  49 ++++++++
>>   drivers/gpu/drm/i915/intel_display.c    |   3 +
>>   drivers/gpu/drm/i915/intel_fbdev.c      |   6 +
>>   drivers/gpu/drm/i915/intel_pm.c         |   2 +
>>   drivers/gpu/drm/i915/intel_ringbuffer.c |   6 +
>>   8 files changed, 275 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.c
>> b/drivers/gpu/drm/i915/i915_drv.c
>> index e6935f1..8f675ae7 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.c
>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>> @@ -979,6 +979,21 @@ static int i915_pm_suspend(struct device *dev)
>>       return i915_drm_suspend(drm_dev);
>>   }
>>
>> +static int i915_pm_freeze(struct device *dev)
>> +{
>> +    int ret;
>> +
>> +    ret = i915_gem_freeze(pci_get_drvdata(to_pci_dev(dev)));
>> +    if (ret)
>> +        return ret;
>> +
>> +    ret = i915_pm_suspend(dev);
>> +    if (ret)
>> +        return ret;
>> +
>> +    return 0;
>> +}
>> +
>>   static int i915_pm_suspend_late(struct device *dev)
>>   {
>>       struct drm_device *drm_dev = dev_to_i915(dev)->dev;
>> @@ -1607,7 +1622,7 @@ static const struct dev_pm_ops i915_pm_ops = {
>>        * @restore, @restore_early : called after rebooting and
>> restoring the
>>        *                            hibernation image [PMSG_RESTORE]
>>        */
>> -    .freeze = i915_pm_suspend,
>> +    .freeze = i915_pm_freeze,
>>       .freeze_late = i915_pm_suspend_late,
>>       .thaw_early = i915_pm_resume_early,
>>       .thaw = i915_pm_resume,
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 2f21e71..492878a 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2079,6 +2079,12 @@ struct drm_i915_gem_object {
>>        * Advice: are the backing pages purgeable?
>>        */
>>       unsigned int madv:2;
>> +    /**
>> +     * Whereas madv is for userspace, there are certain situations
>> +     * where we want I915_MADV_DONTNEED behaviour on internal objects
>> +     * without conflating the userspace setting.
>> +     */
>> +    unsigned int internal_volatile:1;
>>
>>       /**
>>        * Current tiling mode for the object.
>> @@ -3047,6 +3053,9 @@ int i915_gem_l3_remap(struct
>> drm_i915_gem_request *req, int slice);
>>   void i915_gem_init_swizzling(struct drm_device *dev);
>>   void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
>>   int __must_check i915_gpu_idle(struct drm_device *dev);
>> +int __must_check i915_gem_freeze(struct drm_device *dev);
>> +int __must_check
>> +i915_gem_object_migrate_stolen_to_shmemfs(struct drm_i915_gem_object
>> *obj);
>>   int __must_check i915_gem_suspend(struct drm_device *dev);
>>   void __i915_add_request(struct drm_i915_gem_request *req,
>>               struct drm_i915_gem_object *batch_obj,
>> @@ -3276,6 +3285,7 @@
>> i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
>>                              u32 stolen_offset,
>>                              u32 gtt_offset,
>>                              u32 size);
>> +int __must_check i915_gem_stolen_freeze(struct drm_i915_private *i915);
>>
>>   /* i915_gem_shrinker.c */
>>   unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv,
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c
>> b/drivers/gpu/drm/i915/i915_gem.c
>> index a0ec1a9..d27a41e 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -4573,12 +4573,27 @@ static const struct drm_i915_gem_object_ops
>> i915_gem_object_ops = {
>>       .put_pages = i915_gem_object_put_pages_gtt,
>>   };
>>
>> +static struct address_space *
>> +i915_gem_set_inode_gfp(struct drm_device *dev, struct file *file)
>> +{
>> +    struct address_space *mapping = file_inode(file)->i_mapping;
>> +    gfp_t mask;
>> +
>> +    mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
>> +    if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
>> +        /* 965gm cannot relocate objects above 4GiB. */
>> +        mask &= ~__GFP_HIGHMEM;
>> +        mask |= __GFP_DMA32;
>> +    }
>> +    mapping_set_gfp_mask(mapping, mask);
>> +
>> +    return mapping;
>> +}
>> +
>>   struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device
>> *dev,
>>                             size_t size)
>>   {
>>       struct drm_i915_gem_object *obj;
>> -    struct address_space *mapping;
>> -    gfp_t mask;
>>       int ret;
>>
>>       obj = i915_gem_object_alloc(dev);
>> @@ -4591,15 +4606,7 @@ struct drm_i915_gem_object
>> *i915_gem_alloc_object(struct drm_device *dev,
>>           return ERR_PTR(ret);
>>       }
>>
>> -    mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
>> -    if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
>> -        /* 965gm cannot relocate objects above 4GiB. */
>> -        mask &= ~__GFP_HIGHMEM;
>> -        mask |= __GFP_DMA32;
>> -    }
>> -
>> -    mapping = file_inode(obj->base.filp)->i_mapping;
>> -    mapping_set_gfp_mask(mapping, mask);
>> +    i915_gem_set_inode_gfp(dev, obj->base.filp);
>>
>>       i915_gem_object_init(obj, &i915_gem_object_ops);
>>
>> @@ -4774,6 +4781,171 @@ i915_gem_stop_ringbuffers(struct drm_device *dev)
>>           dev_priv->gt.stop_ring(ring);
>>   }
>>
>> +static int
>> +copy_content(struct drm_i915_gem_object *obj,
>> +        struct drm_i915_private *i915,
>> +        struct address_space *mapping)
>> +{
>> +    struct drm_mm_node node;
>> +    int ret, i;
>> +
>> +    ret = i915_gem_object_set_to_gtt_domain(obj, false);
>> +    if (ret)
>> +        return ret;
>> +
>> +    /* stolen objects are already pinned to prevent shrinkage */
>> +    memset(&node, 0, sizeof(node));
>> +    ret = insert_mappable_node(i915, &node);
>> +    if (ret)
>> +        return ret;
>> +
>> +    for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
>> +        struct page *page;
>> +        void *__iomem src;
>> +        void *dst;
>> +
>> +        i915->gtt.base.insert_page(&i915->gtt.base,
>> +                       i915_gem_object_get_dma_address(obj, i),
>> +                       node.start,
>> +                       I915_CACHE_NONE,
>> +                       0);
>> +
>> +        page = shmem_read_mapping_page(mapping, i);
>> +        if (IS_ERR(page)) {
>> +            ret = PTR_ERR(page);
>> +            break;
>> +        }
>> +
>> +        src = io_mapping_map_atomic_wc(i915->gtt.mappable, node.start);
>> +        dst = kmap_atomic(page);
>> +        wmb();
>> +        memcpy_fromio(dst, src, PAGE_SIZE);
>> +        wmb();
>> +        kunmap_atomic(dst);
>> +        io_mapping_unmap_atomic(src);
>> +
>> +        page_cache_release(page);
>> +    }
>> +
>> +    i915->gtt.base.clear_range(&i915->gtt.base,
>> +                   node.start, node.size,
>> +                   true);
>> +    remove_mappable_node(&node);
>> +    if (ret)
>> +        return ret;
>> +
>> +    return i915_gem_object_set_to_cpu_domain(obj, true);
>> +}
>> +
>> +/**
>> + * i915_gem_object_migrate_stolen_to_shmemfs() - migrates a stolen
>> backed
>> + * object to shmemfs
>> + * @obj: stolen backed object to be migrated
>> + *
>> + * Returns: 0 on successful migration, errno on failure
>> + */
>> +
>> +int
>> +i915_gem_object_migrate_stolen_to_shmemfs(struct drm_i915_gem_object
>> *obj)
>> +{
>> +    struct drm_i915_private *i915 = to_i915(obj->base.dev);
>> +    struct i915_vma *vma, *vn;
>> +    struct file *file;
>> +    struct address_space *mapping;
>> +    struct sg_table *stolen_pages, *shmemfs_pages;
>> +    int ret;
>> +
>> +    if (WARN_ON_ONCE(i915_gem_object_needs_bit17_swizzle(obj)))
>> +        return -EINVAL;
>> +
>> +    file = shmem_file_setup("drm mm object", obj->base.size,
>> VM_NORESERVE);
>> +    if (IS_ERR(file))
>> +        return PTR_ERR(file);
>> +    mapping = i915_gem_set_inode_gfp(obj->base.dev, file);
>> +
>> +    list_for_each_entry_safe(vma, vn, &obj->vma_list, vma_link)
>> +        if (i915_vma_unbind(vma))
>> +            continue;
>> +
>> +    if (obj->madv != I915_MADV_WILLNEED && list_empty(&obj->vma_list)) {
>> +        /* Discard the stolen reservation, and replace with
>> +         * an unpopulated shmemfs object.
>> +         */
>> +        obj->madv = __I915_MADV_PURGED;
>> +        list_del(&obj->global_list);
>> +    } else {
>> +        ret = copy_content(obj, i915, mapping);
>> +        if (ret)
>> +            goto err_file;
>> +    }
>> +
>> +    stolen_pages = obj->pages;
>> +    obj->pages = NULL;
>> +
>> +    obj->base.filp = file;
>> +
>> +    /* Recreate any pinned binding with pointers to the new storage */
>> +    if (!list_empty(&obj->vma_list)) {
>> +        ret = i915_gem_object_get_pages_gtt(obj);
>> +        if (ret) {
>> +            obj->pages = stolen_pages;
>> +            goto err_file;
>> +        }
>> +
>> +        obj->get_page.sg = obj->pages->sgl;
>> +        obj->get_page.last = 0;
>> +
>> +        list_for_each_entry(vma, &obj->vma_list, vma_link) {
>> +            if (!drm_mm_node_allocated(&vma->node))
>> +                continue;
>> +
>> +            WARN_ON(i915_vma_bind(vma,
>> +                          obj->cache_level,
>> +                          PIN_UPDATE));
>
> It looks like this should also fail (and restore) the migration.
> Otherwise if it fails it leaves GTT mappings to pages which will be
> released below.
>
> Or a big fat comment explaining why it cannot fail, ever.

On the basis that this is an impossible failure, just please add a 
comment and then you can add:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

You can also change the WARN_ON to BUG_ON which sounds justified in this 
case.

Regards,

Tvrtko