[Intel-gfx] [PATCH v6 7/9] drm/i915: Reduce the number of objects subject to memcpy recover
Thomas Hellström
thomas.hellstrom at linux.intel.com
Thu Sep 23 09:58:38 UTC 2021
On 9/23/21 11:44 AM, Matthew Auld wrote:
> On 22/09/2021 07:25, Thomas Hellström wrote:
>> We really only need memcpy restore for objects that affect the
>> operability of the migrate context. That is, primarily the page-table
>> objects of the migrate VM.
>>
>> Add an object flag, I915_BO_ALLOC_PM_EARLY for objects that need early
>> restores using memcpy and a way to assign LMEM page-table object flags
>> to be used by the vms.
>>
>> Restore objects without this flag with the gpu blitter and only objects
>> carrying the flag using TTM memcpy.
>>
>> Initially mark the migrate, gt, gtt and vgpu vms to use this flag, and
>> defer for a later audit which vms actually need it. Most importantly,
>> user-
>> allocated vms with pinned page-table objects can be restored using the
>> blitter.
>>
>> Performance-wise memcpy restore is probably as fast as gpu restore if
>> not
>> faster, but using gpu restore will help tackling future restrictions in
>> mappable LMEM size.
>>
>> v4:
>> - Don't mark the aliasing ppgtt page table flags for early resume, but
>> rather the ggtt page table flags as intended. (Matthew Auld)
>> - The check for user buffer objects during early resume is pointless,
>> since
>> they are never marked I915_BO_ALLOC_PM_EARLY. (Matthew Auld)
>> v5:
>> - Mark GuC LMEM objects with I915_BO_ALLOC_PM_EARLY to have them
>> restored
>> before we fire up the migrate context.
>>
>> Cc: Matthew Brost <matthew.brost at intel.com>
>> Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
>> Reviewed-by: Matthew Auld <matthew.auld at intel.com>
>> ---
>> drivers/gpu/drm/i915/gem/i915_gem_context.c | 4 ++--
>> drivers/gpu/drm/i915/gem/i915_gem_object_types.h | 9 ++++++---
>> drivers/gpu/drm/i915/gem/i915_gem_pm.c | 6 +++++-
>> drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c | 5 +++--
>> drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 2 +-
>> drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 2 +-
>> drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 5 +++--
>> drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 4 +++-
>> drivers/gpu/drm/i915/gt/intel_ggtt.c | 3 ++-
>> drivers/gpu/drm/i915/gt/intel_gt.c | 2 +-
>> drivers/gpu/drm/i915/gt/intel_gtt.c | 3 ++-
>> drivers/gpu/drm/i915/gt/intel_gtt.h | 9 +++++++--
>> drivers/gpu/drm/i915/gt/intel_migrate.c | 2 +-
>> drivers/gpu/drm/i915/gt/intel_ppgtt.c | 13 ++++++++-----
>> drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 2 +-
>> drivers/gpu/drm/i915/gt/uc/intel_guc.c | 3 ++-
>> drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 7 +++++--
>> drivers/gpu/drm/i915/gvt/scheduler.c | 2 +-
>> drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 4 ++--
>> 19 files changed, 56 insertions(+), 31 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> index c2ab0e22db0a..8208fd5b72c3 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>> @@ -1287,7 +1287,7 @@ i915_gem_create_context(struct drm_i915_private
>> *i915,
>> } else if (HAS_FULL_PPGTT(i915)) {
>> struct i915_ppgtt *ppgtt;
>> - ppgtt = i915_ppgtt_create(&i915->gt);
>> + ppgtt = i915_ppgtt_create(&i915->gt, 0);
>> if (IS_ERR(ppgtt)) {
>> drm_dbg(&i915->drm, "PPGTT setup failed (%ld)\n",
>> PTR_ERR(ppgtt));
>> @@ -1465,7 +1465,7 @@ int i915_gem_vm_create_ioctl(struct drm_device
>> *dev, void *data,
>> if (args->flags)
>> return -EINVAL;
>> - ppgtt = i915_ppgtt_create(&i915->gt);
>> + ppgtt = i915_ppgtt_create(&i915->gt, 0);
>> if (IS_ERR(ppgtt))
>> return PTR_ERR(ppgtt);
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>> b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>> index 118691ce81d7..fa2ba9e2a4d0 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>> @@ -294,13 +294,16 @@ struct drm_i915_gem_object {
>> #define I915_BO_ALLOC_USER BIT(3)
>> /* Object is allowed to lose its contents on suspend / resume, even
>> if pinned */
>> #define I915_BO_ALLOC_PM_VOLATILE BIT(4)
>> +/* Object needs to be restored early using memcpy during resume */
>> +#define I915_BO_ALLOC_PM_EARLY BIT(5)
>> #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \
>> I915_BO_ALLOC_VOLATILE | \
>> I915_BO_ALLOC_CPU_CLEAR | \
>> I915_BO_ALLOC_USER | \
>> - I915_BO_ALLOC_PM_VOLATILE)
>> -#define I915_BO_READONLY BIT(5)
>> -#define I915_TILING_QUIRK_BIT 6 /* unknown swizzling; do not
>> release! */
>> + I915_BO_ALLOC_PM_VOLATILE | \
>> + I915_BO_ALLOC_PM_EARLY)
>> +#define I915_BO_READONLY BIT(6)
>> +#define I915_TILING_QUIRK_BIT 7 /* unknown swizzling; do not
>> release! */
>> /**
>> * @mem_flags - Mutable placement-related flags
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
>> b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
>> index 12b37b4c1192..726b40e1fbb0 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
>> @@ -97,8 +97,12 @@ int i915_gem_backup_suspend(struct
>> drm_i915_private *i915)
>> * More objects may have become unpinned as requests were
>> * retired. Now try to evict again. The gt may be wedged here
>> * in which case we automatically fall back to memcpy.
>> + * We allow also backing up pinned objects that have not been
>> + * marked for early recover, and that may contain, for example,
>> + * page-tables for the migrate context.
>> */
>> - ret = lmem_suspend(i915, I915_TTM_BACKUP_ALLOW_GPU);
>> + ret = lmem_suspend(i915, I915_TTM_BACKUP_ALLOW_GPU |
>> + I915_TTM_BACKUP_PINNED);
>> if (ret)
>> goto out_recover;
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c
>> b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c
>> index 03a00d193f40..3b6d14b5c604 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c
>> @@ -57,7 +57,8 @@ static int i915_ttm_backup(struct
>> i915_gem_apply_to_region *apply,
>> if (pm_apply->allow_gpu && i915_gem_object_evictable(obj))
>> return ttm_bo_validate(bo, i915_ttm_sys_placement(), &ctx);
>> - if (!pm_apply->backup_pinned)
>> + if (!pm_apply->backup_pinned ||
>> + (pm_apply->allow_gpu && (obj->flags & I915_BO_ALLOC_PM_EARLY)))
>> return 0;
>> if (obj->flags & I915_BO_ALLOC_PM_VOLATILE)
>> @@ -155,7 +156,7 @@ static int i915_ttm_restore(struct
>> i915_gem_apply_to_region *apply,
>> if (!backup)
>> return 0;
>> - if (!pm_apply->allow_gpu && (obj->flags & I915_BO_ALLOC_USER))
>> + if (!pm_apply->allow_gpu && !(obj->flags & I915_BO_ALLOC_PM_EARLY))
>> return 0;
>> err = i915_gem_object_lock(backup, apply->ww);
>> diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
>> b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
>> index 0827634c842c..77d84a9e8789 100644
>> --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
>> +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
>> @@ -1645,7 +1645,7 @@ int i915_gem_huge_page_mock_selftests(void)
>> mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL;
>> mkwrite_device_info(dev_priv)->ppgtt_size = 48;
>> - ppgtt = i915_ppgtt_create(&dev_priv->gt);
>> + ppgtt = i915_ppgtt_create(&dev_priv->gt, 0);
>> if (IS_ERR(ppgtt)) {
>> err = PTR_ERR(ppgtt);
>> goto out_unlock;
>> diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
>> b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
>> index 1aee5e6b1b23..890191f286e3 100644
>> --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
>> +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
>> @@ -429,7 +429,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct
>> intel_gt *gt)
>> mutex_init(&ppgtt->flush);
>> mutex_init(&ppgtt->pin_mutex);
>> - ppgtt_init(&ppgtt->base, gt);
>> + ppgtt_init(&ppgtt->base, gt, 0);
>> ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K /
>> sizeof(gen6_pte_t));
>> ppgtt->base.vm.top = 1;
>> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>> b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>> index 6a5af995f5b1..037a9a6e4889 100644
>> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>> @@ -753,7 +753,8 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
>> * space.
>> *
>> */
>> -struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
>> +struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
>> + unsigned long lmem_pt_obj_flags)
>> {
>> struct i915_ppgtt *ppgtt;
>> int err;
>> @@ -762,7 +763,7 @@ struct i915_ppgtt *gen8_ppgtt_create(struct
>> intel_gt *gt)
>> if (!ppgtt)
>> return ERR_PTR(-ENOMEM);
>> - ppgtt_init(ppgtt, gt);
>> + ppgtt_init(ppgtt, gt, lmem_pt_obj_flags);
>> ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
>> ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t));
>> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>> b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>> index b9028c2ad3c7..f541d19264b4 100644
>> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>> @@ -12,7 +12,9 @@ struct i915_address_space;
>> struct intel_gt;
>> enum i915_cache_level;
>> -struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt);
>> +struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
>> + unsigned long lmem_pt_obj_flags);
>> +
>> u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>> enum i915_cache_level level,
>> u32 flags);
>> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> index 8d71f67926f1..06576fc1310e 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> @@ -644,7 +644,7 @@ static int init_aliasing_ppgtt(struct i915_ggtt
>> *ggtt)
>> struct i915_ppgtt *ppgtt;
>> int err;
>> - ppgtt = i915_ppgtt_create(ggtt->vm.gt);
>> + ppgtt = i915_ppgtt_create(ggtt->vm.gt, 0);
>> if (IS_ERR(ppgtt))
>> return PTR_ERR(ppgtt);
>> @@ -909,6 +909,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>> size = gen8_get_total_gtt_size(snb_gmch_ctl);
>> ggtt->vm.alloc_pt_dma = alloc_pt_dma;
>> + ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY;
>
> The scratch page is still in system memory for the ggtt, so I guess
> this is not needed? Although maybe that will change, so probably good
> to keep?
Hmm, yes, I guess lets keep this for a possible future audit for now. I
think there are other vms that may not need this flag either.
Thanks for reviewing!
/Thomas
More information about the Intel-gfx
mailing list