[Intel-gfx] [PATCH v4] drm/i915/mtl: Media GT and Render GT share common GGTT

Iddamsetty, Aravind aravind.iddamsetty at intel.com
Tue Nov 29 06:03:15 UTC 2022



On 29-11-2022 11:24, Lucas De Marchi wrote:
> On Wed, Nov 23, 2022 at 09:47:03AM +0530, Iddamsetty, Aravind wrote:
>>
>>
>> On 23-11-2022 05:29, Matt Roper wrote:
>>> On Tue, Nov 22, 2022 at 12:31:26PM +0530, Aravind Iddamsetty wrote:
>>>> On XE_LPM+ platforms the media engines are carved out into a separate
>>>> GT but have a common GGTMMADR address range which essentially makes
>>>> the GGTT address space to be shared between media and render GT. As a
>>>> result any updates in GGTT shall invalidate TLB of GTs sharing it and
>>>> similarly any operation on GGTT requiring an action on a GT will
>>>> have to
>>>> involve all GTs sharing it. setup_private_pat was being done on a per
>>>> GGTT based as that doesn't touch any GGTT structures moved it to per GT
>>>> based.
>>>>
>>>> BSPEC: 63834
>>>>
>>>> v2:
>>>> 1. Add details to commit msg
>>>> 2. includes fix for failure to add item to ggtt->gt_list, as suggested
>>>> by Lucas
>>>> 3. as ggtt_flush() is used only for ggtt drop i915_is_ggtt check within
>>>> it.
>>>> 4. setup_private_pat moved out of intel_gt_tiles_init
>>>>
>>>> v3:
>>>> 1. Move out for_each_gt from i915_driver.c (Jani Nikula)
>>>>
>>>> v4: drop using RCU primitives on ggtt->gt_list as it is not an RCU list
>>>> (Matt Roper)
>>>>
>>>> Cc: Matt Roper <matthew.d.roper at intel.com>
>>>> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty at intel.com>
>>>
>>> Reviewed-by: Matt Roper <matthew.d.roper at intel.com>
>>
>> Thanks Matt, could you also help with merging the change.
>>
>> Regards,
>> Aravind.
>>>
>>>> ---
>>>>  drivers/gpu/drm/i915/gt/intel_ggtt.c      | 54 +++++++++++++++++------
>>>>  drivers/gpu/drm/i915/gt/intel_gt.c        | 13 +++++-
>>>>  drivers/gpu/drm/i915/gt/intel_gt_types.h  |  3 ++
>>>>  drivers/gpu/drm/i915/gt/intel_gtt.h       |  4 ++
>>>>  drivers/gpu/drm/i915/i915_driver.c        | 12 ++---
>>>>  drivers/gpu/drm/i915/i915_gem.c           |  2 +
>>>>  drivers/gpu/drm/i915/i915_gem_evict.c     | 51 +++++++++++++++------
>>>>  drivers/gpu/drm/i915/i915_vma.c           |  5 ++-
>>>>  drivers/gpu/drm/i915/selftests/i915_gem.c |  2 +
>>>>  9 files changed, 111 insertions(+), 35 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>>>> b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>>>> index 8145851ad23d..7644738b9cdb 100644
>>>> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>>>> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>>>> @@ -8,6 +8,7 @@
>>>>  #include <linux/types.h>
>>>>  #include <linux/stop_machine.h>
>>>>
>>>> +#include <drm/drm_managed.h>
>>>>  #include <drm/i915_drm.h>
>>>>  #include <drm/intel-gtt.h>
>>>>
>>>> @@ -196,10 +197,13 @@ void i915_ggtt_suspend_vm(struct
>>>> i915_address_space *vm)
>>>>
>>>>  void i915_ggtt_suspend(struct i915_ggtt *ggtt)
>>>>  {
>>>> +    struct intel_gt *gt;
>>>> +
>>>>      i915_ggtt_suspend_vm(&ggtt->vm);
>>>>      ggtt->invalidate(ggtt);
>>>>
>>>> -    intel_gt_check_and_clear_faults(ggtt->vm.gt);
>>>> +    list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
>>>> +        intel_gt_check_and_clear_faults(gt);
>>>>  }
>>>>
>>>>  void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
>>>> @@ -225,16 +229,21 @@ static void gen8_ggtt_invalidate(struct
>>>> i915_ggtt *ggtt)
>>>>
>>>>  static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
>>>>  {
>>>> -    struct intel_uncore *uncore = ggtt->vm.gt->uncore;
>>>>      struct drm_i915_private *i915 = ggtt->vm.i915;
>>>>
>>>>      gen8_ggtt_invalidate(ggtt);
>>>>
>>>> -    if (GRAPHICS_VER(i915) >= 12)
>>>> -        intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR,
>>>> -                      GEN12_GUC_TLB_INV_CR_INVALIDATE);
>>>> -    else
>>>> -        intel_uncore_write_fw(uncore, GEN8_GTCR,
>>>> GEN8_GTCR_INVALIDATE);
>>>> +    if (GRAPHICS_VER(i915) >= 12) {
>>>> +        struct intel_gt *gt;
>>>> +
>>>> +        list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
>>>> +            intel_uncore_write_fw(gt->uncore,
>>>> +                          GEN12_GUC_TLB_INV_CR,
>>>> +                          GEN12_GUC_TLB_INV_CR_INVALIDATE);
>>>> +    } else {
>>>> +        intel_uncore_write_fw(ggtt->vm.gt->uncore,
>>>> +                      GEN8_GTCR, GEN8_GTCR_INVALIDATE);
>>>> +    }
>>>>  }
>>>>
>>>>  u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>>>> @@ -986,8 +995,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>>>>
>>>>      ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>>>>
>>>> -    setup_private_pat(ggtt->vm.gt);
>>>> -
>>>>      return ggtt_probe_common(ggtt, size);
>>>>  }
>>>>
>>>> @@ -1196,7 +1203,14 @@ static int ggtt_probe_hw(struct i915_ggtt
>>>> *ggtt, struct intel_gt *gt)
>>>>   */
>>>>  int i915_ggtt_probe_hw(struct drm_i915_private *i915)
>>>>  {
>>>> -    int ret;
>>>> +    struct intel_gt *gt;
>>>> +    int ret, i;
>>>> +
>>>> +    for_each_gt(gt, i915, i) {
>>>> +        ret = intel_gt_assign_ggtt(gt);
> 
> in v3 the intel_gt_assign_ggtt() call is not in i915_driver.c anymore but
> rather moved here. We could make i915_ggtt_create() static, doing the
> allocation here and intel_gt_assign_ggtt() would be in charge of just
> assigning the ggtt. Not very important though and can be done later.

well we call intel_gt_assign_ggtt in i915_gem_gtt_mock_selftests but not
i915_ggtt_probe_hw.

> 
> pushed, thanks

Thanks a lot for the help.

Regards,
Aravind.
> 
> Lucas De Marchi
> 
>>>> +        if (ret)
>>>> +            return ret;
>>>> +    }
>>>>
>>>>      ret = ggtt_probe_hw(to_gt(i915)->ggtt, to_gt(i915));
>>>>      if (ret)
>>>> @@ -1208,6 +1222,19 @@ int i915_ggtt_probe_hw(struct
>>>> drm_i915_private *i915)
>>>>      return 0;
>>>>  }
>>>>
>>>> +struct i915_ggtt *i915_ggtt_create(struct drm_i915_private *i915)
>>>> +{
>>>> +    struct i915_ggtt *ggtt;
>>>> +
>>>> +    ggtt = drmm_kzalloc(&i915->drm, sizeof(*ggtt), GFP_KERNEL);
>>>> +    if (!ggtt)
>>>> +        return ERR_PTR(-ENOMEM);
>>>> +
>>>> +    INIT_LIST_HEAD(&ggtt->gt_list);
>>>> +
>>>> +    return ggtt;
>>>> +}
>>>> +
>>>>  int i915_ggtt_enable_hw(struct drm_i915_private *i915)
>>>>  {
>>>>      if (GRAPHICS_VER(i915) < 6)
>>>> @@ -1296,9 +1323,11 @@ bool i915_ggtt_resume_vm(struct
>>>> i915_address_space *vm)
>>>>
>>>>  void i915_ggtt_resume(struct i915_ggtt *ggtt)
>>>>  {
>>>> +    struct intel_gt *gt;
>>>>      bool flush;
>>>>
>>>> -    intel_gt_check_and_clear_faults(ggtt->vm.gt);
>>>> +    list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
>>>> +        intel_gt_check_and_clear_faults(gt);
>>>>
>>>>      flush = i915_ggtt_resume_vm(&ggtt->vm);
>>>>
>>>> @@ -1307,9 +1336,6 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
>>>>      if (flush)
>>>>          wbinvd_on_all_cpus();
>>>>
>>>> -    if (GRAPHICS_VER(ggtt->vm.i915) >= 8)
>>>> -        setup_private_pat(ggtt->vm.gt);
>>>> -
>>>>      intel_ggtt_restore_fences(ggtt);
>>>>  }
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c
>>>> b/drivers/gpu/drm/i915/gt/intel_gt.c
>>>> index b5ad9caa5537..b03788d7674e 100644
>>>> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
>>>> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
>>>> @@ -110,9 +110,18 @@ static int intel_gt_probe_lmem(struct intel_gt
>>>> *gt)
>>>>
>>>>  int intel_gt_assign_ggtt(struct intel_gt *gt)
>>>>  {
>>>> -    gt->ggtt = drmm_kzalloc(&gt->i915->drm, sizeof(*gt->ggtt),
>>>> GFP_KERNEL);
>>>> +    /* Media GT shares primary GT's GGTT */
>>>> +    if (gt->type == GT_MEDIA) {
>>>> +        gt->ggtt = to_gt(gt->i915)->ggtt;
>>>> +    } else {
>>>> +        gt->ggtt = i915_ggtt_create(gt->i915);
>>>> +        if (IS_ERR(gt->ggtt))
>>>> +            return PTR_ERR(gt->ggtt);
>>>> +    }
>>>>
>>>> -    return gt->ggtt ? 0 : -ENOMEM;
>>>> +    list_add_tail(&gt->ggtt_link, &gt->ggtt->gt_list);
>>>> +
>>>> +    return 0;
>>>>  }
>>>>
>>>>  int intel_gt_init_mmio(struct intel_gt *gt)
>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h
>>>> b/drivers/gpu/drm/i915/gt/intel_gt_types.h
>>>> index c1d9cd255e06..8d915640914b 100644
>>>> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
>>>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
>>>> @@ -277,6 +277,9 @@ struct intel_gt {
>>>>      struct kobject *sysfs_defaults;
>>>>
>>>>      struct i915_perf_gt perf;
>>>> +
>>>> +    /** link: &ggtt.gt_list */
>>>> +    struct list_head ggtt_link;
>>>>  };
>>>>
>>>>  struct intel_gt_definition {
>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h
>>>> b/drivers/gpu/drm/i915/gt/intel_gtt.h
>>>> index 4d75ba4bb41d..d1900fec6cd1 100644
>>>> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
>>>> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
>>>> @@ -390,6 +390,9 @@ struct i915_ggtt {
>>>>      struct mutex error_mutex;
>>>>      struct drm_mm_node error_capture;
>>>>      struct drm_mm_node uc_fw;
>>>> +
>>>> +    /** List of GTs mapping this GGTT */
>>>> +    struct list_head gt_list;
>>>>  };
>>>>
>>>>  struct i915_ppgtt {
>>>> @@ -584,6 +587,7 @@ void i915_ggtt_disable_guc(struct i915_ggtt *ggtt);
>>>>  int i915_init_ggtt(struct drm_i915_private *i915);
>>>>  void i915_ggtt_driver_release(struct drm_i915_private *i915);
>>>>  void i915_ggtt_driver_late_release(struct drm_i915_private *i915);
>>>> +struct i915_ggtt *i915_ggtt_create(struct drm_i915_private *i915);
>>>>
>>>>  static inline bool i915_ggtt_has_aperture(const struct i915_ggtt
>>>> *ggtt)
>>>>  {
>>>> diff --git a/drivers/gpu/drm/i915/i915_driver.c
>>>> b/drivers/gpu/drm/i915/i915_driver.c
>>>> index 69103ae37779..4e1bb3c23c63 100644
>>>> --- a/drivers/gpu/drm/i915/i915_driver.c
>>>> +++ b/drivers/gpu/drm/i915/i915_driver.c
>>>> @@ -612,10 +612,6 @@ static int i915_driver_hw_probe(struct
>>>> drm_i915_private *dev_priv)
>>>>
>>>>      i915_perf_init(dev_priv);
>>>>
>>>> -    ret = intel_gt_assign_ggtt(to_gt(dev_priv));
>>>> -    if (ret)
>>>> -        goto err_perf;
>>>> -
>>>>      ret = i915_ggtt_probe_hw(dev_priv);
>>>>      if (ret)
>>>>          goto err_perf;
>>>> @@ -1316,7 +1312,8 @@ int i915_driver_suspend_switcheroo(struct
>>>> drm_i915_private *i915,
>>>>  static int i915_drm_resume(struct drm_device *dev)
>>>>  {
>>>>      struct drm_i915_private *dev_priv = to_i915(dev);
>>>> -    int ret;
>>>> +    struct intel_gt *gt;
>>>> +    int ret, i;
>>>>
>>>>      disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
>>>>
>>>> @@ -1331,6 +1328,11 @@ static int i915_drm_resume(struct drm_device
>>>> *dev)
>>>>          drm_err(&dev_priv->drm, "failed to re-enable GGTT\n");
>>>>
>>>>      i915_ggtt_resume(to_gt(dev_priv)->ggtt);
>>>> +
>>>> +    for_each_gt(gt, dev_priv, i)
>>>> +        if (GRAPHICS_VER(gt->i915) >= 8)
>>>> +            setup_private_pat(gt);
>>>> +
>>>>      /* Must be called after GGTT is resumed. */
>>>>      intel_dpt_resume(dev_priv);
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c
>>>> b/drivers/gpu/drm/i915/i915_gem.c
>>>> index 8468ca9885fd..086c4702e1bf 100644
>>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>>> @@ -1143,6 +1143,8 @@ int i915_gem_init(struct drm_i915_private
>>>> *dev_priv)
>>>>      for_each_gt(gt, dev_priv, i) {
>>>>          intel_uc_fetch_firmwares(&gt->uc);
>>>>          intel_wopcm_init(&gt->wopcm);
>>>> +        if (GRAPHICS_VER(dev_priv) >= 8)
>>>> +            setup_private_pat(gt);
>>>>      }
>>>>
>>>>      ret = i915_init_ggtt(dev_priv);
>>>> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c
>>>> b/drivers/gpu/drm/i915/i915_gem_evict.c
>>>> index f025ee4fa526..4cfe36b0366b 100644
>>>> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
>>>> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
>>>> @@ -43,16 +43,25 @@ static bool dying_vma(struct i915_vma *vma)
>>>>      return !kref_read(&vma->obj->base.refcount);
>>>>  }
>>>>
>>>> -static int ggtt_flush(struct intel_gt *gt)
>>>> +static int ggtt_flush(struct i915_address_space *vm)
>>>>  {
>>>> -    /*
>>>> -     * Not everything in the GGTT is tracked via vma (otherwise we
>>>> -     * could evict as required with minimal stalling) so we are forced
>>>> -     * to idle the GPU and explicitly retire outstanding requests in
>>>> -     * the hopes that we can then remove contexts and the like only
>>>> -     * bound by their active reference.
>>>> -     */
>>>> -    return intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
>>>> +    struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>>>> +    struct intel_gt *gt;
>>>> +    int ret = 0;
>>>> +
>>>> +    list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) {
>>>> +        /*
>>>> +         * Not everything in the GGTT is tracked via vma (otherwise we
>>>> +         * could evict as required with minimal stalling) so we are
>>>> forced
>>>> +         * to idle the GPU and explicitly retire outstanding
>>>> requests in
>>>> +         * the hopes that we can then remove contexts and the like
>>>> only
>>>> +         * bound by their active reference.
>>>> +         */
>>>> +        ret = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
>>>> +        if (ret)
>>>> +            return ret;
>>>> +    }
>>>> +    return ret;
>>>>  }
>>>>
>>>>  static bool grab_vma(struct i915_vma *vma, struct i915_gem_ww_ctx *ww)
>>>> @@ -149,6 +158,7 @@ i915_gem_evict_something(struct
>>>> i915_address_space *vm,
>>>>      struct drm_mm_node *node;
>>>>      enum drm_mm_insert_mode mode;
>>>>      struct i915_vma *active;
>>>> +    struct intel_gt *gt;
>>>>      int ret;
>>>>
>>>>      lockdep_assert_held(&vm->mutex);
>>>> @@ -174,7 +184,14 @@ i915_gem_evict_something(struct
>>>> i915_address_space *vm,
>>>>                      min_size, alignment, color,
>>>>                      start, end, mode);
>>>>
>>>> -    intel_gt_retire_requests(vm->gt);
>>>> +    if (i915_is_ggtt(vm)) {
>>>> +        struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>>>> +
>>>> +        list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
>>>> +            intel_gt_retire_requests(gt);
>>>> +    } else {
>>>> +        intel_gt_retire_requests(vm->gt);
>>>> +    }
>>>>
>>>>  search_again:
>>>>      active = NULL;
>>>> @@ -246,7 +263,7 @@ i915_gem_evict_something(struct
>>>> i915_address_space *vm,
>>>>      if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy))
>>>>          return -EBUSY;
>>>>
>>>> -    ret = ggtt_flush(vm->gt);
>>>> +    ret = ggtt_flush(vm);
>>>>      if (ret)
>>>>          return ret;
>>>>
>>>> @@ -332,7 +349,15 @@ int i915_gem_evict_for_node(struct
>>>> i915_address_space *vm,
>>>>       * a stray pin (preventing eviction) that can only be resolved by
>>>>       * retiring.
>>>>       */
>>>> -    intel_gt_retire_requests(vm->gt);
>>>> +    if (i915_is_ggtt(vm)) {
>>>> +        struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>>>> +        struct intel_gt *gt;
>>>> +
>>>> +        list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
>>>> +            intel_gt_retire_requests(gt);
>>>> +    } else {
>>>> +        intel_gt_retire_requests(vm->gt);
>>>> +    }
>>>>
>>>>      if (i915_vm_has_cache_coloring(vm)) {
>>>>          /* Expand search to cover neighbouring guard pages (or
>>>> lack!) */
>>>> @@ -438,7 +463,7 @@ int i915_gem_evict_vm(struct i915_address_space
>>>> *vm, struct i915_gem_ww_ctx *ww)
>>>>       * switch otherwise is ineffective.
>>>>       */
>>>>      if (i915_is_ggtt(vm)) {
>>>> -        ret = ggtt_flush(vm->gt);
>>>> +        ret = ggtt_flush(vm);
>>>>          if (ret)
>>>>              return ret;
>>>>      }
>>>> diff --git a/drivers/gpu/drm/i915/i915_vma.c
>>>> b/drivers/gpu/drm/i915/i915_vma.c
>>>> index 703fee6b5f75..726705b10637 100644
>>>> --- a/drivers/gpu/drm/i915/i915_vma.c
>>>> +++ b/drivers/gpu/drm/i915/i915_vma.c
>>>> @@ -1544,6 +1544,8 @@ static int __i915_ggtt_pin(struct i915_vma
>>>> *vma, struct i915_gem_ww_ctx *ww,
>>>>                 u32 align, unsigned int flags)
>>>>  {
>>>>      struct i915_address_space *vm = vma->vm;
>>>> +    struct intel_gt *gt;
>>>> +    struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>>>>      int err;
>>>>
>>>>      do {
>>>> @@ -1559,7 +1561,8 @@ static int __i915_ggtt_pin(struct i915_vma
>>>> *vma, struct i915_gem_ww_ctx *ww,
>>>>          }
>>>>
>>>>          /* Unlike i915_vma_pin, we don't take no for an answer! */
>>>> -        flush_idle_contexts(vm->gt);
>>>> +        list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
>>>> +            flush_idle_contexts(gt);
>>>>          if (mutex_lock_interruptible(&vm->mutex) == 0) {
>>>>              /*
>>>>               * We pass NULL ww here, as we don't want to unbind
>>>> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c
>>>> b/drivers/gpu/drm/i915/selftests/i915_gem.c
>>>> index e5dd82e7e480..2535b9684bd1 100644
>>>> --- a/drivers/gpu/drm/i915/selftests/i915_gem.c
>>>> +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
>>>> @@ -127,6 +127,8 @@ static void igt_pm_resume(struct
>>>> drm_i915_private *i915)
>>>>       */
>>>>      with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
>>>>          i915_ggtt_resume(to_gt(i915)->ggtt);
>>>> +        if (GRAPHICS_VER(i915) >= 8)
>>>> +            setup_private_pat(to_gt(i915));
>>>>          i915_gem_resume(i915);
>>>>      }
>>>>  }
>>>> -- 
>>>> 2.25.1
>>>>
>>>
>>


More information about the dri-devel mailing list