[PATCH v2] tlb flush before releasing backing store
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Fri Oct 22 11:29:59 UTC 2021
From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
.../gpu/drm/i915/gem/i915_gem_object_types.h | 1 +
drivers/gpu/drm/i915/gem/i915_gem_pages.c | 10 ++
drivers/gpu/drm/i915/gt/intel_gt.c | 118 ++++++++++++++++++
drivers/gpu/drm/i915/gt/intel_gt.h | 2 +
drivers/gpu/drm/i915/gt/intel_gt_types.h | 2 +
drivers/gpu/drm/i915/i915_vma.c | 3 +
6 files changed, 136 insertions(+)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 7c3da4e3e737..aaaa74271ab8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -305,6 +305,7 @@ struct drm_i915_gem_object {
#define I915_BO_READONLY BIT(6)
#define I915_TILING_QUIRK_BIT 7 /* unknown swizzling; do not release! */
#define I915_BO_PROTECTED BIT(8)
+#define I915_BO_WAS_BOUND_BIT 9
/**
* @mem_flags - Mutable placement-related flags
*
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 8eb1c3a6fc9c..8d6c38a62201 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -10,6 +10,8 @@
#include "i915_gem_lmem.h"
#include "i915_gem_mman.h"
+#include "gt/intel_gt.h"
+
void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
struct sg_table *pages,
unsigned int sg_page_sizes)
@@ -218,6 +220,14 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
__i915_gem_object_reset_page_iter(obj);
obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
+ if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ intel_wakeref_t wakeref;
+
+ with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref)
+ intel_gt_invalidate_tlbs(&i915->gt);
+ }
+
return pages;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 1cb1948ac959..412e7ca979ad 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -30,6 +30,8 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
spin_lock_init(>->irq_lock);
+ mutex_init(>->tlb_invalidate_lock);
+
INIT_LIST_HEAD(>->closed_vma);
spin_lock_init(>->closed_lock);
@@ -907,3 +909,119 @@ void intel_gt_info_print(const struct intel_gt_info *info,
intel_sseu_dump(&info->sseu, p);
}
+
+static bool
+get_reg_and_bit(const struct intel_engine_cs *engine,
+ const bool gen8, const i915_reg_t *regs, const unsigned int num,
+ i915_reg_t *reg, u32 *bit)
+{
+ const unsigned int class = engine->class;
+ unsigned int b;
+
+ if (drm_WARN_ON_ONCE(&engine->i915->drm,
+ class >= num || !regs[class].reg))
+ return false;
+
+ *reg = regs[class];
+
+ if (gen8 && class == VIDEO_DECODE_CLASS) {
+ reg->reg += 4 * engine->instance;
+ b = 0;
+ } else {
+ b = engine->instance;
+ }
+
+ *bit = BIT(b);
+
+ return true;
+}
+
+void intel_gt_invalidate_tlbs(struct intel_gt *gt)
+{
+ static const i915_reg_t gen8_regs[] = {
+ [RENDER_CLASS] = _MMIO(0x4260),
+ [VIDEO_DECODE_CLASS] = _MMIO(0x4264), /* vcs1 0x4268 */
+ [VIDEO_ENHANCEMENT_CLASS] = _MMIO(0x4270),
+ [COPY_ENGINE_CLASS] = _MMIO(0x426c),
+ };
+ static const i915_reg_t gen12_regs[] = {
+ [RENDER_CLASS] = _MMIO(0xced8),
+ [VIDEO_DECODE_CLASS] = _MMIO(0xcedc),
+ [VIDEO_ENHANCEMENT_CLASS] = _MMIO(0xcee0),
+ [COPY_ENGINE_CLASS] = _MMIO(0xcee4),
+ };
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ const i915_reg_t *regs;
+ unsigned int num = 0;
+
+ if (IS_TIGERLAKE(i915)) {
+ regs = gen12_regs;
+ num = ARRAY_SIZE(gen12_regs);
+ } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
+ regs = gen8_regs;
+ num = ARRAY_SIZE(gen8_regs);
+ } else if (GRAPHICS_VER(i915) < 8) { /* ??? */
+ return;
+ }
+
+ if (drm_WARN_ON_ONCE(&i915->drm, !num))
+ return;
+
+ GEM_TRACE("\n");
+
+ /* FIXME FIXME FIXME
+ *
+ * Bspec 44366 says:
+ *
+ * """
+ * To ensure proper invalidation of the TLBs, SW has to ensure
+ * the corresponding engine's HW pipeline is flushed and cleared
+ * from all its memory accesses. Otherwise HW cannot guarantee
+ * the proper invalidation for TLBs.
+ * """
+ *
+ * Therefore I think this will not be 100% reliable when called with
+ * active engines. Instead of mmio flushing an idle barrier based scheme
+ * is likely required.
+ */
+
+ mutex_lock(>->tlb_invalidate_lock);
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+ for_each_engine(engine, gt, id) {
+ i915_reg_t reg;
+ u32 bit;
+
+ if (!get_reg_and_bit(engine, regs == gen8_regs, regs, num,
+ ®, &bit))
+ continue;
+
+ intel_uncore_write_fw(uncore, reg, bit);
+ }
+
+ for_each_engine(engine, gt, id) {
+ const unsigned int timeout_ms = 1;
+ i915_reg_t reg;
+ u32 bit;
+ int ret;
+
+ if (!get_reg_and_bit(engine, regs == gen8_regs, regs, num,
+ ®, &bit))
+ continue;
+
+ ret = __intel_wait_for_register_fw(uncore,
+ reg, bit, 0,
+ 50, timeout_ms,
+ NULL);
+ if (ret)
+ drm_notice(>->i915->drm,
+ "%s TLB invalidation did not complete in %ums!\n",
+ engine->name, timeout_ms);
+ }
+
+ intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+ mutex_unlock(>->tlb_invalidate_lock);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 74e771871a9b..c0169d6017c2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -90,4 +90,6 @@ void intel_gt_info_print(const struct intel_gt_info *info,
void intel_gt_watchdog_work(struct work_struct *work);
+void intel_gt_invalidate_tlbs(struct intel_gt *gt);
+
#endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 14216cc471b1..f20687796490 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -73,6 +73,8 @@ struct intel_gt {
struct intel_uc uc;
+ struct mutex tlb_invalidate_lock;
+
struct i915_wa_list wa_list;
struct intel_gt_timelines {
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 4b7fc4647e46..dfd20060812b 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -434,6 +434,9 @@ int i915_vma_bind(struct i915_vma *vma,
vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
}
+ if (vma->obj)
+ set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
+
atomic_or(bind_flags, &vma->flags);
return 0;
}
--
2.30.2
More information about the Intel-gfx-trybot
mailing list