[PATCH v2] tlb flush before releasing backing store

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Fri Oct 22 11:29:59 UTC 2021


From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |   1 +
 drivers/gpu/drm/i915/gem/i915_gem_pages.c     |  10 ++
 drivers/gpu/drm/i915/gt/intel_gt.c            | 118 ++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_gt.h            |   2 +
 drivers/gpu/drm/i915/gt/intel_gt_types.h      |   2 +
 drivers/gpu/drm/i915/i915_vma.c               |   3 +
 6 files changed, 136 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 7c3da4e3e737..aaaa74271ab8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -305,6 +305,7 @@ struct drm_i915_gem_object {
 #define I915_BO_READONLY          BIT(6)
 #define I915_TILING_QUIRK_BIT     7 /* unknown swizzling; do not release! */
 #define I915_BO_PROTECTED         BIT(8)
+#define I915_BO_WAS_BOUND_BIT     9
 	/**
 	 * @mem_flags - Mutable placement-related flags
 	 *
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 8eb1c3a6fc9c..8d6c38a62201 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -10,6 +10,8 @@
 #include "i915_gem_lmem.h"
 #include "i915_gem_mman.h"
 
+#include "gt/intel_gt.h"
+
 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 				 struct sg_table *pages,
 				 unsigned int sg_page_sizes)
@@ -218,6 +220,14 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
 	__i915_gem_object_reset_page_iter(obj);
 	obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
 
+	if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
+		struct drm_i915_private *i915 = to_i915(obj->base.dev);
+		intel_wakeref_t wakeref;
+
+		with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref)
+			intel_gt_invalidate_tlbs(&i915->gt);
+	}
+
 	return pages;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 1cb1948ac959..412e7ca979ad 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -30,6 +30,8 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
 
 	spin_lock_init(&gt->irq_lock);
 
+	mutex_init(&gt->tlb_invalidate_lock);
+
 	INIT_LIST_HEAD(&gt->closed_vma);
 	spin_lock_init(&gt->closed_lock);
 
@@ -907,3 +909,119 @@ void intel_gt_info_print(const struct intel_gt_info *info,
 
 	intel_sseu_dump(&info->sseu, p);
 }
+
+static bool
+get_reg_and_bit(const struct intel_engine_cs *engine,
+		const bool gen8, const i915_reg_t *regs, const unsigned int num,
+		i915_reg_t *reg, u32 *bit)
+{
+	const unsigned int class = engine->class;
+	unsigned int b;
+
+	if (drm_WARN_ON_ONCE(&engine->i915->drm,
+			     class >= num || !regs[class].reg))
+		return false;
+
+	*reg = regs[class];
+
+	if (gen8 && class == VIDEO_DECODE_CLASS) {
+		reg->reg += 4 * engine->instance;
+		b = 0;
+	} else {
+		b = engine->instance;
+	}
+
+	*bit = BIT(b);
+
+	return true;
+}
+
+void intel_gt_invalidate_tlbs(struct intel_gt *gt)
+{
+	static const i915_reg_t gen8_regs[] = {
+		[RENDER_CLASS]			= _MMIO(0x4260),
+		[VIDEO_DECODE_CLASS]		= _MMIO(0x4264), /* vcs1 0x4268 */
+		[VIDEO_ENHANCEMENT_CLASS]	= _MMIO(0x4270),
+		[COPY_ENGINE_CLASS]		= _MMIO(0x426c),
+	};
+	static const i915_reg_t gen12_regs[] = {
+		[RENDER_CLASS]			= _MMIO(0xced8),
+		[VIDEO_DECODE_CLASS]		= _MMIO(0xcedc),
+		[VIDEO_ENHANCEMENT_CLASS]	= _MMIO(0xcee0),
+		[COPY_ENGINE_CLASS]		= _MMIO(0xcee4),
+	};
+	struct drm_i915_private *i915 = gt->i915;
+	struct intel_uncore *uncore = gt->uncore;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	const i915_reg_t *regs;
+	unsigned int num = 0;
+
+	if (IS_TIGERLAKE(i915)) {
+		regs = gen12_regs;
+		num = ARRAY_SIZE(gen12_regs);
+	} else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
+		regs = gen8_regs;
+		num = ARRAY_SIZE(gen8_regs);
+	} else if (GRAPHICS_VER(i915) < 8) { /* ??? */
+		return;
+	}
+
+	if (drm_WARN_ON_ONCE(&i915->drm, !num))
+		return;
+
+	GEM_TRACE("\n");
+
+	/* FIXME FIXME FIXME
+	 *
+	 * Bspec 44366 says:
+	 *
+	 * """
+	 * To ensure proper invalidation of the TLBs, SW has to ensure
+	 * the corresponding engine's HW pipeline is flushed and cleared
+	 * from all its memory accesses. Otherwise HW cannot guarantee
+	 * the proper invalidation for TLBs.
+	 * """
+	 *
+	 * Therefore I think this will not be 100% reliable when called with
+	 * active engines. Instead of mmio flushing an idle barrier based scheme
+	 * is likely required.
+	 */
+
+	mutex_lock(&gt->tlb_invalidate_lock);
+	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+	for_each_engine(engine, gt, id) {
+		i915_reg_t reg;
+		u32 bit;
+
+		if (!get_reg_and_bit(engine, regs == gen8_regs, regs, num,
+				     &reg, &bit))
+			continue;
+
+		intel_uncore_write_fw(uncore, reg, bit);
+	}
+
+	for_each_engine(engine, gt, id) {
+		const unsigned int timeout_ms = 1;
+		i915_reg_t reg;
+		u32 bit;
+		int ret;
+
+		if (!get_reg_and_bit(engine, regs == gen8_regs, regs, num,
+				     &reg, &bit))
+			continue;
+
+		ret = __intel_wait_for_register_fw(uncore,
+						   reg, bit, 0,
+						   50, timeout_ms,
+						   NULL);
+		if (ret)
+			drm_notice(&gt->i915->drm,
+				   "%s TLB invalidation did not complete in %ums!\n",
+				   engine->name, timeout_ms);
+	}
+
+	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+	mutex_unlock(&gt->tlb_invalidate_lock);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 74e771871a9b..c0169d6017c2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -90,4 +90,6 @@ void intel_gt_info_print(const struct intel_gt_info *info,
 
 void intel_gt_watchdog_work(struct work_struct *work);
 
+void intel_gt_invalidate_tlbs(struct intel_gt *gt);
+
 #endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 14216cc471b1..f20687796490 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -73,6 +73,8 @@ struct intel_gt {
 
 	struct intel_uc uc;
 
+	struct mutex tlb_invalidate_lock;
+
 	struct i915_wa_list wa_list;
 
 	struct intel_gt_timelines {
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 4b7fc4647e46..dfd20060812b 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -434,6 +434,9 @@ int i915_vma_bind(struct i915_vma *vma,
 		vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
 	}
 
+	if (vma->obj)
+		set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
+
 	atomic_or(bind_flags, &vma->flags);
 	return 0;
 }
-- 
2.30.2



More information about the Intel-gfx-trybot mailing list