[PATCH] When Vtd is enabled, add and clear padding before and after the surface for scanout buffers instead of clearing the full range of GGTT during i915 startup. This reduces i915 startup time by about 100ms. Note that adding padding before the beginning of the surface is required for rotated surfaces.

edmund.j.dea at intel.com edmund.j.dea at intel.com
Wed Mar 14 19:12:00 UTC 2018


From: ejdea <edmund.j.dea at intel.com>

---
 drivers/gpu/drm/i915/i915_gem.c          |  2 ++
 drivers/gpu/drm/i915/i915_gem_gtt.c      | 47 ++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_gem_gtt.h      |  3 ++
 drivers/gpu/drm/i915/i915_gem_stolen.c   |  2 +-
 drivers/gpu/drm/i915/i915_pci.c          |  6 +++-
 drivers/gpu/drm/i915/i915_vma.c          | 57 ++++++++++++++++++++++++++++----
 drivers/gpu/drm/i915/i915_vma.h          |  2 +-
 drivers/gpu/drm/i915/intel_device_info.h |  3 ++
 8 files changed, 110 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ab88ca5..e792756 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4094,6 +4094,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
 
+	flags |= PIN_DISPLAYABLE;
+
 	/* Mark the global pin early so that we account for the
 	 * display coherency whilst setting up the cache domains.
 	 */
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 21d72f6..c2d92a4 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2495,6 +2495,35 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 		gen8_set_pte(&gtt_base[i], scratch_pte);
 }
 
+static void gen8_ggtt_clear_bo_padding(struct i915_address_space *vm,
+					u64 start, u64 length)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+        unsigned first_entry = start >> PAGE_SHIFT;
+        unsigned num_entries = length >> PAGE_SHIFT;
+        const gen8_pte_t scratch_pte =
+                gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
+        gen8_pte_t __iomem *gtt_base =
+                (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
+        const int max_entries = ggtt_total_entries(ggtt) - first_entry;
+	int padding_nents = INTEL_INFO(vm->i915)->surf_padding_nents;
+        int overfetch_start = num_entries + padding_nents;
+        int i;
+
+        if (WARN(num_entries > max_entries,
+                 "First entry = %d; Num entries = %d (max=%d)\n",
+                 first_entry, num_entries, max_entries))
+                num_entries = max_entries;
+
+	/* Clear memory padding before the beginning of the scanout buffer */
+        for (i = 0; i < padding_nents; i++)
+                gen8_set_pte(&gtt_base[i], scratch_pte);
+
+        /* Clear memory padding after the end of the scanout buffer */
+        for (i = overfetch_start; i < (overfetch_start + padding_nents); i++)
+                gen8_set_pte(&gtt_base[i], scratch_pte);
+}
+
 static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
 {
 	struct drm_i915_private *dev_priv = vm->i915;
@@ -3342,8 +3371,22 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 	ggtt->base.clear_pages = clear_pages;
 	ggtt->base.insert_page = gen8_ggtt_insert_page;
 	ggtt->base.clear_range = nop_clear_range;
-	if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv))
-		ggtt->base.clear_range = gen8_ggtt_clear_range;
+
+	if (intel_scanout_needs_vtd_wa(dev_priv)) {
+		if (INTEL_INFO(dev_priv)->surf_padding_nents > 0) {
+			ggtt->base.clear_bo_padding = gen8_ggtt_clear_bo_padding;
+		} else {
+			ggtt->base.clear_range = gen8_ggtt_clear_range;
+			ggtt->base.clear_bo_padding = NULL;
+		}
+	}
+	else if (!USES_FULL_PPGTT(dev_priv)) {
+		ggtt->base.clear_range = gen8_ggtt_clear_bo_padding;
+		ggtt->base.clear_bo_padding = NULL;
+	}
+	else {
+		ggtt->base.clear_bo_padding = NULL;
+	}
 
 	ggtt->base.insert_entries = gen8_ggtt_insert_entries;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 6efc017..98fdf86 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -323,6 +323,8 @@ struct i915_address_space {
 				 u64 start, u64 length);
 	void (*clear_range)(struct i915_address_space *vm,
 			    u64 start, u64 length);
+	void (*clear_bo_padding)(struct i915_address_space *vm,
+				u64 start, u64 length);
 	void (*insert_page)(struct i915_address_space *vm,
 			    dma_addr_t addr,
 			    u64 offset,
@@ -640,6 +642,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 #define PIN_HIGH		BIT(9)
 #define PIN_OFFSET_BIAS		BIT(10)
 #define PIN_OFFSET_FIXED	BIT(11)
+#define PIN_DISPLAYABLE		BIT(12)
 #define PIN_OFFSET_MASK		(-I915_GTT_PAGE_SIZE)
 
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 62aa679..effb426 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -643,7 +643,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 
 	vma->pages = obj->mm.pages;
 	vma->flags |= I915_VMA_GLOBAL_BIND;
-	__i915_vma_set_map_and_fenceable(vma);
+	__i915_vma_set_map_and_fenceable(vma, 0);
 	list_move_tail(&vma->vm_link, &ggtt->base.inactive_list);
 
 	spin_lock(&dev_priv->mm.obj_lock);
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 062e91b..f0549cd 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -463,7 +463,8 @@ static const struct intel_device_info intel_cherryview_info = {
 	.has_csr = 1, \
 	.has_guc = 1, \
 	.has_ipc = 1, \
-	.ddb_size = 896
+	.ddb_size = 896, \
+	.surf_padding_nents = 136
 
 #define SKL_PLATFORM \
 	GEN9_FEATURES, \
@@ -529,12 +530,14 @@ static const struct intel_device_info intel_broxton_info = {
 	GEN9_LP_FEATURES,
 	PLATFORM(INTEL_BROXTON),
 	.ddb_size = 512,
+	.surf_padding_nents = 136,
 };
 
 static const struct intel_device_info intel_geminilake_info = {
 	GEN9_LP_FEATURES,
 	PLATFORM(INTEL_GEMINILAKE),
 	.ddb_size = 1024,
+	.surf_padding_nents = 168,
 	GLK_COLORS,
 };
 
@@ -582,6 +585,7 @@ static const struct intel_device_info intel_coffeelake_gt3_info = {
 	GEN9_FEATURES, \
 	GEN(10), \
 	.ddb_size = 1024, \
+	.surf_padding_nents = 168, \
 	GLK_COLORS
 
 static const struct intel_device_info intel_cannonlake_info = {
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 4bda3bd..10c8664 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -378,6 +378,22 @@ void i915_vma_unpin_and_release(struct i915_vma **p_vma)
 bool i915_vma_misplaced(const struct i915_vma *vma,
 			u64 size, u64 alignment, u64 flags)
 {
+	struct drm_i915_private *dev_priv = vma->vm->i915;
+	u64 start;
+
+	/* When Vtd is enabled, scanout buffers need padding before and after
+         * the surface. Since the vma node start address accounted for padding
+         * before the surface, set the start address back to the real vma start
+         * address without padding when checking if the vma is misplaced.
+         */
+        if (flags & PIN_DISPLAYABLE &&
+	    intel_scanout_needs_vtd_wa(dev_priv) &&
+            vma->vm->clear_bo_padding)
+                start = vma->node.start - 
+			(INTEL_INFO(dev_priv)->surf_padding_nents << 12);
+        else
+                start = vma->node.start;
+
 	if (!drm_mm_node_allocated(&vma->node))
 		return false;
 
@@ -385,30 +401,42 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
 		return true;
 
 	GEM_BUG_ON(alignment && !is_power_of_2(alignment));
-	if (alignment && !IS_ALIGNED(vma->node.start, alignment))
+	if (alignment && !IS_ALIGNED(start, alignment))
 		return true;
 
 	if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma))
 		return true;
 
 	if (flags & PIN_OFFSET_BIAS &&
-	    vma->node.start < (flags & PIN_OFFSET_MASK))
+	    start < (flags & PIN_OFFSET_MASK))
 		return true;
 
 	if (flags & PIN_OFFSET_FIXED &&
-	    vma->node.start != (flags & PIN_OFFSET_MASK))
+	    start != (flags & PIN_OFFSET_MASK))
 		return true;
 
 	return false;
 }
 
-void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
+void __i915_vma_set_map_and_fenceable(struct i915_vma *vma, uint64_t flags)
 {
+	struct drm_i915_private *dev_priv = vma->vm->i915;
 	bool mappable, fenceable;
 
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
 	GEM_BUG_ON(!vma->fence_size);
 
+	/* If clearing padding for scanout buffers, vma->node.size includes
+	 * padding and fence_size does not. Therefore, add padding to 
+	 * fence_size to determine the map_and_fenceable flag.
+	 */
+	u32 fence_size = vma->fence_size;
+
+	if (flags & PIN_DISPLAYABLE && intel_scanout_needs_vtd_wa(dev_priv) &&
+	    vma->vm->clear_bo_padding)
+		fence_size += INTEL_INFO(dev_priv)->surf_padding_nents * 
+				PAGE_SIZE * 2;
+
 	/*
 	 * Explicitly disable for rotated VMA since the display does not
 	 * need the fence and the VMA is not accessible to other users.
@@ -416,10 +444,10 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
 	if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
 		return;
 
-	fenceable = (vma->node.size >= vma->fence_size &&
+	fenceable = (vma->node.size >= fence_size &&
 		     IS_ALIGNED(vma->node.start, vma->fence_alignment));
 
-	mappable = vma->node.start + vma->fence_size <= i915_vm_to_ggtt(vma->vm)->mappable_end;
+	mappable = vma->node.start + fence_size <= i915_vm_to_ggtt(vma->vm)->mappable_end;
 
 	if (mappable && fenceable)
 		vma->flags |= I915_VMA_CAN_FENCE;
@@ -532,6 +560,10 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	if (ret)
 		goto err_unpin;
 
+	if (flags & PIN_DISPLAYABLE && intel_scanout_needs_vtd_wa(dev_priv) &&
+	    vma->vm->clear_bo_padding)
+		size += INTEL_INFO(dev_priv)->surf_padding_nents * PAGE_SIZE * 2;
+
 	if (flags & PIN_OFFSET_FIXED) {
 		u64 offset = flags & PIN_OFFSET_MASK;
 		if (!IS_ALIGNED(offset, alignment) ||
@@ -591,6 +623,17 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level));
 
+	if (flags & PIN_DISPLAYABLE && intel_scanout_needs_vtd_wa(dev_priv) &&
+	    vma->vm->clear_bo_padding) {
+		vma->vm->clear_bo_padding(vma->vm, vma->node.start, vma->size);
+
+		/* When Vtd is enabled, padding is added before the 
+		 * beginning of the surface. Therefore, set the vma start 
+		 * address after this padding.
+		 */
+		vma->node.start += INTEL_INFO(dev_priv)->surf_padding_nents << 12;
+	}
+
 	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
 
 	spin_lock(&dev_priv->mm.obj_lock);
@@ -668,7 +711,7 @@ int __i915_vma_do_pin(struct i915_vma *vma,
 	GEM_BUG_ON((vma->flags & I915_VMA_BIND_MASK) == 0);
 
 	if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
-		__i915_vma_set_map_and_fenceable(vma);
+		__i915_vma_set_map_and_fenceable(vma, flags);
 
 	GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 8c50220..cd7b1c2 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -280,7 +280,7 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level);
 bool i915_vma_misplaced(const struct i915_vma *vma,
 			u64 size, u64 alignment, u64 flags);
-void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
+void __i915_vma_set_map_and_fenceable(struct i915_vma *vma, uint64_t flags);
 void i915_vma_revoke_mmap(struct i915_vma *vma);
 int __must_check i915_vma_unbind(struct i915_vma *vma);
 void i915_vma_unlink_ctx(struct i915_vma *vma);
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 0835752..01eb5a1 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -167,6 +167,9 @@ struct intel_device_info {
 #undef DEFINE_FLAG
 	u16 ddb_size; /* in blocks */
 
+	/* Number of PTE's for padding before/after PLANE_SURF (GEN:HAS:397078) */
+        u8 surf_padding_nents;
+
 	/* Register offsets for the various display pipes and transcoders */
 	int pipe_offsets[I915_MAX_TRANSCODERS];
 	int trans_offsets[I915_MAX_TRANSCODERS];
-- 
2.7.4



More information about the Intel-gfx-trybot mailing list