[PATCH] drm/i915: Replace ggtt_clear_range with ggtt_clear_bo_padding
edmund.j.dea at intel.com
edmund.j.dea at intel.com
Thu Mar 15 18:55:17 UTC 2018
From: ejdea <edmund.j.dea at intel.com>
When Vtd is enabled for GEN9+ platforms, clear padding before and after
displayable surfaces instead of clearing the full range of GGTT during
i915 startup. This reduces i915 startup time by about 60-100ms. Note
that adding padding before the beginning of the surface is required for
rotated surfaces.
---
drivers/gpu/drm/i915/i915_gem.c | 2 ++
drivers/gpu/drm/i915/i915_gem_gtt.c | 47 ++++++++++++++++++++++++--
drivers/gpu/drm/i915/i915_gem_gtt.h | 3 ++
drivers/gpu/drm/i915/i915_gem_stolen.c | 2 +-
drivers/gpu/drm/i915/i915_pci.c | 6 +++-
drivers/gpu/drm/i915/i915_vma.c | 57 ++++++++++++++++++++++++++++----
drivers/gpu/drm/i915/i915_vma.h | 2 +-
drivers/gpu/drm/i915/intel_device_info.h | 3 ++
8 files changed, 110 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ab88ca5..e792756 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4094,6 +4094,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
lockdep_assert_held(&obj->base.dev->struct_mutex);
+ flags |= PIN_DISPLAYABLE;
+
/* Mark the global pin early so that we account for the
* display coherency whilst setting up the cache domains.
*/
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 21d72f6..8267452 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2495,6 +2495,35 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
gen8_set_pte(>t_base[i], scratch_pte);
}
+static void gen8_ggtt_clear_bo_padding(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ unsigned first_entry = start >> PAGE_SHIFT;
+ unsigned num_entries = length >> PAGE_SHIFT;
+ const gen8_pte_t scratch_pte =
+ gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
+ gen8_pte_t __iomem *gtt_base =
+ (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
+ const int max_entries = ggtt_total_entries(ggtt) - first_entry;
+ int padding_nents = INTEL_INFO(vm->i915)->surf_padding_nents;
+ int overfetch_start = num_entries + padding_nents;
+ int i;
+
+ if (WARN(num_entries > max_entries,
+ "First entry = %d; Num entries = %d (max=%d)\n",
+ first_entry, num_entries, max_entries))
+ num_entries = max_entries;
+
+ /* Clear memory padding before the beginning of the scanout buffer */
+ for (i = 0; i < padding_nents; i++)
+ gen8_set_pte(>t_base[i], scratch_pte);
+
+ /* Clear memory padding after the end of the scanout buffer */
+ for (i = overfetch_start; i < (overfetch_start + padding_nents); i++)
+ gen8_set_pte(>t_base[i], scratch_pte);
+}
+
static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
{
struct drm_i915_private *dev_priv = vm->i915;
@@ -3342,8 +3371,22 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
ggtt->base.clear_pages = clear_pages;
ggtt->base.insert_page = gen8_ggtt_insert_page;
ggtt->base.clear_range = nop_clear_range;
- if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv))
- ggtt->base.clear_range = gen8_ggtt_clear_range;
+
+ if (intel_scanout_needs_vtd_wa(dev_priv)) {
+ if (INTEL_INFO(dev_priv)->surf_padding_nents > 0) {
+ ggtt->base.clear_bo_padding = gen8_ggtt_clear_bo_padding;
+ } else {
+ ggtt->base.clear_range = gen8_ggtt_clear_range;
+ ggtt->base.clear_bo_padding = NULL;
+ }
+ }
+ else if (!USES_FULL_PPGTT(dev_priv)) {
+ ggtt->base.clear_range = gen8_ggtt_clear_bo_padding;
+ ggtt->base.clear_bo_padding = NULL;
+ }
+ else {
+ ggtt->base.clear_bo_padding = NULL;
+ }
ggtt->base.insert_entries = gen8_ggtt_insert_entries;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 6efc017..98fdf86 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -323,6 +323,8 @@ struct i915_address_space {
u64 start, u64 length);
void (*clear_range)(struct i915_address_space *vm,
u64 start, u64 length);
+ void (*clear_bo_padding)(struct i915_address_space *vm,
+ u64 start, u64 length);
void (*insert_page)(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
@@ -640,6 +642,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
#define PIN_HIGH BIT(9)
#define PIN_OFFSET_BIAS BIT(10)
#define PIN_OFFSET_FIXED BIT(11)
+#define PIN_DISPLAYABLE BIT(12)
#define PIN_OFFSET_MASK (-I915_GTT_PAGE_SIZE)
#endif
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 62aa679..effb426 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -643,7 +643,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
vma->pages = obj->mm.pages;
vma->flags |= I915_VMA_GLOBAL_BIND;
- __i915_vma_set_map_and_fenceable(vma);
+ __i915_vma_set_map_and_fenceable(vma, 0);
list_move_tail(&vma->vm_link, &ggtt->base.inactive_list);
spin_lock(&dev_priv->mm.obj_lock);
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 062e91b..f0549cd 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -463,7 +463,8 @@ static const struct intel_device_info intel_cherryview_info = {
.has_csr = 1, \
.has_guc = 1, \
.has_ipc = 1, \
- .ddb_size = 896
+ .ddb_size = 896, \
+ .surf_padding_nents = 136
#define SKL_PLATFORM \
GEN9_FEATURES, \
@@ -529,12 +530,14 @@ static const struct intel_device_info intel_broxton_info = {
GEN9_LP_FEATURES,
PLATFORM(INTEL_BROXTON),
.ddb_size = 512,
+ .surf_padding_nents = 136,
};
static const struct intel_device_info intel_geminilake_info = {
GEN9_LP_FEATURES,
PLATFORM(INTEL_GEMINILAKE),
.ddb_size = 1024,
+ .surf_padding_nents = 168,
GLK_COLORS,
};
@@ -582,6 +585,7 @@ static const struct intel_device_info intel_coffeelake_gt3_info = {
GEN9_FEATURES, \
GEN(10), \
.ddb_size = 1024, \
+ .surf_padding_nents = 168, \
GLK_COLORS
static const struct intel_device_info intel_cannonlake_info = {
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 4bda3bd..06266fa 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -378,6 +378,22 @@ void i915_vma_unpin_and_release(struct i915_vma **p_vma)
bool i915_vma_misplaced(const struct i915_vma *vma,
u64 size, u64 alignment, u64 flags)
{
+ struct drm_i915_private *dev_priv = vma->vm->i915;
+ u64 start;
+
+ /* When Vtd is enabled, scanout buffers need padding before and after
+ * the surface. Since the vma node start address accounted for padding
+ * before the surface, set the start address back to the real vma start
+ * address without padding when checking if the vma is misplaced.
+ */
+ if (flags & PIN_DISPLAYABLE &&
+ intel_scanout_needs_vtd_wa(dev_priv) &&
+ vma->vm->clear_bo_padding)
+ start = vma->node.start -
+ (INTEL_INFO(dev_priv)->surf_padding_nents << 12);
+ else
+ start = vma->node.start;
+
if (!drm_mm_node_allocated(&vma->node))
return false;
@@ -385,30 +401,42 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
return true;
GEM_BUG_ON(alignment && !is_power_of_2(alignment));
- if (alignment && !IS_ALIGNED(vma->node.start, alignment))
+ if (alignment && !IS_ALIGNED(start, alignment))
return true;
if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma))
return true;
if (flags & PIN_OFFSET_BIAS &&
- vma->node.start < (flags & PIN_OFFSET_MASK))
+ start < (flags & PIN_OFFSET_MASK))
return true;
if (flags & PIN_OFFSET_FIXED &&
- vma->node.start != (flags & PIN_OFFSET_MASK))
+ start != (flags & PIN_OFFSET_MASK))
return true;
return false;
}
-void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
+void __i915_vma_set_map_and_fenceable(struct i915_vma *vma, uint64_t flags)
{
+ struct drm_i915_private *dev_priv = vma->vm->i915;
bool mappable, fenceable;
GEM_BUG_ON(!i915_vma_is_ggtt(vma));
GEM_BUG_ON(!vma->fence_size);
+ /* If clearing padding for scanout buffers, vma->node.size includes
+ * padding and fence_size does not. Therefore, add padding to
+ * fence_size to determine the map_and_fenceable flag.
+ */
+ u32 fence_size = vma->fence_size;
+
+ if (flags & PIN_DISPLAYABLE && intel_scanout_needs_vtd_wa(dev_priv) &&
+ vma->vm->clear_bo_padding)
+ fence_size += INTEL_INFO(dev_priv)->surf_padding_nents *
+ PAGE_SIZE * 2;
+
/*
* Explicitly disable for rotated VMA since the display does not
* need the fence and the VMA is not accessible to other users.
@@ -416,10 +444,10 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
return;
- fenceable = (vma->node.size >= vma->fence_size &&
+ fenceable = (vma->node.size >= fence_size &&
IS_ALIGNED(vma->node.start, vma->fence_alignment));
- mappable = vma->node.start + vma->fence_size <= i915_vm_to_ggtt(vma->vm)->mappable_end;
+ mappable = vma->node.start + fence_size <= i915_vm_to_ggtt(vma->vm)->mappable_end;
if (mappable && fenceable)
vma->flags |= I915_VMA_CAN_FENCE;
@@ -532,6 +560,10 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
if (ret)
goto err_unpin;
+ if (flags & PIN_DISPLAYABLE && intel_scanout_needs_vtd_wa(dev_priv) &&
+ vma->vm->clear_bo_padding)
+ size += INTEL_INFO(dev_priv)->surf_padding_nents * PAGE_SIZE * 2;
+
if (flags & PIN_OFFSET_FIXED) {
u64 offset = flags & PIN_OFFSET_MASK;
if (!IS_ALIGNED(offset, alignment) ||
@@ -591,6 +623,17 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level));
+ if (flags & PIN_DISPLAYABLE && intel_scanout_needs_vtd_wa(dev_priv) &&
+ vma->vm->clear_bo_padding) {
+ vma->vm->clear_bo_padding(vma->vm, vma->node.start, vma->size);
+
+ /* When Vtd is enabled, padding is added before the
+ * beginning of the surface. Therefore, set the vma start
+ * address after this padding.
+ */
+ vma->node.start += INTEL_INFO(dev_priv)->surf_padding_nents << 12;
+ }
+
list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
spin_lock(&dev_priv->mm.obj_lock);
@@ -668,7 +711,7 @@ int __i915_vma_do_pin(struct i915_vma *vma,
GEM_BUG_ON((vma->flags & I915_VMA_BIND_MASK) == 0);
if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND)
- __i915_vma_set_map_and_fenceable(vma);
+ __i915_vma_set_map_and_fenceable(vma, flags);
GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
return 0;
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 8c50220..cd7b1c2 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -280,7 +280,7 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level);
bool i915_vma_misplaced(const struct i915_vma *vma,
u64 size, u64 alignment, u64 flags);
-void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
+void __i915_vma_set_map_and_fenceable(struct i915_vma *vma, uint64_t flags);
void i915_vma_revoke_mmap(struct i915_vma *vma);
int __must_check i915_vma_unbind(struct i915_vma *vma);
void i915_vma_unlink_ctx(struct i915_vma *vma);
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 0835752..41c0857 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -167,6 +167,9 @@ struct intel_device_info {
#undef DEFINE_FLAG
u16 ddb_size; /* in blocks */
+ /* Number of PTE's for padding before/after PLANE_SURF (GEN:HAS:397078) */
+ u8 surf_padding_nents;
+
/* Register offsets for the various display pipes and transcoders */
int pipe_offsets[I915_MAX_TRANSCODERS];
int trans_offsets[I915_MAX_TRANSCODERS];
--
2.7.4
More information about the Intel-gfx-trybot
mailing list