[Intel-gfx] [PATCH v2 01/40] drm/i915: Use the MRU stack search after evicting
Chris Wilson
chris at chris-wilson.co.uk
Fri Dec 16 07:46:39 UTC 2016
When we evict from the GTT to make room for an object, the hole we
create is put onto the MRU stack inside the drm_mm range manager. On the
next search pass, we can speed up a PIN_HIGH allocation by referencing
that stack for the new hole.
v2: Pull together the 3 identical implements (ahem, a couple were
outdated) into a common routine for allocating a node and evicting as
necessary.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen at linux.intel.com> #v1
---
drivers/gpu/drm/i915/gvt/aperture_gm.c | 33 +++++-----------
drivers/gpu/drm/i915/i915_gem_gtt.c | 72 ++++++++++++++++++++++++----------
drivers/gpu/drm/i915/i915_gem_gtt.h | 5 +++
drivers/gpu/drm/i915/i915_vma.c | 40 ++-----------------
4 files changed, 70 insertions(+), 80 deletions(-)
diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
index 7d33b607bc89..1bb7a5b80d47 100644
--- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -48,47 +48,34 @@ static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm)
{
struct intel_gvt *gvt = vgpu->gvt;
struct drm_i915_private *dev_priv = gvt->dev_priv;
- u32 alloc_flag, search_flag;
+ unsigned int flags;
u64 start, end, size;
struct drm_mm_node *node;
- int retried = 0;
int ret;
if (high_gm) {
- search_flag = DRM_MM_SEARCH_BELOW;
- alloc_flag = DRM_MM_CREATE_TOP;
node = &vgpu->gm.high_gm_node;
size = vgpu_hidden_sz(vgpu);
start = gvt_hidden_gmadr_base(gvt);
end = gvt_hidden_gmadr_end(gvt);
+ flags = PIN_HIGH;
} else {
- search_flag = DRM_MM_SEARCH_DEFAULT;
- alloc_flag = DRM_MM_CREATE_DEFAULT;
node = &vgpu->gm.low_gm_node;
size = vgpu_aperture_sz(vgpu);
start = gvt_aperture_gmadr_base(gvt);
end = gvt_aperture_gmadr_end(gvt);
+ flags = PIN_MAPPABLE;
}
mutex_lock(&dev_priv->drm.struct_mutex);
-search_again:
- ret = drm_mm_insert_node_in_range_generic(&dev_priv->ggtt.base.mm,
- node, size, 4096,
- I915_COLOR_UNEVICTABLE,
- start, end, search_flag,
- alloc_flag);
- if (ret) {
- ret = i915_gem_evict_something(&dev_priv->ggtt.base,
- size, 4096,
- I915_COLOR_UNEVICTABLE,
- start, end, 0);
- if (ret == 0 && ++retried < 3)
- goto search_again;
-
- gvt_err("fail to alloc %s gm space from host, retried %d\n",
- high_gm ? "high" : "low", retried);
- }
+ ret = i915_gem_gtt_insert(&dev_priv->ggtt.base, node,
+ size, 4096, I915_COLOR_UNEVICTABLE,
+ start, end, flags);
mutex_unlock(&dev_priv->drm.struct_mutex);
+ if (ret)
+ gvt_err("fail to alloc %s gm space from host\n",
+ high_gm ? "high" : "low");
+
return ret;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index ef00d36680c9..4543d7fa7fc2 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2056,7 +2056,6 @@ static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
struct i915_address_space *vm = &ppgtt->base;
struct drm_i915_private *dev_priv = ppgtt->base.i915;
struct i915_ggtt *ggtt = &dev_priv->ggtt;
- bool retried = false;
int ret;
/* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
@@ -2069,29 +2068,14 @@ static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
if (ret)
return ret;
-alloc:
- ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, &ppgtt->node,
- GEN6_PD_SIZE, GEN6_PD_ALIGN,
- I915_COLOR_UNEVICTABLE,
- 0, ggtt->base.total,
- DRM_MM_TOPDOWN);
- if (ret == -ENOSPC && !retried) {
- ret = i915_gem_evict_something(&ggtt->base,
- GEN6_PD_SIZE, GEN6_PD_ALIGN,
- I915_COLOR_UNEVICTABLE,
- 0, ggtt->base.total,
- 0);
- if (ret)
- goto err_out;
-
- retried = true;
- goto alloc;
- }
-
+ ret = i915_gem_gtt_insert(&ggtt->base, &ppgtt->node,
+ GEN6_PD_SIZE, GEN6_PD_ALIGN,
+ I915_COLOR_UNEVICTABLE,
+ 0, ggtt->base.total,
+ PIN_HIGH);
if (ret)
goto err_out;
-
if (ppgtt->node.start < ggtt->mappable_end)
DRM_DEBUG("Forced to use aperture for PDEs\n");
@@ -3567,3 +3551,49 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
return ret;
}
+int i915_gem_gtt_insert(struct i915_address_space *vm,
+ struct drm_mm_node *node,
+ u64 size, u64 alignment, unsigned long colour,
+ u64 start, u64 end, unsigned int flags)
+{
+ u32 search_flag, alloc_flag;
+ int err;
+
+ lockdep_assert_held(&vm->i915->drm.struct_mutex);
+
+ if (flags & PIN_HIGH) {
+ search_flag = DRM_MM_SEARCH_BELOW;
+ alloc_flag = DRM_MM_CREATE_TOP;
+ } else {
+ search_flag = DRM_MM_SEARCH_DEFAULT;
+ alloc_flag = DRM_MM_CREATE_DEFAULT;
+ }
+
+ /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
+ * so we know that we always have a minimum alignment of 4096.
+ * The drm_mm range manager is optimised to return results
+ * with zero alignment, so where possible use the optimal
+ * path.
+ */
+ GEM_BUG_ON(size & 4095);
+ if (alignment <= 4096)
+ alignment = 0;
+
+ err = drm_mm_insert_node_in_range_generic(&vm->mm, node,
+ size, alignment, colour,
+ start, end,
+ search_flag, alloc_flag);
+ if (err != -ENOSPC)
+ return err;
+
+ err = i915_gem_evict_something(vm, size, alignment, colour,
+ start, end, flags);
+ if (err)
+ return err;
+
+ search_flag = DRM_MM_SEARCH_DEFAULT;
+ return drm_mm_insert_node_in_range_generic(&vm->mm, node,
+ size, alignment, colour,
+ start, end,
+ search_flag, alloc_flag);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 8965bbb13db7..3a178978d3ce 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -520,6 +520,11 @@ int __must_check i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
struct sg_table *pages);
+int i915_gem_gtt_insert(struct i915_address_space *vm,
+ struct drm_mm_node *node,
+ u64 size, u64 alignment, unsigned long colour,
+ u64 start, u64 end, unsigned int flags);
+
/* Flags used by pin/bind&friends. */
#define PIN_NONBLOCK BIT(0)
#define PIN_MAPPABLE BIT(1)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 37c3eebe8316..f709c9b76358 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -410,43 +410,11 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
goto err_unpin;
}
} else {
- u32 search_flag, alloc_flag;
-
- if (flags & PIN_HIGH) {
- search_flag = DRM_MM_SEARCH_BELOW;
- alloc_flag = DRM_MM_CREATE_TOP;
- } else {
- search_flag = DRM_MM_SEARCH_DEFAULT;
- alloc_flag = DRM_MM_CREATE_DEFAULT;
- }
-
- /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
- * so we know that we always have a minimum alignment of 4096.
- * The drm_mm range manager is optimised to return results
- * with zero alignment, so where possible use the optimal
- * path.
- */
- if (alignment <= 4096)
- alignment = 0;
-
-search_free:
- ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
- &vma->node,
- size, alignment,
- obj->cache_level,
- start, end,
- search_flag,
- alloc_flag);
- if (ret) {
- ret = i915_gem_evict_something(vma->vm, size, alignment,
- obj->cache_level,
- start, end,
- flags);
- if (ret == 0)
- goto search_free;
-
+ ret = i915_gem_gtt_insert(vma->vm, &vma->node,
+ size, alignment, obj->cache_level,
+ start, end, flags);
+ if (ret)
goto err_unpin;
- }
GEM_BUG_ON(vma->node.start < start);
GEM_BUG_ON(vma->node.start + vma->node.size > end);
--
2.11.0
More information about the Intel-gfx
mailing list