[Intel-gfx] [PATCH 093/190] drm/i915: Move the forced switch back to the kernel context into eviction
Chris Wilson
chris at chris-wilson.co.uk
Mon Jan 11 02:44:37 PST 2016
Currently, we always switch back to the kernel context (if available,
i.e. legacy HW contexts not execlists) whenever we try and idle the GPU.
We actually only require the switch when trying to evict everything (in
order to prevent fragmentation from placement of the currently active
context) from the global GTT, so move the forced switch into that one
callsite.
In the process, update the comments regarding mode of operation in
particular the distinction between evicting from the global GTT (which
may contain untracked items and transient global pins) and the
per-process GTT.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_gem.c | 14 ----
drivers/gpu/drm/i915/i915_gem_evict.c | 140 +++++++++++++++++++++-------------
2 files changed, 88 insertions(+), 66 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 054e11cff00f..989222eb107b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2691,21 +2691,7 @@ int i915_gpu_idle(struct drm_device *dev)
struct intel_engine_cs *ring;
int ret, i;
- /* Flush everything onto the inactive list. */
for_each_ring(ring, dev_priv, i) {
- if (!i915.enable_execlists) {
- struct drm_i915_gem_request *req;
-
- req = i915_gem_request_alloc(ring, ring->default_context);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- ret = i915_switch_context(req);
- i915_add_request_no_flush(req);
- if (ret)
- return ret;
- }
-
ret = intel_engine_idle(ring);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index ea1f8d1bd228..b7bcc324a7a7 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -33,6 +33,36 @@
#include "intel_drv.h"
#include "i915_trace.h"
+static int switch_to_pinned_context(struct drm_i915_private *dev_priv)
+{
+ struct intel_engine_cs *ring;
+ int ret, i;
+ int count = 0;
+
+ if (i915.enable_execlists)
+ return 0;
+
+ for_each_ring(ring, dev_priv, i) {
+ struct drm_i915_gem_request *req;
+
+ if (ring->last_context == ring->default_context)
+ continue;
+
+ req = i915_gem_request_alloc(ring, ring->default_context);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ ret = i915_switch_context(req);
+ i915_add_request_no_flush(req);
+ if (ret)
+ return ret;
+
+ count++;
+ }
+
+ return count;
+}
+
static bool
mark_free(struct i915_vma *vma, struct list_head *unwind)
{
@@ -76,37 +106,33 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
unsigned long start, unsigned long end,
unsigned flags)
{
- struct list_head eviction_list, unwind_list;
- struct i915_vma *vma;
- int ret = 0;
- int pass = 0;
+ struct list_head eviction_list;
+ struct list_head *phases[] = {
+ &vm->inactive_list,
+ &vm->active_list,
+ NULL,
+ }, **phase;
+ struct i915_vma *vma, *next;
+ int ret;
trace_i915_gem_evict(dev, min_size, alignment, flags);
/*
* The goal is to evict objects and amalgamate space in LRU order.
* The oldest idle objects reside on the inactive list, which is in
- * retirement order. The next objects to retire are those on the (per
- * ring) active list that do not have an outstanding flush. Once the
- * hardware reports completion (the seqno is updated after the
- * batchbuffer has been finished) the clean buffer objects would
- * be retired to the inactive list. Any dirty objects would be added
- * to the tail of the flushing list. So after processing the clean
- * active objects we need to emit a MI_FLUSH to retire the flushing
- * list, hence the retirement order of the flushing list is in
- * advance of the dirty objects on the active lists.
+ * retirement order. The next objects to retire are those in flight,
+ * on the active list, again in retirement order.
*
* The retirement sequence is thus:
* 1. Inactive objects (already retired)
- * 2. Clean active objects
- * 3. Flushing list
- * 4. Dirty active objects.
+ * 2. Active objects (will stall on unbinding)
*
* On each list, the oldest objects lie at the HEAD with the freshest
* object on the TAIL.
*/
- INIT_LIST_HEAD(&unwind_list);
+search_again:
+ INIT_LIST_HEAD(&eviction_list);
if (start != 0 || end != vm->total) {
drm_mm_init_scan_with_range(&vm->mm, min_size,
alignment, cache_level,
@@ -114,26 +140,19 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
} else
drm_mm_init_scan(&vm->mm, min_size, alignment, cache_level);
-search_again:
- /* First see if there is a large enough contiguous idle region... */
- list_for_each_entry(vma, &vm->inactive_list, vm_link) {
- if (mark_free(vma, &unwind_list))
- goto found;
- }
-
if (flags & PIN_NONBLOCK)
- goto none;
+ phases[1] = NULL;
- /* Now merge in the soon-to-be-expired objects... */
- list_for_each_entry(vma, &vm->active_list, vm_link) {
- if (mark_free(vma, &unwind_list))
- goto found;
- }
+ phase = phases;
+ do {
+ list_for_each_entry(vma, *phase, vm_link)
+ if (mark_free(vma, &eviction_list))
+ goto found;
+ } while (*++phase);
-none:
/* Nothing found, clean up and bail out! */
- while (!list_empty(&unwind_list)) {
- vma = list_first_entry(&unwind_list,
+ while (!list_empty(&eviction_list)) {
+ vma = list_first_entry(&eviction_list,
struct i915_vma,
exec_list);
ret = drm_mm_scan_remove_block(&vma->node);
@@ -143,13 +162,24 @@ none:
}
/* Can we unpin some objects such as idle hw contents,
- * or pending flips?
+ * or pending flips? But since only the GGTT has global entries
+ * such as scanouts, rinbuffers and contexts, we can skip the
+ * purge when inspecting per-process local address spaces.
*/
- if (flags & PIN_NONBLOCK)
+ if (!i915_is_ggtt(vm) || flags & PIN_NONBLOCK)
return -ENOSPC;
- /* Only idle the GPU and repeat the search once */
- if (pass++ == 0) {
+ /* Not everything in the GGTT is tracked via vma (otherwise we
+ * could evict as required with minimal stalling) so we are forced
+ * to idle the GPU and explicitly retire outstanding requests in
+ * the hopes that we can then remove contexts and the like only
+ * bound by their active reference.
+ */
+ ret = switch_to_pinned_context(to_i915(dev));
+ if (ret < 0)
+ return ret;
+
+ if (ret > 0) {
ret = i915_gpu_idle(dev);
if (ret)
return ret;
@@ -166,19 +196,16 @@ none:
found:
/* drm_mm doesn't allow any other other operations while
- * scanning, therefore store to be evicted objects on a
- * temporary list. */
- INIT_LIST_HEAD(&eviction_list);
- while (!list_empty(&unwind_list)) {
- vma = list_first_entry(&unwind_list,
- struct i915_vma,
- exec_list);
- if (drm_mm_scan_remove_block(&vma->node)) {
- list_move(&vma->exec_list, &eviction_list);
+ * scanning, therefore store to-be-evicted objects on a
+ * temporary list and take a reference for all before
+ * calling unbind (which may remove the active reference
+ * of any of our objects, thus corrupting the list).
+ */
+ list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
+ if (drm_mm_scan_remove_block(&vma->node))
drm_gem_object_reference(&vma->obj->base);
- continue;
- }
- list_del_init(&vma->exec_list);
+ else
+ list_del_init(&vma->exec_list);
}
/* Unbinding will emit any required flushes */
@@ -195,7 +222,6 @@ found:
drm_gem_object_unreference(obj);
}
-
return ret;
}
@@ -261,12 +287,22 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
trace_i915_gem_evict_vm(vm);
if (do_idle) {
+ /* Switch back to the default context in order to unpin
+ * the existing context objects. However, such objects only
+ * pin themselves inside the global GTT and performing the
+ * switch otherwise is ineffective.
+ */
+ if (i915_is_ggtt(vm)) {
+ ret = switch_to_pinned_context(to_i915(vm->dev));
+ if (ret)
+ return ret;
+ }
+
ret = i915_gpu_idle(vm->dev);
- if (ret)
+ if (ret < 0)
return ret;
i915_gem_retire_requests(vm->dev);
-
WARN_ON(!list_empty(&vm->active_list));
}
--
2.7.0.rc3
More information about the Intel-gfx
mailing list