[Intel-gfx] [PATCH 06/12] drm/i915/scheduler: Execute requests in order of priorities

Fri Nov 4 09:20:50 UTC 2016

On Thu, Nov 03, 2016 at 07:47:39PM +0000, Chris Wilson wrote:
> On Thu, Nov 03, 2016 at 04:21:25PM +0000, Tvrtko Ursulin wrote:
> > >+static void update_priorities(struct i915_priotree *pt, int prio)
> > >+{
> > >+	struct drm_i915_gem_request *request =
> > >+		container_of(pt, struct drm_i915_gem_request, priotree);
> > >+	struct intel_engine_cs *engine = request->engine;
> > >+	struct i915_dependency *dep;
> > >+
> > >+	if (prio <= READ_ONCE(pt->priority))
> > >+		return;
> > >+
> > >+	/* Recursively bump all dependent priorities to match the new request */
> > >+	list_for_each_entry(dep, &pt->pre_list, pre_link)
> > >+		update_priorities(dep->signal, prio);
> > 
> > John got in trouble from recursion in his scheduler, used for the
> > same thing AFAIR. Or was it the priority bumping? Either way, it
> > could be imperative to avoid it.

Spent some time tuning (but not very well) for very deep pipelines:

static struct intel_engine_cs *
pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
{
	struct intel_engine_cs *engine;

	engine = container_of(pt,
			      struct drm_i915_gem_request,
			      priotree)->engine;
	if (engine != locked) {
		if (locked)
			spin_unlock_irq(&locked->timeline->lock);
		spin_lock_irq(&engine->timeline->lock);
	}

	return engine;
}

static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
{
	struct intel_engine_cs *engine = NULL;
	struct i915_dependency *dep, *p;
	struct i915_dependency stack;
	LIST_HEAD(dfs);

	if (prio <= READ_ONCE(request->priotree.priority))
		return;

	/* Need BKL in order to use the temporary link inside i915_dependency */
	lockdep_assert_held(&request->i915->drm.struct_mutex);

	stack.signal = &request->priotree;
	list_add(&stack.dfs_link, &dfs);

	/* Recursively bump all dependent priorities to match the new request */
	list_for_each_entry_safe(dep, p, &dfs, dfs_link) {
		struct i915_priotree *pt = dep->signal;

		list_for_each_entry(p, &pt->pre_list, pre_link)
			if (prio > READ_ONCE(p->signal->priority))
				list_move_tail(&p->dfs_link, &dfs);

		p = list_first_entry(&dep->dfs_link, typeof(*p), dfs_link);
		if (!RB_EMPTY_NODE(&pt->node))
			continue;

		engine = pt_lock_engine(pt, engine);

		if (prio > pt->priority && RB_EMPTY_NODE(&pt->node)) {
			pt->priority = prio;
			list_del_init(&dep->dfs_link);
		}
	}

	/* Fifo and depth-first replacement ensure our deps execute before us */
	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
		struct i915_priotree *pt = dep->signal;

		INIT_LIST_HEAD(&dep->dfs_link);

		engine = pt_lock_engine(pt, engine);

		if (prio <= pt->priority)
			continue;

		GEM_BUG_ON(RB_EMPTY_NODE(&pt->node));

		pt->priority = prio;
		rb_erase(&pt->node, &engine->execlist_queue);
		if (insert_request(pt, &engine->execlist_queue))
			engine->execlist_first = &pt->node;
	}

	if (engine)
		spin_unlock_irq(&engine->timeline->lock);

	/* XXX Do we need to preempt to make room for us and our deps? */
}

But as always any linear list scales poorly. It is just fortunate that
typically we don't see 10,000s of requests in the pipeline that need PI.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre