[Intel-gfx] [RFC 01/10] move-timeline-to-ctx
Tvrtko Ursulin
tursulin at ursulin.net
Thu Jan 25 13:33:24 UTC 2018
From: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_drv.h | 13 +-----
drivers/gpu/drm/i915/i915_gem.c | 9 ++--
drivers/gpu/drm/i915/i915_gem_context.c | 15 ++++++-
drivers/gpu/drm/i915/i915_gem_context.h | 2 +
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 24 +++++++++++
drivers/gpu/drm/i915/i915_gem_gtt.c | 3 --
drivers/gpu/drm/i915/i915_gem_gtt.h | 1 -
drivers/gpu/drm/i915/i915_gem_request.c | 2 +-
drivers/gpu/drm/i915/i915_gem_timeline.c | 54 +++++++++++++++++++++---
drivers/gpu/drm/i915/i915_gem_timeline.h | 4 ++
drivers/gpu/drm/i915/intel_engine_cs.c | 3 +-
drivers/gpu/drm/i915/intel_lrc.c | 2 +-
drivers/gpu/drm/i915/intel_ringbuffer.c | 9 +++-
drivers/gpu/drm/i915/intel_ringbuffer.h | 5 ++-
drivers/gpu/drm/i915/selftests/mock_engine.c | 3 +-
drivers/gpu/drm/i915/selftests/mock_gem_device.c | 4 +-
drivers/gpu/drm/i915/selftests/mock_gtt.c | 1 -
17 files changed, 119 insertions(+), 35 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2a5845a896b6..0c348f6ab386 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2305,7 +2305,8 @@ struct drm_i915_private {
void (*cleanup_engine)(struct intel_engine_cs *engine);
struct list_head timelines;
- struct i915_gem_timeline global_timeline;
+ struct i915_gem_timeline execution_timeline;
+ struct i915_gem_timeline legacy_timeline;
u32 active_requests;
/**
@@ -3448,16 +3449,6 @@ i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id)
return ctx;
}
-static inline struct intel_timeline *
-i915_gem_context_lookup_timeline(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine)
-{
- struct i915_address_space *vm;
-
- vm = ctx->ppgtt ? &ctx->ppgtt->base : &ctx->i915->ggtt.base;
- return &vm->timeline.engine[engine->id];
-}
-
int i915_perf_open_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 062b21408698..02f71eb5c9d9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3010,10 +3010,10 @@ static void engine_skip_context(struct drm_i915_gem_request *request)
{
struct intel_engine_cs *engine = request->engine;
struct i915_gem_context *hung_ctx = request->ctx;
- struct intel_timeline *timeline;
+ struct intel_timeline *timeline = request->timeline;
unsigned long flags;
- timeline = i915_gem_context_lookup_timeline(hung_ctx, engine);
+ GEM_BUG_ON(timeline == engine->timeline);
spin_lock_irqsave(&engine->timeline->lock, flags);
spin_lock(&timeline->lock);
@@ -3677,7 +3677,7 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
ret = wait_for_engines(i915);
} else {
- ret = wait_for_timeline(&i915->gt.global_timeline, flags);
+ ret = wait_for_timeline(&i915->gt.execution_timeline, flags);
}
return ret;
@@ -5536,7 +5536,8 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
WARN_ON(dev_priv->mm.object_count);
mutex_lock(&dev_priv->drm.struct_mutex);
- i915_gem_timeline_fini(&dev_priv->gt.global_timeline);
+ i915_gem_timeline_fini(&dev_priv->gt.legacy_timeline);
+ i915_gem_timeline_fini(&dev_priv->gt.execution_timeline);
WARN_ON(!list_empty(&dev_priv->gt.timelines));
mutex_unlock(&dev_priv->drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 648e7536ff51..f72046adaace 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -121,6 +121,7 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
lockdep_assert_held(&ctx->i915->drm.struct_mutex);
GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
+ i915_gem_timeline_free(ctx->timeline);
i915_ppgtt_put(ctx->ppgtt);
for (i = 0; i < I915_NUM_ENGINES; i++) {
@@ -373,6 +374,18 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
ctx->desc_template = default_desc_template(dev_priv, ppgtt);
}
+ if (HAS_EXECLISTS(dev_priv)) {
+ struct i915_gem_timeline *timeline;
+
+ timeline = i915_gem_timeline_create(dev_priv, ctx->name);
+ if (IS_ERR(timeline)) {
+ __destroy_hw_context(ctx, file_priv);
+ return ERR_CAST(timeline);
+ }
+
+ ctx->timeline = timeline;
+ }
+
trace_i915_context_create(ctx);
return ctx;
@@ -574,7 +587,7 @@ static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine)
list_for_each_entry(timeline, &engine->i915->gt.timelines, link) {
struct intel_timeline *tl;
- if (timeline == &engine->i915->gt.global_timeline)
+ if (timeline == &engine->i915->gt.execution_timeline)
continue;
tl = &timeline->engine[engine->id];
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 4bfb72f8e1cb..cfa69b12a6b2 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -55,6 +55,8 @@ struct i915_gem_context {
/** file_priv: owning file descriptor */
struct drm_i915_file_private *file_priv;
+ struct i915_gem_timeline *timeline;
+
/**
* @ppgtt: unique address space (GTT)
*
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 4401068ff468..cd482b981fdd 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1753,6 +1753,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
unsigned int flags = eb->flags[i];
struct i915_vma *vma = eb->vma[i];
struct drm_i915_gem_object *obj = vma->obj;
+ struct drm_i915_gem_request *order;
if (flags & EXEC_OBJECT_CAPTURE) {
struct i915_gem_capture_list *capture;
@@ -1783,6 +1784,29 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
flags &= ~EXEC_OBJECT_ASYNC;
}
+ /*
+ * XXX As we allow multiple queues to share the vma, but
+ * with different timelines, yet we rely on a single
+ * timeline through the vm (for activity tracking
+ * see i915_vma_move_to_active()/i915_vma_retire()) we impose
+ * that ordering constraint on the different timelines here.
+ *
+ * Note that this ordering constraint is undesirable as we
+ * want to keep our weakly ordered reads through the GEM
+ * interface. That will require us to be able to track
+ * multiple timelines (lifting the current limit of one
+ * per engine), like struct reservation_object but coupled
+ * into our activity tracking.
+ */
+ order = i915_gem_active_peek(&vma->last_read[eb->engine->id],
+ &eb->i915->drm.struct_mutex);
+ if (order) {
+ err = i915_gem_request_await_dma_fence(eb->request,
+ &order->fence);
+ if (err)
+ return err;
+ }
+
if (flags & EXEC_OBJECT_ASYNC)
continue;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index be227512430a..b355ba1eee22 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2116,8 +2116,6 @@ static void i915_address_space_init(struct i915_address_space *vm,
struct drm_i915_private *dev_priv,
const char *name)
{
- i915_gem_timeline_init(dev_priv, &vm->timeline, name);
-
drm_mm_init(&vm->mm, 0, vm->total);
vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
@@ -2134,7 +2132,6 @@ static void i915_address_space_fini(struct i915_address_space *vm)
if (pagevec_count(&vm->free_pages))
vm_free_pages_release(vm, true);
- i915_gem_timeline_fini(&vm->timeline);
drm_mm_takedown(&vm->mm);
list_del(&vm->global_link);
}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index a42890d9af38..0028a0ccc9a0 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -256,7 +256,6 @@ struct i915_pml4 {
struct i915_address_space {
struct drm_mm mm;
- struct i915_gem_timeline timeline;
struct drm_i915_private *i915;
struct device *dma;
/* Every address space belongs to a struct file - except for the global
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index ac2db7f716a1..160d81bf6d85 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -724,7 +724,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
}
}
- req->timeline = i915_gem_context_lookup_timeline(ctx, engine);
+ req->timeline = ring->timeline;
GEM_BUG_ON(req->timeline == engine->timeline);
spin_lock_init(&req->lock);
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c
index e9fd87604067..1bf48bdb78c4 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.c
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.c
@@ -95,12 +95,28 @@ int i915_gem_timeline_init(struct drm_i915_private *i915,
int i915_gem_timeline_init__global(struct drm_i915_private *i915)
{
- static struct lock_class_key class;
+ static struct lock_class_key class1, class2;
+ int err;
+
+ err = __i915_gem_timeline_init(i915,
+ &i915->gt.execution_timeline,
+ "[execution]", &class1,
+ "i915_execution_timeline");
+ if (err)
+ return err;
+
+ err = __i915_gem_timeline_init(i915,
+ &i915->gt.legacy_timeline,
+ "[global]", &class2,
+ "i915_global_timeline");
+ if (err)
+ goto err_exec_timeline;
+
+ return 0;
- return __i915_gem_timeline_init(i915,
- &i915->gt.global_timeline,
- "[execution]",
- &class, "&global_timeline->lock");
+err_exec_timeline:
+ i915_gem_timeline_fini(&i915->gt.execution_timeline);
+ return err;
}
/**
@@ -148,6 +164,34 @@ void i915_gem_timeline_fini(struct i915_gem_timeline *timeline)
kfree(timeline->name);
}
+struct i915_gem_timeline *
+i915_gem_timeline_create(struct drm_i915_private *i915, const char *name)
+{
+ struct i915_gem_timeline *timeline;
+ int err;
+
+ timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
+ if (!timeline)
+ return ERR_PTR(-ENOMEM);
+
+ err = i915_gem_timeline_init(i915, timeline, name);
+ if (err) {
+ kfree(timeline);
+ return ERR_PTR(err);
+ }
+
+ return timeline;
+}
+
+void i915_gem_timeline_free(struct i915_gem_timeline *timeline)
+{
+ if (!timeline)
+ return;
+
+ i915_gem_timeline_fini(timeline);
+ kfree(timeline);
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_timeline.c"
#include "selftests/i915_gem_timeline.c"
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h
index b5a22400a01f..7ecf0a253d78 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.h
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.h
@@ -96,6 +96,10 @@ int i915_gem_timeline_init__global(struct drm_i915_private *i915);
void i915_gem_timelines_park(struct drm_i915_private *i915);
void i915_gem_timeline_fini(struct i915_gem_timeline *tl);
+struct i915_gem_timeline *
+i915_gem_timeline_create(struct drm_i915_private *i915, const char *name);
+void i915_gem_timeline_free(struct i915_gem_timeline *timeline);
+
static inline int __intel_timeline_sync_set(struct intel_timeline *tl,
u64 context, u32 seqno)
{
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 46b2a92cb7a2..5d49f319220b 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -393,7 +393,8 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
static void intel_engine_init_timeline(struct intel_engine_cs *engine)
{
- engine->timeline = &engine->i915->gt.global_timeline.engine[engine->id];
+ engine->timeline =
+ &engine->i915->gt.execution_timeline.engine[engine->id];
}
static bool csb_force_mmio(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6896ad1756c8..684303923ff7 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2281,7 +2281,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
goto error_deref_obj;
}
- ring = intel_engine_create_ring(engine, ctx->ring_size);
+ ring = intel_engine_create_ring(engine, ctx->timeline, ctx->ring_size);
if (IS_ERR(ring)) {
ret = PTR_ERR(ring);
goto error_deref_obj;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e2085820b586..66e87144f799 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1124,7 +1124,9 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size)
}
struct intel_ring *
-intel_engine_create_ring(struct intel_engine_cs *engine, int size)
+intel_engine_create_ring(struct intel_engine_cs *engine,
+ struct i915_gem_timeline *timeline,
+ int size)
{
struct intel_ring *ring;
struct i915_vma *vma;
@@ -1137,6 +1139,7 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&ring->request_list);
+ ring->timeline = &timeline->engine[engine->id];
ring->size = size;
/* Workaround an erratum on the i830 which causes a hang if
@@ -1333,7 +1336,9 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
if (err)
goto err;
- ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE);
+ ring = intel_engine_create_ring(engine,
+ &engine->i915->gt.legacy_timeline,
+ 32 * PAGE_SIZE);
if (IS_ERR(ring)) {
err = PTR_ERR(ring);
goto err;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 0aefbf6849d1..aab7bd61ae10 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -123,6 +123,7 @@ struct intel_ring {
struct i915_vma *vma;
void *vaddr;
+ struct intel_timeline *timeline;
struct list_head request_list;
u32 head;
@@ -738,7 +739,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
#define CNL_HWS_CSB_WRITE_INDEX 0x2f
struct intel_ring *
-intel_engine_create_ring(struct intel_engine_cs *engine, int size);
+intel_engine_create_ring(struct intel_engine_cs *engine,
+ struct i915_gem_timeline *timeline,
+ int size);
int intel_ring_pin(struct intel_ring *ring,
struct drm_i915_private *i915,
unsigned int offset_bias);
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 55c0e2c15782..19c0d662f351 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -174,8 +174,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
engine->base.emit_breadcrumb = mock_emit_breadcrumb;
engine->base.submit_request = mock_submit_request;
- engine->base.timeline =
- &i915->gt.global_timeline.engine[engine->base.id];
+ intel_engine_init_timeline(&engine->base);
intel_engine_init_breadcrumbs(&engine->base);
engine->base.breadcrumbs.mock = true; /* prevent touching HW for irqs */
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 1bc61f3f76fc..af598e671a8a 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -72,7 +72,9 @@ static void mock_device_release(struct drm_device *dev)
mutex_lock(&i915->drm.struct_mutex);
mock_fini_ggtt(i915);
- i915_gem_timeline_fini(&i915->gt.global_timeline);
+ i915_gem_timeline_fini(&i915->gt.legacy_timeline);
+ i915_gem_timeline_fini(&i915->gt.execution_timeline);
+ WARN_ON(!list_empty(&i915->gt.timelines));
mutex_unlock(&i915->drm.struct_mutex);
destroy_workqueue(i915->wq);
diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c
index e96873f96116..36c112088940 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
@@ -76,7 +76,6 @@ mock_ppgtt(struct drm_i915_private *i915,
INIT_LIST_HEAD(&ppgtt->base.global_link);
drm_mm_init(&ppgtt->base.mm, 0, ppgtt->base.total);
- i915_gem_timeline_init(i915, &ppgtt->base.timeline, name);
ppgtt->base.clear_range = nop_clear_range;
ppgtt->base.insert_page = mock_insert_page;
--
2.14.1
More information about the Intel-gfx
mailing list