[Intel-gfx] [RFC 5/5] drm/i915: Per batch buffer VCS balancing
Tvrtko Ursulin
tursulin at ursulin.net
Mon Nov 13 13:09:09 UTC 2017
From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Building on top of the three previously introduced new pieces of uAPI,
class/instance based engine selection, engine capabilities and concurrent
contexts, we can extend the execbuf2 uAPI to allow per batch buffer load
balancing on the VCS engines.
Legacy execbuf, with contexts mark as concurrent, can now load balance
between individual batches, instead of only statically per client.
This also means two batches submitted one after another, but both to
I915_EXEC_BSD, can potentially be running in parallel on VCS0 and VCS1
respectively.
For legacy (non-concurrent) contexts, behaviour is unchanged. VCS engine
is assigned to each context at the time of first submission and it
persists for the context lifetime.
In both cases trivial round-robin approach is used to load balance.
If execbuf requires a a particular engine feature, like for example HEVC
which is only available on vcs0, it needs to mark it's execbuf calls
appropriately using the engine capabilities uAPI.
For the class/instance based execbuf we add I915_EXEC_INSTANCE_ANY to
accomplish the same behaviour. This is only allowed to be used on
concurrent contexts or an error will be returned.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 7 +--
drivers/gpu/drm/i915/i915_gem.c | 2 +-
drivers/gpu/drm/i915/i915_gem_context.c | 2 +
drivers/gpu/drm/i915/i915_gem_context.h | 2 +
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 80 +++++++++++++++++++++++-------
include/uapi/drm/i915_drm.h | 6 +++
6 files changed, 78 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index bf7216742452..9137127fad22 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1562,9 +1562,6 @@ struct i915_gem_mm {
u64 unordered_timeline;
- /* the indicator for dispatch video commands on two BSD rings */
- atomic_t bsd_engine_dispatch_index;
-
/** Bit 6 swizzling required for X tiling */
uint32_t bit_6_swizzle_x;
/** Bit 6 swizzling required for Y tiling */
@@ -2292,6 +2289,10 @@ struct drm_i915_private {
struct i915_gem_context *preempt_context;
struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1]
[MAX_ENGINE_INSTANCE + 1];
+
+ /* the indicator for dispatch video commands on two BSD rings */
+ atomic_t vcs_dispatch_index;
+
struct i915_vma *semaphore;
struct drm_dma_handle *status_page_dmah;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c5ddae2cc4ef..f39881bbd4ee 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5300,7 +5300,7 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
- atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
+ atomic_set(&dev_priv->vcs_dispatch_index, 0);
spin_lock_init(&dev_priv->fb_tracking.lock);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 3ea1251924ea..fb7021ee35d5 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -275,6 +275,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
list_add_tail(&ctx->link, &dev_priv->contexts.list);
ctx->i915 = dev_priv;
ctx->priority = I915_PRIORITY_NORMAL;
+ atomic_set(&ctx->vcs_dispatch_index,
+ atomic_fetch_xor(1, &dev_priv->vcs_dispatch_index));
INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
INIT_LIST_HEAD(&ctx->handles_list);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 0531c5eedb2a..638c716afbef 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -192,6 +192,8 @@ struct i915_gem_context {
* context close.
*/
struct list_head handles_list;
+
+ atomic_t vcs_dispatch_index;
};
static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index ecb6290ab6ee..a7bf4fedbdbc 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -235,6 +235,7 @@ struct i915_execbuffer {
u64 invalid_flags; /** Set of execobj.flags that are invalid */
u32 context_flags; /** Set of execobj.flags to insert from the ctx */
+ bool ctx_concurrent;
u32 batch_start_offset; /** Location within object of batch */
u32 batch_len; /** Length of batch within object */
@@ -675,6 +676,8 @@ static int eb_select_context(struct i915_execbuffer *eb)
eb->ctx = ctx;
eb->vm = ctx->ppgtt ? &ctx->ppgtt->base : &eb->i915->ggtt.base;
+ eb->ctx_concurrent = i915_gem_context_is_concurrent(ctx);
+
eb->context_flags = 0;
if (ctx->flags & CONTEXT_NO_ZEROMAP)
eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS;
@@ -1987,26 +1990,63 @@ static int eb_submit(struct i915_execbuffer *eb)
return 0;
}
+static unsigned int select_vcs_engine(struct i915_execbuffer *eb, u64 eb_flags)
+{
+ struct drm_i915_private *i915 = eb->i915;
+ u8 eb_caps = (eb_flags & I915_EXEC_ENGINE_CAP_MASK) >>
+ I915_EXEC_ENGINE_CAP_SHIFT;
+ unsigned int instance;
+
+ if (!HAS_BSD2(i915))
+ return 0;
+
+ instance = atomic_fetch_xor(1, &eb->ctx->vcs_dispatch_index);
+
+ if (instance == 1 &&
+ (eb_caps & i915->engine[_VCS(instance)]->caps) != eb_caps)
+ instance = 0;
+
+ return instance;
+}
+
+static unsigned int
+client_static_vcs_choice(struct drm_i915_private *i915,
+ struct drm_i915_file_private *file_priv)
+{
+ /*
+ * Check whether the file_priv has already selected one engine
+ * and if not select one.
+ */
+ if ((int)file_priv->bsd_engine < 0)
+ file_priv->bsd_engine =
+ atomic_fetch_xor(1, &i915->vcs_dispatch_index);
+
+ return file_priv->bsd_engine;
+}
+
/**
* Find one BSD ring to dispatch the corresponding BSD command.
* The engine index is returned.
*/
static unsigned int
-gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
- struct drm_file *file)
+eb_select_vcs_engine(struct i915_execbuffer *eb,
+ struct drm_i915_file_private *file_priv,
+ u64 eb_flags)
{
- struct drm_i915_file_private *file_priv = file->driver_priv;
-
- /* Check whether the file_priv has already selected one ring. */
- if ((int)file_priv->bsd_engine < 0)
- file_priv->bsd_engine = atomic_fetch_xor(1,
- &dev_priv->mm.bsd_engine_dispatch_index);
-
- return file_priv->bsd_engine;
+ /*
+ * For concurrent contexts do a round-robin engine assignment
+ * for each batch buffer.
+ */
+ if (eb->ctx_concurrent)
+ return select_vcs_engine(eb, eb_flags);
+ else
+ return client_static_vcs_choice(eb->i915, file_priv);
}
static struct intel_engine_cs *
-eb_select_engine_class_instance(struct drm_i915_private *i915, u64 eb_flags)
+eb_select_engine_class_instance(struct i915_execbuffer *eb,
+ struct drm_i915_file_private *file_priv,
+ u64 eb_flags)
{
u8 class = eb_flags & I915_EXEC_RING_MASK;
u8 instance = (eb_flags & I915_EXEC_INSTANCE_MASK) >>
@@ -2015,7 +2055,10 @@ eb_select_engine_class_instance(struct drm_i915_private *i915, u64 eb_flags)
I915_EXEC_ENGINE_CAP_SHIFT;
struct intel_engine_cs *engine;
- engine = intel_engine_lookup_user(i915, class, instance);
+ if (instance == I915_EXEC_INSTANCE_ANY)
+ instance = eb_select_vcs_engine(eb, file_priv, eb_flags);
+
+ engine = intel_engine_lookup_user(eb->i915, class, instance);
if (engine && ((caps & engine->caps) != caps))
return NULL;
@@ -2034,15 +2077,17 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
};
static struct intel_engine_cs *
-eb_select_engine(struct drm_i915_private *dev_priv,
+eb_select_engine(struct i915_execbuffer *eb,
struct drm_file *file,
struct drm_i915_gem_execbuffer2 *args)
{
+ struct drm_i915_private *dev_priv = eb->i915;
unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
struct intel_engine_cs *engine;
if (args->flags & I915_EXEC_CLASS_INSTANCE)
- return eb_select_engine_class_instance(dev_priv, args->flags);
+ return eb_select_engine_class_instance(eb, file->driver_priv,
+ args->flags);
if (user_ring_id > I915_USER_RINGS) {
DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
@@ -2060,7 +2105,8 @@ eb_select_engine(struct drm_i915_private *dev_priv,
unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
- bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
+ bsd_idx = eb_select_vcs_engine(eb, file->driver_priv,
+ args->flags);
} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
bsd_idx <= I915_EXEC_BSD_RING2) {
bsd_idx >>= I915_EXEC_BSD_SHIFT;
@@ -2283,8 +2329,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_destroy;
err = -EINVAL;
- eb.engine = eb_select_engine(eb.i915, file, args);
- if (!eb.engine)
+ eb.engine = eb_select_engine(&eb, file, args);
+ if (unlikely(!eb.engine))
goto err_engine;
if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index bef377c4b4fc..35255d7802db 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1056,6 +1056,12 @@ struct drm_i915_gem_execbuffer2 {
#define I915_EXEC_INSTANCE_SHIFT (21)
#define I915_EXEC_INSTANCE_MASK (0xff << I915_EXEC_INSTANCE_SHIFT)
+/*
+ * Batches sent with instance set to any can be load balanced by the driver
+ * is concurrent context flag is also enabled.
+ */
+#define I915_EXEC_INSTANCE_ANY (0xff)
+
/*
* Inform the kernel of what engine capabilities this batch buffer
* requires. For example only the first VCS engine has the HEVC block.
--
2.14.1
More information about the Intel-gfx
mailing list