[PATCH v3 3/8] drm/i915: context submission pvmmio optimization
Xiaolin Zhang
xiaolin.zhang at intel.com
Tue Nov 13 08:35:15 UTC 2018
It is performance optimization to reduce mmio trap numbers from 4 to
1 durning ELSP porting writing (context submission).
When context subission, to cache elsp_data[4] values in
the shared page, the last elsp_data[0] port writing will be trapped
to gvt for real context submission.
Use PVMMIO_ELSP_SUBMIT to control this level of pvmmio optimization.
v0: RFC
v1: rebase
v2: added pv ops for pv context submission. to maximize code resuse,
introduced 2 more ops (submit_ports & preempt_context) instead of 1 op
(set_default_submission) in engine structure. pv version of
submit_ports and preempt_context implemented.
v3:
1. to reduce more code duplication, code refactor and replaced 2 ops
"submit_ports & preempt_contex" from v2 by 1 ops "write_desc"
in engine structure. pv version of write_des implemented.
2. added VGT_G2V_ELSP_SUBMIT for g2v pv notification.
Cc: Zhenyu Wang <zhenyuw at linux.intel.com>
Cc: Zhi Wang <zhi.a.wang at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
Cc: He Min <min.he at intel.com>
Cc: Jiang Fei <fei.jiang at intel.com>
Cc: Gong Zhipeng <zhipeng.gong at intel.com>
Cc: Yuan Hang <hang.yuan at intel.com>
Cc: Zhiyuan Lv <zhiyuan.lv at intel.com>
Signed-off-by: Xiaolin Zhang <xiaolin.zhang at intel.com>
---
drivers/gpu/drm/i915/i915_pvinfo.h | 1 +
drivers/gpu/drm/i915/i915_vgpu.c | 2 ++
drivers/gpu/drm/i915/intel_lrc.c | 33 +++++++++++++++++++++++++++++----
drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +++
4 files changed, 35 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_pvinfo.h b/drivers/gpu/drm/i915/i915_pvinfo.h
index aa5eebc..3da644d 100644
--- a/drivers/gpu/drm/i915/i915_pvinfo.h
+++ b/drivers/gpu/drm/i915/i915_pvinfo.h
@@ -49,6 +49,7 @@ enum vgt_g2v_type {
VGT_G2V_EXECLIST_CONTEXT_CREATE,
VGT_G2V_EXECLIST_CONTEXT_DESTROY,
VGT_G2V_SHARED_PAGE_SETUP,
+ VGT_G2V_ELSP_SUBMIT,
VGT_G2V_MAX,
};
diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index 63f70bf..82120f6 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c
@@ -85,6 +85,8 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv)
return;
}
+ dev_priv->vgpu.pv_caps = PVMMIO_ELSP_SUBMIT;
+
/* If guest wants to enable pvmmio, it needs to enable it explicitly
* through vgt_if interface, and then read back the enable state from
* gvt layer.
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ff0e2b3..660e24c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -391,8 +391,10 @@ static u64 execlists_update_context(struct i915_request *rq)
return ce->lrc_desc;
}
-static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
+static inline void write_desc(struct intel_engine_cs *engine,
+ u64 desc, u32 port)
{
+ struct intel_engine_execlists *execlists = &engine->execlists;
if (execlists->ctrl_reg) {
writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1);
@@ -402,6 +404,24 @@ static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc
}
}
+static inline void write_desc_pv(struct intel_engine_cs *engine,
+ u64 desc, u32 port)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+ u32 *elsp_data;
+
+ spin_lock(&engine->i915->vgpu.shared_page_lock);
+ elsp_data = engine->i915->vgpu.shared_page->elsp_data;
+ elsp_data[engine->id * 4 + port * 2] = upper_32_bits(desc);
+ elsp_data[engine->id * 4 + port * 2 + 1] = lower_32_bits(desc);
+ if (port == 0) {
+ engine->i915->vgpu.shared_page->ring_id = engine->id;
+ __raw_i915_write32(dev_priv, vgtif_reg(g2v_notify),
+ VGT_G2V_ELSP_SUBMIT);
+ }
+ spin_unlock(&engine->i915->vgpu.shared_page_lock);
+}
+
static void execlists_submit_ports(struct intel_engine_cs *engine)
{
struct intel_engine_execlists *execlists = &engine->execlists;
@@ -450,7 +470,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
desc = 0;
}
- write_desc(execlists, desc, n);
+ engine->write_desc(engine, desc, n);
}
/* we need to manually load the submit queue */
@@ -504,9 +524,9 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
*/
GEM_TRACE("%s\n", engine->name);
for (n = execlists_num_ports(execlists); --n; )
- write_desc(execlists, 0, n);
+ engine->write_desc(engine, 0, n);
- write_desc(execlists, ce->lrc_desc, n);
+ engine->write_desc(engine, ce->lrc_desc, n);
/* we need to manually load the submit queue */
if (execlists->ctrl_reg)
@@ -2134,6 +2154,11 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
engine->reset.prepare = execlists_reset_prepare;
+ engine->write_desc = write_desc;
+
+ if (PVMMIO_LEVEL_ENABLE(engine->i915, PVMMIO_ELSP_SUBMIT))
+ engine->write_desc = write_desc_pv;
+
engine->park = NULL;
engine->unpark = NULL;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index f6ec48a..b752aab 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -523,6 +523,9 @@ struct intel_engine_cs {
void (*irq_seqno_barrier)(struct intel_engine_cs *engine);
void (*cleanup)(struct intel_engine_cs *engine);
+ void (*write_desc)(struct intel_engine_cs *engine,
+ u64 desc, u32 port);
+
/* GEN8 signal/wait table - never trust comments!
* signal to signal to signal to signal to signal to
* RCS VCS BCS VECS VCS2
--
2.7.4
More information about the intel-gvt-dev
mailing list