[Intel-gfx] [RFC 03/10] drm/i915/gvt: context submission pvmmio optimization
Xiaolin Zhang
xiaolin.zhang at intel.com
Thu Sep 27 16:37:48 UTC 2018
It is performance optimization to reduce mmio trap numbers from 4 to
1 durning ELSP porting writing (context submission).
When context subission, to cache elsp_data[4] values in
the shared page, the last elsp_data[0] port writing will be trapped
to gvt for real context submission.
Use PVMMIO_ELSP_SUBMIT to control this level of pvmmio optimization.
Signed-off-by: Xiaolin Zhang <xiaolin.zhang at intel.com>
---
drivers/gpu/drm/i915/i915_params.h | 2 +-
drivers/gpu/drm/i915/intel_lrc.c | 37 ++++++++++++++++++++++++++++++++++++-
2 files changed, 37 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 0f6a38b..6c81c87 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -69,7 +69,7 @@
param(bool, enable_dp_mst, true) \
param(bool, enable_dpcd_backlight, false) \
param(bool, enable_gvt, false) \
- param(int, enable_pvmmio, 0)
+ param(int, enable_pvmmio, PVMMIO_ELSP_SUBMIT)
#define MEMBER(T, member, ...) T member;
struct i915_params {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 4b28225..cdc713c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -451,7 +451,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
{
struct intel_engine_execlists *execlists = &engine->execlists;
struct execlist_port *port = execlists->port;
+ u32 __iomem *elsp =
+ engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+ u32 *elsp_data;
unsigned int n;
+ u32 descs[4];
+ int i = 0;
/*
* We can skip acquiring intel_runtime_pm_get() here as it was taken
@@ -494,8 +499,24 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
GEM_BUG_ON(!n);
desc = 0;
}
+ if (PVMMIO_LEVEL_ENABLE(engine->i915, PVMMIO_ELSP_SUBMIT)) {
+ GEM_BUG_ON(i >= 4);
+ descs[i] = upper_32_bits(desc);
+ descs[i + 1] = lower_32_bits(desc);
+ i += 2;
+ } else {
+ write_desc(execlists, desc, n);
+ }
+ }
- write_desc(execlists, desc, n);
+ if (PVMMIO_LEVEL_ENABLE(engine->i915, PVMMIO_ELSP_SUBMIT)) {
+ spin_lock(&engine->i915->shared_page_lock);
+ elsp_data = engine->i915->shared_page->elsp_data;
+ *elsp_data = descs[0];
+ *(elsp_data + 1) = descs[1];
+ *(elsp_data + 2) = descs[2];
+ writel(descs[3], elsp);
+ spin_unlock(&engine->i915->shared_page_lock);
}
/* we need to manually load the submit queue */
@@ -538,11 +559,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
struct intel_engine_execlists *execlists = &engine->execlists;
struct intel_context *ce =
to_intel_context(engine->i915->preempt_context, engine);
+ u32 __iomem *elsp =
+ engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+ u32 *elsp_data;
unsigned int n;
GEM_BUG_ON(execlists->preempt_complete_status !=
upper_32_bits(ce->lrc_desc));
+ if (PVMMIO_LEVEL_ENABLE(engine->i915, PVMMIO_ELSP_SUBMIT)) {
+ spin_lock(&engine->i915->shared_page_lock);
+ elsp_data = engine->i915->shared_page->elsp_data;
+ *elsp_data = 0;
+ *(elsp_data + 1) = 0;
+ *(elsp_data + 2) = upper_32_bits(ce->lrc_desc);
+ writel(lower_32_bits(ce->lrc_desc), elsp);
+ spin_unlock(&engine->i915->shared_page_lock);
+ return;
+ }
+
/*
* Switch to our empty preempt context so
* the state of the GPU is known (idle).
--
1.8.3.1
More information about the Intel-gfx
mailing list