[Intel-gfx] [PATCH v10 5/9] drm/i915: vgpu context submission pv optimization
Chris Wilson
chris at chris-wilson.co.uk
Tue Sep 17 09:54:02 UTC 2019
Quoting Xiaolin Zhang (2019-09-17 06:48:16)
> It is performance optimization to override the actual submisison backend
> in order to eliminate execlists csb process and reduce mmio trap numbers
> for workload submission without context switch interrupt by talking with
> GVT via PV submisison notification mechanism between guest and GVT.
>
> Use PV_SUBMISSION to control this level of pv optimization.
>
> v0: RFC.
> v1: rebase.
> v2: added pv ops for pv context submission. to maximize code resuse,
> introduced 2 more ops (submit_ports & preempt_context) instead of 1 op
> (set_default_submission) in engine structure. pv version of
> submit_ports and preempt_context implemented.
> v3:
> 1. to reduce more code duplication, code refactor and replaced 2 ops
> "submit_ports & preempt_contex" from v2 by 1 ops "write_desc"
> in engine structure. pv version of write_des implemented.
> 2. added VGT_G2V_ELSP_SUBMIT for g2v pv notification.
> v4: implemented pv elsp submission tasklet as the backend workload
> submisison by talking to GVT with PV notificaiton mechanism and renamed
> VGT_G2V_ELSP_SUBMIT to VGT_G2V_PV_SUBMISIION.
> v5: addressed v4 comments from Chris, intel_pv_submission.c added.
> v6: addressed v5 comments from Chris, replaced engine id by hw_id.
> v7: rebase.
> v8: addressed v7 comments from Chris.
>
> Signed-off-by: Xiaolin Zhang <xiaolin.zhang at intel.com>
> ---
> drivers/gpu/drm/i915/Makefile | 2 +-
> drivers/gpu/drm/i915/gt/intel_lrc.c | 12 +-
> drivers/gpu/drm/i915/i915_vgpu.c | 18 +-
> drivers/gpu/drm/i915/i915_vgpu.h | 15 ++
> drivers/gpu/drm/i915/intel_pv_submission.c | 300 +++++++++++++++++++++++++++++
> 5 files changed, 341 insertions(+), 6 deletions(-)
> create mode 100644 drivers/gpu/drm/i915/intel_pv_submission.c
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 658b930..f500cf1 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -250,7 +250,7 @@ i915-$(CONFIG_DRM_I915_SELFTEST) += \
> selftests/igt_spinner.o
>
> # virtual gpu code
> -i915-y += i915_vgpu.o
> +i915-y += i915_vgpu.o intel_pv_submission.o
>
> ifeq ($(CONFIG_DRM_I915_GVT),y)
> i915-y += intel_gvt.o
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index a3f0e49..5587aff 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2933,10 +2933,14 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
> engine->unpark = NULL;
>
> engine->flags |= I915_ENGINE_SUPPORTS_STATS;
> - if (!intel_vgpu_active(engine->i915)) {
> - engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
> - if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
> - engine->flags |= I915_ENGINE_HAS_PREEMPTION;
> + engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
> + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
> + engine->flags |= I915_ENGINE_HAS_PREEMPTION;
> +
> + if (intel_vgpu_active(engine->i915)) {
> + engine->flags &= ~I915_ENGINE_HAS_SEMAPHORES;
> + engine->flags &= ~I915_ENGINE_HAS_PREEMPTION;
> + intel_vgpu_config_pv_caps(engine->i915, PV_SUBMISSION, engine);
> }
>
> if (INTEL_GEN(engine->i915) >= 12) /* XXX disabled for debugging */
> diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
> index e458892..a488b68 100644
> --- a/drivers/gpu/drm/i915/i915_vgpu.c
> +++ b/drivers/gpu/drm/i915/i915_vgpu.c
> @@ -97,7 +97,7 @@ void i915_detect_vgpu(struct drm_i915_private *dev_priv)
> mutex_init(&dev_priv->vgpu.lock);
>
> /* guest driver PV capability */
> - dev_priv->vgpu.pv_caps = PV_PPGTT_UPDATE;
> + dev_priv->vgpu.pv_caps = PV_PPGTT_UPDATE | PV_SUBMISSION;
>
> if (!intel_vgpu_check_pv_caps(dev_priv, shared_area)) {
> DRM_INFO("Virtual GPU for Intel GVT-g detected.\n");
> @@ -389,6 +389,7 @@ void intel_vgpu_config_pv_caps(struct drm_i915_private *dev_priv,
> enum pv_caps cap, void *data)
> {
> struct i915_ppgtt *ppgtt;
> + struct intel_engine_cs *engine;
>
> if (!intel_vgpu_enabled_pv_caps(dev_priv, cap))
> return;
> @@ -399,6 +400,11 @@ void intel_vgpu_config_pv_caps(struct drm_i915_private *dev_priv,
> ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl_pv;
> ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl_pv;
> }
> +
> + if (cap == PV_SUBMISSION) {
> + engine = (struct intel_engine_cs *)data;
> + vgpu_set_pv_submission(engine);
> + }
> }
>
> /**
> @@ -594,6 +600,8 @@ static int intel_vgpu_setup_shared_page(struct drm_i915_private *dev_priv,
> u64 gpa;
> u16 ver_maj, ver_min;
> int ret = 0;
> + int i;
> + u32 size;
>
> /* We allocate 1 page shared between guest and GVT for data exchange.
> * ___________.....................
> @@ -666,6 +674,14 @@ static int intel_vgpu_setup_shared_page(struct drm_i915_private *dev_priv,
> pv->notify = intel_vgpu_pv_notify_mmio;
> mutex_init(&pv->send_mutex);
>
> + /* setup PV per engine data exchange structure */
> + size = sizeof(struct pv_submission);
> + for (i = 0; i < PV_MAX_ENGINES_NUM; i++) {
> + pv->pv_elsp[i] = (void *)base + PV_ELSP_OFF + size * i;
> + pv->pv_elsp[i]->submitted = false;
> + spin_lock_init(&pv->pv_elsp[i]->lock);
> + }
> +
> return ret;
> err:
> __free_page(virt_to_page(base));
> diff --git a/drivers/gpu/drm/i915/i915_vgpu.h b/drivers/gpu/drm/i915/i915_vgpu.h
> index b0fee5b..c33cb05 100644
> --- a/drivers/gpu/drm/i915/i915_vgpu.h
> +++ b/drivers/gpu/drm/i915/i915_vgpu.h
> @@ -29,6 +29,8 @@
>
> #define PV_MAJOR 1
> #define PV_MINOR 0
> +#define PV_MAX_ENGINES_NUM (VECS1_HW + 1)
> +#define PV_ELSP_OFF (PAGE_SIZE/8)
> #define PV_DESC_OFF (PAGE_SIZE/4)
> #define PV_CMD_OFF (PAGE_SIZE/2)
>
> @@ -37,6 +39,7 @@
> */
> enum pv_caps {
> PV_PPGTT_UPDATE = 0x1,
> + PV_SUBMISSION = 0x2,
> };
>
> /* PV actions */
> @@ -45,6 +48,7 @@ enum intel_vgpu_pv_action {
> PV_ACTION_PPGTT_L4_ALLOC,
> PV_ACTION_PPGTT_L4_CLEAR,
> PV_ACTION_PPGTT_L4_INSERT,
> + PV_ACTION_ELSP_SUBMISSION,
> };
>
> /*
> @@ -56,6 +60,13 @@ struct gvt_shared_page {
> u16 ver_minor;
> };
>
> +/* workload submission pv support data structure */
> +struct pv_submission {
> + u64 descs[EXECLIST_MAX_PORTS];
> + bool submitted;
> + spinlock_t lock;
> +};
> +
> /*
> * Definition of the command transport message header (DW0)
> *
> @@ -100,6 +111,9 @@ struct i915_virtual_gpu_pv {
> struct gvt_shared_page *shared_page;
> bool enabled;
>
> + /* per engine PV workload submission data */
> + struct pv_submission *pv_elsp[PV_MAX_ENGINES_NUM];
> +
> /* PV command buffer support */
> struct vgpu_pv_ct_buffer ctb;
> u32 next_fence;
> @@ -164,4 +178,5 @@ bool intel_vgpu_check_pv_caps(struct drm_i915_private *dev_priv,
> void __iomem *shared_area);
> void intel_vgpu_config_pv_caps(struct drm_i915_private *dev_priv,
> enum pv_caps cap, void *data);
> +void vgpu_set_pv_submission(struct intel_engine_cs *engine);
> #endif /* _I915_VGPU_H_ */
> diff --git a/drivers/gpu/drm/i915/intel_pv_submission.c b/drivers/gpu/drm/i915/intel_pv_submission.c
> new file mode 100644
> index 0000000..1083d56
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_pv_submission.c
> @@ -0,0 +1,300 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2018 Intel Corporation
> + */
> +
> +#include "i915_vgpu.h"
> +#include "gt/intel_lrc_reg.h"
> +#include "gt/intel_gt_pm.h"
> +#include "i915_trace.h"
> +
> +#define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
> +
> +static u64 execlists_update_context(struct i915_request *rq)
> +{
> + struct intel_context *ce = rq->hw_context;
> + u32 *reg_state = ce->lrc_reg_state;
> +
> + reg_state[CTX_RING_TAIL + 1] = intel_ring_set_tail(rq->ring, rq->tail);
> +
> + return ce->lrc_desc;
> +}
> +
> +static inline struct i915_priolist *to_priolist(struct rb_node *rb)
> +{
> + return rb_entry(rb, struct i915_priolist, node);
> +}
> +
> +static void pv_submit(struct intel_engine_cs *engine,
> + struct i915_request **out,
> + struct i915_request **end)
> +{
> + struct intel_engine_execlists * const execlists = &engine->execlists;
> + struct i915_virtual_gpu_pv *pv = engine->i915->vgpu.pv;
> + struct pv_submission *pv_elsp = pv->pv_elsp[engine->hw_id];
> + struct i915_request *rq;
> + int n, err;
> +
> + memset(pv_elsp->descs, 0, sizeof(pv_elsp->descs));
> + n = 0;
> +
> + do {
> + rq = *out++;
> + pv_elsp->descs[n++] = execlists_update_context(rq);
> + } while (out != end);
> +
> + spin_lock(&pv_elsp->lock);
> + pv_elsp->submitted = true;
> + writel(PV_ACTION_ELSP_SUBMISSION, execlists->submit_reg);
> +
> +#define done (READ_ONCE(pv_elsp->submitted) == false)
> + err = wait_for_atomic_us(done, 1000);
Please tell me you have a better plan than adding a 1ms wait with
interrupts off.
> +#undef done
> + spin_unlock(&pv_elsp->lock);
> +
> + if (unlikely(err))
> + DRM_ERROR("PV (%s) workload submission failed\n", engine->name);
> +
> +}
> +void vgpu_set_pv_submission(struct intel_engine_cs *engine)
> +{
> + /*
> + * We inherit a bunch of functions from execlists that we'd like
> + * to keep using:
> + *
> + * engine->submit_request = execlists_submit_request;
> + * engine->cancel_requests = execlists_cancel_requests;
> + * engine->schedule = execlists_schedule;
> + *
> + * But we need to override the actual submission backend in order
> + * to talk to the GVT with PV notification message.
> + */
> +
> + engine->execlists.tasklet.func = vgpu_pv_submission_tasklet;
> +
> + /* do not use execlists park/unpark */
> + engine->park = engine->unpark = NULL;
But this was used to enable the interrupts so that your tasklet was
called. guc dropped it because they only plan to run on HW that always
sends the interrupts; your supported HW needs the interrupts enabled on
demand.
Please go and add a test to demonstrate this.
-Chris
More information about the Intel-gfx
mailing list